1%def header():
2/*
3 * Copyright (C) 2023 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * This is a #include, not a %include, because we want the C pre-processor
20 * to expand the macros into assembler assignment statements.
21 */
22#include "asm_support.h"
23#include "arch/riscv64/asm_support_riscv64.S"
24
25/**
26 * RISC-V 64 ABI general notes
27 *
28 * References
29 * - https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-cc.adoc
30 * - runtime/arch/riscv64/registers_riscv64.h
31 *
32 * 32 general purposes registers
33 * - fixed purpose: zero, ra, sp, gp, tp, s1
34 *     gp/scs: shadow call stack - do not clobber!
35 *     s1/tr: ART thread register - do not clobber!
36 * - temporaries: t0-t6
37 * - arguments: a0-a7
38 * - callee saved: ra, s0/fp, s2-s11
39 *     s0 is flexible, available to use as a frame pointer if needed.
40 *
41 * 32 floating point registers
42 * - temporaries: ft0-ft11
43 * - arguments: fa0-fa7
44 * - callee saved: fs0-fs11
45 */
46
47// Android references
48//   Bytecodes: https://source.android.com/docs/core/runtime/dalvik-bytecode
49//   Instruction formats: https://source.android.com/docs/core/runtime/instruction-formats
50//   Shorty: https://source.android.com/docs/core/runtime/dex-format#shortydescriptor
51
52// Fixed register usages in Nterp.
53//    nickname  ABI    reg   purpose
54#define xSELF    s1  // x9,   Thread* self pointer
55#define xFP      s2  // x18,  interpreted frame pointer: to access locals and args
56#define xPC      s3  // x19,  interpreted program counter: to fetch instructions
57#define xINST    s4  // x20,  first 16-bit code unit of current instruction
58#define xIBASE   s5  // x21,  interpreted instruction base pointer: for computed goto
59#define xREFS    s6  // x22,  base of object references of dex registers
60
61// DWARF registers reference
62// https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-dwarf.adoc
63#define CFI_TMP  10  // DWARF register number for       a0/x10
64#define CFI_DEX  19  // DWARF register number for xPC  /s3/x19
65#define CFI_REFS 22  // DWARF register number for xREFS/s6/x22
66
67// Synchronization
68// This code follows the RISC-V atomics ABI specification [1].
69//
70// Object publication.
71// new-instance and new-array operations must first perform a `fence w,w` "constructor fence" to
72// ensure their new object references are correctly published with a subsequent SET_VREG_OBJECT.
73//
74// Volatile load/store.
75// A volatile load is implemented as: fence rw,rw ; load ; fence r,rw.
76// A 32-bit or 64-bit volatile store is implemented as: amoswap.{w,d}.rl
77// A volatile store for a narrower type is implemented as: fence rw,w ; store ; fence rw,rw
78//
79// [1] https://github.com/riscv-non-isa/riscv-elf-psabi-doc/blob/master/riscv-atomic.adoc
80
81// An assembly entry for nterp.
82.macro OAT_ENTRY name
83    .type \name, @function
84    .hidden \name
85    .global \name
86    .balign 16
87\name:
88.endm
89
90.macro SIZE name
91    .size \name, .-\name
92.endm
93
94// Similar to ENTRY but without the CFI directives.
95.macro NAME_START name
96    .type \name, @function
97    .hidden \name  // Hide this as a global symbol, so we do not incur plt calls.
98    .global \name
99    /* Cache alignment for function entry */
100    .balign 16
101\name:
102.endm
103
104.macro NAME_END name
105  SIZE \name
106.endm
107
108// Macro for defining entrypoints into runtime. We don't need to save registers (we're not holding
109// references there), but there is no kDontSave runtime method. So just use the kSaveRefsOnly
110// runtime method.
111.macro NTERP_TRAMPOLINE name, helper
112ENTRY \name
113    SETUP_SAVE_REFS_ONLY_FRAME
114    call \helper
115    RESTORE_SAVE_REFS_ONLY_FRAME
116    ld t0, THREAD_EXCEPTION_OFFSET(xSELF)
117    bnez t0, nterp_deliver_pending_exception
118    ret
119END \name
120.endm
121
122// Unpack code items from dex format.
123// Input: \code_item
124// Output:
125//   - \regs: register count
126//   - \outs: out count
127//   - \ins: in count. If set to register "zero" (x0), load is skipped.
128//   - \code_item: holds instruction array on exit
129.macro FETCH_CODE_ITEM_INFO code_item, regs, outs, ins
130    // Check LSB of \code_item. If 1, it's a compact dex file.
131    BRANCH_IF_BIT_CLEAR \regs, \code_item, 0, 1f  // Regular dex.
132    unimp  // Compact dex: unimplemented.
1331:
134    // Unpack values from regular dex format.
135    lhu \regs, CODE_ITEM_REGISTERS_SIZE_OFFSET(\code_item)
136    lhu \outs, CODE_ITEM_OUTS_SIZE_OFFSET(\code_item)
137    .ifnc \ins, zero
138      lhu \ins, CODE_ITEM_INS_SIZE_OFFSET(\code_item)
139    .endif
140    addi \code_item, \code_item, CODE_ITEM_INSNS_OFFSET
141.endm
142
143.macro EXPORT_PC
144    sd xPC, -16(xREFS)
145.endm
146
147.macro TEST_IF_MARKING reg, label
148    lb \reg, THREAD_IS_GC_MARKING_OFFSET(xSELF)
149    bnez \reg, \label
150.endm
151
152.macro DO_SUSPEND_CHECK continue
153    lwu t0, THREAD_FLAGS_OFFSET(xSELF)
154    andi t0, t0, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
155    beqz t0, \continue
156    EXPORT_PC
157    call art_quick_test_suspend
158.endm
159
160// Fetch one or more half-word units from an offset past the current PC.
161// The offset is specified in 16-bit code units.
162//
163// A \width flag allows reading 32 bits (2 units) or 64 bits (4 units) from the offset.
164// The RISC-V ISA supports unaligned accesses for these wider loads.
165//
166// If \width=8, \byte={0,1} indexes into the code unit at the offset.
167//
168// Default behavior loads one code unit with unsigned zext.
169// The \signed flag is for signed sext, for shorter loads.
170//
171// Does not advance xPC.
172.macro FETCH reg, count, signed=0, width=16, byte=0
173    .if \width == 8
174      .if \signed
175        lb  \reg, (\count*2 + \byte)(xPC)
176      .else
177        lbu \reg, (\count*2 + \byte)(xPC)
178      .endif
179    .elseif \width == 16
180      .if \signed
181        lh  \reg, (\count*2)(xPC)
182      .else
183        lhu \reg, (\count*2)(xPC)
184      .endif
185    .elseif \width == 32
186      .if \signed
187        lw  \reg, (\count*2)(xPC)
188      .else
189        lwu \reg, (\count*2)(xPC)
190      .endif
191    .elseif \width == 64
192      ld  \reg, (\count*2)(xPC)
193    .else
194      unimp  // impossible
195    .endif
196.endm
197
198// Fetch the next instruction, from xPC into xINST.
199// Does not advance xPC.
200.macro FETCH_INST
201    lhu xINST, (xPC)  // zero in upper 48 bits
202.endm
203
204// Fetch the next instruction, from xPC into xINST. Advance xPC by \count units, each 2 bytes.
205//
206// Immediates have a 12-bit offset range from xPC. Thus, \count can range from -1024 to 1023.
207//
208// Note: Must be placed AFTER anything that can throw an exception, or the exception catch may miss.
209// Thus, this macro must be placed after EXPORT_PC.
210.macro FETCH_ADVANCE_INST count
211    lhu xINST, (\count*2)(xPC)  // zero in upper 48 bits
212    addi xPC, xPC, (\count*2)
213.endm
214
215// Clobbers: \reg
216.macro GET_INST_OPCODE reg
217    and \reg, xINST, 0xFF
218.endm
219
220// Clobbers: \reg
221.macro GOTO_OPCODE reg
222    slliw \reg, \reg, ${handler_size_bits}
223    add \reg, xIBASE, \reg
224    jr \reg
225.endm
226
227.macro FETCH_FROM_THREAD_CACHE reg, miss_label, z0, z1
228    // See art::InterpreterCache::IndexOf() for computing index of key within cache array.
229    // Entry address:
230    //   xSELF + OFFSET + ((xPC>>2 & xFF) << 4)
231    // = xSELF + OFFSET + ((xPC & xFF<<2) << 2)
232    // = xSELF + ((OFFSET>>2 + (xPC & xFF<<2)) << 2)
233    // => ANDI, ADD, SH2ADD
234#if (THREAD_INTERPRETER_CACHE_SIZE_LOG2 != 8)
235#error Expected interpreter cache array size = 256 elements
236#endif
237#if (THREAD_INTERPRETER_CACHE_SIZE_SHIFT != 2)
238#error Expected interpreter cache entry size = 16 bytes
239#endif
240#if ((THREAD_INTERPRETER_CACHE_OFFSET & 0x3) != 0)
241#error Expected interpreter cache offset to be 4-byte aligned
242#endif
243    andi \z0, xPC, 0xFF << 2
244    addi \z0, \z0, THREAD_INTERPRETER_CACHE_OFFSET >> 2
245    sh2add \z0, \z0, xSELF  // z0 := entry's address
246    ld \z1, (\z0)           // z1 := dex PC
247    bne xPC, \z1, \miss_label
248    ld \reg, 8(\z0)         // value: depends on context; see call site
249.endm
250
251// Inputs:
252//   - a0
253//   - xSELF
254// Clobbers: t0
255.macro CHECK_AND_UPDATE_SHARED_MEMORY_METHOD if_hot, if_not_hot
256    lwu t0, ART_METHOD_ACCESS_FLAGS_OFFSET(a0)
257    BRANCH_IF_BIT_CLEAR t0, t0, ART_METHOD_IS_MEMORY_SHARED_FLAG_BIT, \if_hot
258
259    lwu t0, THREAD_SHARED_METHOD_HOTNESS_OFFSET(xSELF)  // t0 := hotness
260    beqz t0, \if_hot
261
262    addi t0, t0, -1  // increase hotness
263    sw t0,  THREAD_SHARED_METHOD_HOTNESS_OFFSET(xSELF)
264    j \if_not_hot
265.endm
266
267// Update xPC by \units code units. On back edges, perform hotness and suspend.
268.macro BRANCH units
269    sh1add xPC, \units, xPC
270    blez \units, 2f  // If branch is <= 0, increase hotness and do a suspend check.
2711:
272    FETCH_INST
273    GET_INST_OPCODE t0
274    GOTO_OPCODE t0
2752:
276    ld a0, (sp)
277    lhu t0, ART_METHOD_HOTNESS_COUNT_OFFSET(a0)  // t0 := hotness
278#if (NTERP_HOTNESS_VALUE != 0)
279#error Expected 0 for hotness value
280#endif
281    // If the counter is at zero (hot), handle it in the runtime.
282    beqz t0, 3f
283    addi t0, t0, -1  // increase hotness
284    sh t0, ART_METHOD_HOTNESS_COUNT_OFFSET(a0)
285    DO_SUSPEND_CHECK continue=1b
286    j 1b
2873:
288    tail NterpHandleHotnessOverflow  // arg a0 (ArtMethod*)
289.endm
290
291// Increase method hotness before starting the method.
292// Hardcoded:
293// - a0: ArtMethod*
294// Clobbers: t0
295.macro START_EXECUTING_INSTRUCTIONS
296    ld a0, (sp)
297    lhu t0, ART_METHOD_HOTNESS_COUNT_OFFSET(a0)  // t0 := hotness
298#if (NTERP_HOTNESS_VALUE != 0)
299#error Expected 0 for hotness value
300#endif
301    // If the counter is at zero (hot), handle it in the runtime.
302    beqz t0, 3f
303    addi t0, t0, -1  // increase hotness
304    sh t0, ART_METHOD_HOTNESS_COUNT_OFFSET(a0)
3051:
306    DO_SUSPEND_CHECK continue=2f
3072:
308    FETCH_INST
309    GET_INST_OPCODE t0
310    GOTO_OPCODE t0
3113:
312    CHECK_AND_UPDATE_SHARED_MEMORY_METHOD if_hot=4f, if_not_hot=1b
3134:
314    mv a1, zero  // dex_pc_ptr=nullptr
315    mv a2, zero  // vergs=nullptr
316    call nterp_hot_method
317    j 2b
318.endm
319
320// 64 bit read
321// Clobbers: \reg
322// Safe if \reg == \vreg.
323.macro GET_VREG_WIDE reg, vreg
324    sh2add \reg, \vreg, xFP  // vreg addr in register array
325    ld \reg, (\reg)          // reg := fp[vreg](lo) | fp[vreg+1](hi)
326.endm
327
328// 64 bit write
329// Clobbers: z0
330.macro SET_VREG_WIDE reg, vreg, z0
331    sh2add \z0, \vreg, xFP    // vreg addr in register array
332    sd \reg, (\z0)            // fp[vreg] := reg(lo) ; fp[vreg+1] := reg(hi)
333    sh2add \z0, \vreg, xREFS  // vreg addr in reference array
334    sd zero, (\z0)            // refs[vreg] := null ; refs[vreg+1] := null
335.endm
336
337// Object read
338// Clobbers: \reg
339// Safe if \reg == \vreg.
340.macro GET_VREG_OBJECT reg, vreg
341    sh2add \reg, \vreg, xREFS  // vreg addr in reference array
342    lwu \reg, (\reg)           // reg := refs[vreg]
343.endm
344
345// Object write
346// Clobbers: z0
347.macro SET_VREG_OBJECT reg, vreg, z0
348    sh2add \z0, \vreg, xFP    // vreg addr in register array
349    sw \reg, (\z0)            // fp[vreg] := reg
350    sh2add \z0, \vreg, xREFS  // vreg addr in reference array
351    sw \reg, (\z0)            // refs[vreg] := reg
352.endm
353
354// Floating-point 64 bit read
355// Clobbers: \reg, \vreg
356.macro GET_VREG_DOUBLE reg, vreg
357    sh2add \vreg, \vreg, xFP  // vreg addr in register array
358    fld \reg, (\vreg)         // reg := fp[vreg](lo) | fp[vreg+1](hi)
359.endm
360
361// Floating-point 64 bit write
362// Clobbers: \reg, z0
363.macro SET_VREG_DOUBLE reg, vreg, z0
364    sh2add \z0, \vreg, xFP    // vreg addr in register array
365    fsd \reg, (\z0)           // fp[vreg] := reg(lo) ; fp[vreg+1] := reg(hi)
366    sh2add \z0, \vreg, xREFS  // vreg addr in reference array
367    sd zero, (\z0)            // refs[vreg] := null ; refs[vreg+1] := null
368.endm
369
370// Put "%def" definitions after ".macro" definitions for proper expansion. %def is greedy.
371
372// Typed read, defaults to 32-bit read
373// Note: An object ref requires LWU, or LW;ZEXT.W.
374// Clobbers: \reg
375// Safe if \reg == \vreg.
376%def get_vreg(reg, vreg, width=32, is_wide=False, is_unsigned=False):
377%  if is_wide or width == 64:
378     GET_VREG_WIDE $reg, $vreg
379%  elif is_unsigned:
380     sh2add $reg, $vreg, xFP  // vreg addr in register array
381     lwu $reg, ($reg)         // reg := fp[vreg], zext
382%  else:
383     sh2add $reg, $vreg, xFP  // vreg addr in register array
384     lw $reg, ($reg)          // reg := fp[vreg]
385%#:
386
387// Typed write, defaults to 32-bit write.
388// Note: Incorrect for an object ref; it requires 2nd SW into xREFS.
389// Clobbers: z0
390%def set_vreg(reg, vreg, z0, width=32, is_wide=False):
391%  if is_wide or width == 64:
392     SET_VREG_WIDE $reg, $vreg, $z0
393%  else:
394     sh2add $z0, $vreg, xFP    // vreg addr in register array
395     sw $reg, ($z0)            // fp[vreg] := reg
396     sh2add $z0, $vreg, xREFS  // vreg addr in reference array
397     sw zero, ($z0)            // refs[vreg] := null
398%#:
399
400// Floating-point read, defaults to 32-bit read.
401// Clobbers: reg, vreg
402%def get_vreg_float(reg, vreg, is_double=False):
403%  if is_double:
404     GET_VREG_DOUBLE $reg, $vreg
405%  else:
406     sh2add $vreg, $vreg, xFP  // vreg addr in register array
407     flw $reg, ($vreg)         // reg := fp[vreg]
408%#:
409
410// Floating-point write, defaults to 32-bit write.
411// Clobbers: reg, z0
412%def set_vreg_float(reg, vreg, z0, is_double=False):
413%  if is_double:
414     SET_VREG_DOUBLE $reg, $vreg, $z0
415%  else:
416     sh2add $z0, $vreg, xFP    // vreg addr in register array
417     fsw $reg, ($z0)           // fp[vreg] := reg
418     sh2add $z0, $vreg, xREFS  // vreg addr in reference array
419     sw zero, ($z0)            // refs[vreg] := null
420%#:
421
422%def entry():
423/*
424 * ArtMethod entry point.
425 *
426 * On entry:
427 *  a0     ArtMethod* callee
428 *  a1-a7  method parameters
429 */
430OAT_ENTRY ExecuteNterpWithClinitImpl
431#if MIRROR_CLASS_STATUS_SHIFT < 12
432#error mirror class status bits cannot use LUI load technique
433#endif
434    .cfi_startproc
435    // For simplicity, we don't do a read barrier here, but instead rely
436    // on art_quick_resolution_trampoline to always have a suspend point before
437    // calling back here.
438    lwu t0, ART_METHOD_DECLARING_CLASS_OFFSET(a0)
439    lw t1, MIRROR_CLASS_STATUS_OFFSET(t0)  // t1 := status word, sext
440    lui t2, MIRROR_CLASS_STATUS_VISIBLY_INITIALIZED << (MIRROR_CLASS_STATUS_SHIFT - 12)
441    // The unsigned comparison works in tandem with the 64-bit sign-extension of
442    // the status bits at the top of the 32-bit word. The order of the status
443    // constants (sign extended from LUI) is unchanged with unsigned comparison.
444    bgeu t1, t2, ExecuteNterpImpl
445    lui t2, MIRROR_CLASS_STATUS_INITIALIZED << (MIRROR_CLASS_STATUS_SHIFT - 12)
446    bltu t1, t2, .Linitializing_check
447    fence w, w
448    j ExecuteNterpImpl
449.Linitializing_check:
450    lui t2, MIRROR_CLASS_STATUS_INITIALIZING << (MIRROR_CLASS_STATUS_SHIFT - 12)
451    bltu t1, t2, .Lresolution_trampoline
452    lwu t1, MIRROR_CLASS_CLINIT_THREAD_ID_OFFSET(t0)
453    lwu t0, THREAD_TID_OFFSET(xSELF)
454    beq t0, t1, ExecuteNterpImpl
455.Lresolution_trampoline:
456    tail art_quick_resolution_trampoline
457    .cfi_endproc
458    .type EndExecuteNterpWithClinitImpl, @function
459    .hidden EndExecuteNterpWithClinitImpl
460    .global EndExecuteNterpWithClinitImpl
461EndExecuteNterpWithClinitImpl:
462
463OAT_ENTRY ExecuteNterpImpl
464   .cfi_startproc
465%  setup_nterp_frame(cfi_refs="CFI_REFS", refs="xREFS", fp="xFP", pc="xPC", regs="s7", ins="s8", spills_sp="s9", z0="t0", z1="t1", z2="t2", z3="t3", uniq="entry")
466                            // xREFS := callee refs array
467                            // xFP   := callee fp array
468                            // xPC   := callee dex array
469                            // s7    := refs/fp vreg count
470                            // s8    := ins count
471                            // s9    := post-spills pre-frame sp
472                            // sp    := post-frame sp
473   CFI_DEFINE_DEX_PC_WITH_OFFSET(/*tmpReg*/CFI_TMP, /*dexReg*/CFI_DEX, /*dexOffset*/0)
474
475   // Fast path: zero args.
476   beqz s8, .Lentry_go
477
478   sub s7, s7, s8           // s7 := a1 index in fp/refs
479   lwu s10, ART_METHOD_ACCESS_FLAGS_OFFSET(a0)
480                            // s10 := method flags
481
482   // Fast path: all reference args.
483   sh2add t0, s7, xFP       // t0 := &xFP[a1]
484   sh2add t1, s7, xREFS     // t1 := &xREFS[a1]
485   BRANCH_IF_BIT_CLEAR t2, s10, ART_METHOD_NTERP_ENTRY_POINT_FAST_PATH_FLAG_BIT, .Lentry_a1
486%  setup_ref_args_and_go(fp="t0", refs="t1", refs_end="xFP", spills_sp="s9", z0="t2", z1="t3", done=".Lentry_go")
487
488   // Fast path: instance with zero args.
489.Lentry_a1:
490   bexti s10, s10, ART_METHOD_IS_STATIC_FLAG_BIT
491                            // s10 := 1 if static, 0 if instance
492   bnez s10, .Lentry_shorty
493   sw a1, (t0)
494   sw a1, (t1)
495   li t2, 1
496   beq s8, t2, .Lentry_go
497
498   // Slow path: runtime call to obtain shorty, full setup from managed ABI.
499.Lentry_shorty:
500   SPILL_ALL_ARGUMENTS
501   // TODO: Better way to get shorty
502   call NterpGetShorty      // arg a0
503   mv s11, a0               // s11 := shorty
504   RESTORE_ALL_ARGUMENTS
505
506   // temporaries are trashed, recompute some values
507   sh2add t0, s7, xFP       // t0 := &xFP[a1]
508   sh2add t1, s7, xREFS     // t1 := &xREFS[a1]
509   addi t2, s11, 1          // t2 := shorty arg (skip return type)
510   xori s10, s10, 1         // s10 := 0 if static, 1 if instance
511   slliw t3, s10, 2         // t3 := (static) 0, (instance) 4: fp/refs/outs byte offset
512   // constant setup for gpr/fpr shorty comparisons
513   li s0, 'D'               // s0 := double char (unused fp)
514   li s4, 'F'               // s4 := float char (unused xINST)
515   li s5, 'J'               // s5 := long char (unused xIBASE)
516   li s8, 'L'               // s8 := ref char (unused ins count)
517   bnez s10, .Lentry_args   // instance a1 already stored into callee's xFP and xREFS
518
519%  store_gpr_to_vreg(gpr="a1", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
520
521.Lentry_args:
522   // linear scan through shorty: extract non-float args
523%  store_gpr_to_vreg(gpr="a2", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
524%  store_gpr_to_vreg(gpr="a3", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
525%  store_gpr_to_vreg(gpr="a4", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
526%  store_gpr_to_vreg(gpr="a5", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
527%  store_gpr_to_vreg(gpr="a6", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
528%  store_gpr_to_vreg(gpr="a7", offset="t3", shorty="t2", fp="t0", refs="t1", z0="t4", z1="t5", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
529   // We drained arg registers, so continue from caller stack's out array. Unlike the reference-only
530   // fast-path, the continuation offset in the out array can vary, depending on the presence of
531   // 64-bit values in the arg registers. \offset tracks this value as a byte offset.
532   addi t5, s9, (NTERP_SIZE_SAVE_CALLEE_SAVES + 8)
533                            // t5 := (caller) outs array base address
534   add t4, t3, t0           // t4 := (callee) &FP[next]
535   add t1, t3, t1           // t1 := (callee) &REFS[next]
536   add t3, t3, t5           // t3 := (caller) &OUTS[next]
537%  store_outs_to_vregs(outs="t3", shorty="t2", fp="t4", refs="t1", z0="t5", z1="t6", D="s0", F="s4", J="s5", L="s8", next=".Lentry_fargs")
538                            // t0 = &xFP[a1], unclobbered
539.Lentry_fargs:
540   addi t1, s11, 1          // t1 := shorty arg (skip return type)
541   slliw t2, s10, 2         // t2 := starting byte offset for fp/outs, static and instance
542   // linear scan through shorty: extract float args
543%  store_fpr_to_vreg(fpr="fa0", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
544%  store_fpr_to_vreg(fpr="fa1", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
545%  store_fpr_to_vreg(fpr="fa2", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
546%  store_fpr_to_vreg(fpr="fa3", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
547%  store_fpr_to_vreg(fpr="fa4", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
548%  store_fpr_to_vreg(fpr="fa5", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
549%  store_fpr_to_vreg(fpr="fa6", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
550%  store_fpr_to_vreg(fpr="fa7", offset="t2", shorty="t1", fp="t0", z0="t3", z1="t4", D="s0", F="s4", J="s5", next=".Lentry_go")
551   addi t3, s9, (NTERP_SIZE_SAVE_CALLEE_SAVES + 8)
552                            // t3 := (caller) outs array base address
553   add t0, t2, t0           // t0 := (callee) &FP[next]
554   add t2, t2, t3           // t2 := (caller) &OUTS[next]
555%  store_float_outs_to_vregs(outs="t2", shorty="t1", fp="t0", z0="t3", D="s0", F="s4", J="s5", next=".Lentry_go")
556
557.Lentry_go:
558    la xIBASE, artNterpAsmInstructionStart
559    START_EXECUTING_INSTRUCTIONS
560    // NOTE: no fallthrough
561    // cfi info continues, and covers the whole nterp implementation.
562    SIZE ExecuteNterpImpl
563
564%def footer():
565/*
566 * ===========================================================================
567 *  Common subroutines and data
568 * ===========================================================================
569 */
570
571    .text
572    .align  2
573
574
575// Enclose all code below in a symbol (which gets printed in backtraces).
576NAME_START nterp_helper
577
578common_errArrayIndex:
579    EXPORT_PC
580    // CALL preserves RA for stack walking.
581    call art_quick_throw_array_bounds  // args a0 (index), a1 (length)
582
583common_errDivideByZero:
584    EXPORT_PC
585    // CALL preserves RA for stack walking.
586    call art_quick_throw_div_zero
587
588common_errNullObject:
589    EXPORT_PC
590    // CALL preserves RA for stack walking.
591    call art_quick_throw_null_pointer_exception
592
593NterpInvokeVirtual:
594%  nterp_invoke_virtual()
595NterpInvokeSuper:
596%  nterp_invoke_super()
597NterpInvokeDirect:
598%  nterp_invoke_direct()
599NterpInvokeStringInit:
600%  nterp_invoke_string_init()
601NterpInvokeStatic:
602%  nterp_invoke_static()
603NterpInvokeInterface:
604%  nterp_invoke_interface()
605NterpInvokePolymorphic:
606%  nterp_invoke_polymorphic()
607NterpInvokeCustom:
608%  nterp_invoke_custom()
609NterpInvokeVirtualRange:
610%  nterp_invoke_virtual_range()
611NterpInvokeSuperRange:
612%  nterp_invoke_super_range()
613NterpInvokeDirectRange:
614%  nterp_invoke_direct_range()
615NterpInvokeStringInitRange:
616%  nterp_invoke_string_init_range()
617NterpInvokeStaticRange:
618%  nterp_invoke_static_range()
619NterpInvokeInterfaceRange:
620%  nterp_invoke_interface_range()
621NterpInvokePolymorphicRange:
622%  nterp_invoke_polymorphic_range()
623NterpInvokeCustomRange:
624%  nterp_invoke_custom_range()
625
626// Arg a0: ArtMethod*
627NterpHandleHotnessOverflow:
628   CHECK_AND_UPDATE_SHARED_MEMORY_METHOD if_hot=.Lhotspill_hot, if_not_hot=.Lhotspill_suspend
629.Lhotspill_hot:
630   mv a1, xPC
631   mv a2, xFP
632   call nterp_hot_method  // args a0, a1, a2
633   bnez a0, .Lhotspill_osr
634.Lhotspill_advance:
635   FETCH_INST
636   GET_INST_OPCODE t0
637   GOTO_OPCODE t0
638.Lhotspill_osr:
639   // a0 = OsrData*
640   // Drop most of the current nterp frame, but keep the callee-saves.
641   // The nterp callee-saves (count and layout) match the OSR frame's callee-saves.
642   ld sp, -8(xREFS)  // caller's interpreted frame pointer
643   .cfi_def_cfa sp, NTERP_SIZE_SAVE_CALLEE_SAVES
644   lwu t0, OSR_DATA_FRAME_SIZE(a0)
645   addi t0, t0, -NTERP_SIZE_SAVE_CALLEE_SAVES  // t0 := osr frame - callee saves, in bytes
646   mv s7, sp         // Remember CFA in a callee-save register.
647   .cfi_def_cfa_register s7
648   sub sp, sp, t0    // OSR size guaranteed to be stack aligned (16 bytes).
649
650   addi t1, a0, OSR_DATA_MEMORY  // t1 := read start
651   add t1, t1, t0                // t1 := read end (exclusive)
652   mv t2, s7                     // t2 := write end (exclusive)
653   // t0 >= 8 (OSR places ArtMethod* at bottom of frame), so loop will terminate.
654.Lhotspill_osr_copy_loop:
655   addi t1, t1, -8
656   ld t3, (t1)
657   addi t2, t2, -8
658   sd t3, (t2)
659   bne t2, sp, .Lhotspill_osr_copy_loop
660
661   ld s8, OSR_DATA_NATIVE_PC(a0)  // s8 := native PC; jump after free
662   call free  // arg a0; release OsrData*
663   jr s8      // Jump to the compiled code.
664.Lhotspill_suspend:
665   DO_SUSPEND_CHECK continue=.Lhotspill_advance
666   j .Lhotspill_advance
667
668// This is the logical end of ExecuteNterpImpl, where the frame info applies.
669.cfi_endproc
670
671NterpToNterpInstance:
672%  nterp_to_nterp_instance()
673NterpToNterpStringInit:
674%  nterp_to_nterp_string_init()
675NterpToNterpStatic:
676%  nterp_to_nterp_static()
677NterpToNterpInstanceRange:
678%  nterp_to_nterp_instance_range()
679NterpToNterpStringInitRange:
680%  nterp_to_nterp_string_init_range()
681NterpToNterpStaticRange:
682%  nterp_to_nterp_static_range()
683
684NAME_END nterp_helper
685
686// EndExecuteNterpImpl includes the methods after .cfi_endproc, as we want the runtime to see them
687// as part of the Nterp PCs. This label marks the end of PCs contained by the OatQuickMethodHeader
688// created for the interpreter entry point.
689    .type EndExecuteNterpImpl, @function
690    .hidden EndExecuteNterpImpl
691    .global EndExecuteNterpImpl
692EndExecuteNterpImpl:
693
694// Entrypoints into runtime.
695NTERP_TRAMPOLINE nterp_allocate_object, NterpAllocateObject
696NTERP_TRAMPOLINE nterp_filled_new_array, NterpFilledNewArray
697NTERP_TRAMPOLINE nterp_filled_new_array_range, NterpFilledNewArrayRange
698NTERP_TRAMPOLINE nterp_get_class, NterpGetClass
699NTERP_TRAMPOLINE nterp_get_instance_field_offset, NterpGetInstanceFieldOffset
700NTERP_TRAMPOLINE nterp_get_method, NterpGetMethod
701NTERP_TRAMPOLINE nterp_get_static_field, NterpGetStaticField
702NTERP_TRAMPOLINE nterp_hot_method, NterpHotMethod
703NTERP_TRAMPOLINE nterp_load_object, NterpLoadObject
704
705ENTRY nterp_deliver_pending_exception
706    DELIVER_PENDING_EXCEPTION
707END nterp_deliver_pending_exception
708
709// gen_mterp.py will inline the following definitions
710// within [ExecuteNterpImpl, EndExecuteNterpImpl).
711%def instruction_start():
712    .type artNterpAsmInstructionStart, @function
713    .hidden artNterpAsmInstructionStart
714    .global artNterpAsmInstructionStart
715artNterpAsmInstructionStart = .L_op_nop
716    .text
717
718%def instruction_end():
719    .type artNterpAsmInstructionEnd, @function
720    .hidden artNterpAsmInstructionEnd
721    .global artNterpAsmInstructionEnd
722artNterpAsmInstructionEnd:
723    // artNterpAsmInstructionEnd is used as landing pad for exception handling.
724    // xPC (S3) for the exception handler was set just prior to the long jump coming here.
725    FETCH_INST
726    GET_INST_OPCODE t0
727    GOTO_OPCODE t0
728
729%def opcode_pre():
730%   pass
731%def opcode_name_prefix():
732%   return "nterp_"
733%def opcode_start():
734    NAME_START nterp_${opcode}
735%def opcode_end():
736    NAME_END nterp_${opcode}
737%def opcode_slow_path_start(name):
738    NAME_START ${name}
739%def opcode_slow_path_end(name):
740    NAME_END ${name}
741