• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_CALL_INTRINSIC_H_
18 #define BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_CALL_INTRINSIC_H_
19 
20 #include <array>
21 #include <cstdint>
22 #include <type_traits>
23 
24 #include "berberis/assembler/x86_64.h"
25 #include "berberis/base/bit_util.h"
26 #include "berberis/base/dependent_false.h"
27 #include "berberis/intrinsics/macro_assembler.h"
28 #include "berberis/runtime_primitives/platform.h"
29 
30 namespace berberis::call_intrinsic {
31 
32 constexpr x86_64::Assembler::Register kCallerSavedRegs[] = {
33     x86_64::Assembler::rax,
34     x86_64::Assembler::rcx,
35     x86_64::Assembler::rdx,
36     x86_64::Assembler::rdi,
37     x86_64::Assembler::rsi,
38     x86_64::Assembler::r8,
39     x86_64::Assembler::r9,
40     x86_64::Assembler::r10,
41     x86_64::Assembler::r11,
42 };
43 
44 constexpr int8_t kRegIsNotOnStack = -1;
45 
46 // Map from register number to offset in CallIntrinsic save area. Counted in 8-byte slots.
47 inline constexpr auto kRegOffsetsOnStack = []() {
48   std::array<int8_t, 16> regs_on_stack = {};
49   // regs_on_stack.fill(kRegIsNotOnStack); - needs C++20
50   for (auto& num : regs_on_stack) {
51     num = kRegIsNotOnStack;
52   }
53 
54   int8_t stack_allocation_size = 0;
55   for (auto reg : kCallerSavedRegs) {
56     regs_on_stack[reg.num] = stack_allocation_size;
57     ++stack_allocation_size;
58   }
59   return regs_on_stack;
60 }();
61 
62 constexpr x86_64::Assembler::XMMRegister kCallerSavedXMMRegs[] = {
63     x86_64::Assembler::xmm0,
64     x86_64::Assembler::xmm1,
65     x86_64::Assembler::xmm2,
66     x86_64::Assembler::xmm3,
67     x86_64::Assembler::xmm4,
68     x86_64::Assembler::xmm5,
69     x86_64::Assembler::xmm6,
70     x86_64::Assembler::xmm7,
71     x86_64::Assembler::xmm8,
72     x86_64::Assembler::xmm9,
73     x86_64::Assembler::xmm10,
74     x86_64::Assembler::xmm11,
75     x86_64::Assembler::xmm12,
76     x86_64::Assembler::xmm13,
77     x86_64::Assembler::xmm14,
78     x86_64::Assembler::xmm15,
79 };
80 
81 // Map from register number to offset in CallIntrinsic save area. Counted in 8-byte slots.
82 inline constexpr auto kSimdRegOffsetsOnStack = []() {
83   std::array<int8_t, 16> simd_regs_on_stack = {};
84   // simd_regs_on_stack.fill(kRegIsNotOnStack); - needs C++20
85   for (auto& num : simd_regs_on_stack) {
86     num = kRegIsNotOnStack;
87   }
88 
89   int8_t stack_allocation_size = AlignUp(std::size(kCallerSavedRegs), 2);
90   for (auto reg : kCallerSavedXMMRegs) {
91     simd_regs_on_stack[reg.num] = stack_allocation_size;
92     stack_allocation_size += 2;
93   }
94   return simd_regs_on_stack;
95 }();
96 
97 // Save area size for CallIntrinsic save area. Counted in 8-byte slots.
98 inline constexpr int8_t kSaveAreaSize =
99     AlignUp(std::size(kCallerSavedRegs), 2) + std::size(kCallerSavedXMMRegs) * 2;
100 
101 struct StoredRegsInfo {
102   std::decay_t<decltype(kRegOffsetsOnStack)> regs_on_stack;
103   std::decay_t<decltype(kSimdRegOffsetsOnStack)> simd_regs_on_stack;
104 };
105 
PushCallerSaved(MacroAssembler<x86_64::Assembler> & as)106 inline void PushCallerSaved(MacroAssembler<x86_64::Assembler>& as) {
107   as.Subq(as.rsp, kSaveAreaSize * 8);
108 
109   for (auto reg : kCallerSavedRegs) {
110     as.Movq({.base = as.rsp, .disp = kRegOffsetsOnStack[reg.num] * 8}, reg);
111   }
112 
113   for (auto reg : kCallerSavedXMMRegs) {
114     as.Movdqa({.base = as.rsp, .disp = kSimdRegOffsetsOnStack[reg.num] * 8}, reg);
115   }
116 }
117 
118 // Note: regs_on_stack is usually copy of kRegOffsetsOnStack with some registers marked off as
119 // kRegIsNotOnStack, simd_regs_on_stack is kSimdRegOffsetsOnStack with some registers marked as
120 // kRegIsNotOnStack. These registers are skipped during restoration process.
PopCallerSaved(MacroAssembler<x86_64::Assembler> & as,const StoredRegsInfo regs_info)121 inline void PopCallerSaved(MacroAssembler<x86_64::Assembler>& as, const StoredRegsInfo regs_info) {
122   for (auto reg : kCallerSavedRegs) {
123     if (regs_info.regs_on_stack[reg.num] != kRegIsNotOnStack) {
124       as.Movq(reg, {.base = as.rsp, .disp = regs_info.regs_on_stack[reg.num] * 8});
125     }
126   }
127   for (auto reg : kCallerSavedXMMRegs) {
128     if (regs_info.simd_regs_on_stack[reg.num] != kRegIsNotOnStack) {
129       as.Movdqa(reg, {.base = as.rsp, .disp = regs_info.simd_regs_on_stack[reg.num] * 8});
130     }
131   }
132 
133   as.Addq(as.rsp, kSaveAreaSize * 8);
134 }
135 
136 // Nonfunctional assembler used by static_assert expression. It doesn't do anything but allows us
137 // to call InitArgs during compilation time with the same argument types as would happen during
138 // execution.
139 //
140 // This turns runtime check into compile time check and thus allows us to catch weird corner cases
141 // faster.
142 class ConstExprCheckAssembler {
143  public:
144   using Operand = MacroAssembler<x86_64::Assembler>::Operand;
145   using Register = MacroAssembler<x86_64::Assembler>::Register;
146   using XMMRegister = MacroAssembler<x86_64::Assembler>::XMMRegister;
147   static constexpr auto rsp = MacroAssembler<x86_64::Assembler>::rsp;
148 
149   constexpr ConstExprCheckAssembler() = default;
150 
151   template <typename U, typename V>
Expand(Register,Operand)152   constexpr void Expand(Register, Operand) const {}
153   template <typename U, typename V>
Expand(Register,Register)154   constexpr void Expand(Register, Register) const {}
155 
156   template <typename U>
Mov(Operand,Register)157   constexpr void Mov(Operand, Register) const {}
158   template <typename U>
Mov(Register,Operand)159   constexpr void Mov(Register, Operand) const {}
160   template <typename U>
Mov(Register,Register)161   constexpr void Mov(Register, Register) const {}
162 
Movl(Register,int32_t)163   constexpr void Movl(Register, int32_t) const {}
164 
165   template <typename U>
Movs(Operand,XMMRegister)166   constexpr void Movs(Operand, XMMRegister) const {}
167   template <typename U>
Movs(XMMRegister,Operand)168   constexpr void Movs(XMMRegister, Operand) const {}
169   template <typename U>
Movs(XMMRegister,XMMRegister)170   constexpr void Movs(XMMRegister, XMMRegister) const {}
171 
172   template <typename U>
Vmovs(Operand,XMMRegister)173   constexpr void Vmovs(Operand, XMMRegister) const {}
174   template <typename U>
Vmovs(XMMRegister,Operand)175   constexpr void Vmovs(XMMRegister, Operand) const {}
176   template <typename U>
Vmovs(XMMRegister,XMMRegister,XMMRegister)177   constexpr void Vmovs(XMMRegister, XMMRegister, XMMRegister) const {}
178 };
179 
180 // Helper wrapper to pass the intrinsic type down the generic lambda.
181 template <typename T, typename U>
182 struct ArgWrap {
183   using AssemblerType = T;
184   using IntrinsicType = U;
185   AssemblerType value;
186 };
187 
188 static constexpr x86_64::Assembler::Register kAbiArgs[] = {
189     x86_64::Assembler::rdi,
190     x86_64::Assembler::rsi,
191     x86_64::Assembler::rdx,
192     x86_64::Assembler::rcx,
193     x86_64::Assembler::r8,
194     x86_64::Assembler::r9,
195 };
196 
197 static constexpr x86_64::Assembler::XMMRegister kAbiSimdArgs[] = {
198     x86_64::Assembler::xmm0,
199     x86_64::Assembler::xmm1,
200     x86_64::Assembler::xmm2,
201     x86_64::Assembler::xmm3,
202     x86_64::Assembler::xmm4,
203     x86_64::Assembler::xmm5,
204     x86_64::Assembler::xmm6,
205     x86_64::Assembler::xmm7,
206 };
207 
208 // Assumes RSP points to preallocated stack args area.
209 template <typename IntrinsicResType,
210           typename... IntrinsicArgType,
211           typename MacroAssembler,
212           typename... AssemblerArgType>
InitArgs(MacroAssembler && as,bool has_avx,AssemblerArgType...args)213 constexpr bool InitArgs(MacroAssembler&& as, bool has_avx, AssemblerArgType... args) {
214   using Assembler = std::decay_t<MacroAssembler>;
215   using Register = typename Assembler::Register;
216   using XMMRegister = typename Assembler::XMMRegister;
217   using Float32 = intrinsics::Float32;
218   using Float64 = intrinsics::Float64;
219 
220   // All ABI argument registers are saved among caller-saved registers, so we can safely initialize
221   // them now. When intrinsic receives its argument from such register we'll read it from stack, so
222   // there is no early-clobbering problem. Callee-saved regs are never ABI arguments, so we can move
223   // them to ABI reg directly.
224 
225   size_t gp_index = 0;
226   size_t simd_index = 0;
227   bool success = ([&as, &gp_index, &simd_index, has_avx](auto arg) -> bool {
228     using AssemblerType = typename decltype(arg)::AssemblerType;
229     using IntrinsicType = typename decltype(arg)::IntrinsicType;
230 
231     if (std::is_integral_v<IntrinsicType>) {
232       if (gp_index == std::size(kAbiArgs)) {
233         return false;
234       }
235     } else if constexpr (std::is_same_v<IntrinsicType, Float32> ||
236                          std::is_same_v<IntrinsicType, Float64>) {
237       if (simd_index == std::size(kAbiSimdArgs)) {
238         return false;
239       }
240     } else {
241       return false;
242     }
243 
244     // Note, ABI mandates extension up to 32-bit and zero-filling the upper half.
245     if constexpr (std::is_integral_v<IntrinsicType> && sizeof(IntrinsicType) <= sizeof(int32_t) &&
246                   std::is_integral_v<AssemblerType> && sizeof(AssemblerType) <= sizeof(int32_t)) {
247       as.Movl(kAbiArgs[gp_index++], static_cast<int32_t>(arg.value));
248     } else if constexpr (std::is_integral_v<IntrinsicType> &&
249                          sizeof(IntrinsicType) == sizeof(int64_t) &&
250                          std::is_integral_v<AssemblerType> &&
251                          sizeof(AssemblerType) == sizeof(int64_t)) {
252       as.template Expand<int64_t, IntrinsicType>(kAbiArgs[gp_index++],
253                                                  static_cast<int64_t>(arg.value));
254     } else if constexpr (std::is_integral_v<IntrinsicType> &&
255                          sizeof(IntrinsicType) <= sizeof(int32_t) &&
256                          std::is_same_v<AssemblerType, Register>) {
257       if (kRegOffsetsOnStack[arg.value.num] == kRegIsNotOnStack) {
258         as.template Expand<int32_t, IntrinsicType>(kAbiArgs[gp_index++], arg.value);
259       } else {
260         as.template Expand<int32_t, IntrinsicType>(
261             kAbiArgs[gp_index++],
262             {.base = Assembler::rsp, .disp = kRegOffsetsOnStack[arg.value.num] * 8});
263       }
264     } else if constexpr (std::is_integral_v<IntrinsicType> &&
265                          sizeof(IntrinsicType) == sizeof(int64_t) &&
266                          std::is_same_v<AssemblerType, Register>) {
267       if (kRegOffsetsOnStack[arg.value.num] == kRegIsNotOnStack) {
268         as.template Expand<int64_t, IntrinsicType>(kAbiArgs[gp_index++], arg.value);
269       } else {
270         as.template Expand<int64_t, IntrinsicType>(
271             kAbiArgs[gp_index++],
272             {.base = Assembler::rsp, .disp = kRegOffsetsOnStack[arg.value.num] * 8});
273       }
274     } else if constexpr ((std::is_same_v<IntrinsicType, Float32> ||
275                           std::is_same_v<IntrinsicType, Float64>)&&std::is_same_v<AssemblerType,
276                                                                                   XMMRegister>) {
277       if (kSimdRegOffsetsOnStack[arg.value.num] == kRegIsNotOnStack) {
278         if (has_avx) {
279           as.template Vmovs<IntrinsicType>(
280               kAbiSimdArgs[simd_index], kAbiSimdArgs[simd_index], arg.value);
281           simd_index++;
282         } else {
283           as.template Movs<IntrinsicType>(kAbiSimdArgs[simd_index++], arg.value);
284         }
285       } else {
286         if (has_avx) {
287           as.template Vmovs<IntrinsicType>(
288               kAbiSimdArgs[simd_index++],
289               {.base = as.rsp, .disp = kSimdRegOffsetsOnStack[arg.value.num] * 8});
290         } else {
291           as.template Movs<IntrinsicType>(
292               kAbiSimdArgs[simd_index++],
293               {.base = as.rsp, .disp = kSimdRegOffsetsOnStack[arg.value.num] * 8});
294         }
295       }
296     } else {
297       static_assert(kDependentTypeFalse<std::tuple<IntrinsicType, AssemblerType>>,
298                     "Unknown parameter type, please add support to CallIntrinsic");
299     }
300     return true;
301   }(ArgWrap<AssemblerArgType, IntrinsicArgType>{.value = args}) && ...);
302   return success;
303 }
304 
305 // Forward results from ABI registers to result-specified registers and mark registers in the
306 // returned StoredRegsInfo with kRegIsNotOnStack to prevent restoration from stack.
307 template <typename IntrinsicResType, typename AssemblerResType>
ForwardResults(MacroAssembler<x86_64::Assembler> & as,AssemblerResType result)308 StoredRegsInfo ForwardResults(MacroAssembler<x86_64::Assembler>& as, AssemblerResType result) {
309   using Assembler = MacroAssembler<x86_64::Assembler>;
310   using Register = Assembler::Register;
311   using XMMRegister = Assembler::XMMRegister;
312   using Float32 = intrinsics::Float32;
313   using Float64 = intrinsics::Float64;
314 
315   StoredRegsInfo regs_info = {.regs_on_stack = kRegOffsetsOnStack,
316                               .simd_regs_on_stack = kSimdRegOffsetsOnStack};
317 
318   if constexpr (Assembler::kFormatIs<IntrinsicResType, std::tuple<int32_t>, std::tuple<uint32_t>> &&
319                 std::is_same_v<AssemblerResType, Register>) {
320     // Note: even unsigned 32-bit results are sign-extended to 64bit register on RV64.
321     regs_info.regs_on_stack[result.num] = kRegIsNotOnStack;
322     as.Expand<int64_t, int32_t>(result, Assembler::rax);
323   } else if constexpr (Assembler::
324                            kFormatIs<IntrinsicResType, std::tuple<int64_t>, std::tuple<uint64_t>> &&
325                        std::is_same_v<AssemblerResType, Register>) {
326     regs_info.regs_on_stack[result.num] = kRegIsNotOnStack;
327     as.Mov<int64_t>(result, Assembler::rax);
328   } else if constexpr (Assembler::
329                            kFormatIs<IntrinsicResType, std::tuple<Float32>, std::tuple<Float64>> &&
330                        std::is_same_v<AssemblerResType, XMMRegister>) {
331     using ResType0 = std::tuple_element_t<0, IntrinsicResType>;
332     regs_info.simd_regs_on_stack[result.num] = kRegIsNotOnStack;
333     if (host_platform::kHasAVX) {
334       as.Vmovs<ResType0>(result, result, Assembler::xmm0);
335     } else {
336       as.Movs<ResType0>(result, Assembler::xmm0);
337     }
338   } else if constexpr (std::tuple_size_v<IntrinsicResType> == 2) {
339     using ResType0 = std::tuple_element_t<0, IntrinsicResType>;
340     using ResType1 = std::tuple_element_t<1, IntrinsicResType>;
341     auto [result0, result1] = result;
342     if constexpr (Assembler::kFormatIs<ResType0, int32_t, uint32_t> &&
343                   std::is_same_v<std::tuple_element_t<0, AssemblerResType>, Register>) {
344       regs_info.regs_on_stack[result0.num] = kRegIsNotOnStack;
345       as.Expand<int64_t, int32_t>(result0, Assembler::rax);
346     } else if constexpr (Assembler::kFormatIs<ResType0, int64_t, uint64_t> &&
347                          std::is_same_v<std::tuple_element_t<0, AssemblerResType>, Register>) {
348       regs_info.regs_on_stack[result0.num] = kRegIsNotOnStack;
349       as.Mov<int64_t>(result0, Assembler::rax);
350     } else {
351       static_assert(kDependentTypeFalse<std::tuple<IntrinsicResType, AssemblerResType>>,
352                     "Unknown result type, please add support to CallIntrinsic");
353     }
354     if constexpr (Assembler::kFormatIs<ResType1, int32_t, uint32_t> &&
355                   std::is_same_v<std::tuple_element_t<1, AssemblerResType>, Register>) {
356       regs_info.regs_on_stack[result1.num] = kRegIsNotOnStack;
357       as.Expand<int64_t, int32_t>(result1, Assembler::rdx);
358     } else if constexpr (Assembler::kFormatIs<ResType1, int64_t, uint64_t> &&
359                          std::is_same_v<std::tuple_element_t<1, AssemblerResType>, Register>) {
360       regs_info.regs_on_stack[result1.num] = kRegIsNotOnStack;
361       as.Mov<int64_t>(result1, Assembler::rdx);
362     } else {
363       static_assert(kDependentTypeFalse<std::tuple<IntrinsicResType, AssemblerResType>>,
364                     "Unknown result type, please add support to CallIntrinsic");
365     }
366   } else {
367     static_assert(kDependentTypeFalse<std::tuple<IntrinsicResType, AssemblerResType>>,
368                   "Unknown result type, please add support to CallIntrinsic");
369   }
370   return regs_info;
371 }
372 
373 // Note: we can ignore status in the actual InitArgs call because we know that InitArgs would
374 // succeed if the call in static_assert succeeded.
375 //
376 // AVX flag shouldn't change the outcome, but better safe than sorry.
377 
378 template <typename IntrinsicResType, typename... IntrinsicArgType, typename... AssemblerArgType>
InitArgsVerify(AssemblerArgType...)379 void InitArgsVerify(AssemblerArgType...) {
380   static_assert(InitArgs<IntrinsicResType, IntrinsicArgType...>(
381       ConstExprCheckAssembler(), true, AssemblerArgType{0}...));
382   static_assert(InitArgs<IntrinsicResType, IntrinsicArgType...>(
383       ConstExprCheckAssembler(), false, AssemblerArgType{0}...));
384 }
385 
386 template <typename AssemblerResType,
387           typename IntrinsicResType,
388           typename... IntrinsicArgType,
389           typename... AssemblerArgType>
CallIntrinsic(MacroAssembler<x86_64::Assembler> & as,IntrinsicResType (* function)(IntrinsicArgType...),AssemblerResType result,AssemblerArgType...args)390 void CallIntrinsic(MacroAssembler<x86_64::Assembler>& as,
391                    IntrinsicResType (*function)(IntrinsicArgType...),
392                    AssemblerResType result,
393                    AssemblerArgType... args) {
394   PushCallerSaved(as);
395 
396   InitArgsVerify<IntrinsicResType, IntrinsicArgType...>(args...);
397   InitArgs<IntrinsicResType, IntrinsicArgType...>(as, host_platform::kHasAVX, args...);
398 
399   as.Call(reinterpret_cast<void*>(function));
400 
401   auto regs_info = ForwardResults<IntrinsicResType>(as, result);
402 
403   PopCallerSaved(as, regs_info);
404 }
405 
406 template <typename AssemblerResType, typename... IntrinsicArgType, typename... AssemblerArgType>
CallIntrinsic(MacroAssembler<x86_64::Assembler> & as,void (* function)(IntrinsicArgType...),AssemblerArgType...args)407 void CallIntrinsic(MacroAssembler<x86_64::Assembler>& as,
408                    void (*function)(IntrinsicArgType...),
409                    AssemblerArgType... args) {
410   PushCallerSaved(as);
411 
412   InitArgsVerify<void, IntrinsicArgType...>(args...);
413   InitArgs<void, IntrinsicArgType...>(as, host_platform::kHasAVX, args...);
414 
415   as.Call(reinterpret_cast<void*>(function));
416 
417   PopCallerSaved(
418       as, {.regs_on_stack = kRegOffsetsOnStack, .simd_regs_on_stack = kSimdRegOffsetsOnStack});
419 }
420 
421 }  // namespace berberis::call_intrinsic
422 
423 #endif  // BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_CALL_INTRINSIC_H_
424