1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_CALL_INTRINSIC_H_
18 #define BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_CALL_INTRINSIC_H_
19
20 #include <array>
21 #include <cstdint>
22 #include <type_traits>
23
24 #include "berberis/assembler/x86_64.h"
25 #include "berberis/base/bit_util.h"
26 #include "berberis/base/dependent_false.h"
27 #include "berberis/intrinsics/macro_assembler.h"
28 #include "berberis/runtime_primitives/platform.h"
29
30 namespace berberis::call_intrinsic {
31
32 constexpr x86_64::Assembler::Register kCallerSavedRegs[] = {
33 x86_64::Assembler::rax,
34 x86_64::Assembler::rcx,
35 x86_64::Assembler::rdx,
36 x86_64::Assembler::rdi,
37 x86_64::Assembler::rsi,
38 x86_64::Assembler::r8,
39 x86_64::Assembler::r9,
40 x86_64::Assembler::r10,
41 x86_64::Assembler::r11,
42 };
43
44 constexpr int8_t kRegIsNotOnStack = -1;
45
46 // Map from register number to offset in CallIntrinsic save area. Counted in 8-byte slots.
47 inline constexpr auto kRegOffsetsOnStack = []() {
48 std::array<int8_t, 16> regs_on_stack = {};
49 // regs_on_stack.fill(kRegIsNotOnStack); - needs C++20
50 for (auto& num : regs_on_stack) {
51 num = kRegIsNotOnStack;
52 }
53
54 int8_t stack_allocation_size = 0;
55 for (auto reg : kCallerSavedRegs) {
56 regs_on_stack[reg.num] = stack_allocation_size;
57 ++stack_allocation_size;
58 }
59 return regs_on_stack;
60 }();
61
62 constexpr x86_64::Assembler::XMMRegister kCallerSavedXMMRegs[] = {
63 x86_64::Assembler::xmm0,
64 x86_64::Assembler::xmm1,
65 x86_64::Assembler::xmm2,
66 x86_64::Assembler::xmm3,
67 x86_64::Assembler::xmm4,
68 x86_64::Assembler::xmm5,
69 x86_64::Assembler::xmm6,
70 x86_64::Assembler::xmm7,
71 x86_64::Assembler::xmm8,
72 x86_64::Assembler::xmm9,
73 x86_64::Assembler::xmm10,
74 x86_64::Assembler::xmm11,
75 x86_64::Assembler::xmm12,
76 x86_64::Assembler::xmm13,
77 x86_64::Assembler::xmm14,
78 x86_64::Assembler::xmm15,
79 };
80
81 // Map from register number to offset in CallIntrinsic save area. Counted in 8-byte slots.
82 inline constexpr auto kSimdRegOffsetsOnStack = []() {
83 std::array<int8_t, 16> simd_regs_on_stack = {};
84 // simd_regs_on_stack.fill(kRegIsNotOnStack); - needs C++20
85 for (auto& num : simd_regs_on_stack) {
86 num = kRegIsNotOnStack;
87 }
88
89 int8_t stack_allocation_size = AlignUp(std::size(kCallerSavedRegs), 2);
90 for (auto reg : kCallerSavedXMMRegs) {
91 simd_regs_on_stack[reg.num] = stack_allocation_size;
92 stack_allocation_size += 2;
93 }
94 return simd_regs_on_stack;
95 }();
96
97 // Save area size for CallIntrinsic save area. Counted in 8-byte slots.
98 inline constexpr int8_t kSaveAreaSize =
99 AlignUp(std::size(kCallerSavedRegs), 2) + std::size(kCallerSavedXMMRegs) * 2;
100
101 struct StoredRegsInfo {
102 std::decay_t<decltype(kRegOffsetsOnStack)> regs_on_stack;
103 std::decay_t<decltype(kSimdRegOffsetsOnStack)> simd_regs_on_stack;
104 };
105
PushCallerSaved(MacroAssembler<x86_64::Assembler> & as)106 inline void PushCallerSaved(MacroAssembler<x86_64::Assembler>& as) {
107 as.Subq(as.rsp, kSaveAreaSize * 8);
108
109 for (auto reg : kCallerSavedRegs) {
110 as.Movq({.base = as.rsp, .disp = kRegOffsetsOnStack[reg.num] * 8}, reg);
111 }
112
113 for (auto reg : kCallerSavedXMMRegs) {
114 as.Movdqa({.base = as.rsp, .disp = kSimdRegOffsetsOnStack[reg.num] * 8}, reg);
115 }
116 }
117
118 // Note: regs_on_stack is usually copy of kRegOffsetsOnStack with some registers marked off as
119 // kRegIsNotOnStack, simd_regs_on_stack is kSimdRegOffsetsOnStack with some registers marked as
120 // kRegIsNotOnStack. These registers are skipped during restoration process.
PopCallerSaved(MacroAssembler<x86_64::Assembler> & as,const StoredRegsInfo regs_info)121 inline void PopCallerSaved(MacroAssembler<x86_64::Assembler>& as, const StoredRegsInfo regs_info) {
122 for (auto reg : kCallerSavedRegs) {
123 if (regs_info.regs_on_stack[reg.num] != kRegIsNotOnStack) {
124 as.Movq(reg, {.base = as.rsp, .disp = regs_info.regs_on_stack[reg.num] * 8});
125 }
126 }
127 for (auto reg : kCallerSavedXMMRegs) {
128 if (regs_info.simd_regs_on_stack[reg.num] != kRegIsNotOnStack) {
129 as.Movdqa(reg, {.base = as.rsp, .disp = regs_info.simd_regs_on_stack[reg.num] * 8});
130 }
131 }
132
133 as.Addq(as.rsp, kSaveAreaSize * 8);
134 }
135
136 // Nonfunctional assembler used by static_assert expression. It doesn't do anything but allows us
137 // to call InitArgs during compilation time with the same argument types as would happen during
138 // execution.
139 //
140 // This turns runtime check into compile time check and thus allows us to catch weird corner cases
141 // faster.
142 class ConstExprCheckAssembler {
143 public:
144 using Operand = MacroAssembler<x86_64::Assembler>::Operand;
145 using Register = MacroAssembler<x86_64::Assembler>::Register;
146 using XMMRegister = MacroAssembler<x86_64::Assembler>::XMMRegister;
147 static constexpr auto rsp = MacroAssembler<x86_64::Assembler>::rsp;
148
149 constexpr ConstExprCheckAssembler() = default;
150
151 template <typename U, typename V>
Expand(Register,Operand)152 constexpr void Expand(Register, Operand) const {}
153 template <typename U, typename V>
Expand(Register,Register)154 constexpr void Expand(Register, Register) const {}
155
156 template <typename U>
Mov(Operand,Register)157 constexpr void Mov(Operand, Register) const {}
158 template <typename U>
Mov(Register,Operand)159 constexpr void Mov(Register, Operand) const {}
160 template <typename U>
Mov(Register,Register)161 constexpr void Mov(Register, Register) const {}
162
Movl(Register,int32_t)163 constexpr void Movl(Register, int32_t) const {}
164
165 template <typename U>
Movs(Operand,XMMRegister)166 constexpr void Movs(Operand, XMMRegister) const {}
167 template <typename U>
Movs(XMMRegister,Operand)168 constexpr void Movs(XMMRegister, Operand) const {}
169 template <typename U>
Movs(XMMRegister,XMMRegister)170 constexpr void Movs(XMMRegister, XMMRegister) const {}
171
172 template <typename U>
Vmovs(Operand,XMMRegister)173 constexpr void Vmovs(Operand, XMMRegister) const {}
174 template <typename U>
Vmovs(XMMRegister,Operand)175 constexpr void Vmovs(XMMRegister, Operand) const {}
176 template <typename U>
Vmovs(XMMRegister,XMMRegister,XMMRegister)177 constexpr void Vmovs(XMMRegister, XMMRegister, XMMRegister) const {}
178 };
179
180 // Helper wrapper to pass the intrinsic type down the generic lambda.
181 template <typename T, typename U>
182 struct ArgWrap {
183 using AssemblerType = T;
184 using IntrinsicType = U;
185 AssemblerType value;
186 };
187
188 static constexpr x86_64::Assembler::Register kAbiArgs[] = {
189 x86_64::Assembler::rdi,
190 x86_64::Assembler::rsi,
191 x86_64::Assembler::rdx,
192 x86_64::Assembler::rcx,
193 x86_64::Assembler::r8,
194 x86_64::Assembler::r9,
195 };
196
197 static constexpr x86_64::Assembler::XMMRegister kAbiSimdArgs[] = {
198 x86_64::Assembler::xmm0,
199 x86_64::Assembler::xmm1,
200 x86_64::Assembler::xmm2,
201 x86_64::Assembler::xmm3,
202 x86_64::Assembler::xmm4,
203 x86_64::Assembler::xmm5,
204 x86_64::Assembler::xmm6,
205 x86_64::Assembler::xmm7,
206 };
207
208 // Assumes RSP points to preallocated stack args area.
209 template <typename IntrinsicResType,
210 typename... IntrinsicArgType,
211 typename MacroAssembler,
212 typename... AssemblerArgType>
InitArgs(MacroAssembler && as,bool has_avx,AssemblerArgType...args)213 constexpr bool InitArgs(MacroAssembler&& as, bool has_avx, AssemblerArgType... args) {
214 using Assembler = std::decay_t<MacroAssembler>;
215 using Register = typename Assembler::Register;
216 using XMMRegister = typename Assembler::XMMRegister;
217 using Float32 = intrinsics::Float32;
218 using Float64 = intrinsics::Float64;
219
220 // All ABI argument registers are saved among caller-saved registers, so we can safely initialize
221 // them now. When intrinsic receives its argument from such register we'll read it from stack, so
222 // there is no early-clobbering problem. Callee-saved regs are never ABI arguments, so we can move
223 // them to ABI reg directly.
224
225 size_t gp_index = 0;
226 size_t simd_index = 0;
227 bool success = ([&as, &gp_index, &simd_index, has_avx](auto arg) -> bool {
228 using AssemblerType = typename decltype(arg)::AssemblerType;
229 using IntrinsicType = typename decltype(arg)::IntrinsicType;
230
231 if (std::is_integral_v<IntrinsicType>) {
232 if (gp_index == std::size(kAbiArgs)) {
233 return false;
234 }
235 } else if constexpr (std::is_same_v<IntrinsicType, Float32> ||
236 std::is_same_v<IntrinsicType, Float64>) {
237 if (simd_index == std::size(kAbiSimdArgs)) {
238 return false;
239 }
240 } else {
241 return false;
242 }
243
244 // Note, ABI mandates extension up to 32-bit and zero-filling the upper half.
245 if constexpr (std::is_integral_v<IntrinsicType> && sizeof(IntrinsicType) <= sizeof(int32_t) &&
246 std::is_integral_v<AssemblerType> && sizeof(AssemblerType) <= sizeof(int32_t)) {
247 as.Movl(kAbiArgs[gp_index++], static_cast<int32_t>(arg.value));
248 } else if constexpr (std::is_integral_v<IntrinsicType> &&
249 sizeof(IntrinsicType) == sizeof(int64_t) &&
250 std::is_integral_v<AssemblerType> &&
251 sizeof(AssemblerType) == sizeof(int64_t)) {
252 as.template Expand<int64_t, IntrinsicType>(kAbiArgs[gp_index++],
253 static_cast<int64_t>(arg.value));
254 } else if constexpr (std::is_integral_v<IntrinsicType> &&
255 sizeof(IntrinsicType) <= sizeof(int32_t) &&
256 std::is_same_v<AssemblerType, Register>) {
257 if (kRegOffsetsOnStack[arg.value.num] == kRegIsNotOnStack) {
258 as.template Expand<int32_t, IntrinsicType>(kAbiArgs[gp_index++], arg.value);
259 } else {
260 as.template Expand<int32_t, IntrinsicType>(
261 kAbiArgs[gp_index++],
262 {.base = Assembler::rsp, .disp = kRegOffsetsOnStack[arg.value.num] * 8});
263 }
264 } else if constexpr (std::is_integral_v<IntrinsicType> &&
265 sizeof(IntrinsicType) == sizeof(int64_t) &&
266 std::is_same_v<AssemblerType, Register>) {
267 if (kRegOffsetsOnStack[arg.value.num] == kRegIsNotOnStack) {
268 as.template Expand<int64_t, IntrinsicType>(kAbiArgs[gp_index++], arg.value);
269 } else {
270 as.template Expand<int64_t, IntrinsicType>(
271 kAbiArgs[gp_index++],
272 {.base = Assembler::rsp, .disp = kRegOffsetsOnStack[arg.value.num] * 8});
273 }
274 } else if constexpr ((std::is_same_v<IntrinsicType, Float32> ||
275 std::is_same_v<IntrinsicType, Float64>)&&std::is_same_v<AssemblerType,
276 XMMRegister>) {
277 if (kSimdRegOffsetsOnStack[arg.value.num] == kRegIsNotOnStack) {
278 if (has_avx) {
279 as.template Vmovs<IntrinsicType>(
280 kAbiSimdArgs[simd_index], kAbiSimdArgs[simd_index], arg.value);
281 simd_index++;
282 } else {
283 as.template Movs<IntrinsicType>(kAbiSimdArgs[simd_index++], arg.value);
284 }
285 } else {
286 if (has_avx) {
287 as.template Vmovs<IntrinsicType>(
288 kAbiSimdArgs[simd_index++],
289 {.base = as.rsp, .disp = kSimdRegOffsetsOnStack[arg.value.num] * 8});
290 } else {
291 as.template Movs<IntrinsicType>(
292 kAbiSimdArgs[simd_index++],
293 {.base = as.rsp, .disp = kSimdRegOffsetsOnStack[arg.value.num] * 8});
294 }
295 }
296 } else {
297 static_assert(kDependentTypeFalse<std::tuple<IntrinsicType, AssemblerType>>,
298 "Unknown parameter type, please add support to CallIntrinsic");
299 }
300 return true;
301 }(ArgWrap<AssemblerArgType, IntrinsicArgType>{.value = args}) && ...);
302 return success;
303 }
304
305 // Forward results from ABI registers to result-specified registers and mark registers in the
306 // returned StoredRegsInfo with kRegIsNotOnStack to prevent restoration from stack.
307 template <typename IntrinsicResType, typename AssemblerResType>
ForwardResults(MacroAssembler<x86_64::Assembler> & as,AssemblerResType result)308 StoredRegsInfo ForwardResults(MacroAssembler<x86_64::Assembler>& as, AssemblerResType result) {
309 using Assembler = MacroAssembler<x86_64::Assembler>;
310 using Register = Assembler::Register;
311 using XMMRegister = Assembler::XMMRegister;
312 using Float32 = intrinsics::Float32;
313 using Float64 = intrinsics::Float64;
314
315 StoredRegsInfo regs_info = {.regs_on_stack = kRegOffsetsOnStack,
316 .simd_regs_on_stack = kSimdRegOffsetsOnStack};
317
318 if constexpr (Assembler::kFormatIs<IntrinsicResType, std::tuple<int32_t>, std::tuple<uint32_t>> &&
319 std::is_same_v<AssemblerResType, Register>) {
320 // Note: even unsigned 32-bit results are sign-extended to 64bit register on RV64.
321 regs_info.regs_on_stack[result.num] = kRegIsNotOnStack;
322 as.Expand<int64_t, int32_t>(result, Assembler::rax);
323 } else if constexpr (Assembler::
324 kFormatIs<IntrinsicResType, std::tuple<int64_t>, std::tuple<uint64_t>> &&
325 std::is_same_v<AssemblerResType, Register>) {
326 regs_info.regs_on_stack[result.num] = kRegIsNotOnStack;
327 as.Mov<int64_t>(result, Assembler::rax);
328 } else if constexpr (Assembler::
329 kFormatIs<IntrinsicResType, std::tuple<Float32>, std::tuple<Float64>> &&
330 std::is_same_v<AssemblerResType, XMMRegister>) {
331 using ResType0 = std::tuple_element_t<0, IntrinsicResType>;
332 regs_info.simd_regs_on_stack[result.num] = kRegIsNotOnStack;
333 if (host_platform::kHasAVX) {
334 as.Vmovs<ResType0>(result, result, Assembler::xmm0);
335 } else {
336 as.Movs<ResType0>(result, Assembler::xmm0);
337 }
338 } else if constexpr (std::tuple_size_v<IntrinsicResType> == 2) {
339 using ResType0 = std::tuple_element_t<0, IntrinsicResType>;
340 using ResType1 = std::tuple_element_t<1, IntrinsicResType>;
341 auto [result0, result1] = result;
342 if constexpr (Assembler::kFormatIs<ResType0, int32_t, uint32_t> &&
343 std::is_same_v<std::tuple_element_t<0, AssemblerResType>, Register>) {
344 regs_info.regs_on_stack[result0.num] = kRegIsNotOnStack;
345 as.Expand<int64_t, int32_t>(result0, Assembler::rax);
346 } else if constexpr (Assembler::kFormatIs<ResType0, int64_t, uint64_t> &&
347 std::is_same_v<std::tuple_element_t<0, AssemblerResType>, Register>) {
348 regs_info.regs_on_stack[result0.num] = kRegIsNotOnStack;
349 as.Mov<int64_t>(result0, Assembler::rax);
350 } else {
351 static_assert(kDependentTypeFalse<std::tuple<IntrinsicResType, AssemblerResType>>,
352 "Unknown result type, please add support to CallIntrinsic");
353 }
354 if constexpr (Assembler::kFormatIs<ResType1, int32_t, uint32_t> &&
355 std::is_same_v<std::tuple_element_t<1, AssemblerResType>, Register>) {
356 regs_info.regs_on_stack[result1.num] = kRegIsNotOnStack;
357 as.Expand<int64_t, int32_t>(result1, Assembler::rdx);
358 } else if constexpr (Assembler::kFormatIs<ResType1, int64_t, uint64_t> &&
359 std::is_same_v<std::tuple_element_t<1, AssemblerResType>, Register>) {
360 regs_info.regs_on_stack[result1.num] = kRegIsNotOnStack;
361 as.Mov<int64_t>(result1, Assembler::rdx);
362 } else {
363 static_assert(kDependentTypeFalse<std::tuple<IntrinsicResType, AssemblerResType>>,
364 "Unknown result type, please add support to CallIntrinsic");
365 }
366 } else {
367 static_assert(kDependentTypeFalse<std::tuple<IntrinsicResType, AssemblerResType>>,
368 "Unknown result type, please add support to CallIntrinsic");
369 }
370 return regs_info;
371 }
372
373 // Note: we can ignore status in the actual InitArgs call because we know that InitArgs would
374 // succeed if the call in static_assert succeeded.
375 //
376 // AVX flag shouldn't change the outcome, but better safe than sorry.
377
378 template <typename IntrinsicResType, typename... IntrinsicArgType, typename... AssemblerArgType>
InitArgsVerify(AssemblerArgType...)379 void InitArgsVerify(AssemblerArgType...) {
380 static_assert(InitArgs<IntrinsicResType, IntrinsicArgType...>(
381 ConstExprCheckAssembler(), true, AssemblerArgType{0}...));
382 static_assert(InitArgs<IntrinsicResType, IntrinsicArgType...>(
383 ConstExprCheckAssembler(), false, AssemblerArgType{0}...));
384 }
385
386 template <typename AssemblerResType,
387 typename IntrinsicResType,
388 typename... IntrinsicArgType,
389 typename... AssemblerArgType>
CallIntrinsic(MacroAssembler<x86_64::Assembler> & as,IntrinsicResType (* function)(IntrinsicArgType...),AssemblerResType result,AssemblerArgType...args)390 void CallIntrinsic(MacroAssembler<x86_64::Assembler>& as,
391 IntrinsicResType (*function)(IntrinsicArgType...),
392 AssemblerResType result,
393 AssemblerArgType... args) {
394 PushCallerSaved(as);
395
396 InitArgsVerify<IntrinsicResType, IntrinsicArgType...>(args...);
397 InitArgs<IntrinsicResType, IntrinsicArgType...>(as, host_platform::kHasAVX, args...);
398
399 as.Call(reinterpret_cast<void*>(function));
400
401 auto regs_info = ForwardResults<IntrinsicResType>(as, result);
402
403 PopCallerSaved(as, regs_info);
404 }
405
406 template <typename AssemblerResType, typename... IntrinsicArgType, typename... AssemblerArgType>
CallIntrinsic(MacroAssembler<x86_64::Assembler> & as,void (* function)(IntrinsicArgType...),AssemblerArgType...args)407 void CallIntrinsic(MacroAssembler<x86_64::Assembler>& as,
408 void (*function)(IntrinsicArgType...),
409 AssemblerArgType... args) {
410 PushCallerSaved(as);
411
412 InitArgsVerify<void, IntrinsicArgType...>(args...);
413 InitArgs<void, IntrinsicArgType...>(as, host_platform::kHasAVX, args...);
414
415 as.Call(reinterpret_cast<void*>(function));
416
417 PopCallerSaved(
418 as, {.regs_on_stack = kRegOffsetsOnStack, .simd_regs_on_stack = kSimdRegOffsetsOnStack});
419 }
420
421 } // namespace berberis::call_intrinsic
422
423 #endif // BERBERIS_LITE_TRANSLATOR_RISCV64_TO_X86_64_CALL_INTRINSIC_H_
424