1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "gtest/gtest.h"
18
19 #include "berberis/backend/x86_64/loop_guest_context_optimizer.h"
20
21 #include "berberis/backend/code_emitter.h"
22 #include "berberis/backend/x86_64/machine_ir_builder.h"
23 #include "berberis/backend/x86_64/machine_ir_check.h"
24 #include "berberis/base/arena_alloc.h"
25 #include "berberis/guest_state/guest_addr.h"
26 #include "berberis/guest_state/guest_state.h"
27 #include "berberis/guest_state/guest_state_opaque.h"
28
29 #include "x86_64/loop_guest_context_optimizer_test_checks.h"
30
31 namespace berberis::x86_64 {
32
33 namespace {
34
TEST(MachineIRLoopGuestContextOptimizer,ReplaceGetAndUpdateMap)35 TEST(MachineIRLoopGuestContextOptimizer, ReplaceGetAndUpdateMap) {
36 Arena arena;
37 MachineIR machine_ir(&arena);
38
39 MachineIRBuilder builder(&machine_ir);
40
41 auto bb = machine_ir.NewBasicBlock();
42 builder.StartBasicBlock(bb);
43 auto reg1 = machine_ir.AllocVReg();
44 builder.GenGet(reg1, GetThreadStateRegOffset(0));
45 builder.Gen<PseudoJump>(kNullGuestAddr);
46
47 auto insn_it = bb->insn_list().begin();
48 MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
49 ReplaceGetAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
50 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
51
52 EXPECT_EQ(bb->insn_list().size(), 2UL);
53 auto* copy_insn = *bb->insn_list().begin();
54 auto mapped_reg = CheckCopyGetInsnAndObtainMappedReg(copy_insn, reg1);
55
56 auto offset = GetThreadStateRegOffset(0);
57 CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovq, false);
58 }
59
TEST(MachineIRLoopGuestContextOptimizer,ReplacePutAndUpdateMap)60 TEST(MachineIRLoopGuestContextOptimizer, ReplacePutAndUpdateMap) {
61 Arena arena;
62 MachineIR machine_ir(&arena);
63
64 MachineIRBuilder builder(&machine_ir);
65
66 auto bb = machine_ir.NewBasicBlock();
67 builder.StartBasicBlock(bb);
68 auto reg1 = machine_ir.AllocVReg();
69 builder.GenPut(GetThreadStateRegOffset(1), reg1);
70 builder.Gen<PseudoJump>(kNullGuestAddr);
71
72 auto insn_it = bb->insn_list().begin();
73 MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
74 ReplacePutAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
75 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
76
77 EXPECT_EQ(bb->insn_list().size(), 2UL);
78 auto* copy_insn = *bb->insn_list().begin();
79 auto mapped_reg = CheckCopyPutInsnAndObtainMappedReg(copy_insn, reg1);
80
81 auto offset = GetThreadStateRegOffset(1);
82 CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovq, true);
83 }
84
TEST(MachineIRLoopGuestContextOptimizer,ReplaceGetPutAndUpdateMap)85 TEST(MachineIRLoopGuestContextOptimizer, ReplaceGetPutAndUpdateMap) {
86 Arena arena;
87 MachineIR machine_ir(&arena);
88
89 MachineIRBuilder builder(&machine_ir);
90
91 auto bb = machine_ir.NewBasicBlock();
92 builder.StartBasicBlock(bb);
93 auto reg1 = machine_ir.AllocVReg();
94 auto reg2 = machine_ir.AllocVReg();
95 builder.GenGet(reg1, GetThreadStateRegOffset(1));
96 builder.GenPut(GetThreadStateRegOffset(1), reg2);
97 builder.Gen<PseudoJump>(kNullGuestAddr);
98
99 auto insn_it = bb->insn_list().begin();
100 MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
101 ReplaceGetAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
102 ReplacePutAndUpdateMap(&machine_ir, std::next(insn_it), mem_reg_map);
103 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
104
105 EXPECT_EQ(bb->insn_list().size(), 3UL);
106 auto* get_copy_insn = *bb->insn_list().begin();
107 auto mapped_reg = CheckCopyGetInsnAndObtainMappedReg(get_copy_insn, reg1);
108 auto* put_copy_insn = *std::next(bb->insn_list().begin());
109 auto mapped_reg_in_put = CheckCopyPutInsnAndObtainMappedReg(put_copy_insn, reg2);
110 EXPECT_EQ(mapped_reg, mapped_reg_in_put);
111
112 auto offset = GetThreadStateRegOffset(1);
113 CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovq, true);
114 }
115
TEST(MachineIRLoopGuestContextOptimizer,ReplaceGetSimdAndUpdateMap)116 TEST(MachineIRLoopGuestContextOptimizer, ReplaceGetSimdAndUpdateMap) {
117 if (!DoesCpuStateHaveDedicatedSimdRegs()) {
118 GTEST_SKIP() << "Guest CPU doesn't have SIMD registers";
119 }
120 Arena arena;
121 MachineIR machine_ir(&arena);
122
123 MachineIRBuilder builder(&machine_ir);
124
125 auto bb = machine_ir.NewBasicBlock();
126 builder.StartBasicBlock(bb);
127 auto reg1 = machine_ir.AllocVReg();
128 builder.GenGetSimd<16>(reg1, GetThreadStateSimdRegOffset(0));
129 builder.Gen<PseudoJump>(kNullGuestAddr);
130
131 auto insn_it = bb->insn_list().begin();
132 MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
133 ReplaceGetAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
134 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
135
136 EXPECT_EQ(bb->insn_list().size(), 2UL);
137 auto* copy_insn = *bb->insn_list().begin();
138 auto mapped_reg = CheckCopyGetInsnAndObtainMappedReg(copy_insn, reg1);
139
140 auto offset = GetThreadStateSimdRegOffset(0);
141 CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovdqa, false);
142 }
143
TEST(MachineIRLoopGuestContextOptimizer,ReplacePutSimdAndUpdateMap)144 TEST(MachineIRLoopGuestContextOptimizer, ReplacePutSimdAndUpdateMap) {
145 if (!DoesCpuStateHaveDedicatedSimdRegs()) {
146 GTEST_SKIP() << "Guest CPU doesn't have SIMD registers";
147 }
148 Arena arena;
149 MachineIR machine_ir(&arena);
150
151 MachineIRBuilder builder(&machine_ir);
152
153 auto bb = machine_ir.NewBasicBlock();
154 builder.StartBasicBlock(bb);
155 auto reg1 = machine_ir.AllocVReg();
156 builder.GenSetSimd<16>(GetThreadStateSimdRegOffset(0), reg1);
157 builder.Gen<PseudoJump>(kNullGuestAddr);
158
159 auto insn_it = bb->insn_list().begin();
160 MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
161 ReplacePutAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
162 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
163
164 EXPECT_EQ(bb->insn_list().size(), 2UL);
165 auto* copy_insn = *bb->insn_list().begin();
166 auto mapped_reg = CheckCopyPutInsnAndObtainMappedReg(copy_insn, reg1);
167
168 auto offset = GetThreadStateSimdRegOffset(0);
169 CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovdqa, true);
170 }
171
TEST(MachineIRLoopGuestContextOptimizer,ReplaceGetFAndUpdateMap)172 TEST(MachineIRLoopGuestContextOptimizer, ReplaceGetFAndUpdateMap) {
173 if (!DoesCpuStateHaveDedicatedFpRegs()) {
174 GTEST_SKIP() << "Guest CPU doesn't have dedicated Fp registers";
175 }
176 Arena arena;
177 MachineIR machine_ir(&arena);
178
179 MachineIRBuilder builder(&machine_ir);
180
181 auto bb = machine_ir.NewBasicBlock();
182 builder.StartBasicBlock(bb);
183 auto reg1 = machine_ir.AllocVReg();
184 builder.GenGetSimd<8>(reg1, GetThreadStateFRegOffset(0));
185 builder.Gen<PseudoJump>(kNullGuestAddr);
186
187 auto insn_it = bb->insn_list().begin();
188 MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
189 ReplaceGetAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
190 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
191
192 EXPECT_EQ(bb->insn_list().size(), 2UL);
193 auto* copy_insn = *bb->insn_list().begin();
194 auto mapped_reg = CheckCopyGetInsnAndObtainMappedReg(copy_insn, reg1);
195
196 auto offset = GetThreadStateFRegOffset(0);
197 CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovsd, false);
198 }
199
TEST(MachineIRLoopGuestContextOptimizer,ReplacePutFAndUpdateMap)200 TEST(MachineIRLoopGuestContextOptimizer, ReplacePutFAndUpdateMap) {
201 if (!DoesCpuStateHaveDedicatedFpRegs()) {
202 GTEST_SKIP() << "Guest CPU doesn't have dedicated Fp registers";
203 }
204 Arena arena;
205 MachineIR machine_ir(&arena);
206
207 MachineIRBuilder builder(&machine_ir);
208
209 auto bb = machine_ir.NewBasicBlock();
210 builder.StartBasicBlock(bb);
211 auto reg1 = machine_ir.AllocVReg();
212 builder.GenSetSimd<8>(GetThreadStateFRegOffset(0), reg1);
213 builder.Gen<PseudoJump>(kNullGuestAddr);
214
215 auto insn_it = bb->insn_list().begin();
216 MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
217 ReplacePutAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
218 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
219
220 EXPECT_EQ(bb->insn_list().size(), 2UL);
221 auto* copy_insn = *bb->insn_list().begin();
222 auto mapped_reg = CheckCopyPutInsnAndObtainMappedReg(copy_insn, reg1);
223
224 auto offset = GetThreadStateFRegOffset(0);
225 CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovsd, true);
226 }
227
TEST(MachineIRLoopGuestContextOptimizer,ReplaceGetVAndUpdateMap)228 TEST(MachineIRLoopGuestContextOptimizer, ReplaceGetVAndUpdateMap) {
229 if (!DoesCpuStateHaveDedicatedVecRegs()) {
230 GTEST_SKIP() << "Guest CPU doesn't have Vector registers";
231 }
232 Arena arena;
233 MachineIR machine_ir(&arena);
234
235 MachineIRBuilder builder(&machine_ir);
236
237 auto bb = machine_ir.NewBasicBlock();
238 builder.StartBasicBlock(bb);
239 auto reg1 = machine_ir.AllocVReg();
240 builder.GenGetSimd<16>(reg1, GetThreadStateVRegOffset(0));
241 builder.Gen<PseudoJump>(kNullGuestAddr);
242
243 auto insn_it = bb->insn_list().begin();
244 MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
245 ReplaceGetAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
246 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
247
248 EXPECT_EQ(bb->insn_list().size(), 2UL);
249 auto* copy_insn = *bb->insn_list().begin();
250 auto mapped_reg = CheckCopyGetInsnAndObtainMappedReg(copy_insn, reg1);
251
252 auto offset = GetThreadStateVRegOffset(0);
253 CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovdqa, false);
254 }
255
TEST(MachineIRLoopGuestContextOptimizer,ReplacePutVAndUpdateMap)256 TEST(MachineIRLoopGuestContextOptimizer, ReplacePutVAndUpdateMap) {
257 if (!DoesCpuStateHaveDedicatedVecRegs()) {
258 GTEST_SKIP() << "Guest CPU doesn't have Vector registers";
259 }
260 Arena arena;
261 MachineIR machine_ir(&arena);
262
263 MachineIRBuilder builder(&machine_ir);
264
265 auto bb = machine_ir.NewBasicBlock();
266 builder.StartBasicBlock(bb);
267 auto reg1 = machine_ir.AllocVReg();
268 builder.GenSetSimd<16>(GetThreadStateVRegOffset(0), reg1);
269 builder.Gen<PseudoJump>(kNullGuestAddr);
270
271 auto insn_it = bb->insn_list().begin();
272 MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
273 ReplacePutAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
274 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
275
276 EXPECT_EQ(bb->insn_list().size(), 2UL);
277 auto* copy_insn = *bb->insn_list().begin();
278 auto mapped_reg = CheckCopyPutInsnAndObtainMappedReg(copy_insn, reg1);
279
280 auto offset = GetThreadStateVRegOffset(0);
281 CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovdqa, true);
282 }
283
TEST(MachineIRLoopGuestContextOptimizerRiscv64,ReplaceGetMovwAndUpdateMap)284 TEST(MachineIRLoopGuestContextOptimizerRiscv64, ReplaceGetMovwAndUpdateMap) {
285 Arena arena;
286 MachineIR machine_ir(&arena);
287
288 MachineIRBuilder builder(&machine_ir);
289
290 auto bb = machine_ir.NewBasicBlock();
291 builder.StartBasicBlock(bb);
292 auto reg1 = machine_ir.AllocVReg();
293 auto offset = 0;
294 builder.Gen<MovwRegMemBaseDisp>(reg1, kMachineRegRBP, offset);
295 builder.Gen<PseudoJump>(kNullGuestAddr);
296
297 auto insn_it = bb->insn_list().begin();
298 MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
299 ReplaceGetAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
300 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
301
302 EXPECT_EQ(bb->insn_list().size(), 2UL);
303 auto* copy_insn = *bb->insn_list().begin();
304 auto mapped_reg = CheckCopyGetInsnAndObtainMappedReg(copy_insn, reg1);
305 CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovw, false);
306 }
307
TEST(MachineIRLoopGuestContextOptimizerRiscv64,ReplacePutMovwAndUpdateMap)308 TEST(MachineIRLoopGuestContextOptimizerRiscv64, ReplacePutMovwAndUpdateMap) {
309 Arena arena;
310 MachineIR machine_ir(&arena);
311
312 MachineIRBuilder builder(&machine_ir);
313
314 auto bb = machine_ir.NewBasicBlock();
315 builder.StartBasicBlock(bb);
316 auto reg1 = machine_ir.AllocVReg();
317 auto offset = 0;
318 builder.Gen<MovwMemBaseDispReg>(kMachineRegRBP, offset, reg1);
319 builder.Gen<PseudoJump>(kNullGuestAddr);
320
321 auto insn_it = bb->insn_list().begin();
322 MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
323 ReplacePutAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
324 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
325
326 EXPECT_EQ(bb->insn_list().size(), 2UL);
327 auto* copy_insn = *bb->insn_list().begin();
328 auto mapped_reg = CheckCopyPutInsnAndObtainMappedReg(copy_insn, reg1);
329 CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovw, true);
330 }
331
TEST(MachineIRLoopGuestContextOptimizer,GenerateGetInsns)332 TEST(MachineIRLoopGuestContextOptimizer, GenerateGetInsns) {
333 Arena arena;
334 MachineIR machine_ir(&arena);
335
336 auto* bb = machine_ir.NewBasicBlock();
337
338 // Add an out-edge for the CHECK in GenerateGetInsns.
339 auto* dst = machine_ir.NewBasicBlock();
340 machine_ir.AddEdge(bb, dst);
341
342 MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
343 auto reg1 = machine_ir.AllocVReg();
344 auto reg2 = machine_ir.AllocVReg();
345 auto reg3 = machine_ir.AllocVReg();
346 auto reg4 = machine_ir.AllocVReg();
347 MappedRegInfo mapped_reg1 = {reg1, MovType::kMovq, false};
348 MappedRegInfo mapped_reg2 = {reg2, MovType::kMovdqa, false};
349 MappedRegInfo mapped_reg3 = {reg3, MovType::kMovsd, false};
350 MappedRegInfo mapped_reg4 = {reg4, MovType::kMovw, true};
351 mem_reg_map[GetThreadStateRegOffset(0)] = mapped_reg1;
352 if (DoesCpuStateHaveDedicatedSimdRegs()) {
353 mem_reg_map[GetThreadStateSimdRegOffset(0)] = mapped_reg2;
354 } else if (DoesCpuStateHaveDedicatedVecRegs()) {
355 mem_reg_map[GetThreadStateVRegOffset(0)] = mapped_reg2;
356 }
357 if (DoesCpuStateHaveDedicatedFpRegs()) {
358 mem_reg_map[GetThreadStateFRegOffset(0)] = mapped_reg3;
359 }
360 if (DoesCpuStateHaveFlags()) {
361 mem_reg_map[GetThreadStateFlagOffset()] = mapped_reg4;
362 }
363
364 GenerateGetInsns(&machine_ir, bb, mem_reg_map);
365
366 EXPECT_EQ(
367 bb->insn_list().size(),
368 (DoesCpuStateHaveFlags() ? 2UL : 1UL) + (DoesCpuStateHaveDedicatedFpRegs() ? 1UL : 0UL) +
369 ((DoesCpuStateHaveDedicatedSimdRegs() || DoesCpuStateHaveDedicatedVecRegs()) ? 1UL
370 : 0UL));
371 auto insn_it = bb->insn_list().begin();
372 CheckGetInsn(*insn_it, kMachineOpMovqRegMemBaseDisp, reg1, GetThreadStateRegOffset(0));
373 std::advance(insn_it, 1);
374 if (DoesCpuStateHaveFlags()) {
375 CheckGetInsn(*insn_it, kMachineOpMovwRegMemBaseDisp, reg4, GetThreadStateFlagOffset());
376 std::advance(insn_it, 1);
377 }
378 if (DoesCpuStateHaveDedicatedFpRegs()) {
379 CheckGetInsn(*insn_it, kMachineOpMovsdXRegMemBaseDisp, reg3, GetThreadStateFRegOffset(0));
380 std::advance(insn_it, 1);
381 }
382 if (DoesCpuStateHaveDedicatedSimdRegs()) {
383 CheckGetInsn(*insn_it, kMachineOpMovdqaXRegMemBaseDisp, reg2, GetThreadStateSimdRegOffset(0));
384 std::advance(insn_it, 1);
385 } else if (DoesCpuStateHaveDedicatedVecRegs()) {
386 CheckGetInsn(*insn_it, kMachineOpMovdqaXRegMemBaseDisp, reg2, GetThreadStateVRegOffset(0));
387 std::advance(insn_it, 1);
388 }
389 }
390
TEST(MachineIRLoopGuestContextOptimizer,GeneratePutInsns)391 TEST(MachineIRLoopGuestContextOptimizer, GeneratePutInsns) {
392 Arena arena;
393 MachineIR machine_ir(&arena);
394
395 auto* bb = machine_ir.NewBasicBlock();
396 auto* src = machine_ir.NewBasicBlock();
397 machine_ir.AddEdge(src, bb);
398 MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
399 auto reg1 = machine_ir.AllocVReg();
400 auto reg2 = machine_ir.AllocVReg();
401 auto reg3 = machine_ir.AllocVReg();
402 auto reg4 = machine_ir.AllocVReg();
403 MappedRegInfo mapped_reg1 = {reg1, MovType::kMovq, true};
404 MappedRegInfo mapped_reg2 = {reg2, MovType::kMovdqa, true};
405 MappedRegInfo mapped_reg3 = {reg3, MovType::kMovsd, true};
406 MappedRegInfo mapped_reg4 = {reg4, MovType::kMovw, true};
407 mem_reg_map[GetThreadStateRegOffset(0)] = mapped_reg1;
408 if (DoesCpuStateHaveDedicatedSimdRegs()) {
409 mem_reg_map[GetThreadStateSimdRegOffset(0)] = mapped_reg2;
410 } else if (DoesCpuStateHaveDedicatedVecRegs()) {
411 mem_reg_map[GetThreadStateVRegOffset(0)] = mapped_reg2;
412 }
413 if (DoesCpuStateHaveDedicatedFpRegs()) {
414 mem_reg_map[GetThreadStateFRegOffset(0)] = mapped_reg3;
415 }
416 if (DoesCpuStateHaveFlags()) {
417 mem_reg_map[GetThreadStateFlagOffset()] = mapped_reg4;
418 }
419
420 GeneratePutInsns(&machine_ir, bb, mem_reg_map);
421
422 EXPECT_EQ(
423 bb->insn_list().size(),
424 (DoesCpuStateHaveFlags() ? 2UL : 1UL) + (DoesCpuStateHaveDedicatedFpRegs() ? 1UL : 0UL) +
425 ((DoesCpuStateHaveDedicatedSimdRegs() || DoesCpuStateHaveDedicatedVecRegs()) ? 1UL
426 : 0UL));
427 auto insn_it = bb->insn_list().begin();
428 CheckPutInsn(*insn_it, kMachineOpMovqMemBaseDispReg, reg1, GetThreadStateRegOffset(0));
429 std::advance(insn_it, 1);
430 if (DoesCpuStateHaveFlags()) {
431 CheckPutInsn(*insn_it, kMachineOpMovwMemBaseDispReg, reg4, GetThreadStateFlagOffset());
432 std::advance(insn_it, 1);
433 }
434 if (DoesCpuStateHaveDedicatedFpRegs()) {
435 CheckPutInsn(*insn_it, kMachineOpMovsdMemBaseDispXReg, reg3, GetThreadStateFRegOffset(0));
436 std::advance(insn_it, 1);
437 }
438 if (DoesCpuStateHaveDedicatedSimdRegs()) {
439 CheckPutInsn(*insn_it, kMachineOpMovdqaMemBaseDispXReg, reg2, GetThreadStateSimdRegOffset(0));
440 std::advance(insn_it, 1);
441 } else if (DoesCpuStateHaveDedicatedVecRegs()) {
442 CheckPutInsn(*insn_it, kMachineOpMovdqaMemBaseDispXReg, reg2, GetThreadStateVRegOffset(0));
443 std::advance(insn_it, 1);
444 }
445 }
446
TEST(MachineIRLoopGuestContextOptimizer,GeneratePreloop)447 TEST(MachineIRLoopGuestContextOptimizer, GeneratePreloop) {
448 Arena arena;
449 MachineIR machine_ir(&arena);
450
451 auto* preloop = machine_ir.NewBasicBlock();
452 auto* loop_body = machine_ir.NewBasicBlock();
453 auto* afterloop = machine_ir.NewBasicBlock();
454 machine_ir.AddEdge(preloop, loop_body);
455 machine_ir.AddEdge(loop_body, loop_body);
456 machine_ir.AddEdge(loop_body, afterloop);
457
458 MachineIRBuilder builder(&machine_ir);
459 builder.StartBasicBlock(preloop);
460 builder.Gen<PseudoBranch>(loop_body);
461 builder.StartBasicBlock(loop_body);
462 builder.Gen<PseudoCondBranch>(
463 CodeEmitter::Condition::kZero, loop_body, afterloop, kMachineRegFLAGS);
464 builder.StartBasicBlock(afterloop);
465 builder.Gen<PseudoJump>(kNullGuestAddr);
466
467 Loop loop(machine_ir.arena());
468 loop.push_back(loop_body);
469
470 MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
471 auto reg1 = machine_ir.AllocVReg();
472 auto reg2 = machine_ir.AllocVReg();
473 auto reg3 = machine_ir.AllocVReg();
474 auto reg4 = machine_ir.AllocVReg();
475 MappedRegInfo mapped_reg1 = {reg1, MovType::kMovq, false};
476 MappedRegInfo mapped_reg2 = {reg2, MovType::kMovdqa, false};
477 MappedRegInfo mapped_reg3 = {reg3, MovType::kMovsd, false};
478 MappedRegInfo mapped_reg4 = {reg4, MovType::kMovw, true};
479 mem_reg_map[GetThreadStateRegOffset(0)] = mapped_reg1;
480 if (DoesCpuStateHaveDedicatedSimdRegs()) {
481 mem_reg_map[GetThreadStateSimdRegOffset(0)] = mapped_reg2;
482 } else if (DoesCpuStateHaveDedicatedVecRegs()) {
483 mem_reg_map[GetThreadStateVRegOffset(0)] = mapped_reg2;
484 }
485 if (DoesCpuStateHaveDedicatedFpRegs()) {
486 mem_reg_map[GetThreadStateFRegOffset(0)] = mapped_reg3;
487 }
488 if (DoesCpuStateHaveFlags()) {
489 mem_reg_map[GetThreadStateFlagOffset()] = mapped_reg4;
490 }
491
492 GenerateGetsInPreloop(&machine_ir, &loop, mem_reg_map);
493 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
494
495 EXPECT_EQ(
496 preloop->insn_list().size(),
497 (DoesCpuStateHaveFlags() ? 3UL : 2UL) + (DoesCpuStateHaveDedicatedFpRegs() ? 1UL : 0UL) +
498 ((DoesCpuStateHaveDedicatedSimdRegs() || DoesCpuStateHaveDedicatedVecRegs()) ? 1UL
499 : 0UL));
500 auto insn_it = preloop->insn_list().begin();
501 CheckGetInsn(*insn_it, kMachineOpMovqRegMemBaseDisp, reg1, GetThreadStateRegOffset(0));
502 std::advance(insn_it, 1);
503 if (DoesCpuStateHaveFlags()) {
504 CheckGetInsn(*insn_it, kMachineOpMovwRegMemBaseDisp, reg4, GetThreadStateFlagOffset());
505 std::advance(insn_it, 1);
506 }
507 if (DoesCpuStateHaveDedicatedFpRegs()) {
508 CheckGetInsn(*insn_it, kMachineOpMovsdXRegMemBaseDisp, reg3, GetThreadStateFRegOffset(0));
509 std::advance(insn_it, 1);
510 }
511 if (DoesCpuStateHaveDedicatedSimdRegs()) {
512 CheckGetInsn(*insn_it, kMachineOpMovdqaXRegMemBaseDisp, reg2, GetThreadStateSimdRegOffset(0));
513 std::advance(insn_it, 1);
514 } else if (DoesCpuStateHaveDedicatedVecRegs()) {
515 CheckGetInsn(*insn_it, kMachineOpMovdqaXRegMemBaseDisp, reg2, GetThreadStateVRegOffset(0));
516 std::advance(insn_it, 1);
517 }
518 }
519
TEST(MachineIRLoopGuestContextOptimizer,GenerateAfterloop)520 TEST(MachineIRLoopGuestContextOptimizer, GenerateAfterloop) {
521 Arena arena;
522 MachineIR machine_ir(&arena);
523
524 auto* preloop = machine_ir.NewBasicBlock();
525 auto* loop_body = machine_ir.NewBasicBlock();
526 auto* afterloop = machine_ir.NewBasicBlock();
527 machine_ir.AddEdge(preloop, loop_body);
528 machine_ir.AddEdge(loop_body, loop_body);
529 machine_ir.AddEdge(loop_body, afterloop);
530
531 MachineIRBuilder builder(&machine_ir);
532 builder.StartBasicBlock(preloop);
533 builder.Gen<PseudoBranch>(loop_body);
534 builder.StartBasicBlock(loop_body);
535 builder.Gen<PseudoCondBranch>(
536 CodeEmitter::Condition::kZero, loop_body, afterloop, kMachineRegFLAGS);
537 builder.StartBasicBlock(afterloop);
538 builder.Gen<PseudoJump>(kNullGuestAddr);
539
540 Loop loop(machine_ir.arena());
541 loop.push_back(loop_body);
542
543 MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
544 auto reg1 = machine_ir.AllocVReg();
545 auto reg2 = machine_ir.AllocVReg();
546 auto reg3 = machine_ir.AllocVReg();
547 auto reg4 = machine_ir.AllocVReg();
548 MappedRegInfo mapped_reg1 = {reg1, MovType::kMovq, true};
549 MappedRegInfo mapped_reg2 = {reg2, MovType::kMovdqa, true};
550 MappedRegInfo mapped_reg3 = {reg3, MovType::kMovsd, true};
551 MappedRegInfo mapped_reg4 = {reg4, MovType::kMovw, true};
552 mem_reg_map[GetThreadStateRegOffset(0)] = mapped_reg1;
553 if (DoesCpuStateHaveDedicatedSimdRegs()) {
554 mem_reg_map[GetThreadStateSimdRegOffset(0)] = mapped_reg2;
555 } else if (DoesCpuStateHaveDedicatedVecRegs()) {
556 mem_reg_map[GetThreadStateVRegOffset(0)] = mapped_reg2;
557 }
558 if (DoesCpuStateHaveDedicatedFpRegs()) {
559 mem_reg_map[GetThreadStateFRegOffset(0)] = mapped_reg3;
560 }
561 if (DoesCpuStateHaveFlags()) {
562 mem_reg_map[GetThreadStateFlagOffset()] = mapped_reg4;
563 }
564
565 GeneratePutsInPostloop(&machine_ir, &loop, mem_reg_map);
566 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
567
568 EXPECT_EQ(
569 afterloop->insn_list().size(),
570 (DoesCpuStateHaveFlags() ? 3UL : 2UL) + (DoesCpuStateHaveDedicatedFpRegs() ? 1UL : 0UL) +
571 ((DoesCpuStateHaveDedicatedSimdRegs() || DoesCpuStateHaveDedicatedVecRegs()) ? 1UL
572 : 0UL));
573 auto insn_it = afterloop->insn_list().begin();
574 CheckPutInsn(*insn_it, kMachineOpMovqMemBaseDispReg, reg1, GetThreadStateRegOffset(0));
575 std::advance(insn_it, 1);
576 if (DoesCpuStateHaveFlags()) {
577 CheckPutInsn(*insn_it, kMachineOpMovwMemBaseDispReg, reg4, GetThreadStateFlagOffset());
578 std::advance(insn_it, 1);
579 }
580 if (DoesCpuStateHaveDedicatedFpRegs()) {
581 CheckPutInsn(*insn_it, kMachineOpMovsdMemBaseDispXReg, reg3, GetThreadStateFRegOffset(0));
582 std::advance(insn_it, 1);
583 }
584 if (DoesCpuStateHaveDedicatedSimdRegs()) {
585 CheckPutInsn(*insn_it, kMachineOpMovdqaMemBaseDispXReg, reg2, GetThreadStateSimdRegOffset(0));
586 std::advance(insn_it, 1);
587 } else if (DoesCpuStateHaveDedicatedVecRegs()) {
588 CheckPutInsn(*insn_it, kMachineOpMovdqaMemBaseDispXReg, reg2, GetThreadStateVRegOffset(0));
589 std::advance(insn_it, 1);
590 }
591 }
592
TEST(MachineIRLoopGuestContextOptimizer,GenerateMultiplePreloops)593 TEST(MachineIRLoopGuestContextOptimizer, GenerateMultiplePreloops) {
594 Arena arena;
595 MachineIR machine_ir(&arena);
596
597 auto* preloop1 = machine_ir.NewBasicBlock();
598 auto* preloop2 = machine_ir.NewBasicBlock();
599 auto* loop_body = machine_ir.NewBasicBlock();
600 auto* afterloop = machine_ir.NewBasicBlock();
601 machine_ir.AddEdge(preloop1, loop_body);
602 machine_ir.AddEdge(preloop2, loop_body);
603 machine_ir.AddEdge(loop_body, loop_body);
604 machine_ir.AddEdge(loop_body, afterloop);
605
606 MachineIRBuilder builder(&machine_ir);
607 builder.StartBasicBlock(preloop1);
608 builder.Gen<PseudoBranch>(loop_body);
609 builder.StartBasicBlock(preloop2);
610 builder.Gen<PseudoBranch>(loop_body);
611 builder.StartBasicBlock(loop_body);
612 builder.Gen<PseudoCondBranch>(
613 CodeEmitter::Condition::kZero, loop_body, afterloop, kMachineRegFLAGS);
614 builder.StartBasicBlock(afterloop);
615 builder.Gen<PseudoJump>(kNullGuestAddr);
616
617 Loop loop(machine_ir.arena());
618 loop.push_back(loop_body);
619
620 MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
621 auto reg1 = machine_ir.AllocVReg();
622 MappedRegInfo mapped_reg1 = {reg1, MovType::kMovq, true};
623 mem_reg_map[GetThreadStateRegOffset(0)] = mapped_reg1;
624
625 GenerateGetsInPreloop(&machine_ir, &loop, mem_reg_map);
626 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
627
628 EXPECT_EQ(preloop1->insn_list().size(), 2UL);
629 auto insn_it = preloop1->insn_list().begin();
630 CheckGetInsn(*insn_it, kMachineOpMovqRegMemBaseDisp, reg1, GetThreadStateRegOffset(0));
631
632 EXPECT_EQ(preloop2->insn_list().size(), 2UL);
633 insn_it = preloop2->insn_list().begin();
634 CheckGetInsn(*insn_it, kMachineOpMovqRegMemBaseDisp, reg1, GetThreadStateRegOffset(0));
635 }
636
TEST(MachineIRLoopGuestContextOptimizer,GenerateMultiplePostloops)637 TEST(MachineIRLoopGuestContextOptimizer, GenerateMultiplePostloops) {
638 Arena arena;
639 MachineIR machine_ir(&arena);
640
641 auto* preloop = machine_ir.NewBasicBlock();
642 auto* loop_body1 = machine_ir.NewBasicBlock();
643 auto* loop_body2 = machine_ir.NewBasicBlock();
644 auto* postloop1 = machine_ir.NewBasicBlock();
645 auto* postloop2 = machine_ir.NewBasicBlock();
646 machine_ir.AddEdge(preloop, loop_body1);
647 machine_ir.AddEdge(loop_body1, loop_body2);
648 machine_ir.AddEdge(loop_body1, postloop1);
649 machine_ir.AddEdge(loop_body2, loop_body1);
650 machine_ir.AddEdge(loop_body2, postloop2);
651
652 MachineIRBuilder builder(&machine_ir);
653 builder.StartBasicBlock(preloop);
654 builder.Gen<PseudoBranch>(loop_body1);
655 builder.StartBasicBlock(loop_body1);
656 builder.Gen<PseudoCondBranch>(
657 CodeEmitter::Condition::kZero, loop_body2, postloop1, kMachineRegFLAGS);
658 builder.StartBasicBlock(loop_body2);
659 builder.Gen<PseudoCondBranch>(
660 CodeEmitter::Condition::kZero, loop_body1, postloop2, kMachineRegFLAGS);
661 builder.StartBasicBlock(postloop1);
662 builder.Gen<PseudoJump>(kNullGuestAddr);
663 builder.StartBasicBlock(postloop2);
664 builder.Gen<PseudoJump>(kNullGuestAddr);
665
666 Loop loop(machine_ir.arena());
667 loop.push_back(loop_body1);
668 loop.push_back(loop_body2);
669
670 MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
671 auto reg1 = machine_ir.AllocVReg();
672 MappedRegInfo mapped_reg1 = {reg1, MovType::kMovq, true};
673 mem_reg_map[GetThreadStateRegOffset(0)] = mapped_reg1;
674
675 GeneratePutsInPostloop(&machine_ir, &loop, mem_reg_map);
676 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
677
678 EXPECT_EQ(postloop1->insn_list().size(), 2UL);
679 auto insn_it = postloop1->insn_list().begin();
680 CheckPutInsn(*insn_it, kMachineOpMovqMemBaseDispReg, reg1, GetThreadStateRegOffset(0));
681
682 EXPECT_EQ(postloop2->insn_list().size(), 2UL);
683 insn_it = postloop2->insn_list().begin();
684 CheckPutInsn(*insn_it, kMachineOpMovqMemBaseDispReg, reg1, GetThreadStateRegOffset(0));
685 }
686
TEST(MachineIRLoopGuestContextOptimizer,RemovePutInSelfLoop)687 TEST(MachineIRLoopGuestContextOptimizer, RemovePutInSelfLoop) {
688 Arena arena;
689 MachineIR machine_ir(&arena);
690
691 auto* preloop = machine_ir.NewBasicBlock();
692 auto* body = machine_ir.NewBasicBlock();
693 auto* afterloop = machine_ir.NewBasicBlock();
694 machine_ir.AddEdge(preloop, body);
695 machine_ir.AddEdge(body, body);
696 machine_ir.AddEdge(body, afterloop);
697
698 MachineReg vreg1 = machine_ir.AllocVReg();
699
700 MachineIRBuilder builder(&machine_ir);
701
702 builder.StartBasicBlock(preloop);
703 builder.Gen<PseudoBranch>(body);
704
705 builder.StartBasicBlock(body);
706 builder.GenPut(GetThreadStateRegOffset(0), vreg1);
707 builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, body, afterloop, kMachineRegFLAGS);
708
709 builder.StartBasicBlock(afterloop);
710 builder.Gen<PseudoJump>(kNullGuestAddr);
711
712 RemoveLoopGuestContextAccesses(&machine_ir);
713 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
714
715 EXPECT_EQ(preloop->insn_list().size(), 2UL);
716 auto* get_insn = preloop->insn_list().front();
717 EXPECT_EQ(get_insn->opcode(), kMachineOpMovqRegMemBaseDisp);
718 auto mapped_reg = get_insn->RegAt(0);
719 auto disp = AsMachineInsnX86_64(get_insn)->disp();
720 EXPECT_EQ(disp, GetThreadStateRegOffset(0));
721
722 EXPECT_EQ(body->insn_list().size(), 2UL);
723 auto* copy_insn = body->insn_list().front();
724 EXPECT_EQ(CheckCopyPutInsnAndObtainMappedReg(copy_insn, vreg1), mapped_reg);
725
726 EXPECT_EQ(afterloop->insn_list().size(), 2UL);
727 auto* put_insn = afterloop->insn_list().front();
728 CheckPutInsn(put_insn, kMachineOpMovqMemBaseDispReg, mapped_reg, GetThreadStateRegOffset(0));
729 }
730
TEST(MachineIRLoopGuestContextOptimizer,RemoveGetInSelfLoop)731 TEST(MachineIRLoopGuestContextOptimizer, RemoveGetInSelfLoop) {
732 Arena arena;
733 MachineIR machine_ir(&arena);
734
735 auto* preloop = machine_ir.NewBasicBlock();
736 auto* body = machine_ir.NewBasicBlock();
737 auto* afterloop = machine_ir.NewBasicBlock();
738 machine_ir.AddEdge(preloop, body);
739 machine_ir.AddEdge(body, body);
740 machine_ir.AddEdge(body, afterloop);
741
742 MachineReg vreg1 = machine_ir.AllocVReg();
743
744 MachineIRBuilder builder(&machine_ir);
745
746 builder.StartBasicBlock(preloop);
747 builder.Gen<PseudoBranch>(body);
748
749 builder.StartBasicBlock(body);
750 builder.GenGet(vreg1, GetThreadStateRegOffset(0));
751 builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, body, afterloop, kMachineRegFLAGS);
752
753 builder.StartBasicBlock(afterloop);
754 builder.Gen<PseudoJump>(kNullGuestAddr);
755
756 RemoveLoopGuestContextAccesses(&machine_ir);
757 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
758
759 EXPECT_EQ(preloop->insn_list().size(), 2UL);
760 auto* get_insn = preloop->insn_list().front();
761 EXPECT_EQ(get_insn->opcode(), kMachineOpMovqRegMemBaseDisp);
762 auto mapped_reg = get_insn->RegAt(0);
763 auto disp = AsMachineInsnX86_64(get_insn)->disp();
764 EXPECT_EQ(disp, GetThreadStateRegOffset(0));
765
766 EXPECT_EQ(body->insn_list().size(), 2UL);
767 auto* copy_insn = body->insn_list().front();
768 EXPECT_EQ(mapped_reg, CheckCopyGetInsnAndObtainMappedReg(copy_insn, vreg1));
769
770 EXPECT_EQ(afterloop->insn_list().size(), 1UL);
771 }
772
TEST(MachineIRLoopGuestContextOptimizer,RemoveGetPutInSelfLoop)773 TEST(MachineIRLoopGuestContextOptimizer, RemoveGetPutInSelfLoop) {
774 Arena arena;
775 MachineIR machine_ir(&arena);
776
777 auto* preloop = machine_ir.NewBasicBlock();
778 auto* body = machine_ir.NewBasicBlock();
779 auto* afterloop = machine_ir.NewBasicBlock();
780 machine_ir.AddEdge(preloop, body);
781 machine_ir.AddEdge(body, body);
782 machine_ir.AddEdge(body, afterloop);
783
784 MachineReg vreg1 = machine_ir.AllocVReg();
785 MachineReg vreg2 = machine_ir.AllocVReg();
786
787 MachineIRBuilder builder(&machine_ir);
788
789 builder.StartBasicBlock(preloop);
790 builder.Gen<PseudoBranch>(body);
791
792 builder.StartBasicBlock(body);
793 builder.GenGet(vreg1, GetThreadStateRegOffset(0));
794 builder.GenPut(GetThreadStateRegOffset(0), vreg2);
795 builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, body, afterloop, kMachineRegFLAGS);
796
797 builder.StartBasicBlock(afterloop);
798 builder.Gen<PseudoJump>(kNullGuestAddr);
799
800 RemoveLoopGuestContextAccesses(&machine_ir);
801 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
802
803 EXPECT_EQ(preloop->insn_list().size(), 2UL);
804 auto* get_insn = preloop->insn_list().front();
805 EXPECT_EQ(get_insn->opcode(), kMachineOpMovqRegMemBaseDisp);
806 auto mapped_reg = get_insn->RegAt(0);
807 auto disp = AsMachineInsnX86_64(get_insn)->disp();
808 EXPECT_EQ(disp, GetThreadStateRegOffset(0));
809
810 EXPECT_EQ(body->insn_list().size(), 3UL);
811 auto* copy_insn1 = body->insn_list().front();
812 EXPECT_EQ(mapped_reg, CheckCopyGetInsnAndObtainMappedReg(copy_insn1, vreg1));
813 auto* copy_insn2 = *(std::next(body->insn_list().begin()));
814 EXPECT_EQ(mapped_reg, CheckCopyPutInsnAndObtainMappedReg(copy_insn2, vreg2));
815
816 EXPECT_EQ(afterloop->insn_list().size(), 2UL);
817 auto* put_insn = afterloop->insn_list().front();
818 CheckPutInsn(put_insn, kMachineOpMovqMemBaseDispReg, mapped_reg, GetThreadStateRegOffset(0));
819 }
820
TEST(MachineIRLoopGuestContextOptimizer,RemovePutInLoopWithMultipleExits)821 TEST(MachineIRLoopGuestContextOptimizer, RemovePutInLoopWithMultipleExits) {
822 Arena arena;
823 MachineIR machine_ir(&arena);
824
825 auto* preloop = machine_ir.NewBasicBlock();
826 auto* body1 = machine_ir.NewBasicBlock();
827 auto* body2 = machine_ir.NewBasicBlock();
828 auto* afterloop1 = machine_ir.NewBasicBlock();
829 auto* afterloop2 = machine_ir.NewBasicBlock();
830 machine_ir.AddEdge(preloop, body1);
831 machine_ir.AddEdge(body1, body2);
832 machine_ir.AddEdge(body1, afterloop1);
833 machine_ir.AddEdge(body2, body1);
834 machine_ir.AddEdge(body2, afterloop2);
835
836 MachineReg vreg1 = machine_ir.AllocVReg();
837
838 MachineIRBuilder builder(&machine_ir);
839
840 builder.StartBasicBlock(preloop);
841 builder.Gen<PseudoBranch>(body1);
842
843 builder.StartBasicBlock(body1);
844 builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, body2, afterloop1, kMachineRegFLAGS);
845
846 builder.StartBasicBlock(body2);
847 builder.GenPut(GetThreadStateRegOffset(0), vreg1);
848 builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, body1, afterloop2, kMachineRegFLAGS);
849
850 builder.StartBasicBlock(afterloop1);
851 builder.Gen<PseudoJump>(kNullGuestAddr);
852
853 builder.StartBasicBlock(afterloop2);
854 builder.Gen<PseudoJump>(kNullGuestAddr);
855
856 RemoveLoopGuestContextAccesses(&machine_ir);
857 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
858
859 EXPECT_EQ(preloop->insn_list().size(), 2UL);
860 auto* get_insn = preloop->insn_list().front();
861 EXPECT_EQ(get_insn->opcode(), kMachineOpMovqRegMemBaseDisp);
862 auto mapped_reg = get_insn->RegAt(0);
863 auto disp = AsMachineInsnX86_64(get_insn)->disp();
864 EXPECT_EQ(disp, GetThreadStateRegOffset(0));
865
866 EXPECT_EQ(body1->insn_list().size(), 1UL);
867 EXPECT_EQ(body2->insn_list().size(), 2UL);
868 auto* copy_insn = body2->insn_list().front();
869 EXPECT_EQ(CheckCopyPutInsnAndObtainMappedReg(copy_insn, vreg1), mapped_reg);
870
871 EXPECT_EQ(afterloop1->insn_list().size(), 2UL);
872 auto* put_insn = afterloop1->insn_list().front();
873 CheckPutInsn(put_insn, kMachineOpMovqMemBaseDispReg, mapped_reg, GetThreadStateRegOffset(0));
874
875 EXPECT_EQ(afterloop2->insn_list().size(), 2UL);
876 put_insn = afterloop2->insn_list().front();
877 CheckPutInsn(put_insn, kMachineOpMovqMemBaseDispReg, mapped_reg, GetThreadStateRegOffset(0));
878 }
879
TEST(MachineIRLoopGuestContextOptimizer,CountGuestRegAccesses)880 TEST(MachineIRLoopGuestContextOptimizer, CountGuestRegAccesses) {
881 Arena arena;
882 MachineIR machine_ir(&arena);
883
884 auto* preloop = machine_ir.NewBasicBlock();
885 auto* body1 = machine_ir.NewBasicBlock();
886 auto* body2 = machine_ir.NewBasicBlock();
887 machine_ir.AddEdge(preloop, body1);
888 machine_ir.AddEdge(body1, body2);
889 machine_ir.AddEdge(body2, body1);
890
891 MachineReg vreg1 = machine_ir.AllocVReg();
892 MachineReg vreg2 = machine_ir.AllocVReg();
893
894 MachineIRBuilder builder(&machine_ir);
895
896 builder.StartBasicBlock(preloop);
897 builder.Gen<PseudoBranch>(body1);
898
899 builder.StartBasicBlock(body1);
900 builder.GenPut(GetThreadStateRegOffset(0), vreg1);
901 if (DoesCpuStateHaveDedicatedSimdRegs()) {
902 builder.GenGetSimd<16>(vreg2, GetThreadStateSimdRegOffset(0));
903 } else if (DoesCpuStateHaveDedicatedVecRegs()) {
904 builder.GenGetSimd<16>(vreg2, GetThreadStateVRegOffset(0));
905 }
906 builder.Gen<PseudoBranch>(body2);
907
908 builder.StartBasicBlock(body2);
909 builder.GenGet(vreg1, GetThreadStateRegOffset(1));
910 builder.GenPut(GetThreadStateRegOffset(1), vreg1);
911 if (DoesCpuStateHaveDedicatedSimdRegs()) {
912 builder.GenSetSimd<16>(GetThreadStateSimdRegOffset(0), vreg2);
913 } else if (DoesCpuStateHaveDedicatedVecRegs()) {
914 builder.GenSetSimd<16>(GetThreadStateVRegOffset(0), vreg2);
915 }
916 builder.Gen<PseudoBranch>(body1);
917
918 Loop loop({body1, body2}, machine_ir.arena());
919 auto guest_access_count = CountGuestRegAccesses(&machine_ir, &loop);
920 EXPECT_EQ(guest_access_count[GetThreadStateRegOffset(0)], 1);
921 EXPECT_EQ(guest_access_count[GetThreadStateRegOffset(1)], 2);
922 if (DoesCpuStateHaveDedicatedSimdRegs()) {
923 EXPECT_EQ(guest_access_count[GetThreadStateSimdRegOffset(0)], 2);
924 } else if (DoesCpuStateHaveDedicatedVecRegs()) {
925 EXPECT_EQ(guest_access_count[GetThreadStateVRegOffset(0)], 2);
926 }
927 }
928
TEST(MachineIRLoopGuestContextOptimizer,GetOffsetCounters)929 TEST(MachineIRLoopGuestContextOptimizer, GetOffsetCounters) {
930 Arena arena;
931 MachineIR machine_ir(&arena);
932
933 auto* preloop = machine_ir.NewBasicBlock();
934 auto* body1 = machine_ir.NewBasicBlock();
935 auto* body2 = machine_ir.NewBasicBlock();
936 machine_ir.AddEdge(preloop, body1);
937 machine_ir.AddEdge(body1, body2);
938 machine_ir.AddEdge(body2, body1);
939
940 MachineReg vreg1 = machine_ir.AllocVReg();
941
942 MachineIRBuilder builder(&machine_ir);
943
944 builder.StartBasicBlock(preloop);
945 builder.Gen<PseudoBranch>(body1);
946
947 builder.StartBasicBlock(body1);
948 builder.GenPut(GetThreadStateRegOffset(0), vreg1);
949 builder.GenGet(vreg1, GetThreadStateRegOffset(0));
950 builder.GenGet(vreg1, GetThreadStateRegOffset(1));
951 builder.Gen<PseudoBranch>(body2);
952
953 builder.StartBasicBlock(body2);
954 builder.GenGet(vreg1, GetThreadStateRegOffset(2));
955 builder.GenPut(GetThreadStateRegOffset(2), vreg1);
956 builder.GenPut(GetThreadStateRegOffset(0), vreg1);
957 builder.Gen<PseudoBranch>(body1);
958
959 Loop loop({body1, body2}, machine_ir.arena());
960 auto counters = GetSortedOffsetCounters(&machine_ir, &loop);
961 EXPECT_EQ(counters.size(), 3UL);
962 EXPECT_EQ(std::get<0>(counters[0]), GetThreadStateRegOffset(0));
963 EXPECT_EQ(std::get<1>(counters[0]), 3);
964
965 EXPECT_EQ(std::get<0>(counters[1]), GetThreadStateRegOffset(2));
966 EXPECT_EQ(std::get<1>(counters[1]), 2);
967
968 EXPECT_EQ(std::get<0>(counters[2]), GetThreadStateRegOffset(1));
969 EXPECT_EQ(std::get<1>(counters[2]), 1);
970 }
971
TEST(MachineIRLoopGuestContextOptimizer,OptimizeLoopWithPriority)972 TEST(MachineIRLoopGuestContextOptimizer, OptimizeLoopWithPriority) {
973 Arena arena;
974 MachineIR machine_ir(&arena);
975
976 auto* preloop = machine_ir.NewBasicBlock();
977 auto* body = machine_ir.NewBasicBlock();
978 auto* afterloop = machine_ir.NewBasicBlock();
979 machine_ir.AddEdge(preloop, body);
980 machine_ir.AddEdge(body, body);
981 machine_ir.AddEdge(body, afterloop);
982
983 MachineReg vreg1 = machine_ir.AllocVReg();
984 MachineReg vreg2 = machine_ir.AllocVReg();
985
986 MachineIRBuilder builder(&machine_ir);
987
988 builder.StartBasicBlock(preloop);
989 builder.Gen<PseudoBranch>(body);
990
991 // Regular reg 0 has 3 uses.
992 // Regular reg 1 has 1 use.
993 builder.StartBasicBlock(body);
994 builder.GenGet(vreg1, GetThreadStateRegOffset(0));
995 builder.GenPut(GetThreadStateRegOffset(0), vreg1);
996 builder.GenGet(vreg1, GetThreadStateRegOffset(0));
997 builder.GenGet(vreg1, GetThreadStateRegOffset(1));
998
999 if (DoesCpuStateHaveDedicatedSimdRegs()) {
1000 // Simd reg 0 has 2 uses.
1001 // Simd reg 1 has 1 use.
1002 builder.GenGetSimd<16>(vreg2, GetThreadStateSimdRegOffset(0));
1003 builder.GenSetSimd<16>(GetThreadStateSimdRegOffset(0), vreg2);
1004 builder.GenGetSimd<16>(vreg2, GetThreadStateSimdRegOffset(1));
1005 } else if (DoesCpuStateHaveDedicatedVecRegs()) {
1006 // Vector reg 0 has 2 uses.
1007 // Vector reg 1 has 1 use.
1008 builder.GenGetSimd<16>(vreg2, GetThreadStateVRegOffset(0));
1009 builder.GenSetSimd<16>(GetThreadStateVRegOffset(0), vreg2);
1010 builder.GenGetSimd<16>(vreg2, GetThreadStateVRegOffset(1));
1011 }
1012 builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, body, afterloop, kMachineRegFLAGS);
1013
1014 builder.StartBasicBlock(afterloop);
1015 builder.Gen<PseudoJump>(kNullGuestAddr);
1016
1017 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
1018 Loop loop({body}, machine_ir.arena());
1019 OptimizeLoop(&machine_ir,
1020 &loop,
1021 OptimizeLoopParams{
1022 .general_reg_limit = 1,
1023 .simd_reg_limit = 1,
1024 });
1025
1026 EXPECT_EQ(preloop->insn_list().size(), 3UL);
1027 auto* get_insn_1 = preloop->insn_list().front();
1028 EXPECT_EQ(get_insn_1->opcode(), kMachineOpMovqRegMemBaseDisp);
1029 auto mapped_reg_1 = get_insn_1->RegAt(0);
1030 auto disp_1 = AsMachineInsnX86_64(get_insn_1)->disp();
1031 EXPECT_EQ(disp_1, GetThreadStateRegOffset(0));
1032
1033 auto* get_insn_2 = *std::next(preloop->insn_list().begin());
1034 EXPECT_EQ(get_insn_2->opcode(), kMachineOpMovdqaXRegMemBaseDisp);
1035 auto mapped_reg_2 = get_insn_2->RegAt(0);
1036 auto disp_2 = AsMachineInsnX86_64(get_insn_2)->disp();
1037 if (DoesCpuStateHaveDedicatedSimdRegs()) {
1038 EXPECT_EQ(disp_2, GetThreadStateSimdRegOffset(0));
1039 } else if (DoesCpuStateHaveDedicatedVecRegs()) {
1040 EXPECT_EQ(disp_2, GetThreadStateVRegOffset(0));
1041 }
1042
1043 // Since regular reg limit is 1 only reg 0 is optimized. Same for simd/vector regs.
1044 EXPECT_EQ(body->insn_list().size(), 8UL);
1045 auto insn_it = body->insn_list().begin();
1046 EXPECT_EQ(mapped_reg_1, CheckCopyGetInsnAndObtainMappedReg(*insn_it++, vreg1));
1047 EXPECT_EQ(mapped_reg_1, CheckCopyPutInsnAndObtainMappedReg(*insn_it++, vreg1));
1048 EXPECT_EQ(mapped_reg_1, CheckCopyGetInsnAndObtainMappedReg(*insn_it++, vreg1));
1049 EXPECT_EQ((*insn_it++)->opcode(), kMachineOpMovqRegMemBaseDisp);
1050 EXPECT_EQ(mapped_reg_2, CheckCopyGetInsnAndObtainMappedReg(*insn_it++, vreg2));
1051 EXPECT_EQ(mapped_reg_2, CheckCopyPutInsnAndObtainMappedReg(*insn_it++, vreg2));
1052 EXPECT_EQ((*insn_it++)->opcode(), kMachineOpMovdqaXRegMemBaseDisp);
1053
1054 EXPECT_EQ(afterloop->insn_list().size(), 3UL);
1055 auto* put_insn_1 = afterloop->insn_list().front();
1056 CheckPutInsn(put_insn_1, kMachineOpMovqMemBaseDispReg, mapped_reg_1, GetThreadStateRegOffset(0));
1057 auto* put_insn_2 = *std::next(afterloop->insn_list().begin());
1058 if (DoesCpuStateHaveDedicatedSimdRegs()) {
1059 CheckPutInsn(
1060 put_insn_2, kMachineOpMovdqaMemBaseDispXReg, mapped_reg_2, GetThreadStateSimdRegOffset(0));
1061 } else if (DoesCpuStateHaveDedicatedVecRegs()) {
1062 CheckPutInsn(
1063 put_insn_2, kMachineOpMovdqaMemBaseDispXReg, mapped_reg_2, GetThreadStateVRegOffset(0));
1064 }
1065 }
1066
TEST(MachineIRLoopGuestContextOptimizer,ReplaceGetFlagsAndUpdateMap)1067 TEST(MachineIRLoopGuestContextOptimizer, ReplaceGetFlagsAndUpdateMap) {
1068 if (!DoesCpuStateHaveFlags()) {
1069 GTEST_SKIP() << "Guest CPU doesn't support flags";
1070 }
1071 Arena arena;
1072 MachineIR machine_ir(&arena);
1073
1074 MachineIRBuilder builder(&machine_ir);
1075
1076 auto bb = machine_ir.NewBasicBlock();
1077 builder.StartBasicBlock(bb);
1078 auto reg1 = machine_ir.AllocVReg();
1079 auto offset = GetThreadStateFlagOffset();
1080 builder.Gen<MovwRegMemBaseDisp>(reg1, kMachineRegRBP, offset);
1081 builder.Gen<PseudoJump>(kNullGuestAddr);
1082
1083 auto insn_it = bb->insn_list().begin();
1084 MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
1085 ReplaceGetAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
1086 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
1087
1088 EXPECT_EQ(bb->insn_list().size(), 2UL);
1089 auto* copy_insn = *bb->insn_list().begin();
1090 auto mapped_reg = CheckCopyGetInsnAndObtainMappedReg(copy_insn, reg1);
1091 CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovw, false);
1092 }
1093
TEST(MachineIRLoopGuestContextOptimizer,ReplacePutFlagsAndUpdateMap)1094 TEST(MachineIRLoopGuestContextOptimizer, ReplacePutFlagsAndUpdateMap) {
1095 if (!DoesCpuStateHaveFlags()) {
1096 GTEST_SKIP() << "Guest CPU doesn't support flags";
1097 }
1098 Arena arena;
1099 MachineIR machine_ir(&arena);
1100
1101 MachineIRBuilder builder(&machine_ir);
1102
1103 auto bb = machine_ir.NewBasicBlock();
1104 builder.StartBasicBlock(bb);
1105 auto reg1 = machine_ir.AllocVReg();
1106 auto offset = GetThreadStateFlagOffset();
1107 builder.Gen<MovwMemBaseDispReg>(kMachineRegRBP, offset, reg1);
1108 builder.Gen<PseudoJump>(kNullGuestAddr);
1109
1110 auto insn_it = bb->insn_list().begin();
1111 MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
1112 ReplacePutAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
1113 ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
1114
1115 EXPECT_EQ(bb->insn_list().size(), 2UL);
1116 auto* copy_insn = *bb->insn_list().begin();
1117 auto mapped_reg = CheckCopyPutInsnAndObtainMappedReg(copy_insn, reg1);
1118 CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovw, true);
1119 }
1120
1121 } // namespace
1122
1123 } // namespace berberis::x86_64
1124