1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "gtest/gtest.h"
18 
19 #include "berberis/backend/x86_64/loop_guest_context_optimizer.h"
20 
21 #include "berberis/backend/code_emitter.h"
22 #include "berberis/backend/x86_64/machine_ir_builder.h"
23 #include "berberis/backend/x86_64/machine_ir_check.h"
24 #include "berberis/base/arena_alloc.h"
25 #include "berberis/guest_state/guest_addr.h"
26 #include "berberis/guest_state/guest_state.h"
27 #include "berberis/guest_state/guest_state_opaque.h"
28 
29 #include "x86_64/loop_guest_context_optimizer_test_checks.h"
30 
31 namespace berberis::x86_64 {
32 
33 namespace {
34 
TEST(MachineIRLoopGuestContextOptimizer,ReplaceGetAndUpdateMap)35 TEST(MachineIRLoopGuestContextOptimizer, ReplaceGetAndUpdateMap) {
36   Arena arena;
37   MachineIR machine_ir(&arena);
38 
39   MachineIRBuilder builder(&machine_ir);
40 
41   auto bb = machine_ir.NewBasicBlock();
42   builder.StartBasicBlock(bb);
43   auto reg1 = machine_ir.AllocVReg();
44   builder.GenGet(reg1, GetThreadStateRegOffset(0));
45   builder.Gen<PseudoJump>(kNullGuestAddr);
46 
47   auto insn_it = bb->insn_list().begin();
48   MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
49   ReplaceGetAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
50   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
51 
52   EXPECT_EQ(bb->insn_list().size(), 2UL);
53   auto* copy_insn = *bb->insn_list().begin();
54   auto mapped_reg = CheckCopyGetInsnAndObtainMappedReg(copy_insn, reg1);
55 
56   auto offset = GetThreadStateRegOffset(0);
57   CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovq, false);
58 }
59 
TEST(MachineIRLoopGuestContextOptimizer,ReplacePutAndUpdateMap)60 TEST(MachineIRLoopGuestContextOptimizer, ReplacePutAndUpdateMap) {
61   Arena arena;
62   MachineIR machine_ir(&arena);
63 
64   MachineIRBuilder builder(&machine_ir);
65 
66   auto bb = machine_ir.NewBasicBlock();
67   builder.StartBasicBlock(bb);
68   auto reg1 = machine_ir.AllocVReg();
69   builder.GenPut(GetThreadStateRegOffset(1), reg1);
70   builder.Gen<PseudoJump>(kNullGuestAddr);
71 
72   auto insn_it = bb->insn_list().begin();
73   MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
74   ReplacePutAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
75   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
76 
77   EXPECT_EQ(bb->insn_list().size(), 2UL);
78   auto* copy_insn = *bb->insn_list().begin();
79   auto mapped_reg = CheckCopyPutInsnAndObtainMappedReg(copy_insn, reg1);
80 
81   auto offset = GetThreadStateRegOffset(1);
82   CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovq, true);
83 }
84 
TEST(MachineIRLoopGuestContextOptimizer,ReplaceGetPutAndUpdateMap)85 TEST(MachineIRLoopGuestContextOptimizer, ReplaceGetPutAndUpdateMap) {
86   Arena arena;
87   MachineIR machine_ir(&arena);
88 
89   MachineIRBuilder builder(&machine_ir);
90 
91   auto bb = machine_ir.NewBasicBlock();
92   builder.StartBasicBlock(bb);
93   auto reg1 = machine_ir.AllocVReg();
94   auto reg2 = machine_ir.AllocVReg();
95   builder.GenGet(reg1, GetThreadStateRegOffset(1));
96   builder.GenPut(GetThreadStateRegOffset(1), reg2);
97   builder.Gen<PseudoJump>(kNullGuestAddr);
98 
99   auto insn_it = bb->insn_list().begin();
100   MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
101   ReplaceGetAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
102   ReplacePutAndUpdateMap(&machine_ir, std::next(insn_it), mem_reg_map);
103   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
104 
105   EXPECT_EQ(bb->insn_list().size(), 3UL);
106   auto* get_copy_insn = *bb->insn_list().begin();
107   auto mapped_reg = CheckCopyGetInsnAndObtainMappedReg(get_copy_insn, reg1);
108   auto* put_copy_insn = *std::next(bb->insn_list().begin());
109   auto mapped_reg_in_put = CheckCopyPutInsnAndObtainMappedReg(put_copy_insn, reg2);
110   EXPECT_EQ(mapped_reg, mapped_reg_in_put);
111 
112   auto offset = GetThreadStateRegOffset(1);
113   CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovq, true);
114 }
115 
TEST(MachineIRLoopGuestContextOptimizer,ReplaceGetSimdAndUpdateMap)116 TEST(MachineIRLoopGuestContextOptimizer, ReplaceGetSimdAndUpdateMap) {
117   if (!DoesCpuStateHaveDedicatedSimdRegs()) {
118     GTEST_SKIP() << "Guest CPU doesn't have SIMD registers";
119   }
120   Arena arena;
121   MachineIR machine_ir(&arena);
122 
123   MachineIRBuilder builder(&machine_ir);
124 
125   auto bb = machine_ir.NewBasicBlock();
126   builder.StartBasicBlock(bb);
127   auto reg1 = machine_ir.AllocVReg();
128   builder.GenGetSimd<16>(reg1, GetThreadStateSimdRegOffset(0));
129   builder.Gen<PseudoJump>(kNullGuestAddr);
130 
131   auto insn_it = bb->insn_list().begin();
132   MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
133   ReplaceGetAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
134   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
135 
136   EXPECT_EQ(bb->insn_list().size(), 2UL);
137   auto* copy_insn = *bb->insn_list().begin();
138   auto mapped_reg = CheckCopyGetInsnAndObtainMappedReg(copy_insn, reg1);
139 
140   auto offset = GetThreadStateSimdRegOffset(0);
141   CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovdqa, false);
142 }
143 
TEST(MachineIRLoopGuestContextOptimizer,ReplacePutSimdAndUpdateMap)144 TEST(MachineIRLoopGuestContextOptimizer, ReplacePutSimdAndUpdateMap) {
145   if (!DoesCpuStateHaveDedicatedSimdRegs()) {
146     GTEST_SKIP() << "Guest CPU doesn't have SIMD registers";
147   }
148   Arena arena;
149   MachineIR machine_ir(&arena);
150 
151   MachineIRBuilder builder(&machine_ir);
152 
153   auto bb = machine_ir.NewBasicBlock();
154   builder.StartBasicBlock(bb);
155   auto reg1 = machine_ir.AllocVReg();
156   builder.GenSetSimd<16>(GetThreadStateSimdRegOffset(0), reg1);
157   builder.Gen<PseudoJump>(kNullGuestAddr);
158 
159   auto insn_it = bb->insn_list().begin();
160   MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
161   ReplacePutAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
162   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
163 
164   EXPECT_EQ(bb->insn_list().size(), 2UL);
165   auto* copy_insn = *bb->insn_list().begin();
166   auto mapped_reg = CheckCopyPutInsnAndObtainMappedReg(copy_insn, reg1);
167 
168   auto offset = GetThreadStateSimdRegOffset(0);
169   CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovdqa, true);
170 }
171 
TEST(MachineIRLoopGuestContextOptimizer,ReplaceGetFAndUpdateMap)172 TEST(MachineIRLoopGuestContextOptimizer, ReplaceGetFAndUpdateMap) {
173   if (!DoesCpuStateHaveDedicatedFpRegs()) {
174     GTEST_SKIP() << "Guest CPU doesn't have dedicated Fp registers";
175   }
176   Arena arena;
177   MachineIR machine_ir(&arena);
178 
179   MachineIRBuilder builder(&machine_ir);
180 
181   auto bb = machine_ir.NewBasicBlock();
182   builder.StartBasicBlock(bb);
183   auto reg1 = machine_ir.AllocVReg();
184   builder.GenGetSimd<8>(reg1, GetThreadStateFRegOffset(0));
185   builder.Gen<PseudoJump>(kNullGuestAddr);
186 
187   auto insn_it = bb->insn_list().begin();
188   MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
189   ReplaceGetAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
190   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
191 
192   EXPECT_EQ(bb->insn_list().size(), 2UL);
193   auto* copy_insn = *bb->insn_list().begin();
194   auto mapped_reg = CheckCopyGetInsnAndObtainMappedReg(copy_insn, reg1);
195 
196   auto offset = GetThreadStateFRegOffset(0);
197   CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovsd, false);
198 }
199 
TEST(MachineIRLoopGuestContextOptimizer,ReplacePutFAndUpdateMap)200 TEST(MachineIRLoopGuestContextOptimizer, ReplacePutFAndUpdateMap) {
201   if (!DoesCpuStateHaveDedicatedFpRegs()) {
202     GTEST_SKIP() << "Guest CPU doesn't have dedicated Fp registers";
203   }
204   Arena arena;
205   MachineIR machine_ir(&arena);
206 
207   MachineIRBuilder builder(&machine_ir);
208 
209   auto bb = machine_ir.NewBasicBlock();
210   builder.StartBasicBlock(bb);
211   auto reg1 = machine_ir.AllocVReg();
212   builder.GenSetSimd<8>(GetThreadStateFRegOffset(0), reg1);
213   builder.Gen<PseudoJump>(kNullGuestAddr);
214 
215   auto insn_it = bb->insn_list().begin();
216   MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
217   ReplacePutAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
218   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
219 
220   EXPECT_EQ(bb->insn_list().size(), 2UL);
221   auto* copy_insn = *bb->insn_list().begin();
222   auto mapped_reg = CheckCopyPutInsnAndObtainMappedReg(copy_insn, reg1);
223 
224   auto offset = GetThreadStateFRegOffset(0);
225   CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovsd, true);
226 }
227 
TEST(MachineIRLoopGuestContextOptimizer,ReplaceGetVAndUpdateMap)228 TEST(MachineIRLoopGuestContextOptimizer, ReplaceGetVAndUpdateMap) {
229   if (!DoesCpuStateHaveDedicatedVecRegs()) {
230     GTEST_SKIP() << "Guest CPU doesn't have Vector registers";
231   }
232   Arena arena;
233   MachineIR machine_ir(&arena);
234 
235   MachineIRBuilder builder(&machine_ir);
236 
237   auto bb = machine_ir.NewBasicBlock();
238   builder.StartBasicBlock(bb);
239   auto reg1 = machine_ir.AllocVReg();
240   builder.GenGetSimd<16>(reg1, GetThreadStateVRegOffset(0));
241   builder.Gen<PseudoJump>(kNullGuestAddr);
242 
243   auto insn_it = bb->insn_list().begin();
244   MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
245   ReplaceGetAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
246   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
247 
248   EXPECT_EQ(bb->insn_list().size(), 2UL);
249   auto* copy_insn = *bb->insn_list().begin();
250   auto mapped_reg = CheckCopyGetInsnAndObtainMappedReg(copy_insn, reg1);
251 
252   auto offset = GetThreadStateVRegOffset(0);
253   CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovdqa, false);
254 }
255 
TEST(MachineIRLoopGuestContextOptimizer,ReplacePutVAndUpdateMap)256 TEST(MachineIRLoopGuestContextOptimizer, ReplacePutVAndUpdateMap) {
257   if (!DoesCpuStateHaveDedicatedVecRegs()) {
258     GTEST_SKIP() << "Guest CPU doesn't have Vector registers";
259   }
260   Arena arena;
261   MachineIR machine_ir(&arena);
262 
263   MachineIRBuilder builder(&machine_ir);
264 
265   auto bb = machine_ir.NewBasicBlock();
266   builder.StartBasicBlock(bb);
267   auto reg1 = machine_ir.AllocVReg();
268   builder.GenSetSimd<16>(GetThreadStateVRegOffset(0), reg1);
269   builder.Gen<PseudoJump>(kNullGuestAddr);
270 
271   auto insn_it = bb->insn_list().begin();
272   MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
273   ReplacePutAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
274   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
275 
276   EXPECT_EQ(bb->insn_list().size(), 2UL);
277   auto* copy_insn = *bb->insn_list().begin();
278   auto mapped_reg = CheckCopyPutInsnAndObtainMappedReg(copy_insn, reg1);
279 
280   auto offset = GetThreadStateVRegOffset(0);
281   CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovdqa, true);
282 }
283 
TEST(MachineIRLoopGuestContextOptimizerRiscv64,ReplaceGetMovwAndUpdateMap)284 TEST(MachineIRLoopGuestContextOptimizerRiscv64, ReplaceGetMovwAndUpdateMap) {
285   Arena arena;
286   MachineIR machine_ir(&arena);
287 
288   MachineIRBuilder builder(&machine_ir);
289 
290   auto bb = machine_ir.NewBasicBlock();
291   builder.StartBasicBlock(bb);
292   auto reg1 = machine_ir.AllocVReg();
293   auto offset = 0;
294   builder.Gen<MovwRegMemBaseDisp>(reg1, kMachineRegRBP, offset);
295   builder.Gen<PseudoJump>(kNullGuestAddr);
296 
297   auto insn_it = bb->insn_list().begin();
298   MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
299   ReplaceGetAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
300   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
301 
302   EXPECT_EQ(bb->insn_list().size(), 2UL);
303   auto* copy_insn = *bb->insn_list().begin();
304   auto mapped_reg = CheckCopyGetInsnAndObtainMappedReg(copy_insn, reg1);
305   CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovw, false);
306 }
307 
TEST(MachineIRLoopGuestContextOptimizerRiscv64,ReplacePutMovwAndUpdateMap)308 TEST(MachineIRLoopGuestContextOptimizerRiscv64, ReplacePutMovwAndUpdateMap) {
309   Arena arena;
310   MachineIR machine_ir(&arena);
311 
312   MachineIRBuilder builder(&machine_ir);
313 
314   auto bb = machine_ir.NewBasicBlock();
315   builder.StartBasicBlock(bb);
316   auto reg1 = machine_ir.AllocVReg();
317   auto offset = 0;
318   builder.Gen<MovwMemBaseDispReg>(kMachineRegRBP, offset, reg1);
319   builder.Gen<PseudoJump>(kNullGuestAddr);
320 
321   auto insn_it = bb->insn_list().begin();
322   MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
323   ReplacePutAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
324   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
325 
326   EXPECT_EQ(bb->insn_list().size(), 2UL);
327   auto* copy_insn = *bb->insn_list().begin();
328   auto mapped_reg = CheckCopyPutInsnAndObtainMappedReg(copy_insn, reg1);
329   CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovw, true);
330 }
331 
TEST(MachineIRLoopGuestContextOptimizer,GenerateGetInsns)332 TEST(MachineIRLoopGuestContextOptimizer, GenerateGetInsns) {
333   Arena arena;
334   MachineIR machine_ir(&arena);
335 
336   auto* bb = machine_ir.NewBasicBlock();
337 
338   // Add an out-edge for the CHECK in GenerateGetInsns.
339   auto* dst = machine_ir.NewBasicBlock();
340   machine_ir.AddEdge(bb, dst);
341 
342   MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
343   auto reg1 = machine_ir.AllocVReg();
344   auto reg2 = machine_ir.AllocVReg();
345   auto reg3 = machine_ir.AllocVReg();
346   auto reg4 = machine_ir.AllocVReg();
347   MappedRegInfo mapped_reg1 = {reg1, MovType::kMovq, false};
348   MappedRegInfo mapped_reg2 = {reg2, MovType::kMovdqa, false};
349   MappedRegInfo mapped_reg3 = {reg3, MovType::kMovsd, false};
350   MappedRegInfo mapped_reg4 = {reg4, MovType::kMovw, true};
351   mem_reg_map[GetThreadStateRegOffset(0)] = mapped_reg1;
352   if (DoesCpuStateHaveDedicatedSimdRegs()) {
353     mem_reg_map[GetThreadStateSimdRegOffset(0)] = mapped_reg2;
354   } else if (DoesCpuStateHaveDedicatedVecRegs()) {
355     mem_reg_map[GetThreadStateVRegOffset(0)] = mapped_reg2;
356   }
357   if (DoesCpuStateHaveDedicatedFpRegs()) {
358     mem_reg_map[GetThreadStateFRegOffset(0)] = mapped_reg3;
359   }
360   if (DoesCpuStateHaveFlags()) {
361     mem_reg_map[GetThreadStateFlagOffset()] = mapped_reg4;
362   }
363 
364   GenerateGetInsns(&machine_ir, bb, mem_reg_map);
365 
366   EXPECT_EQ(
367       bb->insn_list().size(),
368       (DoesCpuStateHaveFlags() ? 2UL : 1UL) + (DoesCpuStateHaveDedicatedFpRegs() ? 1UL : 0UL) +
369           ((DoesCpuStateHaveDedicatedSimdRegs() || DoesCpuStateHaveDedicatedVecRegs()) ? 1UL
370                                                                                        : 0UL));
371   auto insn_it = bb->insn_list().begin();
372   CheckGetInsn(*insn_it, kMachineOpMovqRegMemBaseDisp, reg1, GetThreadStateRegOffset(0));
373   std::advance(insn_it, 1);
374   if (DoesCpuStateHaveFlags()) {
375     CheckGetInsn(*insn_it, kMachineOpMovwRegMemBaseDisp, reg4, GetThreadStateFlagOffset());
376     std::advance(insn_it, 1);
377   }
378   if (DoesCpuStateHaveDedicatedFpRegs()) {
379     CheckGetInsn(*insn_it, kMachineOpMovsdXRegMemBaseDisp, reg3, GetThreadStateFRegOffset(0));
380     std::advance(insn_it, 1);
381   }
382   if (DoesCpuStateHaveDedicatedSimdRegs()) {
383     CheckGetInsn(*insn_it, kMachineOpMovdqaXRegMemBaseDisp, reg2, GetThreadStateSimdRegOffset(0));
384     std::advance(insn_it, 1);
385   } else if (DoesCpuStateHaveDedicatedVecRegs()) {
386     CheckGetInsn(*insn_it, kMachineOpMovdqaXRegMemBaseDisp, reg2, GetThreadStateVRegOffset(0));
387     std::advance(insn_it, 1);
388   }
389 }
390 
TEST(MachineIRLoopGuestContextOptimizer,GeneratePutInsns)391 TEST(MachineIRLoopGuestContextOptimizer, GeneratePutInsns) {
392   Arena arena;
393   MachineIR machine_ir(&arena);
394 
395   auto* bb = machine_ir.NewBasicBlock();
396   auto* src = machine_ir.NewBasicBlock();
397   machine_ir.AddEdge(src, bb);
398   MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
399   auto reg1 = machine_ir.AllocVReg();
400   auto reg2 = machine_ir.AllocVReg();
401   auto reg3 = machine_ir.AllocVReg();
402   auto reg4 = machine_ir.AllocVReg();
403   MappedRegInfo mapped_reg1 = {reg1, MovType::kMovq, true};
404   MappedRegInfo mapped_reg2 = {reg2, MovType::kMovdqa, true};
405   MappedRegInfo mapped_reg3 = {reg3, MovType::kMovsd, true};
406   MappedRegInfo mapped_reg4 = {reg4, MovType::kMovw, true};
407   mem_reg_map[GetThreadStateRegOffset(0)] = mapped_reg1;
408   if (DoesCpuStateHaveDedicatedSimdRegs()) {
409     mem_reg_map[GetThreadStateSimdRegOffset(0)] = mapped_reg2;
410   } else if (DoesCpuStateHaveDedicatedVecRegs()) {
411     mem_reg_map[GetThreadStateVRegOffset(0)] = mapped_reg2;
412   }
413   if (DoesCpuStateHaveDedicatedFpRegs()) {
414     mem_reg_map[GetThreadStateFRegOffset(0)] = mapped_reg3;
415   }
416   if (DoesCpuStateHaveFlags()) {
417     mem_reg_map[GetThreadStateFlagOffset()] = mapped_reg4;
418   }
419 
420   GeneratePutInsns(&machine_ir, bb, mem_reg_map);
421 
422   EXPECT_EQ(
423       bb->insn_list().size(),
424       (DoesCpuStateHaveFlags() ? 2UL : 1UL) + (DoesCpuStateHaveDedicatedFpRegs() ? 1UL : 0UL) +
425           ((DoesCpuStateHaveDedicatedSimdRegs() || DoesCpuStateHaveDedicatedVecRegs()) ? 1UL
426                                                                                        : 0UL));
427   auto insn_it = bb->insn_list().begin();
428   CheckPutInsn(*insn_it, kMachineOpMovqMemBaseDispReg, reg1, GetThreadStateRegOffset(0));
429   std::advance(insn_it, 1);
430   if (DoesCpuStateHaveFlags()) {
431     CheckPutInsn(*insn_it, kMachineOpMovwMemBaseDispReg, reg4, GetThreadStateFlagOffset());
432     std::advance(insn_it, 1);
433   }
434   if (DoesCpuStateHaveDedicatedFpRegs()) {
435     CheckPutInsn(*insn_it, kMachineOpMovsdMemBaseDispXReg, reg3, GetThreadStateFRegOffset(0));
436     std::advance(insn_it, 1);
437   }
438   if (DoesCpuStateHaveDedicatedSimdRegs()) {
439     CheckPutInsn(*insn_it, kMachineOpMovdqaMemBaseDispXReg, reg2, GetThreadStateSimdRegOffset(0));
440     std::advance(insn_it, 1);
441   } else if (DoesCpuStateHaveDedicatedVecRegs()) {
442     CheckPutInsn(*insn_it, kMachineOpMovdqaMemBaseDispXReg, reg2, GetThreadStateVRegOffset(0));
443     std::advance(insn_it, 1);
444   }
445 }
446 
TEST(MachineIRLoopGuestContextOptimizer,GeneratePreloop)447 TEST(MachineIRLoopGuestContextOptimizer, GeneratePreloop) {
448   Arena arena;
449   MachineIR machine_ir(&arena);
450 
451   auto* preloop = machine_ir.NewBasicBlock();
452   auto* loop_body = machine_ir.NewBasicBlock();
453   auto* afterloop = machine_ir.NewBasicBlock();
454   machine_ir.AddEdge(preloop, loop_body);
455   machine_ir.AddEdge(loop_body, loop_body);
456   machine_ir.AddEdge(loop_body, afterloop);
457 
458   MachineIRBuilder builder(&machine_ir);
459   builder.StartBasicBlock(preloop);
460   builder.Gen<PseudoBranch>(loop_body);
461   builder.StartBasicBlock(loop_body);
462   builder.Gen<PseudoCondBranch>(
463       CodeEmitter::Condition::kZero, loop_body, afterloop, kMachineRegFLAGS);
464   builder.StartBasicBlock(afterloop);
465   builder.Gen<PseudoJump>(kNullGuestAddr);
466 
467   Loop loop(machine_ir.arena());
468   loop.push_back(loop_body);
469 
470   MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
471   auto reg1 = machine_ir.AllocVReg();
472   auto reg2 = machine_ir.AllocVReg();
473   auto reg3 = machine_ir.AllocVReg();
474   auto reg4 = machine_ir.AllocVReg();
475   MappedRegInfo mapped_reg1 = {reg1, MovType::kMovq, false};
476   MappedRegInfo mapped_reg2 = {reg2, MovType::kMovdqa, false};
477   MappedRegInfo mapped_reg3 = {reg3, MovType::kMovsd, false};
478   MappedRegInfo mapped_reg4 = {reg4, MovType::kMovw, true};
479   mem_reg_map[GetThreadStateRegOffset(0)] = mapped_reg1;
480   if (DoesCpuStateHaveDedicatedSimdRegs()) {
481     mem_reg_map[GetThreadStateSimdRegOffset(0)] = mapped_reg2;
482   } else if (DoesCpuStateHaveDedicatedVecRegs()) {
483     mem_reg_map[GetThreadStateVRegOffset(0)] = mapped_reg2;
484   }
485   if (DoesCpuStateHaveDedicatedFpRegs()) {
486     mem_reg_map[GetThreadStateFRegOffset(0)] = mapped_reg3;
487   }
488   if (DoesCpuStateHaveFlags()) {
489     mem_reg_map[GetThreadStateFlagOffset()] = mapped_reg4;
490   }
491 
492   GenerateGetsInPreloop(&machine_ir, &loop, mem_reg_map);
493   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
494 
495   EXPECT_EQ(
496       preloop->insn_list().size(),
497       (DoesCpuStateHaveFlags() ? 3UL : 2UL) + (DoesCpuStateHaveDedicatedFpRegs() ? 1UL : 0UL) +
498           ((DoesCpuStateHaveDedicatedSimdRegs() || DoesCpuStateHaveDedicatedVecRegs()) ? 1UL
499                                                                                        : 0UL));
500   auto insn_it = preloop->insn_list().begin();
501   CheckGetInsn(*insn_it, kMachineOpMovqRegMemBaseDisp, reg1, GetThreadStateRegOffset(0));
502   std::advance(insn_it, 1);
503   if (DoesCpuStateHaveFlags()) {
504     CheckGetInsn(*insn_it, kMachineOpMovwRegMemBaseDisp, reg4, GetThreadStateFlagOffset());
505     std::advance(insn_it, 1);
506   }
507   if (DoesCpuStateHaveDedicatedFpRegs()) {
508     CheckGetInsn(*insn_it, kMachineOpMovsdXRegMemBaseDisp, reg3, GetThreadStateFRegOffset(0));
509     std::advance(insn_it, 1);
510   }
511   if (DoesCpuStateHaveDedicatedSimdRegs()) {
512     CheckGetInsn(*insn_it, kMachineOpMovdqaXRegMemBaseDisp, reg2, GetThreadStateSimdRegOffset(0));
513     std::advance(insn_it, 1);
514   } else if (DoesCpuStateHaveDedicatedVecRegs()) {
515     CheckGetInsn(*insn_it, kMachineOpMovdqaXRegMemBaseDisp, reg2, GetThreadStateVRegOffset(0));
516     std::advance(insn_it, 1);
517   }
518 }
519 
TEST(MachineIRLoopGuestContextOptimizer,GenerateAfterloop)520 TEST(MachineIRLoopGuestContextOptimizer, GenerateAfterloop) {
521   Arena arena;
522   MachineIR machine_ir(&arena);
523 
524   auto* preloop = machine_ir.NewBasicBlock();
525   auto* loop_body = machine_ir.NewBasicBlock();
526   auto* afterloop = machine_ir.NewBasicBlock();
527   machine_ir.AddEdge(preloop, loop_body);
528   machine_ir.AddEdge(loop_body, loop_body);
529   machine_ir.AddEdge(loop_body, afterloop);
530 
531   MachineIRBuilder builder(&machine_ir);
532   builder.StartBasicBlock(preloop);
533   builder.Gen<PseudoBranch>(loop_body);
534   builder.StartBasicBlock(loop_body);
535   builder.Gen<PseudoCondBranch>(
536       CodeEmitter::Condition::kZero, loop_body, afterloop, kMachineRegFLAGS);
537   builder.StartBasicBlock(afterloop);
538   builder.Gen<PseudoJump>(kNullGuestAddr);
539 
540   Loop loop(machine_ir.arena());
541   loop.push_back(loop_body);
542 
543   MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
544   auto reg1 = machine_ir.AllocVReg();
545   auto reg2 = machine_ir.AllocVReg();
546   auto reg3 = machine_ir.AllocVReg();
547   auto reg4 = machine_ir.AllocVReg();
548   MappedRegInfo mapped_reg1 = {reg1, MovType::kMovq, true};
549   MappedRegInfo mapped_reg2 = {reg2, MovType::kMovdqa, true};
550   MappedRegInfo mapped_reg3 = {reg3, MovType::kMovsd, true};
551   MappedRegInfo mapped_reg4 = {reg4, MovType::kMovw, true};
552   mem_reg_map[GetThreadStateRegOffset(0)] = mapped_reg1;
553   if (DoesCpuStateHaveDedicatedSimdRegs()) {
554     mem_reg_map[GetThreadStateSimdRegOffset(0)] = mapped_reg2;
555   } else if (DoesCpuStateHaveDedicatedVecRegs()) {
556     mem_reg_map[GetThreadStateVRegOffset(0)] = mapped_reg2;
557   }
558   if (DoesCpuStateHaveDedicatedFpRegs()) {
559     mem_reg_map[GetThreadStateFRegOffset(0)] = mapped_reg3;
560   }
561   if (DoesCpuStateHaveFlags()) {
562     mem_reg_map[GetThreadStateFlagOffset()] = mapped_reg4;
563   }
564 
565   GeneratePutsInPostloop(&machine_ir, &loop, mem_reg_map);
566   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
567 
568   EXPECT_EQ(
569       afterloop->insn_list().size(),
570       (DoesCpuStateHaveFlags() ? 3UL : 2UL) + (DoesCpuStateHaveDedicatedFpRegs() ? 1UL : 0UL) +
571           ((DoesCpuStateHaveDedicatedSimdRegs() || DoesCpuStateHaveDedicatedVecRegs()) ? 1UL
572                                                                                        : 0UL));
573   auto insn_it = afterloop->insn_list().begin();
574   CheckPutInsn(*insn_it, kMachineOpMovqMemBaseDispReg, reg1, GetThreadStateRegOffset(0));
575   std::advance(insn_it, 1);
576   if (DoesCpuStateHaveFlags()) {
577     CheckPutInsn(*insn_it, kMachineOpMovwMemBaseDispReg, reg4, GetThreadStateFlagOffset());
578     std::advance(insn_it, 1);
579   }
580   if (DoesCpuStateHaveDedicatedFpRegs()) {
581     CheckPutInsn(*insn_it, kMachineOpMovsdMemBaseDispXReg, reg3, GetThreadStateFRegOffset(0));
582     std::advance(insn_it, 1);
583   }
584   if (DoesCpuStateHaveDedicatedSimdRegs()) {
585     CheckPutInsn(*insn_it, kMachineOpMovdqaMemBaseDispXReg, reg2, GetThreadStateSimdRegOffset(0));
586     std::advance(insn_it, 1);
587   } else if (DoesCpuStateHaveDedicatedVecRegs()) {
588     CheckPutInsn(*insn_it, kMachineOpMovdqaMemBaseDispXReg, reg2, GetThreadStateVRegOffset(0));
589     std::advance(insn_it, 1);
590   }
591 }
592 
TEST(MachineIRLoopGuestContextOptimizer,GenerateMultiplePreloops)593 TEST(MachineIRLoopGuestContextOptimizer, GenerateMultiplePreloops) {
594   Arena arena;
595   MachineIR machine_ir(&arena);
596 
597   auto* preloop1 = machine_ir.NewBasicBlock();
598   auto* preloop2 = machine_ir.NewBasicBlock();
599   auto* loop_body = machine_ir.NewBasicBlock();
600   auto* afterloop = machine_ir.NewBasicBlock();
601   machine_ir.AddEdge(preloop1, loop_body);
602   machine_ir.AddEdge(preloop2, loop_body);
603   machine_ir.AddEdge(loop_body, loop_body);
604   machine_ir.AddEdge(loop_body, afterloop);
605 
606   MachineIRBuilder builder(&machine_ir);
607   builder.StartBasicBlock(preloop1);
608   builder.Gen<PseudoBranch>(loop_body);
609   builder.StartBasicBlock(preloop2);
610   builder.Gen<PseudoBranch>(loop_body);
611   builder.StartBasicBlock(loop_body);
612   builder.Gen<PseudoCondBranch>(
613       CodeEmitter::Condition::kZero, loop_body, afterloop, kMachineRegFLAGS);
614   builder.StartBasicBlock(afterloop);
615   builder.Gen<PseudoJump>(kNullGuestAddr);
616 
617   Loop loop(machine_ir.arena());
618   loop.push_back(loop_body);
619 
620   MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
621   auto reg1 = machine_ir.AllocVReg();
622   MappedRegInfo mapped_reg1 = {reg1, MovType::kMovq, true};
623   mem_reg_map[GetThreadStateRegOffset(0)] = mapped_reg1;
624 
625   GenerateGetsInPreloop(&machine_ir, &loop, mem_reg_map);
626   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
627 
628   EXPECT_EQ(preloop1->insn_list().size(), 2UL);
629   auto insn_it = preloop1->insn_list().begin();
630   CheckGetInsn(*insn_it, kMachineOpMovqRegMemBaseDisp, reg1, GetThreadStateRegOffset(0));
631 
632   EXPECT_EQ(preloop2->insn_list().size(), 2UL);
633   insn_it = preloop2->insn_list().begin();
634   CheckGetInsn(*insn_it, kMachineOpMovqRegMemBaseDisp, reg1, GetThreadStateRegOffset(0));
635 }
636 
TEST(MachineIRLoopGuestContextOptimizer,GenerateMultiplePostloops)637 TEST(MachineIRLoopGuestContextOptimizer, GenerateMultiplePostloops) {
638   Arena arena;
639   MachineIR machine_ir(&arena);
640 
641   auto* preloop = machine_ir.NewBasicBlock();
642   auto* loop_body1 = machine_ir.NewBasicBlock();
643   auto* loop_body2 = machine_ir.NewBasicBlock();
644   auto* postloop1 = machine_ir.NewBasicBlock();
645   auto* postloop2 = machine_ir.NewBasicBlock();
646   machine_ir.AddEdge(preloop, loop_body1);
647   machine_ir.AddEdge(loop_body1, loop_body2);
648   machine_ir.AddEdge(loop_body1, postloop1);
649   machine_ir.AddEdge(loop_body2, loop_body1);
650   machine_ir.AddEdge(loop_body2, postloop2);
651 
652   MachineIRBuilder builder(&machine_ir);
653   builder.StartBasicBlock(preloop);
654   builder.Gen<PseudoBranch>(loop_body1);
655   builder.StartBasicBlock(loop_body1);
656   builder.Gen<PseudoCondBranch>(
657       CodeEmitter::Condition::kZero, loop_body2, postloop1, kMachineRegFLAGS);
658   builder.StartBasicBlock(loop_body2);
659   builder.Gen<PseudoCondBranch>(
660       CodeEmitter::Condition::kZero, loop_body1, postloop2, kMachineRegFLAGS);
661   builder.StartBasicBlock(postloop1);
662   builder.Gen<PseudoJump>(kNullGuestAddr);
663   builder.StartBasicBlock(postloop2);
664   builder.Gen<PseudoJump>(kNullGuestAddr);
665 
666   Loop loop(machine_ir.arena());
667   loop.push_back(loop_body1);
668   loop.push_back(loop_body2);
669 
670   MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
671   auto reg1 = machine_ir.AllocVReg();
672   MappedRegInfo mapped_reg1 = {reg1, MovType::kMovq, true};
673   mem_reg_map[GetThreadStateRegOffset(0)] = mapped_reg1;
674 
675   GeneratePutsInPostloop(&machine_ir, &loop, mem_reg_map);
676   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
677 
678   EXPECT_EQ(postloop1->insn_list().size(), 2UL);
679   auto insn_it = postloop1->insn_list().begin();
680   CheckPutInsn(*insn_it, kMachineOpMovqMemBaseDispReg, reg1, GetThreadStateRegOffset(0));
681 
682   EXPECT_EQ(postloop2->insn_list().size(), 2UL);
683   insn_it = postloop2->insn_list().begin();
684   CheckPutInsn(*insn_it, kMachineOpMovqMemBaseDispReg, reg1, GetThreadStateRegOffset(0));
685 }
686 
TEST(MachineIRLoopGuestContextOptimizer,RemovePutInSelfLoop)687 TEST(MachineIRLoopGuestContextOptimizer, RemovePutInSelfLoop) {
688   Arena arena;
689   MachineIR machine_ir(&arena);
690 
691   auto* preloop = machine_ir.NewBasicBlock();
692   auto* body = machine_ir.NewBasicBlock();
693   auto* afterloop = machine_ir.NewBasicBlock();
694   machine_ir.AddEdge(preloop, body);
695   machine_ir.AddEdge(body, body);
696   machine_ir.AddEdge(body, afterloop);
697 
698   MachineReg vreg1 = machine_ir.AllocVReg();
699 
700   MachineIRBuilder builder(&machine_ir);
701 
702   builder.StartBasicBlock(preloop);
703   builder.Gen<PseudoBranch>(body);
704 
705   builder.StartBasicBlock(body);
706   builder.GenPut(GetThreadStateRegOffset(0), vreg1);
707   builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, body, afterloop, kMachineRegFLAGS);
708 
709   builder.StartBasicBlock(afterloop);
710   builder.Gen<PseudoJump>(kNullGuestAddr);
711 
712   RemoveLoopGuestContextAccesses(&machine_ir);
713   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
714 
715   EXPECT_EQ(preloop->insn_list().size(), 2UL);
716   auto* get_insn = preloop->insn_list().front();
717   EXPECT_EQ(get_insn->opcode(), kMachineOpMovqRegMemBaseDisp);
718   auto mapped_reg = get_insn->RegAt(0);
719   auto disp = AsMachineInsnX86_64(get_insn)->disp();
720   EXPECT_EQ(disp, GetThreadStateRegOffset(0));
721 
722   EXPECT_EQ(body->insn_list().size(), 2UL);
723   auto* copy_insn = body->insn_list().front();
724   EXPECT_EQ(CheckCopyPutInsnAndObtainMappedReg(copy_insn, vreg1), mapped_reg);
725 
726   EXPECT_EQ(afterloop->insn_list().size(), 2UL);
727   auto* put_insn = afterloop->insn_list().front();
728   CheckPutInsn(put_insn, kMachineOpMovqMemBaseDispReg, mapped_reg, GetThreadStateRegOffset(0));
729 }
730 
TEST(MachineIRLoopGuestContextOptimizer,RemoveGetInSelfLoop)731 TEST(MachineIRLoopGuestContextOptimizer, RemoveGetInSelfLoop) {
732   Arena arena;
733   MachineIR machine_ir(&arena);
734 
735   auto* preloop = machine_ir.NewBasicBlock();
736   auto* body = machine_ir.NewBasicBlock();
737   auto* afterloop = machine_ir.NewBasicBlock();
738   machine_ir.AddEdge(preloop, body);
739   machine_ir.AddEdge(body, body);
740   machine_ir.AddEdge(body, afterloop);
741 
742   MachineReg vreg1 = machine_ir.AllocVReg();
743 
744   MachineIRBuilder builder(&machine_ir);
745 
746   builder.StartBasicBlock(preloop);
747   builder.Gen<PseudoBranch>(body);
748 
749   builder.StartBasicBlock(body);
750   builder.GenGet(vreg1, GetThreadStateRegOffset(0));
751   builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, body, afterloop, kMachineRegFLAGS);
752 
753   builder.StartBasicBlock(afterloop);
754   builder.Gen<PseudoJump>(kNullGuestAddr);
755 
756   RemoveLoopGuestContextAccesses(&machine_ir);
757   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
758 
759   EXPECT_EQ(preloop->insn_list().size(), 2UL);
760   auto* get_insn = preloop->insn_list().front();
761   EXPECT_EQ(get_insn->opcode(), kMachineOpMovqRegMemBaseDisp);
762   auto mapped_reg = get_insn->RegAt(0);
763   auto disp = AsMachineInsnX86_64(get_insn)->disp();
764   EXPECT_EQ(disp, GetThreadStateRegOffset(0));
765 
766   EXPECT_EQ(body->insn_list().size(), 2UL);
767   auto* copy_insn = body->insn_list().front();
768   EXPECT_EQ(mapped_reg, CheckCopyGetInsnAndObtainMappedReg(copy_insn, vreg1));
769 
770   EXPECT_EQ(afterloop->insn_list().size(), 1UL);
771 }
772 
TEST(MachineIRLoopGuestContextOptimizer,RemoveGetPutInSelfLoop)773 TEST(MachineIRLoopGuestContextOptimizer, RemoveGetPutInSelfLoop) {
774   Arena arena;
775   MachineIR machine_ir(&arena);
776 
777   auto* preloop = machine_ir.NewBasicBlock();
778   auto* body = machine_ir.NewBasicBlock();
779   auto* afterloop = machine_ir.NewBasicBlock();
780   machine_ir.AddEdge(preloop, body);
781   machine_ir.AddEdge(body, body);
782   machine_ir.AddEdge(body, afterloop);
783 
784   MachineReg vreg1 = machine_ir.AllocVReg();
785   MachineReg vreg2 = machine_ir.AllocVReg();
786 
787   MachineIRBuilder builder(&machine_ir);
788 
789   builder.StartBasicBlock(preloop);
790   builder.Gen<PseudoBranch>(body);
791 
792   builder.StartBasicBlock(body);
793   builder.GenGet(vreg1, GetThreadStateRegOffset(0));
794   builder.GenPut(GetThreadStateRegOffset(0), vreg2);
795   builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, body, afterloop, kMachineRegFLAGS);
796 
797   builder.StartBasicBlock(afterloop);
798   builder.Gen<PseudoJump>(kNullGuestAddr);
799 
800   RemoveLoopGuestContextAccesses(&machine_ir);
801   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
802 
803   EXPECT_EQ(preloop->insn_list().size(), 2UL);
804   auto* get_insn = preloop->insn_list().front();
805   EXPECT_EQ(get_insn->opcode(), kMachineOpMovqRegMemBaseDisp);
806   auto mapped_reg = get_insn->RegAt(0);
807   auto disp = AsMachineInsnX86_64(get_insn)->disp();
808   EXPECT_EQ(disp, GetThreadStateRegOffset(0));
809 
810   EXPECT_EQ(body->insn_list().size(), 3UL);
811   auto* copy_insn1 = body->insn_list().front();
812   EXPECT_EQ(mapped_reg, CheckCopyGetInsnAndObtainMappedReg(copy_insn1, vreg1));
813   auto* copy_insn2 = *(std::next(body->insn_list().begin()));
814   EXPECT_EQ(mapped_reg, CheckCopyPutInsnAndObtainMappedReg(copy_insn2, vreg2));
815 
816   EXPECT_EQ(afterloop->insn_list().size(), 2UL);
817   auto* put_insn = afterloop->insn_list().front();
818   CheckPutInsn(put_insn, kMachineOpMovqMemBaseDispReg, mapped_reg, GetThreadStateRegOffset(0));
819 }
820 
TEST(MachineIRLoopGuestContextOptimizer,RemovePutInLoopWithMultipleExits)821 TEST(MachineIRLoopGuestContextOptimizer, RemovePutInLoopWithMultipleExits) {
822   Arena arena;
823   MachineIR machine_ir(&arena);
824 
825   auto* preloop = machine_ir.NewBasicBlock();
826   auto* body1 = machine_ir.NewBasicBlock();
827   auto* body2 = machine_ir.NewBasicBlock();
828   auto* afterloop1 = machine_ir.NewBasicBlock();
829   auto* afterloop2 = machine_ir.NewBasicBlock();
830   machine_ir.AddEdge(preloop, body1);
831   machine_ir.AddEdge(body1, body2);
832   machine_ir.AddEdge(body1, afterloop1);
833   machine_ir.AddEdge(body2, body1);
834   machine_ir.AddEdge(body2, afterloop2);
835 
836   MachineReg vreg1 = machine_ir.AllocVReg();
837 
838   MachineIRBuilder builder(&machine_ir);
839 
840   builder.StartBasicBlock(preloop);
841   builder.Gen<PseudoBranch>(body1);
842 
843   builder.StartBasicBlock(body1);
844   builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, body2, afterloop1, kMachineRegFLAGS);
845 
846   builder.StartBasicBlock(body2);
847   builder.GenPut(GetThreadStateRegOffset(0), vreg1);
848   builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, body1, afterloop2, kMachineRegFLAGS);
849 
850   builder.StartBasicBlock(afterloop1);
851   builder.Gen<PseudoJump>(kNullGuestAddr);
852 
853   builder.StartBasicBlock(afterloop2);
854   builder.Gen<PseudoJump>(kNullGuestAddr);
855 
856   RemoveLoopGuestContextAccesses(&machine_ir);
857   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
858 
859   EXPECT_EQ(preloop->insn_list().size(), 2UL);
860   auto* get_insn = preloop->insn_list().front();
861   EXPECT_EQ(get_insn->opcode(), kMachineOpMovqRegMemBaseDisp);
862   auto mapped_reg = get_insn->RegAt(0);
863   auto disp = AsMachineInsnX86_64(get_insn)->disp();
864   EXPECT_EQ(disp, GetThreadStateRegOffset(0));
865 
866   EXPECT_EQ(body1->insn_list().size(), 1UL);
867   EXPECT_EQ(body2->insn_list().size(), 2UL);
868   auto* copy_insn = body2->insn_list().front();
869   EXPECT_EQ(CheckCopyPutInsnAndObtainMappedReg(copy_insn, vreg1), mapped_reg);
870 
871   EXPECT_EQ(afterloop1->insn_list().size(), 2UL);
872   auto* put_insn = afterloop1->insn_list().front();
873   CheckPutInsn(put_insn, kMachineOpMovqMemBaseDispReg, mapped_reg, GetThreadStateRegOffset(0));
874 
875   EXPECT_EQ(afterloop2->insn_list().size(), 2UL);
876   put_insn = afterloop2->insn_list().front();
877   CheckPutInsn(put_insn, kMachineOpMovqMemBaseDispReg, mapped_reg, GetThreadStateRegOffset(0));
878 }
879 
TEST(MachineIRLoopGuestContextOptimizer,CountGuestRegAccesses)880 TEST(MachineIRLoopGuestContextOptimizer, CountGuestRegAccesses) {
881   Arena arena;
882   MachineIR machine_ir(&arena);
883 
884   auto* preloop = machine_ir.NewBasicBlock();
885   auto* body1 = machine_ir.NewBasicBlock();
886   auto* body2 = machine_ir.NewBasicBlock();
887   machine_ir.AddEdge(preloop, body1);
888   machine_ir.AddEdge(body1, body2);
889   machine_ir.AddEdge(body2, body1);
890 
891   MachineReg vreg1 = machine_ir.AllocVReg();
892   MachineReg vreg2 = machine_ir.AllocVReg();
893 
894   MachineIRBuilder builder(&machine_ir);
895 
896   builder.StartBasicBlock(preloop);
897   builder.Gen<PseudoBranch>(body1);
898 
899   builder.StartBasicBlock(body1);
900   builder.GenPut(GetThreadStateRegOffset(0), vreg1);
901   if (DoesCpuStateHaveDedicatedSimdRegs()) {
902     builder.GenGetSimd<16>(vreg2, GetThreadStateSimdRegOffset(0));
903   } else if (DoesCpuStateHaveDedicatedVecRegs()) {
904     builder.GenGetSimd<16>(vreg2, GetThreadStateVRegOffset(0));
905   }
906   builder.Gen<PseudoBranch>(body2);
907 
908   builder.StartBasicBlock(body2);
909   builder.GenGet(vreg1, GetThreadStateRegOffset(1));
910   builder.GenPut(GetThreadStateRegOffset(1), vreg1);
911   if (DoesCpuStateHaveDedicatedSimdRegs()) {
912     builder.GenSetSimd<16>(GetThreadStateSimdRegOffset(0), vreg2);
913   } else if (DoesCpuStateHaveDedicatedVecRegs()) {
914     builder.GenSetSimd<16>(GetThreadStateVRegOffset(0), vreg2);
915   }
916   builder.Gen<PseudoBranch>(body1);
917 
918   Loop loop({body1, body2}, machine_ir.arena());
919   auto guest_access_count = CountGuestRegAccesses(&machine_ir, &loop);
920   EXPECT_EQ(guest_access_count[GetThreadStateRegOffset(0)], 1);
921   EXPECT_EQ(guest_access_count[GetThreadStateRegOffset(1)], 2);
922   if (DoesCpuStateHaveDedicatedSimdRegs()) {
923     EXPECT_EQ(guest_access_count[GetThreadStateSimdRegOffset(0)], 2);
924   } else if (DoesCpuStateHaveDedicatedVecRegs()) {
925     EXPECT_EQ(guest_access_count[GetThreadStateVRegOffset(0)], 2);
926   }
927 }
928 
TEST(MachineIRLoopGuestContextOptimizer,GetOffsetCounters)929 TEST(MachineIRLoopGuestContextOptimizer, GetOffsetCounters) {
930   Arena arena;
931   MachineIR machine_ir(&arena);
932 
933   auto* preloop = machine_ir.NewBasicBlock();
934   auto* body1 = machine_ir.NewBasicBlock();
935   auto* body2 = machine_ir.NewBasicBlock();
936   machine_ir.AddEdge(preloop, body1);
937   machine_ir.AddEdge(body1, body2);
938   machine_ir.AddEdge(body2, body1);
939 
940   MachineReg vreg1 = machine_ir.AllocVReg();
941 
942   MachineIRBuilder builder(&machine_ir);
943 
944   builder.StartBasicBlock(preloop);
945   builder.Gen<PseudoBranch>(body1);
946 
947   builder.StartBasicBlock(body1);
948   builder.GenPut(GetThreadStateRegOffset(0), vreg1);
949   builder.GenGet(vreg1, GetThreadStateRegOffset(0));
950   builder.GenGet(vreg1, GetThreadStateRegOffset(1));
951   builder.Gen<PseudoBranch>(body2);
952 
953   builder.StartBasicBlock(body2);
954   builder.GenGet(vreg1, GetThreadStateRegOffset(2));
955   builder.GenPut(GetThreadStateRegOffset(2), vreg1);
956   builder.GenPut(GetThreadStateRegOffset(0), vreg1);
957   builder.Gen<PseudoBranch>(body1);
958 
959   Loop loop({body1, body2}, machine_ir.arena());
960   auto counters = GetSortedOffsetCounters(&machine_ir, &loop);
961   EXPECT_EQ(counters.size(), 3UL);
962   EXPECT_EQ(std::get<0>(counters[0]), GetThreadStateRegOffset(0));
963   EXPECT_EQ(std::get<1>(counters[0]), 3);
964 
965   EXPECT_EQ(std::get<0>(counters[1]), GetThreadStateRegOffset(2));
966   EXPECT_EQ(std::get<1>(counters[1]), 2);
967 
968   EXPECT_EQ(std::get<0>(counters[2]), GetThreadStateRegOffset(1));
969   EXPECT_EQ(std::get<1>(counters[2]), 1);
970 }
971 
TEST(MachineIRLoopGuestContextOptimizer,OptimizeLoopWithPriority)972 TEST(MachineIRLoopGuestContextOptimizer, OptimizeLoopWithPriority) {
973   Arena arena;
974   MachineIR machine_ir(&arena);
975 
976   auto* preloop = machine_ir.NewBasicBlock();
977   auto* body = machine_ir.NewBasicBlock();
978   auto* afterloop = machine_ir.NewBasicBlock();
979   machine_ir.AddEdge(preloop, body);
980   machine_ir.AddEdge(body, body);
981   machine_ir.AddEdge(body, afterloop);
982 
983   MachineReg vreg1 = machine_ir.AllocVReg();
984   MachineReg vreg2 = machine_ir.AllocVReg();
985 
986   MachineIRBuilder builder(&machine_ir);
987 
988   builder.StartBasicBlock(preloop);
989   builder.Gen<PseudoBranch>(body);
990 
991   // Regular reg 0 has 3 uses.
992   // Regular reg 1 has 1 use.
993   builder.StartBasicBlock(body);
994   builder.GenGet(vreg1, GetThreadStateRegOffset(0));
995   builder.GenPut(GetThreadStateRegOffset(0), vreg1);
996   builder.GenGet(vreg1, GetThreadStateRegOffset(0));
997   builder.GenGet(vreg1, GetThreadStateRegOffset(1));
998 
999   if (DoesCpuStateHaveDedicatedSimdRegs()) {
1000     // Simd reg 0 has 2 uses.
1001     // Simd reg 1 has 1 use.
1002     builder.GenGetSimd<16>(vreg2, GetThreadStateSimdRegOffset(0));
1003     builder.GenSetSimd<16>(GetThreadStateSimdRegOffset(0), vreg2);
1004     builder.GenGetSimd<16>(vreg2, GetThreadStateSimdRegOffset(1));
1005   } else if (DoesCpuStateHaveDedicatedVecRegs()) {
1006     // Vector reg 0 has 2 uses.
1007     // Vector reg 1 has 1 use.
1008     builder.GenGetSimd<16>(vreg2, GetThreadStateVRegOffset(0));
1009     builder.GenSetSimd<16>(GetThreadStateVRegOffset(0), vreg2);
1010     builder.GenGetSimd<16>(vreg2, GetThreadStateVRegOffset(1));
1011   }
1012   builder.Gen<PseudoCondBranch>(CodeEmitter::Condition::kZero, body, afterloop, kMachineRegFLAGS);
1013 
1014   builder.StartBasicBlock(afterloop);
1015   builder.Gen<PseudoJump>(kNullGuestAddr);
1016 
1017   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
1018   Loop loop({body}, machine_ir.arena());
1019   OptimizeLoop(&machine_ir,
1020                &loop,
1021                OptimizeLoopParams{
1022                    .general_reg_limit = 1,
1023                    .simd_reg_limit = 1,
1024                });
1025 
1026   EXPECT_EQ(preloop->insn_list().size(), 3UL);
1027   auto* get_insn_1 = preloop->insn_list().front();
1028   EXPECT_EQ(get_insn_1->opcode(), kMachineOpMovqRegMemBaseDisp);
1029   auto mapped_reg_1 = get_insn_1->RegAt(0);
1030   auto disp_1 = AsMachineInsnX86_64(get_insn_1)->disp();
1031   EXPECT_EQ(disp_1, GetThreadStateRegOffset(0));
1032 
1033   auto* get_insn_2 = *std::next(preloop->insn_list().begin());
1034   EXPECT_EQ(get_insn_2->opcode(), kMachineOpMovdqaXRegMemBaseDisp);
1035   auto mapped_reg_2 = get_insn_2->RegAt(0);
1036   auto disp_2 = AsMachineInsnX86_64(get_insn_2)->disp();
1037   if (DoesCpuStateHaveDedicatedSimdRegs()) {
1038     EXPECT_EQ(disp_2, GetThreadStateSimdRegOffset(0));
1039   } else if (DoesCpuStateHaveDedicatedVecRegs()) {
1040     EXPECT_EQ(disp_2, GetThreadStateVRegOffset(0));
1041   }
1042 
1043   // Since regular reg limit is 1 only reg 0 is optimized. Same for simd/vector regs.
1044   EXPECT_EQ(body->insn_list().size(), 8UL);
1045   auto insn_it = body->insn_list().begin();
1046   EXPECT_EQ(mapped_reg_1, CheckCopyGetInsnAndObtainMappedReg(*insn_it++, vreg1));
1047   EXPECT_EQ(mapped_reg_1, CheckCopyPutInsnAndObtainMappedReg(*insn_it++, vreg1));
1048   EXPECT_EQ(mapped_reg_1, CheckCopyGetInsnAndObtainMappedReg(*insn_it++, vreg1));
1049   EXPECT_EQ((*insn_it++)->opcode(), kMachineOpMovqRegMemBaseDisp);
1050   EXPECT_EQ(mapped_reg_2, CheckCopyGetInsnAndObtainMappedReg(*insn_it++, vreg2));
1051   EXPECT_EQ(mapped_reg_2, CheckCopyPutInsnAndObtainMappedReg(*insn_it++, vreg2));
1052   EXPECT_EQ((*insn_it++)->opcode(), kMachineOpMovdqaXRegMemBaseDisp);
1053 
1054   EXPECT_EQ(afterloop->insn_list().size(), 3UL);
1055   auto* put_insn_1 = afterloop->insn_list().front();
1056   CheckPutInsn(put_insn_1, kMachineOpMovqMemBaseDispReg, mapped_reg_1, GetThreadStateRegOffset(0));
1057   auto* put_insn_2 = *std::next(afterloop->insn_list().begin());
1058   if (DoesCpuStateHaveDedicatedSimdRegs()) {
1059     CheckPutInsn(
1060         put_insn_2, kMachineOpMovdqaMemBaseDispXReg, mapped_reg_2, GetThreadStateSimdRegOffset(0));
1061   } else if (DoesCpuStateHaveDedicatedVecRegs()) {
1062     CheckPutInsn(
1063         put_insn_2, kMachineOpMovdqaMemBaseDispXReg, mapped_reg_2, GetThreadStateVRegOffset(0));
1064   }
1065 }
1066 
TEST(MachineIRLoopGuestContextOptimizer,ReplaceGetFlagsAndUpdateMap)1067 TEST(MachineIRLoopGuestContextOptimizer, ReplaceGetFlagsAndUpdateMap) {
1068   if (!DoesCpuStateHaveFlags()) {
1069     GTEST_SKIP() << "Guest CPU doesn't support flags";
1070   }
1071   Arena arena;
1072   MachineIR machine_ir(&arena);
1073 
1074   MachineIRBuilder builder(&machine_ir);
1075 
1076   auto bb = machine_ir.NewBasicBlock();
1077   builder.StartBasicBlock(bb);
1078   auto reg1 = machine_ir.AllocVReg();
1079   auto offset = GetThreadStateFlagOffset();
1080   builder.Gen<MovwRegMemBaseDisp>(reg1, kMachineRegRBP, offset);
1081   builder.Gen<PseudoJump>(kNullGuestAddr);
1082 
1083   auto insn_it = bb->insn_list().begin();
1084   MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
1085   ReplaceGetAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
1086   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
1087 
1088   EXPECT_EQ(bb->insn_list().size(), 2UL);
1089   auto* copy_insn = *bb->insn_list().begin();
1090   auto mapped_reg = CheckCopyGetInsnAndObtainMappedReg(copy_insn, reg1);
1091   CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovw, false);
1092 }
1093 
TEST(MachineIRLoopGuestContextOptimizer,ReplacePutFlagsAndUpdateMap)1094 TEST(MachineIRLoopGuestContextOptimizer, ReplacePutFlagsAndUpdateMap) {
1095   if (!DoesCpuStateHaveFlags()) {
1096     GTEST_SKIP() << "Guest CPU doesn't support flags";
1097   }
1098   Arena arena;
1099   MachineIR machine_ir(&arena);
1100 
1101   MachineIRBuilder builder(&machine_ir);
1102 
1103   auto bb = machine_ir.NewBasicBlock();
1104   builder.StartBasicBlock(bb);
1105   auto reg1 = machine_ir.AllocVReg();
1106   auto offset = GetThreadStateFlagOffset();
1107   builder.Gen<MovwMemBaseDispReg>(kMachineRegRBP, offset, reg1);
1108   builder.Gen<PseudoJump>(kNullGuestAddr);
1109 
1110   auto insn_it = bb->insn_list().begin();
1111   MemRegMap mem_reg_map(sizeof(CPUState), std::nullopt, machine_ir.arena());
1112   ReplacePutAndUpdateMap(&machine_ir, insn_it, mem_reg_map);
1113   ASSERT_EQ(CheckMachineIR(machine_ir), x86_64::kMachineIRCheckSuccess);
1114 
1115   EXPECT_EQ(bb->insn_list().size(), 2UL);
1116   auto* copy_insn = *bb->insn_list().begin();
1117   auto mapped_reg = CheckCopyPutInsnAndObtainMappedReg(copy_insn, reg1);
1118   CheckMemRegMap(mem_reg_map, offset, mapped_reg, MovType::kMovw, true);
1119 }
1120 
1121 }  // namespace
1122 
1123 }  // namespace berberis::x86_64
1124