1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "instruction_simplifier_shared.h"
18 
19 #include "code_generator.h"
20 #include "mirror/array-inl.h"
21 #include "nodes.h"
22 
23 namespace art HIDDEN {
24 
25 namespace {
26 
TrySimpleMultiplyAccumulatePatterns(HMul * mul,HBinaryOperation * input_binop,HInstruction * input_other)27 bool TrySimpleMultiplyAccumulatePatterns(HMul* mul,
28                                          HBinaryOperation* input_binop,
29                                          HInstruction* input_other) {
30   DCHECK(DataType::IsIntOrLongType(mul->GetType()));
31   DCHECK(input_binop->IsAdd() || input_binop->IsSub());
32   DCHECK_NE(input_binop, input_other);
33   if (!input_binop->HasOnlyOneNonEnvironmentUse()) {
34     return false;
35   }
36 
37   // Try to interpret patterns like
38   //    a * (b <+/-> 1)
39   // as
40   //    (a * b) <+/-> a
41   HInstruction* input_a = input_other;
42   HInstruction* input_b = nullptr;  // Set to a non-null value if we found a pattern to optimize.
43   HInstruction::InstructionKind op_kind;
44 
45   if (input_binop->IsAdd()) {
46     if ((input_binop->GetConstantRight() != nullptr) && input_binop->GetConstantRight()->IsOne()) {
47       // Interpret
48       //    a * (b + 1)
49       // as
50       //    (a * b) + a
51       input_b = input_binop->GetLeastConstantLeft();
52       op_kind = HInstruction::kAdd;
53     }
54   } else {
55     DCHECK(input_binop->IsSub());
56     if (input_binop->GetRight()->IsConstant() &&
57         input_binop->GetRight()->AsConstant()->IsMinusOne()) {
58       // Interpret
59       //    a * (b - (-1))
60       // as
61       //    a + (a * b)
62       input_b = input_binop->GetLeft();
63       op_kind = HInstruction::kAdd;
64     } else if (input_binop->GetLeft()->IsConstant() &&
65                input_binop->GetLeft()->AsConstant()->IsOne()) {
66       // Interpret
67       //    a * (1 - b)
68       // as
69       //    a - (a * b)
70       input_b = input_binop->GetRight();
71       op_kind = HInstruction::kSub;
72     }
73   }
74 
75   if (input_b == nullptr) {
76     // We did not find a pattern we can optimize.
77     return false;
78   }
79 
80   ArenaAllocator* allocator = mul->GetBlock()->GetGraph()->GetAllocator();
81   HMultiplyAccumulate* mulacc = new (allocator) HMultiplyAccumulate(
82       mul->GetType(), op_kind, input_a, input_a, input_b, mul->GetDexPc());
83 
84   mul->GetBlock()->ReplaceAndRemoveInstructionWith(mul, mulacc);
85   input_binop->GetBlock()->RemoveInstruction(input_binop);
86 
87   return true;
88 }
89 
90 }  // namespace
91 
TryCombineMultiplyAccumulate(HMul * mul,InstructionSet isa)92 bool TryCombineMultiplyAccumulate(HMul* mul, InstructionSet isa) {
93   DataType::Type type = mul->GetType();
94   switch (isa) {
95     case InstructionSet::kArm:
96     case InstructionSet::kThumb2:
97       if (type != DataType::Type::kInt32) {
98         return false;
99       }
100       break;
101     case InstructionSet::kArm64:
102       if (!DataType::IsIntOrLongType(type)) {
103         return false;
104       }
105       break;
106     default:
107       return false;
108   }
109 
110   ArenaAllocator* allocator = mul->GetBlock()->GetGraph()->GetAllocator();
111 
112   if (mul->HasOnlyOneNonEnvironmentUse()) {
113     HInstruction* use = mul->GetUses().front().GetUser();
114     if (use->IsAdd() || use->IsSub()) {
115       // Replace code looking like
116       //    MUL tmp, x, y
117       //    SUB dst, acc, tmp
118       // with
119       //    MULSUB dst, acc, x, y
120       // Note that we do not want to (unconditionally) perform the merge when the
121       // multiplication has multiple uses and it can be merged in all of them.
122       // Multiple uses could happen on the same control-flow path, and we would
123       // then increase the amount of work. In the future we could try to evaluate
124       // whether all uses are on different control-flow paths (using dominance and
125       // reverse-dominance information) and only perform the merge when they are.
126       HInstruction* accumulator = nullptr;
127       HBinaryOperation* binop = use->AsBinaryOperation();
128       HInstruction* binop_left = binop->GetLeft();
129       HInstruction* binop_right = binop->GetRight();
130       // Be careful after GVN. This should not happen since the `HMul` has only
131       // one use.
132       DCHECK_NE(binop_left, binop_right);
133       if (binop_right == mul) {
134         accumulator = binop_left;
135       } else if (use->IsAdd()) {
136         DCHECK_EQ(binop_left, mul);
137         accumulator = binop_right;
138       }
139 
140       if (accumulator != nullptr) {
141         HMultiplyAccumulate* mulacc =
142             new (allocator) HMultiplyAccumulate(type,
143                                                 binop->GetKind(),
144                                                 accumulator,
145                                                 mul->GetLeft(),
146                                                 mul->GetRight());
147 
148         binop->GetBlock()->ReplaceAndRemoveInstructionWith(binop, mulacc);
149         DCHECK(!mul->HasUses());
150         mul->GetBlock()->RemoveInstruction(mul);
151         return true;
152       }
153     } else if (use->IsNeg() && isa != InstructionSet::kArm) {
154       HMultiplyAccumulate* mulacc =
155           new (allocator) HMultiplyAccumulate(type,
156                                               HInstruction::kSub,
157                                               mul->GetBlock()->GetGraph()->GetConstant(type, 0),
158                                               mul->GetLeft(),
159                                               mul->GetRight());
160 
161       use->GetBlock()->ReplaceAndRemoveInstructionWith(use, mulacc);
162       DCHECK(!mul->HasUses());
163       mul->GetBlock()->RemoveInstruction(mul);
164       return true;
165     }
166   }
167 
168   // Use multiply accumulate instruction for a few simple patterns.
169   // We prefer not applying the following transformations if the left and
170   // right inputs perform the same operation.
171   // We rely on GVN having squashed the inputs if appropriate. However the
172   // results are still correct even if that did not happen.
173   if (mul->GetLeft() == mul->GetRight()) {
174     return false;
175   }
176 
177   HInstruction* left = mul->GetLeft();
178   HInstruction* right = mul->GetRight();
179   if ((right->IsAdd() || right->IsSub()) &&
180       TrySimpleMultiplyAccumulatePatterns(mul, right->AsBinaryOperation(), left)) {
181     return true;
182   }
183   if ((left->IsAdd() || left->IsSub()) &&
184       TrySimpleMultiplyAccumulatePatterns(mul, left->AsBinaryOperation(), right)) {
185     return true;
186   }
187   return false;
188 }
189 
TryExtractArrayAccessAddress(CodeGenerator * codegen,HInstruction * access,HInstruction * array,HInstruction * index,size_t data_offset)190 bool TryExtractArrayAccessAddress(CodeGenerator* codegen,
191                                   HInstruction* access,
192                                   HInstruction* array,
193                                   HInstruction* index,
194                                   size_t data_offset) {
195   if (index->IsConstant() ||
196       (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
197     // When the index is a constant all the addressing can be fitted in the
198     // memory access instruction, so do not split the access.
199     return false;
200   }
201   if (access->IsArraySet() &&
202       access->AsArraySet()->GetValue()->GetType() == DataType::Type::kReference) {
203     // The access may require a runtime call or the original array pointer.
204     return false;
205   }
206   if (codegen->EmitNonBakerReadBarrier() &&
207       access->IsArrayGet() &&
208       access->GetType() == DataType::Type::kReference) {
209     // For object arrays, the non-Baker read barrier instrumentation requires
210     // the original array pointer.
211     return false;
212   }
213 
214   // Proceed to extract the base address computation.
215   HGraph* graph = access->GetBlock()->GetGraph();
216   ArenaAllocator* allocator = graph->GetAllocator();
217 
218   HIntConstant* offset = graph->GetIntConstant(data_offset);
219   HIntermediateAddress* address = new (allocator) HIntermediateAddress(array, offset, kNoDexPc);
220   // TODO: Is it ok to not have this on the intermediate address?
221   // address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
222   access->GetBlock()->InsertInstructionBefore(address, access);
223   access->ReplaceInput(address, 0);
224   // Both instructions must depend on GC to prevent any instruction that can
225   // trigger GC to be inserted between the two.
226   access->AddSideEffects(SideEffects::DependsOnGC());
227   DCHECK(address->GetSideEffects().Includes(SideEffects::DependsOnGC()));
228   DCHECK(access->GetSideEffects().Includes(SideEffects::DependsOnGC()));
229   // TODO: Code generation for HArrayGet and HArraySet will check whether the input address
230   // is an HIntermediateAddress and generate appropriate code.
231   // We would like to replace the `HArrayGet` and `HArraySet` with custom instructions (maybe
232   // `HArm64Load` and `HArm64Store`,`HArmLoad` and `HArmStore`). We defer these changes
233   // because these new instructions would not bring any advantages yet.
234   // Also see the comments in
235   // `InstructionCodeGeneratorARMVIXL::VisitArrayGet()`
236   // `InstructionCodeGeneratorARMVIXL::VisitArraySet()`
237   // `InstructionCodeGeneratorARM64::VisitArrayGet()`
238   // `InstructionCodeGeneratorARM64::VisitArraySet()`.
239   return true;
240 }
241 
TryExtractVecArrayAccessAddress(HVecMemoryOperation * access,HInstruction * index)242 bool TryExtractVecArrayAccessAddress(HVecMemoryOperation* access, HInstruction* index) {
243   if (index->IsConstant()) {
244     // If index is constant the whole address calculation often can be done by LDR/STR themselves.
245     // TODO: Treat the case with not-embedable constant.
246     return false;
247   }
248 
249   HGraph* graph = access->GetBlock()->GetGraph();
250   ArenaAllocator* allocator = graph->GetAllocator();
251   DataType::Type packed_type = access->GetPackedType();
252   uint32_t data_offset = mirror::Array::DataOffset(
253       DataType::Size(packed_type)).Uint32Value();
254   size_t component_shift = DataType::SizeShift(packed_type);
255 
256   bool is_extracting_beneficial = false;
257   // It is beneficial to extract index intermediate address only if there are at least 2 users.
258   for (const HUseListNode<HInstruction*>& use : index->GetUses()) {
259     HInstruction* user = use.GetUser();
260     if (user->IsVecMemoryOperation() && user != access) {
261       HVecMemoryOperation* another_access = user->AsVecMemoryOperation();
262       DataType::Type another_packed_type = another_access->GetPackedType();
263       uint32_t another_data_offset = mirror::Array::DataOffset(
264           DataType::Size(another_packed_type)).Uint32Value();
265       size_t another_component_shift = DataType::SizeShift(another_packed_type);
266       if (another_data_offset == data_offset && another_component_shift == component_shift) {
267         is_extracting_beneficial = true;
268         break;
269       }
270     } else if (user->IsIntermediateAddressIndex()) {
271       HIntermediateAddressIndex* another_access = user->AsIntermediateAddressIndex();
272       uint32_t another_data_offset = another_access->GetOffset()->AsIntConstant()->GetValue();
273       size_t another_component_shift = another_access->GetShift()->AsIntConstant()->GetValue();
274       if (another_data_offset == data_offset && another_component_shift == component_shift) {
275         is_extracting_beneficial = true;
276         break;
277       }
278     }
279   }
280 
281   if (!is_extracting_beneficial) {
282     return false;
283   }
284 
285   // Proceed to extract the index + data_offset address computation.
286   HIntConstant* offset = graph->GetIntConstant(data_offset);
287   HIntConstant* shift = graph->GetIntConstant(component_shift);
288   HIntermediateAddressIndex* address =
289       new (allocator) HIntermediateAddressIndex(index, offset, shift, kNoDexPc);
290 
291   access->GetBlock()->InsertInstructionBefore(address, access);
292   access->ReplaceInput(address, 1);
293 
294   return true;
295 }
296 
TryReplaceSubSubWithSubAdd(HSub * last_sub)297 bool TryReplaceSubSubWithSubAdd(HSub* last_sub) {
298   DCHECK(last_sub->GetRight()->IsSub());
299   HBasicBlock* basic_block = last_sub->GetBlock();
300   ArenaAllocator* allocator = basic_block->GetGraph()->GetAllocator();
301   HInstruction* last_sub_right = last_sub->GetRight();
302   HInstruction* last_sub_left = last_sub->GetLeft();
303   if (last_sub_right->GetUses().HasExactlyOneElement()) {
304     // Reorder operands of last_sub_right: Sub(a, b) -> Sub(b, a).
305     HInstruction* a = last_sub_right->InputAt(0);
306     HInstruction* b = last_sub_right->InputAt(1);
307     last_sub_right->ReplaceInput(b, 0);
308     last_sub_right->ReplaceInput(a, 1);
309 
310     // Replace Sub(c, Sub(a, b)) with Add(c, Sub(b, a).
311     HAdd* add = new (allocator) HAdd(last_sub->GetType(), last_sub_left, last_sub_right);
312     basic_block->ReplaceAndRemoveInstructionWith(last_sub, add);
313     return true;
314   } else {
315     return false;
316   }
317 }
318 
319 }  // namespace art
320