1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_H_
18 #define ART_COMPILER_OPTIMIZING_INTRINSICS_H_
19 
20 #include "base/macros.h"
21 #include "code_generator.h"
22 #include "intrinsics_list.h"
23 #include "nodes.h"
24 #include "optimization.h"
25 #include "parallel_move_resolver.h"
26 
27 namespace art HIDDEN {
28 
29 class DexFile;
30 
31 // Positive floating-point infinities.
32 static constexpr uint32_t kPositiveInfinityFloat = 0x7f800000U;
33 static constexpr uint64_t kPositiveInfinityDouble = UINT64_C(0x7ff0000000000000);
34 
35 static constexpr uint32_t kNanFloat = 0x7fc00000U;
36 static constexpr uint64_t kNanDouble = 0x7ff8000000000000;
37 
38 class IntrinsicVisitor : public ValueObject {
39  public:
~IntrinsicVisitor()40   virtual ~IntrinsicVisitor() {}
41 
42   // Dispatch logic.
43 
Dispatch(HInvoke * invoke)44   void Dispatch(HInvoke* invoke) {
45     switch (invoke->GetIntrinsic()) {
46       case Intrinsics::kNone:
47         return;
48 
49 #define OPTIMIZING_INTRINSICS_WITH_SPECIALIZED_HIR(Name, ...) \
50       case Intrinsics::k ## Name:
51         ART_INTRINSICS_WITH_SPECIALIZED_HIR_LIST(OPTIMIZING_INTRINSICS_WITH_SPECIALIZED_HIR)
52 #undef OPTIMIZING_INTRINSICS_WITH_SPECIALIZED_HIR
53         // Note: clang++ can optimize this `switch` to a range check and a virtual dispatch
54         // with indexed load from the vtable using an adjusted `invoke->GetIntrinsic()`
55         // as the index. However, a non-empty `case` causes clang++ to produce much worse
56         // code, so we want to limit this check to debug builds only.
57         DCHECK(false) << "Unexpected intrinsic with HIR: " << invoke->GetIntrinsic();
58         return;
59 
60 #define OPTIMIZING_INTRINSICS(Name, ...) \
61       case Intrinsics::k ## Name: \
62         Visit ## Name(invoke);    \
63         return;
64         ART_INTRINSICS_WITH_HINVOKE_LIST(OPTIMIZING_INTRINSICS)
65 #undef OPTIMIZING_INTRINSICS
66 
67       // Do not put a default case. That way the compiler will complain if we missed a case.
68     }
69   }
70 
71   // Define visitor methods.
72 
73 #define DECLARE_VISIT_INTRINSIC(Name, ...) \
74   virtual void Visit##Name([[maybe_unused]] HInvoke* invoke) = 0;
ART_INTRINSICS_WITH_HINVOKE_LIST(DECLARE_VISIT_INTRINSIC)75   ART_INTRINSICS_WITH_HINVOKE_LIST(DECLARE_VISIT_INTRINSIC)
76 #undef DECLARE_VISIT_INTRINSIC
77 
78   static void MoveArguments(HInvoke* invoke,
79                             CodeGenerator* codegen,
80                             InvokeDexCallingConventionVisitor* calling_convention_visitor) {
81     if (kIsDebugBuild && invoke->IsInvokeStaticOrDirect()) {
82       HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
83       // Explicit clinit checks triggered by static invokes must have been
84       // pruned by art::PrepareForRegisterAllocation.
85       DCHECK(!invoke_static_or_direct->IsStaticWithExplicitClinitCheck());
86     }
87 
88     if (invoke->GetNumberOfArguments() == 0) {
89       // No argument to move.
90       return;
91     }
92 
93     LocationSummary* locations = invoke->GetLocations();
94 
95     // We're moving potentially two or more locations to locations that could overlap, so we need
96     // a parallel move resolver.
97     HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
98 
99     for (size_t i = 0; i < invoke->GetNumberOfArguments(); i++) {
100       HInstruction* input = invoke->InputAt(i);
101       Location cc_loc = calling_convention_visitor->GetNextLocation(input->GetType());
102       Location actual_loc = locations->InAt(i);
103 
104       parallel_move.AddMove(actual_loc, cc_loc, input->GetType(), nullptr);
105     }
106 
107     codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
108   }
109 
110   static void ComputeValueOfLocations(HInvoke* invoke,
111                                       CodeGenerator* codegen,
112                                       int32_t low,
113                                       int32_t length,
114                                       Location return_location,
115                                       Location first_argument_location);
116 
117   // Temporary data structure for holding BoxedType.valueOf data for generating code.
118   struct ValueOfInfo {
119     static constexpr uint32_t kInvalidReference = static_cast<uint32_t>(-1);
120 
121     ValueOfInfo();
122 
123     // Offset of the value field of the boxed object for initializing a newly allocated instance.
124     uint32_t value_offset;
125     // The low value in the cache.
126     int32_t low;
127     // The length of the cache array.
128     uint32_t length;
129 
130     // This union contains references to the boot image. For app AOT or JIT compilation,
131     // these are the boot image offsets of the target. For boot image compilation, the
132     // location shall be known only at link time, so we encode a symbolic reference using
133     // IntrinsicObjects::EncodePatch().
134     union {
135       // The target value for a constant input in the cache range. If the constant input
136       // is out of range (use `low` and `length` to check), this value is bogus (set to
137       // kInvalidReference) and the code must allocate a new Integer.
138       uint32_t value_boot_image_reference;
139 
140       // The cache array data used for a non-constant input in the cache range.
141       // If the input is out of range, the code must allocate a new Integer.
142       uint32_t array_data_boot_image_reference;
143     };
144   };
145 
146   static ValueOfInfo ComputeValueOfInfo(
147       HInvoke* invoke,
148       const CompilerOptions& compiler_options,
149       ArtField* value_field,
150       int32_t low,
151       int32_t length,
152       size_t base);
153 
154   static MemberOffset GetReferenceDisableIntrinsicOffset();
155   static MemberOffset GetReferenceSlowPathEnabledOffset();
156   static void CreateReferenceGetReferentLocations(HInvoke* invoke, CodeGenerator* codegen);
157   static void CreateReferenceRefersToLocations(HInvoke* invoke, CodeGenerator* codegen);
158 
159  protected:
IntrinsicVisitor()160   IntrinsicVisitor() {}
161 
162   static void AssertNonMovableStringClass();
163 
164  private:
165   DISALLOW_COPY_AND_ASSIGN(IntrinsicVisitor);
166 };
167 
IsIntrinsicWithSpecializedHir(Intrinsics intrinsic)168 static inline bool IsIntrinsicWithSpecializedHir(Intrinsics intrinsic) {
169   switch (intrinsic) {
170 #define OPTIMIZING_INTRINSICS_WITH_SPECIALIZED_HIR(Name, ...) \
171     case Intrinsics::k ## Name:
172       ART_INTRINSICS_WITH_SPECIALIZED_HIR_LIST(OPTIMIZING_INTRINSICS_WITH_SPECIALIZED_HIR)
173 #undef OPTIMIZING_INTRINSICS_WITH_SPECIALIZED_HIR
174       return true;
175     default:
176       return false;
177   }
178 }
179 
IsValidIntrinsicAfterBuilder(Intrinsics intrinsic)180 static inline bool IsValidIntrinsicAfterBuilder(Intrinsics intrinsic) {
181   return !IsIntrinsicWithSpecializedHir(intrinsic) ||
182          // FIXME: The inliner can currently create graphs with any of the intrinsics with HIR.
183          // However, we are able to compensate for `StringCharAt` and `StringLength` in the
184          // `HInstructionSimplifier`, so we're allowing these two intrinsics for now, preserving
185          // the old behavior. Besides fixing the bug, we should also clean up the simplifier
186          // and remove `SimplifyStringCharAt` and `SimplifyStringLength`. Bug: 319045458
187          intrinsic == Intrinsics::kStringCharAt ||
188          intrinsic == Intrinsics::kStringLength;
189 }
190 
191 #define GENERIC_OPTIMIZATION(name, bit)                \
192 public:                                                \
193 void Set##name() { SetBit(k##name); }                  \
194 bool Get##name() const { return IsBitSet(k##name); }   \
195 private:                                               \
196 static constexpr size_t k##name = bit
197 
198 class IntrinsicOptimizations : public ValueObject {
199  public:
IntrinsicOptimizations(HInvoke * invoke)200   explicit IntrinsicOptimizations(HInvoke* invoke)
201       : value_(invoke->GetIntrinsicOptimizations()) {}
IntrinsicOptimizations(const HInvoke & invoke)202   explicit IntrinsicOptimizations(const HInvoke& invoke)
203       : value_(invoke.GetIntrinsicOptimizations()) {}
204 
205   static constexpr int kNumberOfGenericOptimizations = 1;
206   GENERIC_OPTIMIZATION(DoesNotNeedEnvironment, 0);
207 
208  protected:
IsBitSet(uint32_t bit)209   bool IsBitSet(uint32_t bit) const {
210     DCHECK_LT(bit, sizeof(uint32_t) * kBitsPerByte);
211     return (*value_ & (1 << bit)) != 0u;
212   }
213 
SetBit(uint32_t bit)214   void SetBit(uint32_t bit) {
215     DCHECK_LT(bit, sizeof(uint32_t) * kBitsPerByte);
216     *(const_cast<uint32_t* const>(value_)) |= (1 << bit);
217   }
218 
219  private:
220   const uint32_t* const value_;
221 
222   DISALLOW_COPY_AND_ASSIGN(IntrinsicOptimizations);
223 };
224 
225 #undef GENERIC_OPTIMIZATION
226 
227 #define INTRINSIC_OPTIMIZATION(name, bit)                             \
228 public:                                                               \
229 void Set##name() { SetBit(k##name); }                                 \
230 bool Get##name() const { return IsBitSet(k##name); }                  \
231 private:                                                              \
232 static constexpr size_t k##name = (bit) + kNumberOfGenericOptimizations
233 
234 class StringEqualsOptimizations : public IntrinsicOptimizations {
235  public:
StringEqualsOptimizations(HInvoke * invoke)236   explicit StringEqualsOptimizations(HInvoke* invoke) : IntrinsicOptimizations(invoke) {}
237 
238   INTRINSIC_OPTIMIZATION(ArgumentNotNull, 0);
239   INTRINSIC_OPTIMIZATION(ArgumentIsString, 1);
240 
241  private:
242   DISALLOW_COPY_AND_ASSIGN(StringEqualsOptimizations);
243 };
244 
245 class SystemArrayCopyOptimizations : public IntrinsicOptimizations {
246  public:
SystemArrayCopyOptimizations(HInvoke * invoke)247   explicit SystemArrayCopyOptimizations(HInvoke* invoke) : IntrinsicOptimizations(invoke) {}
248 
249   INTRINSIC_OPTIMIZATION(SourceIsNotNull, 0);
250   INTRINSIC_OPTIMIZATION(DestinationIsNotNull, 1);
251   INTRINSIC_OPTIMIZATION(DestinationIsSource, 2);
252   INTRINSIC_OPTIMIZATION(CountIsSourceLength, 3);
253   INTRINSIC_OPTIMIZATION(CountIsDestinationLength, 4);
254   INTRINSIC_OPTIMIZATION(DoesNotNeedTypeCheck, 5);
255   INTRINSIC_OPTIMIZATION(DestinationIsTypedObjectArray, 6);
256   INTRINSIC_OPTIMIZATION(DestinationIsNonPrimitiveArray, 7);
257   INTRINSIC_OPTIMIZATION(DestinationIsPrimitiveArray, 8);
258   INTRINSIC_OPTIMIZATION(SourceIsNonPrimitiveArray, 9);
259   INTRINSIC_OPTIMIZATION(SourceIsPrimitiveArray, 10);
260   INTRINSIC_OPTIMIZATION(SourcePositionIsDestinationPosition, 11);
261 
262  private:
263   DISALLOW_COPY_AND_ASSIGN(SystemArrayCopyOptimizations);
264 };
265 
266 class VarHandleOptimizations : public IntrinsicOptimizations {
267  public:
VarHandleOptimizations(HInvoke * invoke)268   explicit VarHandleOptimizations(HInvoke* invoke) : IntrinsicOptimizations(invoke) {}
269 
270   INTRINSIC_OPTIMIZATION(DoNotIntrinsify, 0);  // One of the checks is statically known to fail.
271   INTRINSIC_OPTIMIZATION(SkipObjectNullCheck, 1);  // Not applicable for static fields.
272 
273   // Use known `VarHandle` from the boot image. To apply this optimization, the following
274   // `VarHandle` checks must pass based on static analysis:
275   //   - `VarHandle` type check (must match the coordinate count),
276   //   - access mode check,
277   //   - var type check (including assignability for reference types),
278   //   - object type check (except for static field VarHandles that do not take an object).
279   // Note that the object null check is controlled by the above flag `SkipObjectNullCheck`
280   // and arrays and byte array views (which always need a range check and sometimes also
281   // array type check) are currently unsupported.
282   INTRINSIC_OPTIMIZATION(UseKnownImageVarHandle, 2);
283 };
284 
285 #undef INTRISIC_OPTIMIZATION
286 
287 //
288 // Macros for use in the intrinsics code generators.
289 //
290 
291 // Defines an unimplemented intrinsic: that is, a method call that is recognized as an
292 // intrinsic to exploit e.g. no side-effects or exceptions, but otherwise not handled
293 // by this architecture-specific intrinsics code generator. Eventually it is implemented
294 // as a true method call.
295 #define UNIMPLEMENTED_INTRINSIC(Arch, Name)                                              \
296   void IntrinsicLocationsBuilder##Arch::Visit##Name([[maybe_unused]] HInvoke* invoke) {} \
297   void IntrinsicCodeGenerator##Arch::Visit##Name([[maybe_unused]] HInvoke* invoke) {}
298 
299 // Defines a list of unreached intrinsics: that is, method calls that are recognized as
300 // an intrinsic, and then always converted into HIR instructions before they reach any
301 // architecture-specific intrinsics code generator. This only applies to non-baseline
302 // compilation.
303 #define UNREACHABLE_INTRINSIC(Arch, Name)                                \
304 void IntrinsicLocationsBuilder ## Arch::Visit ## Name(HInvoke* invoke) { \
305   if (Runtime::Current()->IsAotCompiler() &&                             \
306       !codegen_->GetCompilerOptions().IsBaseline()) {                    \
307     LOG(FATAL) << "Unreachable: intrinsic " << invoke->GetIntrinsic()    \
308                << " should have been converted to HIR";                  \
309   }                                                                      \
310 }                                                                        \
311 void IntrinsicCodeGenerator ## Arch::Visit ## Name(HInvoke* invoke) {    \
312   LOG(FATAL) << "Unreachable: intrinsic " << invoke->GetIntrinsic()      \
313              << " should have been converted to HIR";                    \
314 }
315 #define UNREACHABLE_INTRINSICS(Arch)                            \
316 UNREACHABLE_INTRINSIC(Arch, FloatFloatToIntBits)                \
317 UNREACHABLE_INTRINSIC(Arch, DoubleDoubleToLongBits)
318 
319 template <typename IntrinsicLocationsBuilder, typename Codegenerator>
IsCallFreeIntrinsic(HInvoke * invoke,Codegenerator * codegen)320 bool IsCallFreeIntrinsic(HInvoke* invoke, Codegenerator* codegen) {
321   if (invoke->GetIntrinsic() != Intrinsics::kNone) {
322     // This invoke may have intrinsic code generation defined. However, we must
323     // now also determine if this code generation is truly there and call-free
324     // (not unimplemented, no bail on instruction features, or call on slow path).
325     // This is done by actually calling the locations builder on the instruction
326     // and clearing out the locations once result is known. We assume this
327     // call only has creating locations as side effects!
328     // TODO: Avoid wasting Arena memory.
329     IntrinsicLocationsBuilder builder(codegen);
330     bool success = builder.TryDispatch(invoke) && !invoke->GetLocations()->CanCall();
331     invoke->SetLocations(nullptr);
332     return success;
333   }
334   return false;
335 }
336 
337 // Insert a `Float.floatToRawIntBits()` or `Double.doubleToRawLongBits()` intrinsic for a
338 // given input. These fake calls are needed on arm and riscv64 to satisfy type consistency
339 // checks while passing certain FP args in core registers for direct @CriticalNative calls.
340 void InsertFpToIntegralIntrinsic(HInvokeStaticOrDirect* invoke, size_t input_index);
341 
342 }  // namespace art
343 
344 #endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_H_
345