/* * Copyright (C) 2015 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include "load_store_elimination.h" #include #include #include #include #include "base/arena_allocator.h" #include "base/arena_bit_vector.h" #include "base/array_ref.h" #include "base/bit_vector-inl.h" #include "base/bit_vector.h" #include "base/globals.h" #include "base/indenter.h" #include "base/iteration_range.h" #include "base/scoped_arena_allocator.h" #include "base/scoped_arena_containers.h" #include "base/transform_iterator.h" #include "escape.h" #include "handle.h" #include "load_store_analysis.h" #include "mirror/class_loader.h" #include "mirror/dex_cache.h" #include "nodes.h" #include "oat/stack_map.h" #include "optimizing_compiler_stats.h" #include "reference_type_propagation.h" #include "side_effects_analysis.h" /** * The general algorithm of load-store elimination (LSE). * * We use load-store analysis to collect a list of heap locations and perform * alias analysis of those heap locations. LSE then keeps track of a list of * heap values corresponding to the heap locations and stores that put those * values in these locations. * - In phase 1, we visit basic blocks in reverse post order and for each basic * block, visit instructions sequentially, recording heap values and looking * for loads and stores to eliminate without relying on loop Phis. * - In phase 2, we look for loads that can be replaced by creating loop Phis * or using a loop-invariant value. * - In phase 3, we determine which stores are dead and can be eliminated and * based on that information we re-evaluate whether some kept stores are * storing the same value as the value in the heap location; such stores are * also marked for elimination. * - In phase 4, we commit the changes, replacing loads marked for elimination * in previous processing and removing stores not marked for keeping. We also * remove allocations that are no longer needed. * - In phase 5, we move allocations which only escape along some executions * closer to their escape points and fixup non-escaping paths with their actual * values, creating PHIs when needed. * * 1. Walk over blocks and their instructions. * * The initial set of heap values for a basic block is * - For a loop header of an irreducible loop, all heap values are unknown. * - For a loop header of a normal loop, all values unknown at the end of the * preheader are initialized to unknown, other heap values are set to Phi * placeholders as we cannot determine yet whether these values are known on * all back-edges. We use Phi placeholders also for array heap locations with * index defined inside the loop but this helps only when the value remains * zero from the array allocation throughout the loop. * - For catch blocks, we clear all assumptions since we arrived due to an * instruction throwing. * - For other basic blocks, we merge incoming values from the end of all * predecessors. If any incoming value is unknown, the start value for this * block is also unknown. Otherwise, if all the incoming values are the same * (including the case of a single predecessor), the incoming value is used. * Otherwise, we use a Phi placeholder to indicate different incoming values. * We record whether such Phi placeholder depends on a loop Phi placeholder. * * For each instruction in the block * - If the instruction is a load from a heap location with a known value not * dependent on a loop Phi placeholder, the load can be eliminated, either by * using an existing instruction or by creating new Phi(s) instead. In order * to maintain the validity of all heap locations during the optimization * phase, we only record substitutes at this phase and the real elimination * is delayed till the end of LSE. Loads that require a loop Phi placeholder * replacement are recorded for processing later. * - If the instruction is a store, it updates the heap value for the heap * location with the stored value and records the store itself so that we can * mark it for keeping if the value becomes observable. Heap values are * invalidated for heap locations that may alias with the store instruction's * heap location and their recorded stores are marked for keeping as they are * now potentially observable. The store instruction can be eliminated unless * the value stored is later needed e.g. by a load from the same/aliased heap * location or the heap location persists at method return/deoptimization. * - A store that stores the same value as the heap value is eliminated. * - For newly instantiated instances, their heap values are initialized to * language defined default values. * - Finalizable objects are considered as persisting at method * return/deoptimization. * - Some instructions such as invokes are treated as loading and invalidating * all the heap values, depending on the instruction's side effects. * - SIMD graphs (with VecLoad and VecStore instructions) are also handled. Any * partial overlap access among ArrayGet/ArraySet/VecLoad/Store is seen as * alias and no load/store is eliminated in such case. * * The time complexity of the initial phase has several components. The total * time for the initialization of heap values for all blocks is * O(heap_locations * edges) * and the time complexity for simple instruction processing is * O(instructions). * See the description of phase 3 for additional complexity due to matching of * existing Phis for replacing loads. * * 2. Process loads that depend on loop Phi placeholders. * * We go over these loads to determine whether they can be eliminated. We look * for the set of all Phi placeholders that feed the load and depend on a loop * Phi placeholder and, if we find no unknown value, we construct the necessary * Phi(s) or, if all other inputs are identical, i.e. the location does not * change in the loop, just use that input. If we do find an unknown input, this * must be from a loop back-edge and we replace the loop Phi placeholder with * unknown value and re-process loads and stores that previously depended on * loop Phi placeholders. This shall find at least one load of an unknown value * which is now known to be unreplaceable or a new unknown value on a back-edge * and we repeat this process until each load is either marked for replacement * or found to be unreplaceable. As we mark at least one additional loop Phi * placeholder as unreplacable in each iteration, this process shall terminate. * * The depth-first search for Phi placeholders in FindLoopPhisToMaterialize() * is limited by the number of Phi placeholders and their dependencies we need * to search with worst-case time complexity * O(phi_placeholder_dependencies) . * The dependencies are usually just the Phi placeholders' potential inputs, * but if we use TryReplacingLoopPhiPlaceholderWithDefault() for default value * replacement search, there are additional dependencies to consider, see below. * * In the successful case (no unknown inputs found) we use the Floyd-Warshall * algorithm to determine transitive closures for each found Phi placeholder, * and then match or materialize Phis from the smallest transitive closure, * so that we can determine if such subset has a single other input. This has * time complexity * O(phi_placeholders_found^3) . * Note that successful TryReplacingLoopPhiPlaceholderWithDefault() does not * contribute to this as such Phi placeholders are replaced immediately. * The total time of all such successful cases has time complexity * O(phi_placeholders^3) * because the found sets are disjoint and `Sum(n_i^3) <= Sum(n_i)^3`. Similar * argument applies to the searches used to find all successful cases, so their * total contribution is also just an insignificant * O(phi_placeholder_dependencies) . * The materialization of Phis has an insignificant total time complexity * O(phi_placeholders * edges) . * * If we find an unknown input, we re-process heap values and loads with a time * complexity that's the same as the phase 1 in the worst case. Adding this to * the depth-first search time complexity yields * O(phi_placeholder_dependencies + heap_locations * edges + instructions) * for a single iteration. We can ignore the middle term as it's proprotional * to the number of Phi placeholder inputs included in the first term. Using * the upper limit of number of such iterations, the total time complexity is * O((phi_placeholder_dependencies + instructions) * phi_placeholders) . * * The upper bound of Phi placeholder inputs is * heap_locations * edges * but if we use TryReplacingLoopPhiPlaceholderWithDefault(), the dependencies * include other heap locations in predecessor blocks with the upper bound of * heap_locations^2 * edges . * Using the estimate * edges <= blocks^2 * and * phi_placeholders <= heap_locations * blocks , * the worst-case time complexity of the * O(phi_placeholder_dependencies * phi_placeholders) * term from unknown input cases is actually * O(heap_locations^3 * blocks^3) , * exactly as the estimate for the Floyd-Warshall parts of successful cases. * Adding the other term from the unknown input cases (to account for the case * with significantly more instructions than blocks and heap locations), the * phase 2 time complexity is * O(heap_locations^3 * blocks^3 + heap_locations * blocks * instructions) . * * See the description of phase 3 for additional complexity due to matching of * existing Phis for replacing loads. * * 3. Determine which stores to keep and which to eliminate. * * During instruction processing in phase 1 and re-processing in phase 2, we are * keeping a record of the stores and Phi placeholders that become observable * and now propagate the observable Phi placeholders to all actual stores that * feed them. Having determined observable stores, we look for stores that just * overwrite the old value with the same. Since ignoring non-observable stores * actually changes the old values in heap locations, we need to recalculate * Phi placeholder replacements but we proceed similarly to the previous phase. * We look for the set of all Phis that feed the old value replaced by the store * (but ignoring whether they depend on a loop Phi) and, if we find no unknown * value, we try to match existing Phis (we do not create new Phis anymore) or, * if all other inputs are identical, i.e. the location does not change in the * loop, just use that input. If this succeeds and the old value is identical to * the value we're storing, such store shall be eliminated. * * The work is similar to the phase 2, except that we're not re-processing loads * and stores anymore, so the time complexity of phase 3 is * O(heap_locations^3 * blocks^3) . * * There is additional complexity in matching existing Phis shared between the * phases 1, 2 and 3. We are never trying to match two or more Phis at the same * time (this could be difficult and slow), so each matching attempt is just * looking at Phis in the block (both old Phis and newly created Phis) and their * inputs. As we create at most `heap_locations` Phis in each block, the upper * bound on the number of Phis we look at is * heap_locations * (old_phis + heap_locations) * and the worst-case time complexity is * O(heap_locations^2 * edges + heap_locations * old_phis * edges) . * The first term is lower than one term in phase 2, so the relevant part is * O(heap_locations * old_phis * edges) . * * 4. Replace loads and remove unnecessary stores and singleton allocations. * * A special type of objects called singletons are instantiated in the method * and have a single name, i.e. no aliases. Singletons have exclusive heap * locations since they have no aliases. Singletons are helpful in narrowing * down the life span of a heap location such that they do not always need to * participate in merging heap values. Allocation of a singleton can be * eliminated if that singleton is not used and does not persist at method * return/deoptimization. * * The time complexity of this phase is * O(instructions + instruction_uses) . * * FIXME: The time complexities described above assumes that the * HeapLocationCollector finds a heap location for an instruction in O(1) * time but it is currently O(heap_locations); this can be fixed by adding * a hash map to the HeapLocationCollector. */ namespace art HIDDEN { #define LSE_VLOG \ if (::art::LoadStoreElimination::kVerboseLoggingMode && VLOG_IS_ON(compiler)) LOG(INFO) class HeapRefHolder; // Use HGraphDelegateVisitor for which all VisitInvokeXXX() delegate to VisitInvoke(). class LSEVisitor final : private HGraphDelegateVisitor { public: LSEVisitor(HGraph* graph, const HeapLocationCollector& heap_location_collector, OptimizingCompilerStats* stats); void Run(); private: class PhiPlaceholder { public: constexpr PhiPlaceholder() : block_id_(-1), heap_location_(-1) {} constexpr PhiPlaceholder(uint32_t block_id, size_t heap_location) : block_id_(block_id), heap_location_(dchecked_integral_cast(heap_location)) {} constexpr PhiPlaceholder(const PhiPlaceholder& p) = default; constexpr PhiPlaceholder(PhiPlaceholder&& p) = default; constexpr PhiPlaceholder& operator=(const PhiPlaceholder& p) = default; constexpr PhiPlaceholder& operator=(PhiPlaceholder&& p) = default; constexpr uint32_t GetBlockId() const { return block_id_; } constexpr size_t GetHeapLocation() const { return heap_location_; } constexpr bool Equals(const PhiPlaceholder& p2) const { return block_id_ == p2.block_id_ && heap_location_ == p2.heap_location_; } void Dump(std::ostream& oss) const { oss << "PhiPlaceholder[blk: " << block_id_ << ", heap_location_: " << heap_location_ << "]"; } private: uint32_t block_id_; uint32_t heap_location_; }; struct Marker {}; class Value; class PriorValueHolder { public: constexpr explicit PriorValueHolder(Value prior); constexpr bool IsInstruction() const { return std::holds_alternative(value_); } constexpr bool IsPhi() const { return std::holds_alternative(value_); } constexpr bool IsDefault() const { return std::holds_alternative(value_); } constexpr PhiPlaceholder GetPhiPlaceholder() const { DCHECK(IsPhi()); return std::get(value_); } constexpr HInstruction* GetInstruction() const { DCHECK(IsInstruction()); return std::get(value_); } Value ToValue() const; void Dump(std::ostream& oss) const; constexpr bool Equals(PriorValueHolder other) const { return value_ == other.value_; } private: std::variant value_; }; friend constexpr bool operator==(const Marker&, const Marker&); friend constexpr bool operator==(const PriorValueHolder& p1, const PriorValueHolder& p2); friend constexpr bool operator==(const PhiPlaceholder& p1, const PhiPlaceholder& p2); friend std::ostream& operator<<(std::ostream& oss, const PhiPlaceholder& p2); class Value { public: enum class ValuelessType { kInvalid, kPureUnknown, kDefault, }; struct MergedUnknownMarker { PhiPlaceholder phi_; }; struct NeedsNonLoopPhiMarker { PhiPlaceholder phi_; }; struct NeedsLoopPhiMarker { PhiPlaceholder phi_; }; static constexpr Value Invalid() { return Value(ValuelessType::kInvalid); } // An unknown heap value. Loads with such a value in the heap location cannot be eliminated. // A heap location can be set to an unknown heap value when: // - it is coming from outside the method, // - it is killed due to aliasing, or side effects, or merging with an unknown value. static constexpr Value PureUnknown() { return Value(ValuelessType::kPureUnknown); } static constexpr Value PartialUnknown(Value old_value) { if (old_value.IsInvalid() || old_value.IsPureUnknown()) { return PureUnknown(); } else { return Value(PriorValueHolder(old_value)); } } static constexpr Value MergedUnknown(PhiPlaceholder phi_placeholder) { return Value(MergedUnknownMarker{phi_placeholder}); } // Default heap value after an allocation. // A heap location can be set to that value right after an allocation. static constexpr Value Default() { return Value(ValuelessType::kDefault); } static constexpr Value ForInstruction(HInstruction* instruction) { return Value(instruction); } static constexpr Value ForNonLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) { return Value(NeedsNonLoopPhiMarker{phi_placeholder}); } static constexpr Value ForLoopPhiPlaceholder(PhiPlaceholder phi_placeholder) { return Value(NeedsLoopPhiMarker{phi_placeholder}); } static constexpr Value ForPhiPlaceholder(PhiPlaceholder phi_placeholder, bool needs_loop_phi) { return needs_loop_phi ? ForLoopPhiPlaceholder(phi_placeholder) : ForNonLoopPhiPlaceholder(phi_placeholder); } constexpr bool IsValid() const { return !IsInvalid(); } constexpr bool IsInvalid() const { return std::holds_alternative(value_) && GetValuelessType() == ValuelessType::kInvalid; } bool IsPartialUnknown() const { return std::holds_alternative(value_); } bool IsMergedUnknown() const { return std::holds_alternative(value_); } bool IsPureUnknown() const { return std::holds_alternative(value_) && GetValuelessType() == ValuelessType::kPureUnknown; } bool IsUnknown() const { return IsPureUnknown() || IsMergedUnknown() || IsPartialUnknown(); } bool IsDefault() const { return std::holds_alternative(value_) && GetValuelessType() == ValuelessType::kDefault; } bool IsInstruction() const { return std::holds_alternative(value_); } bool NeedsNonLoopPhi() const { return std::holds_alternative(value_); } bool NeedsLoopPhi() const { return std::holds_alternative(value_); } bool NeedsPhi() const { return NeedsNonLoopPhi() || NeedsLoopPhi(); } HInstruction* GetInstruction() const { DCHECK(IsInstruction()) << *this; return std::get(value_); } PriorValueHolder GetPriorValue() const { DCHECK(IsPartialUnknown()); return std::get(value_); } PhiPlaceholder GetPhiPlaceholder() const { DCHECK(NeedsPhi() || IsMergedUnknown()); if (NeedsNonLoopPhi()) { return std::get(value_).phi_; } else if (NeedsLoopPhi()) { return std::get(value_).phi_; } else { return std::get(value_).phi_; } } uint32_t GetMergeBlockId() const { DCHECK(IsMergedUnknown()) << this; return std::get(value_).phi_.GetBlockId(); } HBasicBlock* GetMergeBlock(const HGraph* graph) const { DCHECK(IsMergedUnknown()) << *this; return graph->GetBlocks()[GetMergeBlockId()]; } size_t GetHeapLocation() const { DCHECK(IsMergedUnknown() || NeedsPhi()) << this; return GetPhiPlaceholder().GetHeapLocation(); } constexpr bool ExactEquals(Value other) const; constexpr bool Equals(Value other) const; constexpr bool Equals(HInstruction* instruction) const { return Equals(ForInstruction(instruction)); } std::ostream& Dump(std::ostream& os) const; // Public for use with lists. constexpr Value() : value_(ValuelessType::kInvalid) {} private: using ValueHolder = std::variant; constexpr ValuelessType GetValuelessType() const { return std::get(value_); } constexpr explicit Value(ValueHolder v) : value_(v) {} friend std::ostream& operator<<(std::ostream& os, const Value& v); ValueHolder value_; static_assert(std::is_move_assignable::value); }; friend constexpr bool operator==(const Value::NeedsLoopPhiMarker& p1, const Value::NeedsLoopPhiMarker& p2); friend constexpr bool operator==(const Value::NeedsNonLoopPhiMarker& p1, const Value::NeedsNonLoopPhiMarker& p2); friend constexpr bool operator==(const Value::MergedUnknownMarker& p1, const Value::MergedUnknownMarker& p2); // Get Phi placeholder index for access to `phi_placeholder_replacements_` // and "visited" bit vectors during depth-first searches. size_t PhiPlaceholderIndex(PhiPlaceholder phi_placeholder) const { size_t res = phi_placeholder.GetBlockId() * heap_location_collector_.GetNumberOfHeapLocations() + phi_placeholder.GetHeapLocation(); DCHECK_EQ(phi_placeholder, GetPhiPlaceholderAt(res)) << res << "blks: " << GetGraph()->GetBlocks().size() << " hls: " << heap_location_collector_.GetNumberOfHeapLocations(); return res; } size_t PhiPlaceholderIndex(Value phi_placeholder) const { return PhiPlaceholderIndex(phi_placeholder.GetPhiPlaceholder()); } bool IsEscapingObject(ReferenceInfo* info) { return !info->IsSingletonAndRemovable(); } PhiPlaceholder GetPhiPlaceholderAt(size_t off) const { DCHECK_LT(off, num_phi_placeholders_); size_t id = off % heap_location_collector_.GetNumberOfHeapLocations(); // Technically this should be (off - id) / NumberOfHeapLocations // but due to truncation it's all the same. size_t blk_id = off / heap_location_collector_.GetNumberOfHeapLocations(); return GetPhiPlaceholder(blk_id, id); } PhiPlaceholder GetPhiPlaceholder(uint32_t block_id, size_t idx) const { DCHECK(GetGraph()->GetBlocks()[block_id] != nullptr) << block_id; return PhiPlaceholder(block_id, idx); } Value Replacement(Value value) const { DCHECK(value.NeedsPhi()) << value << " phase: " << current_phase_; Value replacement = phi_placeholder_replacements_[PhiPlaceholderIndex(value)]; DCHECK(replacement.IsUnknown() || replacement.IsInstruction()); DCHECK(replacement.IsUnknown() || FindSubstitute(replacement.GetInstruction()) == replacement.GetInstruction()); return replacement; } Value ReplacementOrValue(Value value) const { if (value.NeedsPhi() && phi_placeholder_replacements_[PhiPlaceholderIndex(value)].IsValid()) { return Replacement(value); } else { DCHECK_IMPLIES(value.IsInstruction(), FindSubstitute(value.GetInstruction()) == value.GetInstruction()); return value; } } // The record of a heap value and instruction(s) that feed that value. struct ValueRecord { Value value; Value stored_by; }; HTypeConversion* FindOrAddTypeConversionIfNecessary(HInstruction* instruction, HInstruction* value, DataType::Type expected_type) { // Should never add type conversion into boolean value. if (expected_type == DataType::Type::kBool || DataType::IsTypeConversionImplicit(value->GetType(), expected_type) || // TODO: This prevents type conversion of default values but we can still insert // type conversion of other constants and there is no constant folding pass after LSE. IsZeroBitPattern(value)) { return nullptr; } // Check if there is already a suitable TypeConversion we can reuse. for (const HUseListNode& use : value->GetUses()) { if (use.GetUser()->IsTypeConversion() && use.GetUser()->GetType() == expected_type && // TODO: We could move the TypeConversion to a common dominator // if it does not cross irreducible loop header. use.GetUser()->GetBlock()->Dominates(instruction->GetBlock()) && // Don't share across irreducible loop headers. // TODO: can be more fine-grained than this by testing each dominator. (use.GetUser()->GetBlock() == instruction->GetBlock() || !GetGraph()->HasIrreducibleLoops())) { if (use.GetUser()->GetBlock() == instruction->GetBlock() && use.GetUser()->GetBlock()->GetInstructions().FoundBefore(instruction, use.GetUser())) { // Move the TypeConversion before the instruction. use.GetUser()->MoveBefore(instruction); } DCHECK(use.GetUser()->StrictlyDominates(instruction)); return use.GetUser()->AsTypeConversion(); } } // We must create a new TypeConversion instruction. HTypeConversion* type_conversion = new (GetGraph()->GetAllocator()) HTypeConversion( expected_type, value, instruction->GetDexPc()); instruction->GetBlock()->InsertInstructionBefore(type_conversion, instruction); return type_conversion; } // Find an instruction's substitute if it's a removed load. // Return the same instruction if it should not be removed. HInstruction* FindSubstitute(HInstruction* instruction) const { size_t id = static_cast(instruction->GetId()); if (id >= substitute_instructions_for_loads_.size()) { // New Phi (may not be in the graph yet), or default value. DCHECK(!IsLoad(instruction)); return instruction; } HInstruction* substitute = substitute_instructions_for_loads_[id]; DCHECK(substitute == nullptr || IsLoad(instruction)); return (substitute != nullptr) ? substitute : instruction; } void AddRemovedLoad(HInstruction* load, HInstruction* heap_value) { DCHECK(IsLoad(load)); DCHECK_EQ(FindSubstitute(load), load); DCHECK_EQ(FindSubstitute(heap_value), heap_value) << "Unexpected heap_value that has a substitute " << heap_value->DebugName(); // The load expects to load the heap value as type load->GetType(). // However the tracked heap value may not be of that type. An explicit // type conversion may be needed. // There are actually three types involved here: // (1) tracked heap value's type (type A) // (2) heap location (field or element)'s type (type B) // (3) load's type (type C) // We guarantee that type A stored as type B and then fetched out as // type C is the same as casting from type A to type C directly, since // type B and type C will have the same size which is guaranteed in // HInstanceFieldGet/HStaticFieldGet/HArrayGet/HVecLoad's SetType(). // So we only need one type conversion from type A to type C. HTypeConversion* type_conversion = FindOrAddTypeConversionIfNecessary( load, heap_value, load->GetType()); substitute_instructions_for_loads_[load->GetId()] = type_conversion != nullptr ? type_conversion : heap_value; } static bool IsLoad(HInstruction* instruction) { // Unresolved load is not treated as a load. return instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet() || instruction->IsVecLoad() || instruction->IsArrayGet(); } static bool IsStore(HInstruction* instruction) { // Unresolved store is not treated as a store. return instruction->IsInstanceFieldSet() || instruction->IsArraySet() || instruction->IsVecStore() || instruction->IsStaticFieldSet(); } // Check if it is allowed to use default values or Phis for the specified load. static bool IsDefaultOrPhiAllowedForLoad(HInstruction* instruction) { DCHECK(IsLoad(instruction)); // Using defaults for VecLoads requires to create additional vector operations. // As there are some issues with scheduling vector operations it is better to avoid creating // them. return !instruction->IsVecOperation(); } // Keep the store referenced by the instruction, or all stores that feed a Phi placeholder. // This is necessary if the stored heap value can be observed. void KeepStores(Value value) { if (value.IsPureUnknown() || value.IsPartialUnknown()) { return; } if (value.IsMergedUnknown() || value.NeedsPhi()) { phi_placeholders_to_search_for_kept_stores_.SetBit(PhiPlaceholderIndex(value)); } else { HInstruction* instruction = value.GetInstruction(); DCHECK(IsStore(instruction)); kept_stores_.SetBit(instruction->GetId()); } } // If a heap location X may alias with heap location at `loc_index` // and heap_values of that heap location X holds a store, keep that store. // It's needed for a dependent load that's not eliminated since any store // that may put value into the load's heap location needs to be kept. void KeepStoresIfAliasedToLocation(ScopedArenaVector& heap_values, size_t loc_index) { for (size_t i = 0u, size = heap_values.size(); i != size; ++i) { if (i == loc_index) { // We use this function when reading a location with unknown value and // therefore we cannot know what exact store wrote that unknown value. // But we can have a phi placeholder here marking multiple stores to keep. DCHECK(!heap_values[i].stored_by.IsInstruction()); KeepStores(heap_values[i].stored_by); heap_values[i].stored_by = Value::PureUnknown(); } else if (heap_location_collector_.MayAlias(i, loc_index)) { KeepStores(heap_values[i].stored_by); heap_values[i].stored_by = Value::PureUnknown(); } } } HInstruction* GetDefaultValue(DataType::Type type) { switch (type) { case DataType::Type::kReference: return GetGraph()->GetNullConstant(); case DataType::Type::kBool: case DataType::Type::kUint8: case DataType::Type::kInt8: case DataType::Type::kUint16: case DataType::Type::kInt16: case DataType::Type::kInt32: return GetGraph()->GetIntConstant(0); case DataType::Type::kInt64: return GetGraph()->GetLongConstant(0); case DataType::Type::kFloat32: return GetGraph()->GetFloatConstant(0); case DataType::Type::kFloat64: return GetGraph()->GetDoubleConstant(0); default: UNREACHABLE(); } } bool CanValueBeKeptIfSameAsNew(Value value, HInstruction* new_value, HInstruction* new_value_set_instr) { // For field/array set location operations, if the value is the same as the new_value // it can be kept even if aliasing happens. All aliased operations will access the same memory // range. // For vector values, this is not true. For example: // packed_data = [0xA, 0xB, 0xC, 0xD]; <-- Different values in each lane. // VecStore array[i ,i+1,i+2,i+3] = packed_data; // VecStore array[i+1,i+2,i+3,i+4] = packed_data; <-- We are here (partial overlap). // VecLoad vx = array[i,i+1,i+2,i+3]; <-- Cannot be eliminated because the value // here is not packed_data anymore. // // TODO: to allow such 'same value' optimization on vector data, // LSA needs to report more fine-grain MAY alias information: // (1) May alias due to two vector data partial overlap. // e.g. a[i..i+3] and a[i+1,..,i+4]. // (2) May alias due to two vector data may complete overlap each other. // e.g. a[i..i+3] and b[i..i+3]. // (3) May alias but the exact relationship between two locations is unknown. // e.g. a[i..i+3] and b[j..j+3], where values of a,b,i,j are all unknown. // This 'same value' optimization can apply only on case (2). if (new_value_set_instr->IsVecOperation()) { return false; } return value.Equals(new_value); } Value PrepareLoopValue(HBasicBlock* block, size_t idx); Value PrepareLoopStoredBy(HBasicBlock* block, size_t idx); void PrepareLoopRecords(HBasicBlock* block); Value MergePredecessorValues(HBasicBlock* block, size_t idx); void MergePredecessorRecords(HBasicBlock* block); void MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type); void VisitGetLocation(HInstruction* instruction, size_t idx); void VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value); void RecordFieldInfo(const FieldInfo* info, size_t heap_loc) { field_infos_[heap_loc] = info; } void VisitBasicBlock(HBasicBlock* block) override; enum class Phase { kLoadElimination, kStoreElimination, }; bool MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const; bool TryReplacingLoopPhiPlaceholderWithDefault( PhiPlaceholder phi_placeholder, DataType::Type type, /*inout*/ ArenaBitVector* phi_placeholders_to_materialize); bool TryReplacingLoopPhiPlaceholderWithSingleInput( PhiPlaceholder phi_placeholder, /*inout*/ ArenaBitVector* phi_placeholders_to_materialize); std::optional FindLoopPhisToMaterialize( PhiPlaceholder phi_placeholder, /*out*/ ArenaBitVector* phi_placeholders_to_materialize, DataType::Type type, bool can_use_default_or_phi); bool MaterializeLoopPhis(const ScopedArenaVector& phi_placeholder_indexes, DataType::Type type); bool MaterializeLoopPhis(ArrayRef phi_placeholder_indexes, DataType::Type type); bool MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize, DataType::Type type); bool FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type); std::optional TryToMaterializeLoopPhis(PhiPlaceholder phi_placeholder, HInstruction* load); void ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input); void ProcessLoadsRequiringLoopPhis(); void SearchPhiPlaceholdersForKeptStores(); void UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record); void FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder, DataType::Type type); void FindStoresWritingOldValues(); void FinishFullLSE(); void HandleAcquireLoad(HInstruction* instruction) { DCHECK((instruction->IsInstanceFieldGet() && instruction->AsInstanceFieldGet()->IsVolatile()) || (instruction->IsStaticFieldGet() && instruction->AsStaticFieldGet()->IsVolatile()) || (instruction->IsMonitorOperation() && instruction->AsMonitorOperation()->IsEnter())) << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName(); // Acquire operations e.g. MONITOR_ENTER change the thread's view of the memory, so we must // invalidate all current values. ScopedArenaVector& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; for (size_t i = 0u, size = heap_values.size(); i != size; ++i) { KeepStores(heap_values[i].stored_by); heap_values[i].stored_by = Value::PureUnknown(); heap_values[i].value = Value::PartialUnknown(heap_values[i].value); } // Note that there's no need to record the load as subsequent acquire loads shouldn't be // eliminated either. } void HandleReleaseStore(HInstruction* instruction) { DCHECK((instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->IsVolatile()) || (instruction->IsStaticFieldSet() && instruction->AsStaticFieldSet()->IsVolatile()) || (instruction->IsMonitorOperation() && !instruction->AsMonitorOperation()->IsEnter())) << "Unexpected instruction " << instruction->GetId() << ": " << instruction->DebugName(); // Release operations e.g. MONITOR_EXIT do not affect this thread's view of the memory, but // they will push the modifications for other threads to see. Therefore, we must keep the // stores but there's no need to clobber the value. ScopedArenaVector& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; for (size_t i = 0u, size = heap_values.size(); i != size; ++i) { KeepStores(heap_values[i].stored_by); heap_values[i].stored_by = Value::PureUnknown(); } // Note that there's no need to record the store as subsequent release store shouldn't be // eliminated either. } void VisitInstanceFieldGet(HInstanceFieldGet* instruction) override { HInstruction* object = instruction->InputAt(0); if (instruction->IsVolatile()) { ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf( heap_location_collector_.HuntForOriginalReference(object)); if (!ref_info->IsSingletonAndRemovable()) { HandleAcquireLoad(instruction); return; } // Treat it as a normal load if it is a removable singleton. } const FieldInfo& field = instruction->GetFieldInfo(); VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(object, &field)); } void VisitInstanceFieldSet(HInstanceFieldSet* instruction) override { HInstruction* object = instruction->InputAt(0); if (instruction->IsVolatile()) { ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf( heap_location_collector_.HuntForOriginalReference(object)); if (!ref_info->IsSingletonAndRemovable()) { HandleReleaseStore(instruction); return; } // Treat it as a normal store if it is a removable singleton. } const FieldInfo& field = instruction->GetFieldInfo(); HInstruction* value = instruction->InputAt(1); size_t idx = heap_location_collector_.GetFieldHeapLocation(object, &field); VisitSetLocation(instruction, idx, value); } void VisitStaticFieldGet(HStaticFieldGet* instruction) override { if (instruction->IsVolatile()) { HandleAcquireLoad(instruction); return; } const FieldInfo& field = instruction->GetFieldInfo(); HInstruction* cls = instruction->InputAt(0); VisitGetLocation(instruction, heap_location_collector_.GetFieldHeapLocation(cls, &field)); } void VisitStaticFieldSet(HStaticFieldSet* instruction) override { if (instruction->IsVolatile()) { HandleReleaseStore(instruction); return; } const FieldInfo& field = instruction->GetFieldInfo(); HInstruction* cls = instruction->InputAt(0); HInstruction* value = instruction->InputAt(1); size_t idx = heap_location_collector_.GetFieldHeapLocation(cls, &field); VisitSetLocation(instruction, idx, value); } void VisitMonitorOperation(HMonitorOperation* monitor_op) override { HInstruction* object = monitor_op->InputAt(0); ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf( heap_location_collector_.HuntForOriginalReference(object)); if (ref_info->IsSingletonAndRemovable()) { // If the object is a removable singleton, we know that no other threads will have // access to it, and we can remove the MonitorOperation instruction. // MONITOR_ENTER throws when encountering a null object. If `object` is a removable // singleton, it is guaranteed to be non-null so we don't have to worry about the NullCheck. DCHECK(!object->CanBeNull()); monitor_op->GetBlock()->RemoveInstruction(monitor_op); MaybeRecordStat(stats_, MethodCompilationStat::kRemovedMonitorOp); return; } // We detected a monitor operation that we couldn't remove. See also LSEVisitor::Run(). monitor_op->GetBlock()->GetGraph()->SetHasMonitorOperations(true); if (monitor_op->IsEnter()) { HandleAcquireLoad(monitor_op); } else { HandleReleaseStore(monitor_op); } } void VisitArrayGet(HArrayGet* instruction) override { VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction)); } void VisitArraySet(HArraySet* instruction) override { size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction); VisitSetLocation(instruction, idx, instruction->GetValue()); } void VisitVecLoad(HVecLoad* instruction) override { DCHECK(!instruction->IsPredicated()); VisitGetLocation(instruction, heap_location_collector_.GetArrayHeapLocation(instruction)); } void VisitVecStore(HVecStore* instruction) override { DCHECK(!instruction->IsPredicated()); size_t idx = heap_location_collector_.GetArrayHeapLocation(instruction); VisitSetLocation(instruction, idx, instruction->GetValue()); } void VisitDeoptimize(HDeoptimize* instruction) override { // If we are in a try, even singletons are observable. const bool inside_a_try = instruction->GetBlock()->IsTryBlock(); HBasicBlock* block = instruction->GetBlock(); ScopedArenaVector& heap_values = heap_values_for_[block->GetBlockId()]; for (size_t i = 0u, size = heap_values.size(); i != size; ++i) { Value* stored_by = &heap_values[i].stored_by; if (stored_by->IsUnknown()) { continue; } // Stores are generally observeable after deoptimization, except // for singletons that don't escape in the deoptimization environment. bool observable = true; ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); if (!inside_a_try && info->IsSingleton()) { HInstruction* reference = info->GetReference(); // Finalizable objects always escape. const bool finalizable_object = reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable(); if (!finalizable_object && !IsEscapingObject(info)) { // Check whether the reference for a store is used by an environment local of // the HDeoptimize. If not, the singleton is not observed after deoptimization. const HUseList& env_uses = reference->GetEnvUses(); observable = std::any_of( env_uses.begin(), env_uses.end(), [instruction](const HUseListNode& use) { return use.GetUser()->GetHolder() == instruction; }); } } if (observable) { KeepStores(*stored_by); *stored_by = Value::PureUnknown(); } } } // Keep necessary stores before exiting a method via return/throw. void HandleExit(HBasicBlock* block, bool must_keep_stores = false) { ScopedArenaVector& heap_values = heap_values_for_[block->GetBlockId()]; for (size_t i = 0u, size = heap_values.size(); i != size; ++i) { ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); if (must_keep_stores || IsEscapingObject(ref_info)) { KeepStores(heap_values[i].stored_by); heap_values[i].stored_by = Value::PureUnknown(); } } } void VisitReturn(HReturn* instruction) override { HandleExit(instruction->GetBlock()); } void VisitReturnVoid(HReturnVoid* return_void) override { HandleExit(return_void->GetBlock()); } void HandleThrowingInstruction(HInstruction* instruction) { DCHECK(instruction->CanThrow()); // If we are inside of a try, singletons can become visible since we may not exit the method. HandleExit(instruction->GetBlock(), instruction->GetBlock()->IsTryBlock()); } void VisitMethodEntryHook(HMethodEntryHook* method_entry) override { HandleThrowingInstruction(method_entry); } void VisitMethodExitHook(HMethodExitHook* method_exit) override { HandleThrowingInstruction(method_exit); } void VisitDivZeroCheck(HDivZeroCheck* div_zero_check) override { HandleThrowingInstruction(div_zero_check); } void VisitNullCheck(HNullCheck* null_check) override { HandleThrowingInstruction(null_check); } void VisitBoundsCheck(HBoundsCheck* bounds_check) override { HandleThrowingInstruction(bounds_check); } void VisitLoadClass(HLoadClass* load_class) override { if (load_class->CanThrow()) { HandleThrowingInstruction(load_class); } } void VisitLoadString(HLoadString* load_string) override { if (load_string->CanThrow()) { HandleThrowingInstruction(load_string); } } void VisitLoadMethodHandle(HLoadMethodHandle* load_method_handle) override { HandleThrowingInstruction(load_method_handle); } void VisitLoadMethodType(HLoadMethodType* load_method_type) override { HandleThrowingInstruction(load_method_type); } void VisitStringBuilderAppend(HStringBuilderAppend* sb_append) override { HandleThrowingInstruction(sb_append); } void VisitThrow(HThrow* throw_instruction) override { HandleThrowingInstruction(throw_instruction); } void VisitCheckCast(HCheckCast* check_cast) override { HandleThrowingInstruction(check_cast); } void HandleInvoke(HInstruction* instruction) { // If `instruction` can throw we have to presume all stores are visible. const bool can_throw = instruction->CanThrow(); // If we are in a try, even singletons are observable. const bool can_throw_inside_a_try = can_throw && instruction->GetBlock()->IsTryBlock(); SideEffects side_effects = instruction->GetSideEffects(); ScopedArenaVector& heap_values = heap_values_for_[instruction->GetBlock()->GetBlockId()]; for (size_t i = 0u, size = heap_values.size(); i != size; ++i) { ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); // We don't need to do anything if the reference has not escaped at this point. // This is true if we never escape. if (!can_throw_inside_a_try && ref_info->IsSingleton()) { // Singleton references cannot be seen by the callee. } else { if (can_throw || side_effects.DoesAnyRead() || side_effects.DoesAnyWrite()) { // Previous stores may become visible (read) and/or impossible for LSE to track (write). KeepStores(heap_values[i].stored_by); heap_values[i].stored_by = Value::PureUnknown(); } if (side_effects.DoesAnyWrite()) { // The value may be clobbered. heap_values[i].value = Value::PartialUnknown(heap_values[i].value); } } } } void VisitInvoke(HInvoke* invoke) override { HandleInvoke(invoke); } void VisitClinitCheck(HClinitCheck* clinit) override { // Class initialization check can result in class initializer calling arbitrary methods. HandleInvoke(clinit); } void VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } void VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } void VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } void VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet* instruction) override { // Conservatively treat it as an invocation. HandleInvoke(instruction); } void VisitNewInstance(HNewInstance* new_instance) override { // If we are in a try, even singletons are observable. const bool inside_a_try = new_instance->GetBlock()->IsTryBlock(); ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_instance); if (ref_info == nullptr) { // new_instance isn't used for field accesses. No need to process it. return; } if (ref_info->IsSingletonAndRemovable() && !new_instance->NeedsChecks()) { DCHECK(!new_instance->IsFinalizable()); // new_instance can potentially be eliminated. singleton_new_instances_.push_back(new_instance); } HBasicBlock* block = new_instance->GetBlock(); ScopedArenaVector& heap_values = heap_values_for_[block->GetBlockId()]; for (size_t i = 0u, size = heap_values.size(); i != size; ++i) { ReferenceInfo* info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); HInstruction* ref = info->GetReference(); size_t offset = heap_location_collector_.GetHeapLocation(i)->GetOffset(); if (ref == new_instance) { if (offset >= mirror::kObjectHeaderSize || MemberOffset(offset) == mirror::Object::MonitorOffset()) { // Instance fields except the header fields are set to default heap values. // The shadow$_monitor_ field is set to the default value however. heap_values[i].value = Value::Default(); heap_values[i].stored_by = Value::PureUnknown(); } else if (MemberOffset(offset) == mirror::Object::ClassOffset()) { // The shadow$_klass_ field is special and has an actual value however. heap_values[i].value = Value::ForInstruction(new_instance->GetLoadClass()); heap_values[i].stored_by = Value::PureUnknown(); } } else if (inside_a_try || IsEscapingObject(info)) { // Since NewInstance can throw, we presume all previous stores could be visible. KeepStores(heap_values[i].stored_by); heap_values[i].stored_by = Value::PureUnknown(); } } } void VisitNewArray(HNewArray* new_array) override { // If we are in a try, even singletons are observable. const bool inside_a_try = new_array->GetBlock()->IsTryBlock(); ReferenceInfo* ref_info = heap_location_collector_.FindReferenceInfoOf(new_array); if (ref_info == nullptr) { // new_array isn't used for array accesses. No need to process it. return; } if (ref_info->IsSingletonAndRemovable()) { if (new_array->GetLength()->IsIntConstant() && new_array->GetLength()->AsIntConstant()->GetValue() >= 0) { // new_array can potentially be eliminated. singleton_new_instances_.push_back(new_array); } else { // new_array may throw NegativeArraySizeException. Keep it. } } HBasicBlock* block = new_array->GetBlock(); ScopedArenaVector& heap_values = heap_values_for_[block->GetBlockId()]; for (size_t i = 0u, size = heap_values.size(); i != size; ++i) { HeapLocation* location = heap_location_collector_.GetHeapLocation(i); ReferenceInfo* info = location->GetReferenceInfo(); HInstruction* ref = info->GetReference(); if (ref == new_array && location->GetIndex() != nullptr) { // Array elements are set to default heap values. heap_values[i].value = Value::Default(); heap_values[i].stored_by = Value::PureUnknown(); } else if (inside_a_try || IsEscapingObject(info)) { // Since NewArray can throw, we presume all previous stores could be visible. KeepStores(heap_values[i].stored_by); heap_values[i].stored_by = Value::PureUnknown(); } } } void VisitInstruction(HInstruction* instruction) override { // Throwing instructions must be handled specially. DCHECK(!instruction->CanThrow()); } const HeapLocationCollector& heap_location_collector_; // Use local allocator for allocating memory. ScopedArenaAllocator allocator_; // The number of unique phi_placeholders there possibly are size_t num_phi_placeholders_; // One array of heap value records for each block. ScopedArenaVector> heap_values_for_; // We record loads and stores for re-processing when we find a loop Phi placeholder // with unknown value from a predecessor, and also for removing stores that are // found to be dead, i.e. not marked in `kept_stores_` at the end. struct LoadStoreRecord { HInstruction* load_or_store; size_t heap_location_index; }; ScopedArenaVector loads_and_stores_; // We record the substitute instructions for loads that should be // eliminated but may be used by heap locations. They'll be removed // in the end. These are indexed by the load's id. ScopedArenaVector substitute_instructions_for_loads_; // Record stores to keep in a bit vector indexed by instruction ID. ArenaBitVector kept_stores_; // When we need to keep all stores that feed a Phi placeholder, we just record the // index of that placeholder for processing after graph traversal. ArenaBitVector phi_placeholders_to_search_for_kept_stores_; // Loads that would require a loop Phi to replace are recorded for processing // later as we do not have enough information from back-edges to determine if // a suitable Phi can be found or created when we visit these loads. // This is a flat "map" indexed by the load instruction id. ScopedArenaVector loads_requiring_loop_phi_; // For stores, record the old value records that were replaced and the stored values. struct StoreRecord { StoreRecord(ValueRecord old_value_record_in, HInstruction* stored_value_in) : old_value_record(old_value_record_in), stored_value(stored_value_in) {} ValueRecord old_value_record; HInstruction* stored_value; }; // This is a flat "map" indexed by the store instruction id. ScopedArenaVector store_records_; // Replacements for Phi placeholders. // The invalid heap value is used to mark Phi placeholders that cannot be replaced. ScopedArenaVector phi_placeholder_replacements_; ScopedArenaVector singleton_new_instances_; // The field infos for each heap location (if relevant). ScopedArenaVector field_infos_; Phase current_phase_; friend struct ScopedRestoreHeapValues; friend std::ostream& operator<<(std::ostream& os, const Value& v); friend std::ostream& operator<<(std::ostream& os, const PriorValueHolder& v); friend std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase); DISALLOW_COPY_AND_ASSIGN(LSEVisitor); }; std::ostream& operator<<(std::ostream& oss, const LSEVisitor::PriorValueHolder& p) { p.Dump(oss); return oss; } std::ostream& operator<<(std::ostream& oss, const LSEVisitor::Phase& phase) { switch (phase) { case LSEVisitor::Phase::kLoadElimination: return oss << "kLoadElimination"; case LSEVisitor::Phase::kStoreElimination: return oss << "kStoreElimination"; } } void LSEVisitor::PriorValueHolder::Dump(std::ostream& oss) const { if (IsDefault()) { oss << "Default"; } else if (IsPhi()) { oss << "Phi: " << GetPhiPlaceholder(); } else { oss << "Instruction: " << *GetInstruction(); } } constexpr LSEVisitor::PriorValueHolder::PriorValueHolder(Value val) : value_(Marker{}) { DCHECK(!val.IsInvalid() && !val.IsPureUnknown()); if (val.IsPartialUnknown()) { value_ = val.GetPriorValue().value_; } else if (val.IsMergedUnknown() || val.NeedsPhi()) { value_ = val.GetPhiPlaceholder(); } else if (val.IsInstruction()) { value_ = val.GetInstruction(); } else { DCHECK(val.IsDefault()); } } constexpr bool operator==(const LSEVisitor::Marker&, const LSEVisitor::Marker&) { return true; } constexpr bool operator==(const LSEVisitor::PriorValueHolder& p1, const LSEVisitor::PriorValueHolder& p2) { return p1.Equals(p2); } constexpr bool operator==(const LSEVisitor::PhiPlaceholder& p1, const LSEVisitor::PhiPlaceholder& p2) { return p1.Equals(p2); } constexpr bool operator==(const LSEVisitor::Value::NeedsLoopPhiMarker& p1, const LSEVisitor::Value::NeedsLoopPhiMarker& p2) { return p1.phi_ == p2.phi_; } constexpr bool operator==(const LSEVisitor::Value::NeedsNonLoopPhiMarker& p1, const LSEVisitor::Value::NeedsNonLoopPhiMarker& p2) { return p1.phi_ == p2.phi_; } constexpr bool operator==(const LSEVisitor::Value::MergedUnknownMarker& p1, const LSEVisitor::Value::MergedUnknownMarker& p2) { return p1.phi_ == p2.phi_; } std::ostream& operator<<(std::ostream& oss, const LSEVisitor::PhiPlaceholder& p) { p.Dump(oss); return oss; } LSEVisitor::Value LSEVisitor::PriorValueHolder::ToValue() const { if (IsDefault()) { return Value::Default(); } else if (IsPhi()) { return Value::ForLoopPhiPlaceholder(GetPhiPlaceholder()); } else { return Value::ForInstruction(GetInstruction()); } } constexpr bool LSEVisitor::Value::ExactEquals(LSEVisitor::Value other) const { return value_ == other.value_; } constexpr bool LSEVisitor::Value::Equals(LSEVisitor::Value other) const { // Only valid values can be compared. DCHECK(IsValid()); DCHECK(other.IsValid()); if (value_ == other.value_) { // Note: Two unknown values are considered different. return !IsUnknown(); } else { // Default is considered equal to zero-bit-pattern instructions. return (IsDefault() && other.IsInstruction() && IsZeroBitPattern(other.GetInstruction())) || (other.IsDefault() && IsInstruction() && IsZeroBitPattern(GetInstruction())); } } std::ostream& LSEVisitor::Value::Dump(std::ostream& os) const { if (std::holds_alternative(value_)) { switch (GetValuelessType()) { case ValuelessType::kDefault: return os << "Default"; case ValuelessType::kPureUnknown: return os << "PureUnknown"; case ValuelessType::kInvalid: return os << "Invalid"; } } else if (IsPartialUnknown()) { return os << "PartialUnknown[" << GetPriorValue() << "]"; } else if (IsInstruction()) { return os << "Instruction[id: " << GetInstruction()->GetId() << ", block: " << GetInstruction()->GetBlock()->GetBlockId() << "]"; } else if (IsMergedUnknown()) { return os << "MergedUnknown[block: " << GetPhiPlaceholder().GetBlockId() << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]"; } else if (NeedsLoopPhi()) { return os << "NeedsLoopPhi[block: " << GetPhiPlaceholder().GetBlockId() << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]"; } else { return os << "NeedsNonLoopPhi[block: " << GetPhiPlaceholder().GetBlockId() << ", heap_loc: " << GetPhiPlaceholder().GetHeapLocation() << "]"; } } std::ostream& operator<<(std::ostream& os, const LSEVisitor::Value& v) { return v.Dump(os); } LSEVisitor::LSEVisitor(HGraph* graph, const HeapLocationCollector& heap_location_collector, OptimizingCompilerStats* stats) : HGraphDelegateVisitor(graph, stats), heap_location_collector_(heap_location_collector), allocator_(graph->GetArenaStack()), num_phi_placeholders_(GetGraph()->GetBlocks().size() * heap_location_collector_.GetNumberOfHeapLocations()), heap_values_for_(graph->GetBlocks().size(), ScopedArenaVector(allocator_.Adapter(kArenaAllocLSE)), allocator_.Adapter(kArenaAllocLSE)), loads_and_stores_(allocator_.Adapter(kArenaAllocLSE)), // We may add new instructions (default values, Phis) but we're not adding loads // or stores, so we shall not need to resize following vector and BitVector. substitute_instructions_for_loads_( graph->GetCurrentInstructionId(), nullptr, allocator_.Adapter(kArenaAllocLSE)), kept_stores_(&allocator_, /*start_bits=*/graph->GetCurrentInstructionId(), /*expandable=*/false, kArenaAllocLSE), phi_placeholders_to_search_for_kept_stores_(&allocator_, num_phi_placeholders_, /*expandable=*/false, kArenaAllocLSE), loads_requiring_loop_phi_(allocator_.Adapter(kArenaAllocLSE)), store_records_(allocator_.Adapter(kArenaAllocLSE)), phi_placeholder_replacements_( num_phi_placeholders_, Value::Invalid(), allocator_.Adapter(kArenaAllocLSE)), singleton_new_instances_(allocator_.Adapter(kArenaAllocLSE)), field_infos_(heap_location_collector_.GetNumberOfHeapLocations(), allocator_.Adapter(kArenaAllocLSE)), current_phase_(Phase::kLoadElimination) {} LSEVisitor::Value LSEVisitor::PrepareLoopValue(HBasicBlock* block, size_t idx) { // If the pre-header value is known (which implies that the reference dominates this // block), use a Phi placeholder for the value in the loop header. If all predecessors // are later found to have a known value, we can replace loads from this location, // either with the pre-header value or with a new Phi. For array locations, the index // may be defined inside the loop but the only known value in that case should be the // default value or a Phi placeholder that can be replaced only with the default value. HLoopInformation* loop_info = block->GetLoopInformation(); uint32_t pre_header_block_id = loop_info->GetPreHeader()->GetBlockId(); Value pre_header_value = ReplacementOrValue(heap_values_for_[pre_header_block_id][idx].value); if (pre_header_value.IsUnknown()) { return pre_header_value; } if (kIsDebugBuild) { // Check that the reference indeed dominates this loop. HeapLocation* location = heap_location_collector_.GetHeapLocation(idx); HInstruction* ref = location->GetReferenceInfo()->GetReference(); CHECK(ref->GetBlock() != block && ref->GetBlock()->Dominates(block)) << GetGraph()->PrettyMethod(); // Check that the index, if defined inside the loop, tracks a default value // or a Phi placeholder requiring a loop Phi. HInstruction* index = location->GetIndex(); if (index != nullptr && loop_info->Contains(*index->GetBlock())) { CHECK(pre_header_value.NeedsLoopPhi() || pre_header_value.Equals(Value::Default())) << GetGraph()->PrettyMethod() << " blk: " << block->GetBlockId() << " " << pre_header_value; } } PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx); return ReplacementOrValue(Value::ForLoopPhiPlaceholder(phi_placeholder)); } LSEVisitor::Value LSEVisitor::PrepareLoopStoredBy(HBasicBlock* block, size_t idx) { // Use the Phi placeholder for `stored_by` to make sure all incoming stores are kept // if the value in the location escapes. This is not applicable to singletons that are // defined inside the loop as they shall be dead in the loop header. const ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo(); const HInstruction* reference = ref_info->GetReference(); // Finalizable objects always escape. const bool is_finalizable = reference->IsNewInstance() && reference->AsNewInstance()->IsFinalizable(); if (ref_info->IsSingleton() && block->GetLoopInformation()->Contains(*reference->GetBlock()) && !is_finalizable) { return Value::PureUnknown(); } PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx); return Value::ForLoopPhiPlaceholder(phi_placeholder); } void LSEVisitor::PrepareLoopRecords(HBasicBlock* block) { DCHECK(block->IsLoopHeader()); int block_id = block->GetBlockId(); HBasicBlock* pre_header = block->GetLoopInformation()->GetPreHeader(); ScopedArenaVector& pre_header_heap_values = heap_values_for_[pre_header->GetBlockId()]; size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations(); DCHECK_EQ(num_heap_locations, pre_header_heap_values.size()); ScopedArenaVector& heap_values = heap_values_for_[block_id]; DCHECK(heap_values.empty()); // Don't eliminate loads in irreducible loops. if (block->GetLoopInformation()->IsIrreducible()) { heap_values.resize(num_heap_locations, {/*value=*/Value::Invalid(), /*stored_by=*/Value::PureUnknown()}); // Also keep the stores before the loop header, including in blocks that were not visited yet. bool is_osr = GetGraph()->IsCompilingOsr(); for (size_t idx = 0u; idx != num_heap_locations; ++idx) { heap_values[idx].value = is_osr ? Value::PureUnknown() : Value::MergedUnknown(GetPhiPlaceholder(block->GetBlockId(), idx)); KeepStores(Value::ForLoopPhiPlaceholder(GetPhiPlaceholder(block->GetBlockId(), idx))); } return; } // Fill `heap_values` based on values from pre-header. heap_values.reserve(num_heap_locations); for (size_t idx = 0u; idx != num_heap_locations; ++idx) { heap_values.push_back({ PrepareLoopValue(block, idx), PrepareLoopStoredBy(block, idx) }); } } LSEVisitor::Value LSEVisitor::MergePredecessorValues(HBasicBlock* block, size_t idx) { ArrayRef predecessors(block->GetPredecessors()); DCHECK(!predecessors.empty()); Value merged_value = ReplacementOrValue(heap_values_for_[predecessors[0]->GetBlockId()][idx].value); for (size_t i = 1u, size = predecessors.size(); i != size; ++i) { Value pred_value = ReplacementOrValue(heap_values_for_[predecessors[i]->GetBlockId()][idx].value); if (pred_value.Equals(merged_value)) { // Value is the same. No need to update our merged value. continue; } else if (pred_value.IsUnknown() || merged_value.IsUnknown()) { // If one is unknown and the other is a different type of unknown PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx); merged_value = Value::MergedUnknown(phi_placeholder); // We know that at least one of the merge points is unknown (and both are // not pure-unknowns since that's captured above). This means that the // overall value needs to be a MergedUnknown. Just return that. break; } else { // There are conflicting known values. We may still be able to replace loads with a Phi. PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx); // Propagate the need for a new loop Phi from all predecessors. bool needs_loop_phi = merged_value.NeedsLoopPhi() || pred_value.NeedsLoopPhi(); merged_value = ReplacementOrValue(Value::ForPhiPlaceholder(phi_placeholder, needs_loop_phi)); } } DCHECK_IMPLIES(merged_value.IsPureUnknown(), block->GetPredecessors().size() <= 1) << merged_value << " in " << GetGraph()->PrettyMethod(); return merged_value; } void LSEVisitor::MergePredecessorRecords(HBasicBlock* block) { if (block->IsExitBlock()) { // Exit block doesn't really merge values since the control flow ends in // its predecessors. Each predecessor needs to make sure stores are kept // if necessary. return; } ScopedArenaVector& heap_values = heap_values_for_[block->GetBlockId()]; DCHECK(heap_values.empty()); size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations(); if (block->GetPredecessors().empty() || block->IsCatchBlock()) { DCHECK_IMPLIES(block->GetPredecessors().empty(), block->IsEntryBlock()); heap_values.resize(num_heap_locations, {/*value=*/Value::PureUnknown(), /*stored_by=*/Value::PureUnknown()}); return; } heap_values.reserve(num_heap_locations); for (size_t idx = 0u; idx != num_heap_locations; ++idx) { Value merged_value = MergePredecessorValues(block, idx); if (kIsDebugBuild) { if (merged_value.NeedsPhi()) { uint32_t block_id = merged_value.GetPhiPlaceholder().GetBlockId(); CHECK(GetGraph()->GetBlocks()[block_id]->Dominates(block)); } else if (merged_value.IsInstruction()) { CHECK(merged_value.GetInstruction()->GetBlock()->Dominates(block)); } } ArrayRef predecessors(block->GetPredecessors()); Value merged_stored_by = heap_values_for_[predecessors[0]->GetBlockId()][idx].stored_by; for (size_t predecessor_idx = 1u; predecessor_idx != predecessors.size(); ++predecessor_idx) { uint32_t predecessor_block_id = predecessors[predecessor_idx]->GetBlockId(); Value stored_by = heap_values_for_[predecessor_block_id][idx].stored_by; if ((!stored_by.IsUnknown() || !merged_stored_by.IsUnknown()) && !merged_stored_by.Equals(stored_by)) { // Use the Phi placeholder to track that we need to keep stores from all predecessors. PhiPlaceholder phi_placeholder = GetPhiPlaceholder(block->GetBlockId(), idx); merged_stored_by = Value::ForNonLoopPhiPlaceholder(phi_placeholder); break; } } heap_values.push_back({ merged_value, merged_stored_by }); } } static HInstruction* FindOrConstructNonLoopPhi( HBasicBlock* block, const ScopedArenaVector& phi_inputs, DataType::Type type) { for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { HInstruction* phi = phi_it.Current(); DCHECK_EQ(phi->InputCount(), phi_inputs.size()); auto cmp = [](HInstruction* lhs, const HUserRecord& rhs) { return lhs == rhs.GetInstruction(); }; if (std::equal(phi_inputs.begin(), phi_inputs.end(), phi->GetInputRecords().begin(), cmp)) { return phi; } } ArenaAllocator* allocator = block->GetGraph()->GetAllocator(); HPhi* phi = new (allocator) HPhi(allocator, kNoRegNumber, phi_inputs.size(), type); for (size_t i = 0, size = phi_inputs.size(); i != size; ++i) { DCHECK_NE(phi_inputs[i]->GetType(), DataType::Type::kVoid) << phi_inputs[i]->DebugName(); phi->SetRawInputAt(i, phi_inputs[i]); } block->AddPhi(phi); if (type == DataType::Type::kReference) { // Update reference type information. Pass invalid handles, these are not used for Phis. ReferenceTypePropagation rtp_fixup(block->GetGraph(), Handle(), /* is_first_run= */ false); rtp_fixup.Visit(phi); } return phi; } void LSEVisitor::MaterializeNonLoopPhis(PhiPlaceholder phi_placeholder, DataType::Type type) { DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid()); const ArenaVector& blocks = GetGraph()->GetBlocks(); size_t idx = phi_placeholder.GetHeapLocation(); // Use local allocator to reduce peak memory usage. ScopedArenaAllocator allocator(allocator_.GetArenaStack()); // Reuse the same vector for collecting phi inputs. ScopedArenaVector phi_inputs(allocator.Adapter(kArenaAllocLSE)); ScopedArenaVector work_queue(allocator.Adapter(kArenaAllocLSE)); work_queue.push_back(phi_placeholder); while (!work_queue.empty()) { PhiPlaceholder current_phi_placeholder = work_queue.back(); if (phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].IsValid()) { // This Phi placeholder was pushed to the `work_queue` followed by another Phi placeholder // that directly or indirectly depends on it, so it was already processed as part of the // other Phi placeholder's dependencies before this one got back to the top of the stack. work_queue.pop_back(); continue; } uint32_t current_block_id = current_phi_placeholder.GetBlockId(); HBasicBlock* current_block = blocks[current_block_id]; DCHECK_GE(current_block->GetPredecessors().size(), 2u); // Non-loop Phis cannot depend on a loop Phi, so we should not see any loop header here. // And the only way for such merged value to reach a different heap location is through // a load at which point we materialize the Phi. Therefore all non-loop Phi placeholders // seen here are tied to one heap location. DCHECK(!current_block->IsLoopHeader()) << current_phi_placeholder << " phase: " << current_phase_; DCHECK_EQ(current_phi_placeholder.GetHeapLocation(), idx); phi_inputs.clear(); for (HBasicBlock* predecessor : current_block->GetPredecessors()) { Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value); DCHECK(!pred_value.IsPureUnknown()) << pred_value << " block " << current_block->GetBlockId() << " pred: " << predecessor->GetBlockId(); if (pred_value.NeedsNonLoopPhi()) { // We need to process the Phi placeholder first. work_queue.push_back(pred_value.GetPhiPlaceholder()); } else if (pred_value.IsDefault()) { phi_inputs.push_back(GetDefaultValue(type)); } else { DCHECK(pred_value.IsInstruction()) << pred_value << " block " << current_block->GetBlockId() << " pred: " << predecessor->GetBlockId(); phi_inputs.push_back(pred_value.GetInstruction()); } } if (phi_inputs.size() == current_block->GetPredecessors().size()) { // All inputs are available. Find or construct the Phi replacement. phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)] = Value::ForInstruction(FindOrConstructNonLoopPhi(current_block, phi_inputs, type)); // Remove the block from the queue. DCHECK_EQ(current_phi_placeholder, work_queue.back()); work_queue.pop_back(); } } } void LSEVisitor::VisitGetLocation(HInstruction* instruction, size_t idx) { DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound); uint32_t block_id = instruction->GetBlock()->GetBlockId(); ScopedArenaVector& heap_values = heap_values_for_[block_id]; ValueRecord& record = heap_values[idx]; if (instruction->IsFieldAccess()) { RecordFieldInfo(&instruction->GetFieldInfo(), idx); } DCHECK(record.value.IsUnknown() || record.value.Equals(ReplacementOrValue(record.value))); loads_and_stores_.push_back({ instruction, idx }); if ((record.value.IsDefault() || record.value.NeedsNonLoopPhi()) && !IsDefaultOrPhiAllowedForLoad(instruction)) { record.value = Value::PureUnknown(); } if (record.value.IsDefault()) { KeepStores(record.stored_by); HInstruction* constant = GetDefaultValue(instruction->GetType()); AddRemovedLoad(instruction, constant); record.value = Value::ForInstruction(constant); } else if (record.value.IsUnknown()) { // Load isn't eliminated. Put the load as the value into the HeapLocation. // This acts like GVN but with better aliasing analysis. Value old_value = record.value; record.value = Value::ForInstruction(instruction); KeepStoresIfAliasedToLocation(heap_values, idx); KeepStores(old_value); } else if (record.value.NeedsLoopPhi()) { // We do not know yet if the value is known for all back edges. Record for future processing. if (loads_requiring_loop_phi_.empty()) { loads_requiring_loop_phi_.resize(GetGraph()->GetCurrentInstructionId(), nullptr); } DCHECK_EQ(loads_requiring_loop_phi_[instruction->GetId()], nullptr); loads_requiring_loop_phi_[instruction->GetId()] = new (allocator_.Alloc(kArenaAllocLSE)) ValueRecord(record); } else { // This load can be eliminated but we may need to construct non-loop Phis. if (record.value.NeedsNonLoopPhi()) { MaterializeNonLoopPhis(record.value.GetPhiPlaceholder(), instruction->GetType()); record.value = Replacement(record.value); } HInstruction* heap_value = FindSubstitute(record.value.GetInstruction()); AddRemovedLoad(instruction, heap_value); } } void LSEVisitor::VisitSetLocation(HInstruction* instruction, size_t idx, HInstruction* value) { DCHECK_NE(idx, HeapLocationCollector::kHeapLocationNotFound); DCHECK(!IsStore(value)) << value->DebugName(); if (instruction->IsFieldAccess()) { RecordFieldInfo(&instruction->GetFieldInfo(), idx); } // value may already have a substitute. value = FindSubstitute(value); HBasicBlock* block = instruction->GetBlock(); ScopedArenaVector& heap_values = heap_values_for_[block->GetBlockId()]; ValueRecord& record = heap_values[idx]; DCHECK_IMPLIES(record.value.IsInstruction(), FindSubstitute(record.value.GetInstruction()) == record.value.GetInstruction()); if (record.value.Equals(value)) { // Store into the heap location with the same value. // This store can be eliminated right away. block->RemoveInstruction(instruction); return; } if (store_records_.empty()) { store_records_.resize(GetGraph()->GetCurrentInstructionId(), nullptr); } DCHECK_EQ(store_records_[instruction->GetId()], nullptr); store_records_[instruction->GetId()] = new (allocator_.Alloc(kArenaAllocLSE)) StoreRecord(record, value); loads_and_stores_.push_back({ instruction, idx }); // If the `record.stored_by` specified a store from this block, it shall be removed // at the end, except for throwing ArraySet; it cannot be marked for keeping in // `kept_stores_` anymore after we update the `record.stored_by` below. DCHECK(!record.stored_by.IsInstruction() || record.stored_by.GetInstruction()->GetBlock() != block || record.stored_by.GetInstruction()->CanThrow() || !kept_stores_.IsBitSet(record.stored_by.GetInstruction()->GetId())); if (instruction->CanThrow()) { // Previous stores can become visible. HandleThrowingInstruction(instruction); // We cannot remove a possibly throwing store. // After marking it as kept, it does not matter if we track it in `stored_by` or not. kept_stores_.SetBit(instruction->GetId()); } // Update the record. // Note that the `value` can be a newly created `Phi` with an id that falls outside // the allocated `loads_requiring_loop_phi_` range. DCHECK_IMPLIES(IsLoad(value) && !loads_requiring_loop_phi_.empty(), static_cast(value->GetId()) < loads_requiring_loop_phi_.size()); if (static_cast(value->GetId()) < loads_requiring_loop_phi_.size() && loads_requiring_loop_phi_[value->GetId()] != nullptr) { // Propapate the Phi placeholder to the record. record.value = loads_requiring_loop_phi_[value->GetId()]->value; DCHECK(record.value.NeedsLoopPhi()); } else { record.value = Value::ForInstruction(value); } // Track the store in the value record. If the value is loaded or needed after // return/deoptimization later, this store isn't really redundant. record.stored_by = Value::ForInstruction(instruction); // This store may kill values in other heap locations due to aliasing. for (size_t i = 0u, size = heap_values.size(); i != size; ++i) { if (i == idx || heap_values[i].value.IsUnknown() || CanValueBeKeptIfSameAsNew(heap_values[i].value, value, instruction) || !heap_location_collector_.MayAlias(i, idx)) { continue; } // Kill heap locations that may alias and keep previous stores to these locations. KeepStores(heap_values[i].stored_by); heap_values[i].stored_by = Value::PureUnknown(); heap_values[i].value = Value::PartialUnknown(heap_values[i].value); } } void LSEVisitor::VisitBasicBlock(HBasicBlock* block) { // Populate the heap_values array for this block. // TODO: try to reuse the heap_values array from one predecessor if possible. if (block->IsLoopHeader()) { PrepareLoopRecords(block); } else { MergePredecessorRecords(block); } // Visit non-Phi instructions. VisitNonPhiInstructions(block); } bool LSEVisitor::MayAliasOnBackEdge(HBasicBlock* loop_header, size_t idx1, size_t idx2) const { DCHECK_NE(idx1, idx2); DCHECK(loop_header->IsLoopHeader()); if (heap_location_collector_.MayAlias(idx1, idx2)) { return true; } // For array locations with index defined inside the loop, include // all other locations in the array, even those that LSA declares // non-aliasing, such as `a[i]` and `a[i + 1]`, as they may actually // refer to the same locations for different iterations. (LSA's // `ComputeMayAlias()` does not consider different loop iterations.) HeapLocation* loc1 = heap_location_collector_.GetHeapLocation(idx1); HeapLocation* loc2 = heap_location_collector_.GetHeapLocation(idx2); if (loc1->IsArray() && loc2->IsArray() && HeapLocationCollector::CanReferencesAlias(loc1->GetReferenceInfo(), loc2->GetReferenceInfo())) { HLoopInformation* loop_info = loop_header->GetLoopInformation(); if (loop_info->Contains(*loc1->GetIndex()->GetBlock()) || loop_info->Contains(*loc2->GetIndex()->GetBlock())) { // Consider the locations aliasing. Do not optimize the case where both indexes // are loop invariants defined inside the loop, rely on LICM to pull them out. return true; } } return false; } bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithDefault( PhiPlaceholder phi_placeholder, DataType::Type type, /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) { // Use local allocator to reduce peak memory usage. ScopedArenaAllocator allocator(allocator_.GetArenaStack()); ArenaBitVector visited(&allocator, /*start_bits=*/ num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE); ScopedArenaVector work_queue(allocator.Adapter(kArenaAllocLSE)); // Use depth first search to check if any non-Phi input is unknown. const ArenaVector& blocks = GetGraph()->GetBlocks(); size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations(); visited.SetBit(PhiPlaceholderIndex(phi_placeholder)); work_queue.push_back(phi_placeholder); while (!work_queue.empty()) { PhiPlaceholder current_phi_placeholder = work_queue.back(); work_queue.pop_back(); HBasicBlock* block = blocks[current_phi_placeholder.GetBlockId()]; DCHECK_GE(block->GetPredecessors().size(), 2u); size_t idx = current_phi_placeholder.GetHeapLocation(); for (HBasicBlock* predecessor : block->GetPredecessors()) { Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value); if (value.NeedsPhi()) { // Visit the predecessor Phi placeholder if it's not visited yet. if (!visited.IsBitSet(PhiPlaceholderIndex(value))) { visited.SetBit(PhiPlaceholderIndex(value)); work_queue.push_back(value.GetPhiPlaceholder()); } } else if (!value.Equals(Value::Default())) { return false; // Report failure. } } if (block->IsLoopHeader()) { // For back-edges we need to check all locations that write to the same array, // even those that LSA declares non-aliasing, such as `a[i]` and `a[i + 1]` // as they may actually refer to the same locations for different iterations. for (size_t i = 0; i != num_heap_locations; ++i) { if (i == idx || heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo() != heap_location_collector_.GetHeapLocation(idx)->GetReferenceInfo()) { continue; } for (HBasicBlock* predecessor : block->GetPredecessors()) { // Check if there were any writes to this location. // Note: We could simply process the values but due to the vector operation // carve-out (see `IsDefaultOrPhiAllowedForLoad()`), a vector load can cause // the value to change and not be equal to default. To work around this and // allow replacing the non-vector load of loop-invariant default values // anyway, skip over paths that do not have any writes. ValueRecord record = heap_values_for_[predecessor->GetBlockId()][i]; while (record.stored_by.NeedsLoopPhi() && blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->IsLoopHeader()) { HLoopInformation* loop_info = blocks[record.stored_by.GetPhiPlaceholder().GetBlockId()]->GetLoopInformation(); record = heap_values_for_[loop_info->GetPreHeader()->GetBlockId()][i]; } Value value = ReplacementOrValue(record.value); if (value.NeedsPhi()) { // Visit the predecessor Phi placeholder if it's not visited yet. if (!visited.IsBitSet(PhiPlaceholderIndex(value))) { visited.SetBit(PhiPlaceholderIndex(value)); work_queue.push_back(value.GetPhiPlaceholder()); } } else if (!value.Equals(Value::Default())) { return false; // Report failure. } } } } } // Record replacement and report success. HInstruction* replacement = GetDefaultValue(type); for (uint32_t phi_placeholder_index : visited.Indexes()) { DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid()); PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index); HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation()); // We use both vector and non vector operations to analyze the information. However, we replace // only non vector operations in this code path. if (!hl->IsVecOp()) { phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement); phi_placeholders_to_materialize->ClearBit(phi_placeholder_index); } } return true; } bool LSEVisitor::TryReplacingLoopPhiPlaceholderWithSingleInput( PhiPlaceholder phi_placeholder, /*inout*/ ArenaBitVector* phi_placeholders_to_materialize) { // Use local allocator to reduce peak memory usage. ScopedArenaAllocator allocator(allocator_.GetArenaStack()); ArenaBitVector visited(&allocator, /*start_bits=*/ num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE); ScopedArenaVector work_queue(allocator.Adapter(kArenaAllocLSE)); // Use depth first search to check if any non-Phi input is unknown. HInstruction* replacement = nullptr; const ArenaVector& blocks = GetGraph()->GetBlocks(); visited.SetBit(PhiPlaceholderIndex(phi_placeholder)); work_queue.push_back(phi_placeholder); while (!work_queue.empty()) { PhiPlaceholder current_phi_placeholder = work_queue.back(); work_queue.pop_back(); HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()]; DCHECK_GE(current_block->GetPredecessors().size(), 2u); size_t idx = current_phi_placeholder.GetHeapLocation(); for (HBasicBlock* predecessor : current_block->GetPredecessors()) { Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value); if (value.NeedsPhi()) { // Visit the predecessor Phi placeholder if it's not visited yet. if (!visited.IsBitSet(PhiPlaceholderIndex(value))) { visited.SetBit(PhiPlaceholderIndex(value)); work_queue.push_back(value.GetPhiPlaceholder()); } } else { if (!value.IsInstruction() || (replacement != nullptr && replacement != value.GetInstruction())) { return false; // Report failure. } replacement = value.GetInstruction(); } } // While `TryReplacingLoopPhiPlaceholderWithDefault()` has special treatment // for back-edges, it is not needed here. When looking for a single input // instruction coming from before the loop, the array index must also be // defined before the loop and the aliasing analysis done by LSA is sufficient. // Any writes of a different value with an index that is not loop invariant // would invalidate the heap location in `VisitSetLocation()`. } // Record replacement and report success. DCHECK(replacement != nullptr); for (uint32_t phi_placeholder_index : visited.Indexes()) { DCHECK(phi_placeholder_replacements_[phi_placeholder_index].IsInvalid()); PhiPlaceholder curr = GetPhiPlaceholderAt(phi_placeholder_index); HeapLocation* hl = heap_location_collector_.GetHeapLocation(curr.GetHeapLocation()); // We use both vector and non vector operations to analyze the information. However, we replace // only vector operations in this code path. if (hl->IsVecOp()) { phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement); phi_placeholders_to_materialize->ClearBit(phi_placeholder_index); } } return true; } std::optional LSEVisitor::FindLoopPhisToMaterialize( PhiPlaceholder phi_placeholder, /*inout*/ ArenaBitVector* phi_placeholders_to_materialize, DataType::Type type, bool can_use_default_or_phi) { DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid()); // Use local allocator to reduce peak memory usage. ScopedArenaAllocator allocator(allocator_.GetArenaStack()); ScopedArenaVector work_queue(allocator.Adapter(kArenaAllocLSE)); // Use depth first search to check if any non-Phi input is unknown. const ArenaVector& blocks = GetGraph()->GetBlocks(); phi_placeholders_to_materialize->ClearAllBits(); phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(phi_placeholder)); work_queue.push_back(phi_placeholder); while (!work_queue.empty()) { PhiPlaceholder current_phi_placeholder = work_queue.back(); work_queue.pop_back(); if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(current_phi_placeholder))) { // Replaced by `TryReplacingLoopPhiPlaceholderWith{Default,SingleInput}()`. DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(current_phi_placeholder)].Equals( Value::Default())); continue; } HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()]; DCHECK_GE(current_block->GetPredecessors().size(), 2u); size_t idx = current_phi_placeholder.GetHeapLocation(); if (current_block->IsLoopHeader()) { // If the index is defined inside the loop, it may reference different elements of the // array on each iteration. Since we do not track if all elements of an array are set // to the same value explicitly, the only known value in pre-header can be the default // value from NewArray or a Phi placeholder depending on a default value from some outer // loop pre-header. This Phi placeholder can be replaced only by the default value. HInstruction* index = heap_location_collector_.GetHeapLocation(idx)->GetIndex(); if (index != nullptr && current_block->GetLoopInformation()->Contains(*index->GetBlock())) { if (can_use_default_or_phi && TryReplacingLoopPhiPlaceholderWithDefault(current_phi_placeholder, type, phi_placeholders_to_materialize)) { continue; } else { return current_phi_placeholder; // Report the loop Phi placeholder. } } // A similar situation arises with the index defined outside the loop if we cannot use // default values or Phis, i.e. for vector loads, as we can only replace the Phi // placeholder with a single instruction defined before the loop. if (!can_use_default_or_phi) { DCHECK(index != nullptr); // Vector operations are array operations. if (TryReplacingLoopPhiPlaceholderWithSingleInput(current_phi_placeholder, phi_placeholders_to_materialize)) { continue; } else { return current_phi_placeholder; // Report the loop Phi placeholder. } } } for (HBasicBlock* predecessor : current_block->GetPredecessors()) { ScopedArenaVector& heap_values = heap_values_for_[predecessor->GetBlockId()]; Value value = ReplacementOrValue(heap_values[idx].value); if (value.IsUnknown()) { // We cannot create a Phi for this loop Phi placeholder. return current_phi_placeholder; // Report the loop Phi placeholder. } // For arrays, the location may have been clobbered by writes to other locations // in a loop that LSA does not consider aliasing, such as `a[i]` and `a[i + 1]`. if (current_block->IsLoopHeader() && predecessor != current_block->GetLoopInformation()->GetPreHeader() && heap_location_collector_.GetHeapLocation(idx)->GetIndex() != nullptr) { for (size_t i = 0, size = heap_values.size(); i != size; ++i) { if (i != idx && !heap_values[i].stored_by.IsUnknown() && MayAliasOnBackEdge(current_block, idx, i)) { // We cannot create a Phi for this loop Phi placeholder. return current_phi_placeholder; } } } if (value.NeedsLoopPhi()) { // Visit the predecessor Phi placeholder if it's not visited yet. if (!phi_placeholders_to_materialize->IsBitSet(PhiPlaceholderIndex(value))) { phi_placeholders_to_materialize->SetBit(PhiPlaceholderIndex(value)); work_queue.push_back(value.GetPhiPlaceholder()); LSE_VLOG << "For materialization of " << phi_placeholder << " we need to materialize " << value; } } } } // There are no unknown values feeding this Phi, so we can construct the Phis if needed. return std::nullopt; } bool LSEVisitor::MaterializeLoopPhis(const ScopedArenaVector& phi_placeholder_indexes, DataType::Type type) { return MaterializeLoopPhis(ArrayRef(phi_placeholder_indexes), type); } bool LSEVisitor::MaterializeLoopPhis(ArrayRef phi_placeholder_indexes, DataType::Type type) { // Materialize all predecessors that do not need a loop Phi and determine if all inputs // other than loop Phis are the same. const ArenaVector& blocks = GetGraph()->GetBlocks(); std::optional other_value = std::nullopt; for (size_t phi_placeholder_index : phi_placeholder_indexes) { PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index); HBasicBlock* block = blocks[phi_placeholder.GetBlockId()]; DCHECK_GE(block->GetPredecessors().size(), 2u); size_t idx = phi_placeholder.GetHeapLocation(); for (HBasicBlock* predecessor : block->GetPredecessors()) { Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value); if (value.NeedsNonLoopPhi()) { DCHECK(current_phase_ == Phase::kLoadElimination) << current_phase_; MaterializeNonLoopPhis(value.GetPhiPlaceholder(), type); value = Replacement(value); } if (!value.NeedsLoopPhi()) { if (!other_value) { // The first other value we found. other_value = value; } else if (!other_value->IsInvalid()) { // Check if the current `value` differs from the previous `other_value`. if (!value.Equals(*other_value)) { other_value = Value::Invalid(); } } } } } DCHECK(other_value.has_value()); if (!other_value->IsInvalid()) { HInstruction* replacement = (other_value->IsDefault()) ? GetDefaultValue(type) : other_value->GetInstruction(); for (size_t phi_placeholder_index : phi_placeholder_indexes) { phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction(replacement); } return true; } // If we're materializing only a single Phi, try to match it with an existing Phi. // (Matching multiple Phis would need investigation. It may be prohibitively slow.) // This also covers the case when after replacing a previous set of Phi placeholders, // we continue with a Phi placeholder that does not really need a loop Phi anymore. if (phi_placeholder_indexes.size() == 1u) { PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_indexes[0]); size_t idx = phi_placeholder.GetHeapLocation(); HBasicBlock* block = GetGraph()->GetBlocks()[phi_placeholder.GetBlockId()]; ArrayRef predecessors(block->GetPredecessors()); for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) { HInstruction* phi = phi_it.Current(); DCHECK_EQ(phi->InputCount(), predecessors.size()); ArrayRef> phi_inputs = phi->GetInputRecords(); auto cmp = [=](const HUserRecord& lhs, HBasicBlock* rhs) { Value value = ReplacementOrValue(heap_values_for_[rhs->GetBlockId()][idx].value); if (value.NeedsPhi()) { DCHECK(value.GetPhiPlaceholder() == phi_placeholder); return lhs.GetInstruction() == phi; } else { DCHECK(value.IsDefault() || value.IsInstruction()); return value.Equals(lhs.GetInstruction()); } }; if (std::equal(phi_inputs.begin(), phi_inputs.end(), predecessors.begin(), cmp)) { phi_placeholder_replacements_[phi_placeholder_indexes[0]] = Value::ForInstruction(phi); return true; } } } if (current_phase_ == Phase::kStoreElimination) { // We're not creating Phis during the final store elimination phase. return false; } // There are different inputs to the Phi chain. Create the Phis. ArenaAllocator* allocator = GetGraph()->GetAllocator(); for (size_t phi_placeholder_index : phi_placeholder_indexes) { PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index); HBasicBlock* block = blocks[phi_placeholder.GetBlockId()]; CHECK_GE(block->GetPredecessors().size(), 2u); phi_placeholder_replacements_[phi_placeholder_index] = Value::ForInstruction( new (allocator) HPhi(allocator, kNoRegNumber, block->GetPredecessors().size(), type)); } // Fill the Phi inputs. for (size_t phi_placeholder_index : phi_placeholder_indexes) { PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index); HBasicBlock* block = blocks[phi_placeholder.GetBlockId()]; size_t idx = phi_placeholder.GetHeapLocation(); HInstruction* phi = phi_placeholder_replacements_[phi_placeholder_index].GetInstruction(); DCHECK(DataType::IsTypeConversionImplicit(type, phi->GetType())) << "type=" << type << " vs phi-type=" << phi->GetType(); for (size_t i = 0, size = block->GetPredecessors().size(); i != size; ++i) { HBasicBlock* predecessor = block->GetPredecessors()[i]; Value value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value); HInstruction* input = value.IsDefault() ? GetDefaultValue(type) : value.GetInstruction(); DCHECK_NE(input->GetType(), DataType::Type::kVoid); phi->SetRawInputAt(i, input); DCHECK(DataType::IsTypeConversionImplicit(input->GetType(), phi->GetType())) << " input: " << input->GetType() << value << " phi: " << phi->GetType() << " request: " << type; } } // Add the Phis to their blocks. for (size_t phi_placeholder_index : phi_placeholder_indexes) { PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_index); HBasicBlock* block = blocks[phi_placeholder.GetBlockId()]; block->AddPhi(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction()->AsPhi()); } if (type == DataType::Type::kReference) { ScopedArenaAllocator local_allocator(allocator_.GetArenaStack()); ScopedArenaVector phis(local_allocator.Adapter(kArenaAllocLSE)); for (size_t phi_placeholder_index : phi_placeholder_indexes) { phis.push_back(phi_placeholder_replacements_[phi_placeholder_index].GetInstruction()); } // Update reference type information. Pass invalid handles, these are not used for Phis. ReferenceTypePropagation rtp_fixup(GetGraph(), Handle(), /* is_first_run= */ false); rtp_fixup.Visit(ArrayRef(phis)); } return true; } bool LSEVisitor::MaterializeLoopPhis(const ArenaBitVector& phi_placeholders_to_materialize, DataType::Type type) { // Use local allocator to reduce peak memory usage. ScopedArenaAllocator allocator(allocator_.GetArenaStack()); // We want to recognize when a subset of these loop Phis that do not need other // loop Phis, i.e. a transitive closure, has only one other instruction as an input, // i.e. that instruction can be used instead of each Phi in the set. See for example // Main.testLoop{5,6,7,8}() in the test 530-checker-lse. To do that, we shall // materialize these loop Phis from the smallest transitive closure. // Construct a matrix of loop phi placeholder dependencies. To reduce the memory usage, // assign new indexes to the Phi placeholders, making the matrix dense. ScopedArenaVector matrix_indexes(num_phi_placeholders_, static_cast(-1), // Invalid. allocator.Adapter(kArenaAllocLSE)); ScopedArenaVector phi_placeholder_indexes(allocator.Adapter(kArenaAllocLSE)); size_t num_phi_placeholders = phi_placeholders_to_materialize.NumSetBits(); phi_placeholder_indexes.reserve(num_phi_placeholders); for (uint32_t marker_index : phi_placeholders_to_materialize.Indexes()) { matrix_indexes[marker_index] = phi_placeholder_indexes.size(); phi_placeholder_indexes.push_back(marker_index); } const ArenaVector& blocks = GetGraph()->GetBlocks(); ScopedArenaVector dependencies(allocator.Adapter(kArenaAllocLSE)); dependencies.reserve(num_phi_placeholders); for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) { static constexpr bool kExpandable = false; dependencies.push_back( ArenaBitVector::Create(&allocator, num_phi_placeholders, kExpandable, kArenaAllocLSE)); ArenaBitVector* current_dependencies = dependencies.back(); current_dependencies->SetBit(matrix_index); // Count the Phi placeholder as its own dependency. PhiPlaceholder current_phi_placeholder = GetPhiPlaceholderAt(phi_placeholder_indexes[matrix_index]); HBasicBlock* current_block = blocks[current_phi_placeholder.GetBlockId()]; DCHECK_GE(current_block->GetPredecessors().size(), 2u); size_t idx = current_phi_placeholder.GetHeapLocation(); for (HBasicBlock* predecessor : current_block->GetPredecessors()) { Value pred_value = ReplacementOrValue(heap_values_for_[predecessor->GetBlockId()][idx].value); if (pred_value.NeedsLoopPhi()) { size_t pred_value_index = PhiPlaceholderIndex(pred_value); DCHECK(phi_placeholder_replacements_[pred_value_index].IsInvalid()); DCHECK_NE(matrix_indexes[pred_value_index], static_cast(-1)); current_dependencies->SetBit(matrix_indexes[PhiPlaceholderIndex(pred_value)]); } } } // Use the Floyd-Warshall algorithm to determine all transitive dependencies. for (size_t k = 0; k != num_phi_placeholders; ++k) { for (size_t i = 0; i != num_phi_placeholders; ++i) { for (size_t j = 0; j != num_phi_placeholders; ++j) { if (dependencies[i]->IsBitSet(k) && dependencies[k]->IsBitSet(j)) { dependencies[i]->SetBit(j); } } } } // Count the number of transitive dependencies for each replaceable Phi placeholder. ScopedArenaVector num_dependencies(allocator.Adapter(kArenaAllocLSE)); num_dependencies.reserve(num_phi_placeholders); for (size_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) { num_dependencies.push_back(dependencies[matrix_index]->NumSetBits()); } // Pick a Phi placeholder with the smallest number of transitive dependencies and // materialize it and its dependencies. Repeat until we have materialized all. ScopedArenaVector current_subset(allocator.Adapter(kArenaAllocLSE)); current_subset.reserve(num_phi_placeholders); size_t remaining_phi_placeholders = num_phi_placeholders; while (remaining_phi_placeholders != 0u) { auto it = std::min_element(num_dependencies.begin(), num_dependencies.end()); DCHECK_LE(*it, remaining_phi_placeholders); size_t current_matrix_index = std::distance(num_dependencies.begin(), it); ArenaBitVector* current_dependencies = dependencies[current_matrix_index]; size_t current_num_dependencies = num_dependencies[current_matrix_index]; current_subset.clear(); for (uint32_t matrix_index : current_dependencies->Indexes()) { current_subset.push_back(phi_placeholder_indexes[matrix_index]); } if (!MaterializeLoopPhis(current_subset, type)) { DCHECK_EQ(current_phase_, Phase::kStoreElimination); // This is the final store elimination phase and we shall not be able to eliminate any // stores that depend on the current subset, so mark these Phi placeholders unreplaceable. for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) { if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) { DCHECK(phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]].IsInvalid()); phi_placeholder_replacements_[phi_placeholder_indexes[matrix_index]] = Value::PureUnknown(); } } return false; } for (uint32_t matrix_index = 0; matrix_index != num_phi_placeholders; ++matrix_index) { if (current_dependencies->IsBitSet(matrix_index)) { // Mark all dependencies as done by incrementing their `num_dependencies[.]`, // so that they shall never be the minimum again. num_dependencies[matrix_index] = num_phi_placeholders; } else if (dependencies[matrix_index]->IsBitSet(current_matrix_index)) { // Remove dependencies from other Phi placeholders. dependencies[matrix_index]->Subtract(current_dependencies); num_dependencies[matrix_index] -= current_num_dependencies; } } remaining_phi_placeholders -= current_num_dependencies; } return true; } bool LSEVisitor::FullyMaterializePhi(PhiPlaceholder phi_placeholder, DataType::Type type) { ScopedArenaAllocator saa(GetGraph()->GetArenaStack()); ArenaBitVector abv(&saa, num_phi_placeholders_, false, ArenaAllocKind::kArenaAllocLSE); auto res = FindLoopPhisToMaterialize(phi_placeholder, &abv, type, /* can_use_default_or_phi=*/true); CHECK(!res.has_value()) << *res; return MaterializeLoopPhis(abv, type); } std::optional LSEVisitor::TryToMaterializeLoopPhis( PhiPlaceholder phi_placeholder, HInstruction* load) { DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid()); // Use local allocator to reduce peak memory usage. ScopedArenaAllocator allocator(allocator_.GetArenaStack()); // Find Phi placeholders to materialize. ArenaBitVector phi_placeholders_to_materialize( &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE); DataType::Type type = load->GetType(); bool can_use_default_or_phi = IsDefaultOrPhiAllowedForLoad(load); std::optional loop_phi_with_unknown_input = FindLoopPhisToMaterialize( phi_placeholder, &phi_placeholders_to_materialize, type, can_use_default_or_phi); if (loop_phi_with_unknown_input) { DCHECK_GE(GetGraph() ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()] ->GetPredecessors() .size(), 2u); return loop_phi_with_unknown_input; // Return failure. } DCHECK_EQ(current_phase_, Phase::kLoadElimination); bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type); DCHECK(success); // Report success. return std::nullopt; } // Re-process loads and stores in successors from the `loop_phi_with_unknown_input`. This may // find one or more loads from `loads_requiring_loop_phi_` which cannot be replaced by Phis and // propagate the load(s) as the new value(s) to successors; this may uncover new elimination // opportunities. If we find no such load, we shall at least propagate an unknown value to some // heap location that is needed by another loop Phi placeholder. void LSEVisitor::ProcessLoopPhiWithUnknownInput(PhiPlaceholder loop_phi_with_unknown_input) { DCHECK(!loads_requiring_loop_phi_.empty()); size_t loop_phi_with_unknown_input_index = PhiPlaceholderIndex(loop_phi_with_unknown_input); DCHECK(phi_placeholder_replacements_[loop_phi_with_unknown_input_index].IsInvalid()); phi_placeholder_replacements_[loop_phi_with_unknown_input_index] = Value::MergedUnknown(loop_phi_with_unknown_input); uint32_t block_id = loop_phi_with_unknown_input.GetBlockId(); const ArenaVector reverse_post_order = GetGraph()->GetReversePostOrder(); size_t reverse_post_order_index = 0; size_t reverse_post_order_size = reverse_post_order.size(); size_t loads_and_stores_index = 0u; size_t loads_and_stores_size = loads_and_stores_.size(); // Skip blocks and instructions before the block containing the loop phi with unknown input. DCHECK_NE(reverse_post_order_index, reverse_post_order_size); while (reverse_post_order[reverse_post_order_index]->GetBlockId() != block_id) { HBasicBlock* block = reverse_post_order[reverse_post_order_index]; while (loads_and_stores_index != loads_and_stores_size && loads_and_stores_[loads_and_stores_index].load_or_store->GetBlock() == block) { ++loads_and_stores_index; } ++reverse_post_order_index; DCHECK_NE(reverse_post_order_index, reverse_post_order_size); } // Use local allocator to reduce peak memory usage. ScopedArenaAllocator allocator(allocator_.GetArenaStack()); // Reuse one temporary vector for all remaining blocks. size_t num_heap_locations = heap_location_collector_.GetNumberOfHeapLocations(); ScopedArenaVector local_heap_values(allocator.Adapter(kArenaAllocLSE)); auto get_initial_value = [this](HBasicBlock* block, size_t idx) { Value value; if (block->IsLoopHeader()) { if (block->GetLoopInformation()->IsIrreducible()) { PhiPlaceholder placeholder = GetPhiPlaceholder(block->GetBlockId(), idx); value = Value::MergedUnknown(placeholder); } else { value = PrepareLoopValue(block, idx); } } else { value = MergePredecessorValues(block, idx); } DCHECK(value.IsUnknown() || ReplacementOrValue(value).Equals(value)); return value; }; // Process remaining blocks and instructions. bool found_unreplaceable_load = false; bool replaced_heap_value_with_unknown = false; for (; reverse_post_order_index != reverse_post_order_size; ++reverse_post_order_index) { HBasicBlock* block = reverse_post_order[reverse_post_order_index]; if (block->IsExitBlock()) { continue; } // We shall reconstruct only the heap values that we need for processing loads and stores. local_heap_values.clear(); local_heap_values.resize(num_heap_locations, Value::Invalid()); for (; loads_and_stores_index != loads_and_stores_size; ++loads_and_stores_index) { HInstruction* load_or_store = loads_and_stores_[loads_and_stores_index].load_or_store; size_t idx = loads_and_stores_[loads_and_stores_index].heap_location_index; if (load_or_store->GetBlock() != block) { break; // End of instructions from the current block. } if (IsStore(load_or_store)) { StoreRecord* store_record = store_records_[load_or_store->GetId()]; DCHECK(store_record != nullptr); HInstruction* stored_value = store_record->stored_value; DCHECK(stored_value != nullptr); // Note that the `stored_value` can be a newly created `Phi` with an id that falls // outside the allocated `loads_requiring_loop_phi_` range. DCHECK_IMPLIES( IsLoad(stored_value), static_cast(stored_value->GetId()) < loads_requiring_loop_phi_.size()); if (static_cast(stored_value->GetId()) >= loads_requiring_loop_phi_.size() || loads_requiring_loop_phi_[stored_value->GetId()] == nullptr) { continue; // This store never needed a loop Phi. } ValueRecord* record = loads_requiring_loop_phi_[stored_value->GetId()]; // Process the store by updating `local_heap_values[idx]`. The last update shall // be propagated to the `heap_values[idx].value` if it previously needed a loop Phi // at the end of the block. Value replacement = ReplacementOrValue(record->value); if (replacement.NeedsLoopPhi()) { // No replacement yet, use the Phi placeholder from the load. DCHECK(record->value.NeedsLoopPhi()); local_heap_values[idx] = record->value; } else { // If the load fetched a known value, use it, otherwise use the load. local_heap_values[idx] = Value::ForInstruction( replacement.IsUnknown() ? stored_value : replacement.GetInstruction()); } } else { // Process the load unless it has previously been marked unreplaceable. DCHECK(IsLoad(load_or_store)); ValueRecord* record = loads_requiring_loop_phi_[load_or_store->GetId()]; if (record == nullptr) { continue; // This load never needed a loop Phi. } if (record->value.NeedsLoopPhi()) { if (local_heap_values[idx].IsInvalid()) { local_heap_values[idx] = get_initial_value(block, idx); } if (local_heap_values[idx].IsUnknown()) { // This load cannot be replaced. Keep stores that feed the Phi placeholder // (no aliasing since then, otherwise the Phi placeholder would not have been // propagated as a value to this load) and store the load as the new heap value. found_unreplaceable_load = true; KeepStores(record->value); record->value = Value::MergedUnknown(record->value.GetPhiPlaceholder()); local_heap_values[idx] = Value::ForInstruction(load_or_store); } else if (local_heap_values[idx].NeedsLoopPhi()) { // The load may still be replaced with a Phi later. DCHECK(local_heap_values[idx].Equals(record->value)); } else { // This load can be eliminated but we may need to construct non-loop Phis. if (local_heap_values[idx].NeedsNonLoopPhi()) { MaterializeNonLoopPhis(local_heap_values[idx].GetPhiPlaceholder(), load_or_store->GetType()); local_heap_values[idx] = Replacement(local_heap_values[idx]); } record->value = local_heap_values[idx]; DCHECK(local_heap_values[idx].IsDefault() || local_heap_values[idx].IsInstruction()) << "The replacement heap value can be an HIR instruction or the default value."; HInstruction* heap_value = local_heap_values[idx].IsDefault() ? GetDefaultValue(load_or_store->GetType()) : local_heap_values[idx].GetInstruction(); AddRemovedLoad(load_or_store, heap_value); } } } } // All heap values that previously needed a loop Phi at the end of the block // need to be updated for processing successors. ScopedArenaVector& heap_values = heap_values_for_[block->GetBlockId()]; for (size_t idx = 0; idx != num_heap_locations; ++idx) { if (heap_values[idx].value.NeedsLoopPhi()) { if (local_heap_values[idx].IsValid()) { heap_values[idx].value = local_heap_values[idx]; } else { heap_values[idx].value = get_initial_value(block, idx); } if (heap_values[idx].value.IsUnknown()) { replaced_heap_value_with_unknown = true; } } } } DCHECK(found_unreplaceable_load || replaced_heap_value_with_unknown); } void LSEVisitor::ProcessLoadsRequiringLoopPhis() { // Note: The vector operations carve-out (see `IsDefaultOrPhiAllowedForLoad()`) can possibly // make the result of the processing depend on the order in which we process these loads. // To make sure the result is deterministic, iterate over `loads_and_stores_` instead of the // `loads_requiring_loop_phi_` indexed by non-deterministic pointers. if (loads_requiring_loop_phi_.empty()) { return; // No loads to process. } for (const LoadStoreRecord& load_store_record : loads_and_stores_) { ValueRecord* record = loads_requiring_loop_phi_[load_store_record.load_or_store->GetId()]; if (record == nullptr) { continue; } HInstruction* load = load_store_record.load_or_store; while (record->value.NeedsLoopPhi() && phi_placeholder_replacements_[PhiPlaceholderIndex(record->value)].IsInvalid()) { std::optional loop_phi_with_unknown_input = TryToMaterializeLoopPhis(record->value.GetPhiPlaceholder(), load); DCHECK_EQ(loop_phi_with_unknown_input.has_value(), phi_placeholder_replacements_[PhiPlaceholderIndex(record->value)].IsInvalid()); if (loop_phi_with_unknown_input) { DCHECK_GE(GetGraph() ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()] ->GetPredecessors() .size(), 2u); ProcessLoopPhiWithUnknownInput(*loop_phi_with_unknown_input); } } // The load could have been marked as unreplaceable (and stores marked for keeping) // or marked for replacement with an instruction in ProcessLoopPhiWithUnknownInput(). DCHECK(record->value.IsUnknown() || record->value.IsInstruction() || record->value.NeedsLoopPhi()); if (record->value.NeedsLoopPhi()) { record->value = Replacement(record->value); HInstruction* heap_value = record->value.GetInstruction(); AddRemovedLoad(load, heap_value); } } } void LSEVisitor::SearchPhiPlaceholdersForKeptStores() { ScopedArenaVector work_queue(allocator_.Adapter(kArenaAllocLSE)); size_t start_size = phi_placeholders_to_search_for_kept_stores_.NumSetBits(); work_queue.reserve(((start_size * 3u) + 1u) / 2u); // Reserve 1.5x start size, rounded up. for (uint32_t index : phi_placeholders_to_search_for_kept_stores_.Indexes()) { work_queue.push_back(index); } const ArenaVector& blocks = GetGraph()->GetBlocks(); while (!work_queue.empty()) { uint32_t cur_phi_idx = work_queue.back(); PhiPlaceholder phi_placeholder = GetPhiPlaceholderAt(cur_phi_idx); work_queue.pop_back(); size_t idx = phi_placeholder.GetHeapLocation(); HBasicBlock* block = blocks[phi_placeholder.GetBlockId()]; DCHECK(block != nullptr) << cur_phi_idx << " phi: " << phi_placeholder << " (blocks: " << blocks.size() << ")"; for (HBasicBlock* predecessor : block->GetPredecessors()) { ScopedArenaVector& heap_values = heap_values_for_[predecessor->GetBlockId()]; // For loop back-edges we must also preserve all stores to locations that // may alias with the location `idx`. // TODO: Add tests cases around this. bool is_back_edge = block->IsLoopHeader() && predecessor != block->GetLoopInformation()->GetPreHeader(); size_t start = is_back_edge ? 0u : idx; size_t end = is_back_edge ? heap_values.size() : idx + 1u; for (size_t i = start; i != end; ++i) { Value stored_by = heap_values[i].stored_by; if (!stored_by.IsUnknown() && (i == idx || MayAliasOnBackEdge(block, idx, i))) { if (stored_by.NeedsPhi()) { size_t phi_placeholder_index = PhiPlaceholderIndex(stored_by); if (!phi_placeholders_to_search_for_kept_stores_.IsBitSet(phi_placeholder_index)) { phi_placeholders_to_search_for_kept_stores_.SetBit(phi_placeholder_index); work_queue.push_back(phi_placeholder_index); } } else { DCHECK(IsStore(stored_by.GetInstruction())); ReferenceInfo* ri = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo(); DCHECK(ri != nullptr) << "No heap value for " << stored_by.GetInstruction()->DebugName() << " id: " << stored_by.GetInstruction()->GetId() << " block: " << stored_by.GetInstruction()->GetBlock()->GetBlockId(); kept_stores_.SetBit(stored_by.GetInstruction()->GetId()); } } } } } } void LSEVisitor::UpdateValueRecordForStoreElimination(/*inout*/ValueRecord* value_record) { while (value_record->stored_by.IsInstruction() && !kept_stores_.IsBitSet(value_record->stored_by.GetInstruction()->GetId())) { StoreRecord* store_record = store_records_[value_record->stored_by.GetInstruction()->GetId()]; DCHECK(store_record != nullptr); *value_record = store_record->old_value_record; } if (value_record->stored_by.NeedsPhi() && !phi_placeholders_to_search_for_kept_stores_.IsBitSet( PhiPlaceholderIndex(value_record->stored_by))) { // Some stores feeding this heap location may have been eliminated. Use the `stored_by` // Phi placeholder to recalculate the actual value. value_record->value = value_record->stored_by; } value_record->value = ReplacementOrValue(value_record->value); if (value_record->value.NeedsNonLoopPhi()) { // Treat all Phi placeholders as requiring loop Phis at this point. // We do not want MaterializeLoopPhis() to call MaterializeNonLoopPhis(). value_record->value = Value::ForLoopPhiPlaceholder(value_record->value.GetPhiPlaceholder()); } } void LSEVisitor::FindOldValueForPhiPlaceholder(PhiPlaceholder phi_placeholder, DataType::Type type) { DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsInvalid()); // Use local allocator to reduce peak memory usage. ScopedArenaAllocator allocator(allocator_.GetArenaStack()); ArenaBitVector visited(&allocator, /*start_bits=*/ num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE); // Find Phi placeholders to try and match against existing Phis or other replacement values. ArenaBitVector phi_placeholders_to_materialize( &allocator, num_phi_placeholders_, /*expandable=*/ false, kArenaAllocLSE); std::optional loop_phi_with_unknown_input = FindLoopPhisToMaterialize( phi_placeholder, &phi_placeholders_to_materialize, type, /*can_use_default_or_phi=*/true); if (loop_phi_with_unknown_input) { DCHECK_GE(GetGraph() ->GetBlocks()[loop_phi_with_unknown_input->GetBlockId()] ->GetPredecessors() .size(), 2u); // Mark the unreplacable placeholder as well as the input Phi placeholder as unreplaceable. phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)] = Value::PureUnknown(); phi_placeholder_replacements_[PhiPlaceholderIndex(*loop_phi_with_unknown_input)] = Value::PureUnknown(); return; } DCHECK_EQ(current_phase_, Phase::kStoreElimination); bool success = MaterializeLoopPhis(phi_placeholders_to_materialize, type); DCHECK(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsValid()); DCHECK_EQ(phi_placeholder_replacements_[PhiPlaceholderIndex(phi_placeholder)].IsUnknown(), !success); } struct ScopedRestoreHeapValues { public: ScopedRestoreHeapValues(ArenaStack* alloc, size_t num_heap_locs, ScopedArenaVector>& to_restore) : alloc_(alloc), updated_values_(alloc_.Adapter(kArenaAllocLSE)), to_restore_(to_restore) { updated_values_.reserve(num_heap_locs * to_restore_.size()); } ~ScopedRestoreHeapValues() { for (const auto& rec : updated_values_) { to_restore_[rec.blk_id][rec.heap_loc].value = rec.val_; } } template void ForEachRecord(Func&& func) { for (size_t blk_id : Range(to_restore_.size())) { for (size_t heap_loc : Range(to_restore_[blk_id].size())) { LSEVisitor::ValueRecord* vr = &to_restore_[blk_id][heap_loc]; LSEVisitor::Value initial = vr->value; func(vr); if (!vr->value.ExactEquals(initial)) { updated_values_.push_back({blk_id, heap_loc, initial}); } } } } private: struct UpdateRecord { size_t blk_id; size_t heap_loc; LSEVisitor::Value val_; }; ScopedArenaAllocator alloc_; ScopedArenaVector updated_values_; ScopedArenaVector>& to_restore_; DISALLOW_COPY_AND_ASSIGN(ScopedRestoreHeapValues); }; void LSEVisitor::FindStoresWritingOldValues() { // Partial LSE relies on knowing the real heap-values not the // store-replacement versions so we need to restore the map after removing // stores. ScopedRestoreHeapValues heap_vals(allocator_.GetArenaStack(), heap_location_collector_.GetNumberOfHeapLocations(), heap_values_for_); // The Phi placeholder replacements have so far been used for eliminating loads, // tracking values that would be stored if all stores were kept. As we want to // compare actual old values after removing unmarked stores, prune the Phi // placeholder replacements that can be fed by values we may not actually store. // Replacements marked as unknown can be kept as they are fed by some unknown // value and would end up as unknown again if we recalculated them. for (size_t i = 0, size = phi_placeholder_replacements_.size(); i != size; ++i) { if (!phi_placeholder_replacements_[i].IsUnknown() && !phi_placeholders_to_search_for_kept_stores_.IsBitSet(i)) { phi_placeholder_replacements_[i] = Value::Invalid(); } } // Update heap values at end of blocks. heap_vals.ForEachRecord([&](ValueRecord* rec) { UpdateValueRecordForStoreElimination(rec); }); if (kIsDebugBuild) { heap_vals.ForEachRecord([](ValueRecord* rec) { DCHECK(!rec->value.NeedsNonLoopPhi()) << rec->value; }); } // Use local allocator to reduce peak memory usage. ScopedArenaAllocator allocator(allocator_.GetArenaStack()); // Mark the stores we want to eliminate in a separate bit vector. ArenaBitVector eliminated_stores(&allocator, /*start_bits=*/ GetGraph()->GetCurrentInstructionId(), /*expandable=*/ false, kArenaAllocLSE); for (uint32_t store_id : kept_stores_.Indexes()) { DCHECK(kept_stores_.IsBitSet(store_id)); StoreRecord* store_record = store_records_[store_id]; DCHECK(store_record != nullptr); UpdateValueRecordForStoreElimination(&store_record->old_value_record); if (store_record->old_value_record.value.NeedsPhi()) { DataType::Type type = store_record->stored_value->GetType(); FindOldValueForPhiPlaceholder(store_record->old_value_record.value.GetPhiPlaceholder(), type); store_record->old_value_record.value = ReplacementOrValue(store_record->old_value_record.value); } DCHECK(!store_record->old_value_record.value.NeedsPhi()); HInstruction* stored_value = FindSubstitute(store_record->stored_value); if (store_record->old_value_record.value.Equals(stored_value)) { eliminated_stores.SetBit(store_id); } } // Commit the stores to eliminate by removing them from `kept_stores_`. kept_stores_.Subtract(&eliminated_stores); } void LSEVisitor::Run() { // 0. Set HasMonitorOperations to false. If we encounter some MonitorOperations that we can't // remove, we will set it to true in VisitMonitorOperation. GetGraph()->SetHasMonitorOperations(false); // 1. Process blocks and instructions in reverse post order. for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) { VisitBasicBlock(block); } // 2. Process loads that require loop Phis, trying to find/create replacements. current_phase_ = Phase::kLoadElimination; ProcessLoadsRequiringLoopPhis(); // 3. Determine which stores to keep and which to eliminate. current_phase_ = Phase::kStoreElimination; // Finish marking stores for keeping. SearchPhiPlaceholdersForKeptStores(); // Find stores that write the same value as is already present in the location. FindStoresWritingOldValues(); // 4. Replace loads and remove unnecessary stores and singleton allocations. FinishFullLSE(); } void LSEVisitor::FinishFullLSE() { // Remove recorded load instructions that should be eliminated. for (const LoadStoreRecord& record : loads_and_stores_) { size_t id = dchecked_integral_cast(record.load_or_store->GetId()); HInstruction* substitute = substitute_instructions_for_loads_[id]; if (substitute == nullptr) { continue; } HInstruction* load = record.load_or_store; DCHECK(load != nullptr); DCHECK(IsLoad(load)); DCHECK(load->GetBlock() != nullptr) << load->DebugName() << "@" << load->GetDexPc(); // We proactively retrieve the substitute for a removed load, so // a load that has a substitute should not be observed as a heap // location value. DCHECK_EQ(FindSubstitute(substitute), substitute); load->ReplaceWith(substitute); load->GetBlock()->RemoveInstruction(load); if ((load->IsInstanceFieldGet() && load->AsInstanceFieldGet()->IsVolatile()) || (load->IsStaticFieldGet() && load->AsStaticFieldGet()->IsVolatile())) { MaybeRecordStat(stats_, MethodCompilationStat::kRemovedVolatileLoad); } } // Remove all the stores we can. for (const LoadStoreRecord& record : loads_and_stores_) { if (IsStore(record.load_or_store) && !kept_stores_.IsBitSet(record.load_or_store->GetId())) { record.load_or_store->GetBlock()->RemoveInstruction(record.load_or_store); if ((record.load_or_store->IsInstanceFieldSet() && record.load_or_store->AsInstanceFieldSet()->IsVolatile()) || (record.load_or_store->IsStaticFieldSet() && record.load_or_store->AsStaticFieldSet()->IsVolatile())) { MaybeRecordStat(stats_, MethodCompilationStat::kRemovedVolatileStore); } } } // Eliminate singleton-classified instructions: // * - Constructor fences (they never escape this thread). // * - Allocations (if they are unused). for (HInstruction* new_instance : singleton_new_instances_) { size_t removed = HConstructorFence::RemoveConstructorFences(new_instance); MaybeRecordStat(stats_, MethodCompilationStat::kConstructorFenceRemovedLSE, removed); if (!new_instance->HasNonEnvironmentUses()) { new_instance->RemoveEnvironmentUsers(); new_instance->GetBlock()->RemoveInstruction(new_instance); MaybeRecordStat(stats_, MethodCompilationStat::kFullLSEAllocationRemoved); } } } // The LSEVisitor is a ValueObject (indirectly through base classes) and therefore // cannot be directly allocated with an arena allocator, so we need to wrap it. class LSEVisitorWrapper : public DeletableArenaObject { public: LSEVisitorWrapper(HGraph* graph, const HeapLocationCollector& heap_location_collector, OptimizingCompilerStats* stats) : lse_visitor_(graph, heap_location_collector, stats) {} void Run() { lse_visitor_.Run(); } private: LSEVisitor lse_visitor_; }; bool LoadStoreElimination::Run() { if (graph_->IsDebuggable()) { // Debugger may set heap values or trigger deoptimization of callers. // Skip this optimization. return false; } ScopedArenaAllocator allocator(graph_->GetArenaStack()); LoadStoreAnalysis lsa(graph_, stats_, &allocator); lsa.Run(); const HeapLocationCollector& heap_location_collector = lsa.GetHeapLocationCollector(); if (heap_location_collector.GetNumberOfHeapLocations() == 0) { // No HeapLocation information from LSA, skip this optimization. return false; } // Currently load_store analysis can't handle predicated load/stores; specifically pairs of // memory operations with different predicates. // TODO: support predicated SIMD. if (graph_->HasPredicatedSIMD()) { return false; } std::unique_ptr lse_visitor( new (&allocator) LSEVisitorWrapper(graph_, heap_location_collector, stats_)); lse_visitor->Run(); return true; } #undef LSE_VLOG } // namespace art