1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <stdint.h>
20 #include <string.h>
21 #include <sys/mman.h>
22 
23 #include <memory>
24 #include <vector>
25 
26 #include <unwindstack/Global.h>
27 #include <unwindstack/Maps.h>
28 
29 #include "Check.h"
30 #include "GlobalDebugInterface.h"
31 #include "MemoryCache.h"
32 #include "MemoryRange.h"
33 
34 // This implements the JIT Compilation Interface.
35 // See https://sourceware.org/gdb/onlinedocs/gdb/JIT-Interface.html
36 //
37 // We use it to get in-memory ELF files created by the ART compiler,
38 // but we also use it to get list of DEX files used by the runtime.
39 
40 namespace unwindstack {
41 
42 // Implementation templated for ELF/DEX and for different architectures.
43 template <typename Symfile, typename Uintptr_T, typename Uint64_T>
44 class GlobalDebugImpl : public GlobalDebugInterface<Symfile>, public Global {
45  public:
46   static constexpr int kMaxRaceRetries = 16;
47   static constexpr int kMaxHeadRetries = 16;
48   static constexpr uint8_t kMagic[8] = {'A', 'n', 'd', 'r', 'o', 'i', 'd', '2'};
49 
50   struct JITCodeEntry {
51     Uintptr_T next;
52     Uintptr_T prev;
53     Uintptr_T symfile_addr;
54     Uint64_T symfile_size;
55     // Android-specific fields:
56     Uint64_T timestamp;
57     uint32_t seqlock;
58   };
59 
60   static constexpr size_t kSizeOfCodeEntryV1 = offsetof(JITCodeEntry, timestamp);
61   static constexpr size_t kSizeOfCodeEntryV2 = sizeof(JITCodeEntry);
62 
63   struct JITDescriptor {
64     uint32_t version;
65     uint32_t action_flag;
66     Uintptr_T relevant_entry;
67     Uintptr_T first_entry;
68     // Android-specific fields:
69     uint8_t magic[8];
70     uint32_t flags;
71     uint32_t sizeof_descriptor;
72     uint32_t sizeof_entry;
73     uint32_t seqlock;
74     Uint64_T timestamp;
75   };
76 
77   static constexpr size_t kSizeOfDescriptorV1 = offsetof(JITDescriptor, magic);
78   static constexpr size_t kSizeOfDescriptorV2 = sizeof(JITDescriptor);
79 
80   // This uniquely identifies entry in presence of concurrent modifications.
81   // Each (address,seqlock) pair is unique for each newly created JIT entry.
82   struct UID {
83     uint64_t address;  // Address of JITCodeEntry in memory.
84     uint32_t seqlock;  // This servers as "version" for the given address.
85 
86     bool operator<(const UID& other) const {
87       return std::tie(address, seqlock) < std::tie(other.address, other.seqlock);
88     }
89   };
90 
GlobalDebugImpl(ArchEnum arch,std::shared_ptr<Memory> & memory,std::vector<std::string> & search_libs,const char * global_variable_name)91   GlobalDebugImpl(ArchEnum arch, std::shared_ptr<Memory>& memory,
92                   std::vector<std::string>& search_libs, const char* global_variable_name)
93       : Global(memory, search_libs), global_variable_name_(global_variable_name) {
94     SetArch(arch);
95   }
96 
ReadDescriptor(uint64_t addr)97   bool ReadDescriptor(uint64_t addr) {
98     JITDescriptor desc{};
99     // Try to read the full descriptor including Android-specific fields.
100     if (!this->memory_->ReadFully(addr, &desc, kSizeOfDescriptorV2)) {
101       // Fallback to just the minimal descriptor.
102       // This will make the magic check below fail.
103       if (!this->memory_->ReadFully(addr, &desc, kSizeOfDescriptorV1)) {
104         return false;
105       }
106     }
107 
108     if (desc.version != 1 || desc.first_entry == 0) {
109       // Either unknown version, or no jit entries.
110       return false;
111     }
112 
113     // Check if there are extra Android-specific fields.
114     if (memcmp(desc.magic, kMagic, sizeof(kMagic)) == 0) {
115       jit_entry_size_ = kSizeOfCodeEntryV2;
116       seqlock_offset_ = offsetof(JITCodeEntry, seqlock);
117     } else {
118       jit_entry_size_ = kSizeOfCodeEntryV1;
119       seqlock_offset_ = 0;
120     }
121     descriptor_addr_ = addr;
122     return true;
123   }
124 
ProcessArch()125   void ProcessArch() {}
126 
ReadVariableData(uint64_t ptr)127   bool ReadVariableData(uint64_t ptr) { return ReadDescriptor(ptr); }
128 
129   // Invoke callback for all symfiles that contain the given PC.
130   // Returns true if any callback returns true (which also aborts the iteration).
131   template <typename Callback /* (Symfile*) -> bool */>
ForEachSymfile(Maps * maps,uint64_t pc,Callback callback)132   bool ForEachSymfile(Maps* maps, uint64_t pc, Callback callback) {
133     // Use a single lock, this object should be used so infrequently that
134     // a fine grain lock is unnecessary.
135     std::lock_guard<std::mutex> guard(lock_);
136     if (descriptor_addr_ == 0) {
137       FindAndReadVariable(maps, global_variable_name_);
138       if (descriptor_addr_ == 0) {
139         return false;
140       }
141     }
142 
143     // Try to find the entry in already loaded symbol files.
144     for (auto& it : entries_) {
145       Symfile* symfile = it.second.get();
146       // Check seqlock to make sure that entry is still valid (it may be very old).
147       if (symfile->IsValidPc(pc) && CheckSeqlock(it.first) && callback(symfile)) {
148         return true;
149       }
150     }
151 
152     // Update all entries and retry.
153     ReadAllEntries(maps);
154     for (auto& it : entries_) {
155       Symfile* symfile = it.second.get();
156       // Note that the entry could become invalid since the ReadAllEntries above,
157       // but that is ok.  We don't want to fail or refresh the entries yet again.
158       // This is as if we found the entry in time and it became invalid after return.
159       // This is relevant when ART moves/packs JIT entries. That is, the entry is
160       // technically deleted, but only because it was copied into merged uber-entry.
161       // So the JIT method is still alive and the deleted data is still correct.
162       if (symfile->IsValidPc(pc) && callback(symfile)) {
163         return true;
164       }
165     }
166 
167     return false;
168   }
169 
GetFunctionName(Maps * maps,uint64_t pc,SharedString * name,uint64_t * offset)170   bool GetFunctionName(Maps* maps, uint64_t pc, SharedString* name, uint64_t* offset) {
171     // NB: If symfiles overlap in PC ranges, this will check all of them.
172     return ForEachSymfile(maps, pc, [pc, name, offset](Symfile* file) {
173       return file->GetFunctionName(pc, name, offset);
174     });
175   }
176 
Find(Maps * maps,uint64_t pc)177   Symfile* Find(Maps* maps, uint64_t pc) {
178     // NB: If symfiles overlap in PC ranges (which can happen for both ELF and DEX),
179     // this will check all of them and return one that also has a matching function.
180     Symfile* result = nullptr;
181     bool found = ForEachSymfile(maps, pc, [pc, &result](Symfile* file) {
182       result = file;
183       SharedString name;
184       uint64_t offset;
185       return file->GetFunctionName(pc, &name, &offset);
186     });
187     if (found) {
188       return result;  // Found symfile with symbol that also matches the PC.
189     }
190     // There is no matching symbol, so return any symfile for which the PC is valid.
191     // This is a useful fallback for tests, which often have symfiles with no functions.
192     return result;
193   }
194 
195   // Read all entries from the process and cache them locally.
196   // The linked list might be concurrently modified. We detect races and retry.
ReadAllEntries(Maps * maps)197   bool ReadAllEntries(Maps* maps) {
198     for (int i = 0; i < kMaxRaceRetries; i++) {
199       bool race = false;
200       if (!ReadAllEntries(maps, &race)) {
201         if (race) {
202           continue;  // Retry due to concurrent modification of the linked list.
203         }
204         return false;  // Failed to read entries.
205       }
206       return true;  // Success.
207     }
208     return false;  // Too many retries.
209   }
210 
211   // Read all JIT entries while assuming there might be concurrent modifications.
212   // If there is a race, the method will fail and the caller should retry the call.
ReadAllEntries(Maps * maps,bool * race)213   bool ReadAllEntries(Maps* maps, bool* race) {
214     // New entries might be added while we iterate over the linked list.
215     // In particular, an entry could be effectively moved from end to start due to
216     // the ART repacking algorithm, which groups smaller entries into a big one.
217     // Therefore keep reading the most recent entries until we reach a fixed point.
218     std::map<UID, std::shared_ptr<Symfile>> entries;
219     for (size_t i = 0; i < kMaxHeadRetries; i++) {
220       size_t old_size = entries.size();
221       if (!ReadNewEntries(maps, &entries, race)) {
222         return false;
223       }
224       if (entries.size() == old_size) {
225         entries_.swap(entries);
226         return true;
227       }
228     }
229     return false;  // Too many retries.
230   }
231 
232   // Read new JIT entries (head of linked list) until we find one that we have seen before.
233   // This method uses seqlocks extensively to ensure safety in case of concurrent modifications.
ReadNewEntries(Maps * maps,std::map<UID,std::shared_ptr<Symfile>> * entries,bool * race)234   bool ReadNewEntries(Maps* maps, std::map<UID, std::shared_ptr<Symfile>>* entries, bool* race) {
235     // Read the address of the head entry in the linked list.
236     UID uid;
237     if (!ReadNextField(descriptor_addr_ + offsetof(JITDescriptor, first_entry), &uid, race)) {
238       return false;
239     }
240 
241     // Follow the linked list.
242     while (uid.address != 0) {
243       // Check if we have reached an already cached entry (we restart from head repeatedly).
244       if (entries->count(uid) != 0) {
245         return true;
246       }
247 
248       // Read the entry.
249       JITCodeEntry data{};
250       if (!memory_->ReadFully(uid.address, &data, jit_entry_size_)) {
251         return false;
252       }
253       data.symfile_addr = StripAddressTag(data.symfile_addr);
254 
255       // Check the seqlock to verify the symfile_addr and symfile_size.
256       if (!CheckSeqlock(uid, race)) {
257         return false;
258       }
259 
260       // Copy and load the symfile.
261       auto it = entries_.find(uid);
262       if (it != entries_.end()) {
263         // The symfile was already loaded - just copy the reference.
264         entries->emplace(uid, it->second);
265       } else if (data.symfile_addr != 0) {
266         std::shared_ptr<Symfile> symfile;
267         bool ok = this->Load(maps, memory_, data.symfile_addr, data.symfile_size.value, symfile);
268         // Check seqlock first because load can fail due to race (so we want to trigger retry).
269         // TODO: Extract the memory copy code before the load, so that it is immune to races.
270         if (!CheckSeqlock(uid, race)) {
271           return false;  // The ELF/DEX data was removed before we loaded it.
272         }
273         // Exclude symbol files that fail to load (but continue loading other files).
274         if (ok) {
275           entries->emplace(uid, symfile);
276         }
277       }
278 
279       // Go to next entry.
280       UID next_uid;
281       if (!ReadNextField(uid.address + offsetof(JITCodeEntry, next), &next_uid, race)) {
282         return false;  // The next pointer was modified while we were reading it.
283       }
284       if (!CheckSeqlock(uid, race)) {
285         return false;  // This entry was deleted before we moved to the next one.
286       }
287       uid = next_uid;
288     }
289 
290     return true;
291   }
292 
293   // Read the address and seqlock of entry from the next field of linked list.
294   // This is non-trivial since they need to be consistent (as if we read both atomically).
295   //
296   // We're reading pointers, which can point at heap-allocated structures (the
297   // case for the __dex_debug_descriptor pointers at the time of writing).
298   // On 64 bit systems, the target process might have top-byte heap pointer
299   // tagging enabled, so we need to mask out the tag. We also know that the
300   // address must point to userspace, so the top byte of the address must be
301   // zero on both x64 and aarch64 without tagging. Therefore the masking can be
302   // done unconditionally.
ReadNextField(uint64_t next_field_addr,UID * uid,bool * race)303   bool ReadNextField(uint64_t next_field_addr, UID* uid, bool* race) {
304     Uintptr_T address[2]{0, 0};
305     uint32_t seqlock[2]{0, 0};
306     // Read all data twice: address[0], seqlock[0], address[1], seqlock[1].
307     for (int i = 0; i < 2; i++) {
308       std::atomic_thread_fence(std::memory_order_acquire);
309       if (!(memory_->ReadFully(next_field_addr, &address[i], sizeof(address[i])))) {
310         return false;
311       }
312       address[i] = StripAddressTag(address[i]);
313       if (seqlock_offset_ == 0) {
314         // There is no seqlock field.
315         *uid = UID{.address = address[0], .seqlock = 0};
316         return true;
317       }
318       if (address[i] != 0) {
319         std::atomic_thread_fence(std::memory_order_acquire);
320         if (!memory_->ReadFully(address[i] + seqlock_offset_, &seqlock[i], sizeof(seqlock[i]))) {
321           return false;
322         }
323       }
324     }
325     // Check that both reads returned identical values, and that the entry is live.
326     if (address[0] != address[1] || seqlock[0] != seqlock[1] || (seqlock[0] & 1) == 1) {
327       *race = true;
328       return false;
329     }
330     // Since address[1] is sandwiched between two seqlock reads, we know that
331     // at the time of address[1] read, the entry had the given seqlock value.
332     *uid = UID{.address = address[1], .seqlock = seqlock[1]};
333     return true;
334   }
335 
336   // Check that the given entry has not been deleted (or replaced by new entry at same address).
337   bool CheckSeqlock(UID uid, bool* race = nullptr) {
338     if (seqlock_offset_ == 0) {
339       // There is no seqlock field.
340       return true;
341     }
342     // This is required for memory synchronization if the we are working with local memory.
343     // For other types of memory (e.g. remote) this is no-op and has no significant effect.
344     std::atomic_thread_fence(std::memory_order_acquire);
345     uint32_t seen_seqlock;
346     if (!memory_->Read32(uid.address + seqlock_offset_, &seen_seqlock)) {
347       return false;
348     }
349     if (seen_seqlock != uid.seqlock) {
350       if (race != nullptr) {
351         *race = true;
352       }
353       return false;
354     }
355     return true;
356   }
357 
358   // AArch64 has Address tagging (aka Top Byte Ignore) feature, which is used by
359   // HWASAN and MTE to store metadata in the address. We need to remove the tag.
StripAddressTag(Uintptr_T addr)360   Uintptr_T StripAddressTag(Uintptr_T addr) {
361     if (arch() == ARCH_ARM64) {
362       // Make the value signed so it will be sign extended if necessary.
363       return static_cast<Uintptr_T>((static_cast<int64_t>(addr) << 8) >> 8);
364     }
365     return addr;
366   }
367 
368  private:
369   const char* global_variable_name_ = nullptr;
370   uint64_t descriptor_addr_ = 0;  // Non-zero if we have found (non-empty) descriptor.
371   uint32_t jit_entry_size_ = 0;
372   uint32_t seqlock_offset_ = 0;
373   std::map<UID, std::shared_ptr<Symfile>> entries_;  // Cached loaded entries.
374 
375   std::mutex lock_;
376 };
377 
378 // uint64_t values on x86 are not naturally aligned,
379 // but uint64_t values on ARM are naturally aligned.
380 struct Uint64_P {
381   uint64_t value;
382 } __attribute__((packed));
383 struct Uint64_A {
384   uint64_t value;
385 } __attribute__((aligned(8)));
386 
387 template <typename Symfile>
CreateGlobalDebugImpl(ArchEnum arch,std::shared_ptr<Memory> & memory,std::vector<std::string> search_libs,const char * global_variable_name)388 std::unique_ptr<GlobalDebugInterface<Symfile>> CreateGlobalDebugImpl(
389     ArchEnum arch, std::shared_ptr<Memory>& memory, std::vector<std::string> search_libs,
390     const char* global_variable_name) {
391   CHECK(arch != ARCH_UNKNOWN);
392 
393   // The interface needs to see real-time changes in memory for synchronization with the
394   // concurrently running ART JIT compiler. Skip caching and read the memory directly.
395   std::shared_ptr<Memory> jit_memory;
396   MemoryCacheBase* cached_memory = memory->AsMemoryCacheBase();
397   if (cached_memory != nullptr) {
398     jit_memory = cached_memory->UnderlyingMemory();
399   } else {
400     jit_memory = memory;
401   }
402 
403   switch (arch) {
404     case ARCH_X86: {
405       using Impl = GlobalDebugImpl<Symfile, uint32_t, Uint64_P>;
406       static_assert(offsetof(typename Impl::JITCodeEntry, symfile_size) == 12, "layout");
407       static_assert(offsetof(typename Impl::JITCodeEntry, seqlock) == 28, "layout");
408       static_assert(sizeof(typename Impl::JITCodeEntry) == 32, "layout");
409       static_assert(sizeof(typename Impl::JITDescriptor) == 48, "layout");
410       return std::make_unique<Impl>(arch, jit_memory, search_libs, global_variable_name);
411     }
412     case ARCH_ARM: {
413       using Impl = GlobalDebugImpl<Symfile, uint32_t, Uint64_A>;
414       static_assert(offsetof(typename Impl::JITCodeEntry, symfile_size) == 16, "layout");
415       static_assert(offsetof(typename Impl::JITCodeEntry, seqlock) == 32, "layout");
416       static_assert(sizeof(typename Impl::JITCodeEntry) == 40, "layout");
417       static_assert(sizeof(typename Impl::JITDescriptor) == 48, "layout");
418       return std::make_unique<Impl>(arch, jit_memory, search_libs, global_variable_name);
419     }
420     case ARCH_ARM64:
421     case ARCH_X86_64:
422     case ARCH_RISCV64: {
423       using Impl = GlobalDebugImpl<Symfile, uint64_t, Uint64_A>;
424       static_assert(offsetof(typename Impl::JITCodeEntry, symfile_size) == 24, "layout");
425       static_assert(offsetof(typename Impl::JITCodeEntry, seqlock) == 40, "layout");
426       static_assert(sizeof(typename Impl::JITCodeEntry) == 48, "layout");
427       static_assert(sizeof(typename Impl::JITDescriptor) == 56, "layout");
428       return std::make_unique<Impl>(arch, jit_memory, search_libs, global_variable_name);
429     }
430     default:
431       abort();
432   }
433 }
434 
435 }  // namespace unwindstack
436