1 /*
2  * Copyright (C) 2015 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
18 #define ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
19 
20 #include "dedupe_set.h"
21 
22 #include <inttypes.h>
23 
24 #include <algorithm>
25 #include <unordered_map>
26 
27 #include "android-base/stringprintf.h"
28 
29 #include "base/hash_set.h"
30 #include "base/macros.h"
31 #include "base/mutex.h"
32 #include "base/stl_util.h"
33 #include "base/time_utils.h"
34 
35 namespace art HIDDEN {
36 
37 template <typename InKey,
38           typename StoreKey,
39           typename Alloc,
40           typename HashType,
41           typename HashFunc,
42           HashType kShard>
43 struct DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Stats {
44   size_t collision_sum = 0u;
45   size_t collision_max = 0u;
46   size_t total_probe_distance = 0u;
47   size_t total_size = 0u;
48 };
49 
50 template <typename InKey,
51           typename StoreKey,
52           typename Alloc,
53           typename HashType,
54           typename HashFunc,
55           HashType kShard>
56 class DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Shard {
57  public:
58   Shard(const Alloc& alloc, const std::string& lock_name)
59       : alloc_(alloc),
60         lock_name_(lock_name),
61         lock_(lock_name_.c_str()),
62         keys_() {
63   }
64 
65   ~Shard() {
66     for (const HashedKey<StoreKey>& key : keys_) {
67       DCHECK(key.Key() != nullptr);
68       alloc_.Destroy(key.Key());
69     }
70   }
71 
72   const StoreKey* Add(Thread* self, size_t hash, const InKey& in_key) REQUIRES(!lock_) {
73     MutexLock lock(self, lock_);
74     HashedKey<InKey> hashed_in_key(hash, &in_key);
75     auto it = keys_.find(hashed_in_key);
76     if (it != keys_.end()) {
77       DCHECK(it->Key() != nullptr);
78       return it->Key();
79     }
80     const StoreKey* store_key = alloc_.Copy(in_key);
81     keys_.insert(HashedKey<StoreKey> { hash, store_key });
82     return store_key;
83   }
84 
85   size_t Size(Thread* self) {
86     MutexLock lock(self, lock_);
87     return keys_.size();
88   }
89 
90   void UpdateStats(Thread* self, Stats* global_stats) REQUIRES(!lock_) {
91     // HashSet<> doesn't keep entries ordered by hash, so we actually allocate memory
92     // for bookkeeping while collecting the stats.
93     std::unordered_map<HashType, size_t> stats;
94     {
95       MutexLock lock(self, lock_);
96       // Note: The total_probe_distance will be updated with the current state.
97       // It may have been higher before a re-hash.
98       global_stats->total_probe_distance += keys_.TotalProbeDistance();
99       global_stats->total_size += keys_.size();
100       for (const HashedKey<StoreKey>& key : keys_) {
101         auto it = stats.find(key.Hash());
102         if (it == stats.end()) {
103           stats.insert({key.Hash(), 1u});
104         } else {
105           ++it->second;
106         }
107       }
108     }
109     for (const auto& entry : stats) {
110       size_t number_of_entries = entry.second;
111       if (number_of_entries > 1u) {
112         global_stats->collision_sum += number_of_entries - 1u;
113         global_stats->collision_max = std::max(global_stats->collision_max, number_of_entries);
114       }
115     }
116   }
117 
118  private:
119   template <typename T>
120   class HashedKey {
121    public:
122     HashedKey() : hash_(0u), key_(nullptr) { }
123     HashedKey(size_t hash, const T* key) : hash_(hash), key_(key) { }
124 
125     size_t Hash() const {
126       return hash_;
127     }
128 
129     const T* Key() const {
130       return key_;
131     }
132 
133     bool IsEmpty() const {
134       return Key() == nullptr;
135     }
136 
137     void MakeEmpty() {
138       key_ = nullptr;
139     }
140 
141    private:
142     size_t hash_;
143     const T* key_;
144   };
145 
146   class ShardEmptyFn {
147    public:
148     bool IsEmpty(const HashedKey<StoreKey>& key) const {
149       return key.IsEmpty();
150     }
151 
152     void MakeEmpty(HashedKey<StoreKey>& key) {
153       key.MakeEmpty();
154     }
155   };
156 
157   struct ShardHashFn {
158     template <typename T>
159     size_t operator()(const HashedKey<T>& key) const {
160       return key.Hash();
161     }
162   };
163 
164   struct ShardPred {
165     typename std::enable_if<!std::is_same<StoreKey, InKey>::value, bool>::type
166     operator()(const HashedKey<StoreKey>& lhs, const HashedKey<StoreKey>& rhs) const {
167       DCHECK(lhs.Key() != nullptr);
168       DCHECK(rhs.Key() != nullptr);
169       // Rehashing: stored keys are already deduplicated, so we can simply compare key pointers.
170       return lhs.Key() == rhs.Key();
171     }
172 
173     template <typename LeftT, typename RightT>
174     bool operator()(const HashedKey<LeftT>& lhs, const HashedKey<RightT>& rhs) const {
175       DCHECK(lhs.Key() != nullptr);
176       DCHECK(rhs.Key() != nullptr);
177       return lhs.Hash() == rhs.Hash() &&
178           lhs.Key()->size() == rhs.Key()->size() &&
179           std::equal(lhs.Key()->begin(), lhs.Key()->end(), rhs.Key()->begin());
180     }
181   };
182 
183   Alloc alloc_;
184   const std::string lock_name_;
185   Mutex lock_;
186   HashSet<HashedKey<StoreKey>, ShardEmptyFn, ShardHashFn, ShardPred> keys_ GUARDED_BY(lock_);
187 };
188 
189 template <typename InKey,
190           typename StoreKey,
191           typename Alloc,
192           typename HashType,
193           typename HashFunc,
194           HashType kShard>
195 const StoreKey* DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Add(
196     Thread* self, const InKey& key) {
197   uint64_t hash_start;
198   if (kIsDebugBuild) {
199     hash_start = NanoTime();
200   }
201   HashType raw_hash = HashFunc()(key);
202   if (kIsDebugBuild) {
203     uint64_t hash_end = NanoTime();
204     hash_time_ += hash_end - hash_start;
205   }
206   HashType shard_hash = raw_hash / kShard;
207   HashType shard_bin = raw_hash % kShard;
208   return shards_[shard_bin]->Add(self, shard_hash, key);
209 }
210 
211 template <typename InKey,
212           typename StoreKey,
213           typename Alloc,
214           typename HashType,
215           typename HashFunc,
216           HashType kShard>
217 DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DedupeSet(const char* set_name,
218                                                                          const Alloc& alloc)
219     : hash_time_(0) {
220   for (HashType i = 0; i < kShard; ++i) {
221     std::ostringstream oss;
222     oss << set_name << " lock " << i;
223     shards_[i].reset(new Shard(alloc, oss.str()));
224   }
225 }
226 
227 template <typename InKey,
228           typename StoreKey,
229           typename Alloc,
230           typename HashType,
231           typename HashFunc,
232           HashType kShard>
233 DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::~DedupeSet() {
234   // Everything done by member destructors.
235 }
236 
237 template <typename InKey,
238           typename StoreKey,
239           typename Alloc,
240           typename HashType,
241           typename HashFunc,
242           HashType kShard>
243 size_t DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::Size(Thread* self) const {
244   size_t result = 0u;
245   for (const auto& shard : shards_) {
246     result += shard->Size(self);
247   }
248   return result;
249 }
250 
251 template <typename InKey,
252           typename StoreKey,
253           typename Alloc,
254           typename HashType,
255           typename HashFunc,
256           HashType kShard>
257 std::string DedupeSet<InKey, StoreKey, Alloc, HashType, HashFunc, kShard>::DumpStats(
258     Thread* self) const {
259   Stats stats;
260   for (HashType shard = 0; shard < kShard; ++shard) {
261     shards_[shard]->UpdateStats(self, &stats);
262   }
263   return android::base::StringPrintf("%zu collisions, %zu max hash collisions, "
264                                      "%zu/%zu probe distance, %" PRIu64 " ns hash time",
265                                      stats.collision_sum,
266                                      stats.collision_max,
267                                      stats.total_probe_distance,
268                                      stats.total_size,
269                                      hash_time_);
270 }
271 
272 
273 }  // namespace art
274 
275 #endif  // ART_COMPILER_UTILS_DEDUPE_SET_INL_H_
276