Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2013 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef ART_COMPILER_UTILS_DEDUPE_SET_H_ |
| 18 | #define ART_COMPILER_UTILS_DEDUPE_SET_H_ |
| 19 | |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 20 | #include <algorithm> |
| 21 | #include <inttypes.h> |
| 22 | #include <memory> |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 23 | #include <set> |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 24 | #include <string> |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 25 | |
| 26 | #include "base/mutex.h" |
| 27 | #include "base/stl_util.h" |
Brian Carlstrom | ba150c3 | 2013-08-27 17:31:03 -0700 | [diff] [blame] | 28 | #include "base/stringprintf.h" |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 29 | #include "utils/swap_space.h" |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 30 | |
| 31 | namespace art { |
| 32 | |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 33 | // A set of Keys that support a HashFunc returning HashType. Used to find duplicates of Key in the |
| 34 | // Add method. The data-structure is thread-safe through the use of internal locks, it also |
| 35 | // supports the lock being sharded. |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 36 | template <typename InKey, typename StoreKey, typename HashType, typename HashFunc, |
| 37 | HashType kShard = 1> |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 38 | class DedupeSet { |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 39 | typedef std::pair<HashType, const InKey*> HashedInKey; |
| 40 | struct HashedKey { |
| 41 | StoreKey* store_ptr; |
| 42 | union { |
Mathieu Chartier | 2cebb24 | 2015-04-21 16:50:40 -0700 | [diff] [blame] | 43 | HashType store_hash; // Valid if store_ptr != null. |
| 44 | const HashedInKey* in_key; // Valid if store_ptr == null. |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 45 | }; |
| 46 | }; |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 47 | |
| 48 | class Comparator { |
| 49 | public: |
| 50 | bool operator()(const HashedKey& a, const HashedKey& b) const { |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 51 | HashType a_hash = (a.store_ptr != nullptr) ? a.store_hash : a.in_key->first; |
| 52 | HashType b_hash = (b.store_ptr != nullptr) ? b.store_hash : b.in_key->first; |
| 53 | if (a_hash != b_hash) { |
| 54 | return a_hash < b_hash; |
| 55 | } |
| 56 | if (a.store_ptr != nullptr && b.store_ptr != nullptr) { |
| 57 | return std::lexicographical_compare(a.store_ptr->begin(), a.store_ptr->end(), |
| 58 | b.store_ptr->begin(), b.store_ptr->end()); |
| 59 | } else if (a.store_ptr != nullptr && b.store_ptr == nullptr) { |
| 60 | return std::lexicographical_compare(a.store_ptr->begin(), a.store_ptr->end(), |
| 61 | b.in_key->second->begin(), b.in_key->second->end()); |
| 62 | } else if (a.store_ptr == nullptr && b.store_ptr != nullptr) { |
| 63 | return std::lexicographical_compare(a.in_key->second->begin(), a.in_key->second->end(), |
| 64 | b.store_ptr->begin(), b.store_ptr->end()); |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 65 | } else { |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 66 | return std::lexicographical_compare(a.in_key->second->begin(), a.in_key->second->end(), |
| 67 | b.in_key->second->begin(), b.in_key->second->end()); |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 68 | } |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 69 | } |
| 70 | }; |
| 71 | |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 72 | public: |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 73 | StoreKey* Add(Thread* self, const InKey& key) { |
| 74 | uint64_t hash_start; |
| 75 | if (kIsDebugBuild) { |
| 76 | hash_start = NanoTime(); |
| 77 | } |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 78 | HashType raw_hash = HashFunc()(key); |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 79 | if (kIsDebugBuild) { |
| 80 | uint64_t hash_end = NanoTime(); |
| 81 | hash_time_ += hash_end - hash_start; |
| 82 | } |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 83 | HashType shard_hash = raw_hash / kShard; |
| 84 | HashType shard_bin = raw_hash % kShard; |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 85 | HashedInKey hashed_in_key(shard_hash, &key); |
| 86 | HashedKey hashed_key; |
| 87 | hashed_key.store_ptr = nullptr; |
| 88 | hashed_key.in_key = &hashed_in_key; |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 89 | MutexLock lock(self, *lock_[shard_bin]); |
| 90 | auto it = keys_[shard_bin].find(hashed_key); |
| 91 | if (it != keys_[shard_bin].end()) { |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 92 | DCHECK(it->store_ptr != nullptr); |
| 93 | return it->store_ptr; |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 94 | } |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 95 | hashed_key.store_ptr = CreateStoreKey(key); |
| 96 | hashed_key.store_hash = shard_hash; |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 97 | keys_[shard_bin].insert(hashed_key); |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 98 | return hashed_key.store_ptr; |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 99 | } |
| 100 | |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 101 | explicit DedupeSet(const char* set_name, SwapAllocator<void>& alloc) |
| 102 | : allocator_(alloc), hash_time_(0) { |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 103 | for (HashType i = 0; i < kShard; ++i) { |
Ian Rogers | ef7d42f | 2014-01-06 12:55:46 -0800 | [diff] [blame] | 104 | std::ostringstream oss; |
| 105 | oss << set_name << " lock " << i; |
| 106 | lock_name_[i] = oss.str(); |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 107 | lock_[i].reset(new Mutex(lock_name_[i].c_str())); |
| 108 | } |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 109 | } |
| 110 | |
| 111 | ~DedupeSet() { |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 112 | // Have to manually free all pointers. |
| 113 | for (auto& shard : keys_) { |
| 114 | for (const auto& hashed_key : shard) { |
| 115 | DCHECK(hashed_key.store_ptr != nullptr); |
| 116 | DeleteStoreKey(hashed_key.store_ptr); |
| 117 | } |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 118 | } |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 119 | } |
| 120 | |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 121 | std::string DumpStats() const { |
| 122 | size_t collision_sum = 0; |
| 123 | size_t collision_max = 0; |
| 124 | for (HashType shard = 0; shard < kShard; ++shard) { |
| 125 | HashType last_hash = 0; |
| 126 | size_t collision_cur_max = 0; |
| 127 | for (const HashedKey& key : keys_[shard]) { |
| 128 | DCHECK(key.store_ptr != nullptr); |
| 129 | if (key.store_hash == last_hash) { |
| 130 | collision_cur_max++; |
| 131 | if (collision_cur_max > 1) { |
| 132 | collision_sum++; |
| 133 | if (collision_cur_max > collision_max) { |
| 134 | collision_max = collision_cur_max; |
| 135 | } |
| 136 | } |
| 137 | } else { |
| 138 | collision_cur_max = 1; |
| 139 | last_hash = key.store_hash; |
| 140 | } |
| 141 | } |
| 142 | } |
| 143 | return StringPrintf("%zu collisions, %zu max bucket size, %" PRIu64 " ns hash time", |
| 144 | collision_sum, collision_max, hash_time_); |
| 145 | } |
| 146 | |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 147 | private: |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 148 | StoreKey* CreateStoreKey(const InKey& key) { |
| 149 | StoreKey* ret = allocator_.allocate(1); |
| 150 | allocator_.construct(ret, key.begin(), key.end(), allocator_); |
| 151 | return ret; |
| 152 | } |
| 153 | |
| 154 | void DeleteStoreKey(StoreKey* key) { |
| 155 | SwapAllocator<StoreKey> alloc(allocator_); |
| 156 | alloc.destroy(key); |
| 157 | alloc.deallocate(key, 1); |
| 158 | } |
| 159 | |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 160 | std::string lock_name_[kShard]; |
Ian Rogers | 700a402 | 2014-05-19 16:49:03 -0700 | [diff] [blame] | 161 | std::unique_ptr<Mutex> lock_[kShard]; |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 162 | std::set<HashedKey, Comparator> keys_[kShard]; |
Andreas Gampe | e21dc3d | 2014-12-08 16:59:43 -0800 | [diff] [blame] | 163 | SwapAllocator<StoreKey> allocator_; |
| 164 | uint64_t hash_time_; |
Ian Rogers | d133b97 | 2013-09-05 11:01:30 -0700 | [diff] [blame] | 165 | |
Mathieu Chartier | 193bad9 | 2013-08-29 18:46:00 -0700 | [diff] [blame] | 166 | DISALLOW_COPY_AND_ASSIGN(DedupeSet); |
| 167 | }; |
| 168 | |
| 169 | } // namespace art |
| 170 | |
| 171 | #endif // ART_COMPILER_UTILS_DEDUPE_SET_H_ |