Jeff Brown | 66fbde3 | 2011-11-14 18:29:15 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2011 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef ANDROID_BASIC_HASHTABLE_H |
| 18 | #define ANDROID_BASIC_HASHTABLE_H |
| 19 | |
| 20 | #include <stdint.h> |
| 21 | #include <sys/types.h> |
| 22 | #include <utils/SharedBuffer.h> |
| 23 | #include <utils/TypeHelpers.h> |
| 24 | |
| 25 | namespace android { |
| 26 | |
| 27 | /* Implementation type. Nothing to see here. */ |
| 28 | class BasicHashtableImpl { |
| 29 | protected: |
| 30 | struct Bucket { |
| 31 | // The collision flag indicates that the bucket is part of a collision chain |
| 32 | // such that at least two entries both hash to this bucket. When true, we |
| 33 | // may need to seek further along the chain to find the entry. |
| 34 | static const uint32_t COLLISION = 0x80000000UL; |
| 35 | |
| 36 | // The present flag indicates that the bucket contains an initialized entry value. |
| 37 | static const uint32_t PRESENT = 0x40000000UL; |
| 38 | |
| 39 | // Mask for 30 bits worth of the hash code that are stored within the bucket to |
| 40 | // speed up lookups and rehashing by eliminating the need to recalculate the |
| 41 | // hash code of the entry's key. |
| 42 | static const uint32_t HASH_MASK = 0x3fffffffUL; |
| 43 | |
| 44 | // Combined value that stores the collision and present flags as well as |
| 45 | // a 30 bit hash code. |
| 46 | uint32_t cookie; |
| 47 | |
| 48 | // Storage for the entry begins here. |
| 49 | char entry[0]; |
| 50 | }; |
| 51 | |
| 52 | BasicHashtableImpl(size_t entrySize, bool hasTrivialDestructor, |
| 53 | size_t minimumInitialCapacity, float loadFactor); |
| 54 | BasicHashtableImpl(const BasicHashtableImpl& other); |
| 55 | |
| 56 | void dispose(); |
| 57 | |
| 58 | inline void edit() { |
| 59 | if (mBuckets && !SharedBuffer::bufferFromData(mBuckets)->onlyOwner()) { |
| 60 | clone(); |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | void setTo(const BasicHashtableImpl& other); |
| 65 | void clear(); |
| 66 | |
| 67 | ssize_t next(ssize_t index) const; |
| 68 | ssize_t find(ssize_t index, hash_t hash, const void* __restrict__ key) const; |
| 69 | size_t add(hash_t hash, const void* __restrict__ entry); |
| 70 | void removeAt(size_t index); |
| 71 | void rehash(size_t minimumCapacity, float loadFactor); |
| 72 | |
| 73 | const size_t mBucketSize; // number of bytes per bucket including the entry |
| 74 | const bool mHasTrivialDestructor; // true if the entry type does not require destruction |
| 75 | size_t mCapacity; // number of buckets that can be filled before exceeding load factor |
| 76 | float mLoadFactor; // load factor |
| 77 | size_t mSize; // number of elements actually in the table |
| 78 | size_t mFilledBuckets; // number of buckets for which collision or present is true |
| 79 | size_t mBucketCount; // number of slots in the mBuckets array |
| 80 | void* mBuckets; // array of buckets, as a SharedBuffer |
| 81 | |
| 82 | inline const Bucket& bucketAt(const void* __restrict__ buckets, size_t index) const { |
| 83 | return *reinterpret_cast<const Bucket*>( |
| 84 | static_cast<const uint8_t*>(buckets) + index * mBucketSize); |
| 85 | } |
| 86 | |
| 87 | inline Bucket& bucketAt(void* __restrict__ buckets, size_t index) const { |
| 88 | return *reinterpret_cast<Bucket*>(static_cast<uint8_t*>(buckets) + index * mBucketSize); |
| 89 | } |
| 90 | |
| 91 | virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const = 0; |
| 92 | virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const = 0; |
| 93 | virtual void destroyBucketEntry(Bucket& bucket) const = 0; |
| 94 | |
| 95 | private: |
| 96 | void clone(); |
| 97 | |
| 98 | // Allocates a bucket array as a SharedBuffer. |
| 99 | void* allocateBuckets(size_t count) const; |
| 100 | |
| 101 | // Releases a bucket array's associated SharedBuffer. |
| 102 | void releaseBuckets(void* __restrict__ buckets, size_t count) const; |
| 103 | |
| 104 | // Destroys the contents of buckets (invokes destroyBucketEntry for each |
| 105 | // populated bucket if needed). |
| 106 | void destroyBuckets(void* __restrict__ buckets, size_t count) const; |
| 107 | |
| 108 | // Copies the content of buckets (copies the cookie and invokes copyBucketEntry |
| 109 | // for each populated bucket if needed). |
| 110 | void copyBuckets(const void* __restrict__ fromBuckets, |
| 111 | void* __restrict__ toBuckets, size_t count) const; |
| 112 | |
| 113 | // Determines the appropriate size of a bucket array to store a certain minimum |
| 114 | // number of entries and returns its effective capacity. |
| 115 | static void determineCapacity(size_t minimumCapacity, float loadFactor, |
| 116 | size_t* __restrict__ outBucketCount, size_t* __restrict__ outCapacity); |
| 117 | |
| 118 | // Trim a hash code to 30 bits to match what we store in the bucket's cookie. |
| 119 | inline static hash_t trimHash(hash_t hash) { |
| 120 | return (hash & Bucket::HASH_MASK) ^ (hash >> 30); |
| 121 | } |
| 122 | |
| 123 | // Returns the index of the first bucket that is in the collision chain |
| 124 | // for the specified hash code, given the total number of buckets. |
| 125 | // (Primary hash) |
| 126 | inline static size_t chainStart(hash_t hash, size_t count) { |
| 127 | return hash % count; |
| 128 | } |
| 129 | |
| 130 | // Returns the increment to add to a bucket index to seek to the next bucket |
| 131 | // in the collision chain for the specified hash code, given the total number of buckets. |
| 132 | // (Secondary hash) |
| 133 | inline static size_t chainIncrement(hash_t hash, size_t count) { |
| 134 | return ((hash >> 7) | (hash << 25)) % (count - 1) + 1; |
| 135 | } |
| 136 | |
| 137 | // Returns the index of the next bucket that is in the collision chain |
| 138 | // that is defined by the specified increment, given the total number of buckets. |
| 139 | inline static size_t chainSeek(size_t index, size_t increment, size_t count) { |
| 140 | return (index + increment) % count; |
| 141 | } |
| 142 | }; |
| 143 | |
| 144 | /* |
| 145 | * A BasicHashtable stores entries that are indexed by hash code in place |
| 146 | * within an array. The basic operations are finding entries by key, |
| 147 | * adding new entries and removing existing entries. |
| 148 | * |
| 149 | * This class provides a very limited set of operations with simple semantics. |
| 150 | * It is intended to be used as a building block to construct more complex |
| 151 | * and interesting data structures such as HashMap. Think very hard before |
| 152 | * adding anything extra to BasicHashtable, it probably belongs at a |
| 153 | * higher level of abstraction. |
| 154 | * |
| 155 | * TKey: The key type. |
| 156 | * TEntry: The entry type which is what is actually stored in the array. |
| 157 | * |
| 158 | * TKey must support the following contract: |
| 159 | * bool operator==(const TKey& other) const; // return true if equal |
| 160 | * bool operator!=(const TKey& other) const; // return true if unequal |
| 161 | * |
| 162 | * TEntry must support the following contract: |
| 163 | * const TKey& getKey() const; // get the key from the entry |
| 164 | * |
| 165 | * This class supports storing entries with duplicate keys. Of course, it can't |
| 166 | * tell them apart during removal so only the first entry will be removed. |
| 167 | * We do this because it means that operations like add() can't fail. |
| 168 | */ |
| 169 | template <typename TKey, typename TEntry> |
| 170 | class BasicHashtable : private BasicHashtableImpl { |
| 171 | public: |
| 172 | /* Creates a hashtable with the specified minimum initial capacity. |
| 173 | * The underlying array will be created when the first entry is added. |
| 174 | * |
| 175 | * minimumInitialCapacity: The minimum initial capacity for the hashtable. |
| 176 | * Default is 0. |
| 177 | * loadFactor: The desired load factor for the hashtable, between 0 and 1. |
| 178 | * Default is 0.75. |
| 179 | */ |
| 180 | BasicHashtable(size_t minimumInitialCapacity = 0, float loadFactor = 0.75f); |
| 181 | |
| 182 | /* Copies a hashtable. |
| 183 | * The underlying storage is shared copy-on-write. |
| 184 | */ |
| 185 | BasicHashtable(const BasicHashtable& other); |
| 186 | |
| 187 | /* Clears and destroys the hashtable. |
| 188 | */ |
| 189 | virtual ~BasicHashtable(); |
| 190 | |
| 191 | /* Making this hashtable a copy of the other hashtable. |
| 192 | * The underlying storage is shared copy-on-write. |
| 193 | * |
| 194 | * other: The hashtable to copy. |
| 195 | */ |
| 196 | inline BasicHashtable<TKey, TEntry>& operator =(const BasicHashtable<TKey, TEntry> & other) { |
| 197 | setTo(other); |
| 198 | return *this; |
| 199 | } |
| 200 | |
| 201 | /* Returns the number of entries in the hashtable. |
| 202 | */ |
| 203 | inline size_t size() const { |
| 204 | return mSize; |
| 205 | } |
| 206 | |
| 207 | /* Returns the capacity of the hashtable, which is the number of elements that can |
| 208 | * added to the hashtable without requiring it to be grown. |
| 209 | */ |
| 210 | inline size_t capacity() const { |
| 211 | return mCapacity; |
| 212 | } |
| 213 | |
| 214 | /* Returns the number of buckets that the hashtable has, which is the size of its |
| 215 | * underlying array. |
| 216 | */ |
| 217 | inline size_t bucketCount() const { |
| 218 | return mBucketCount; |
| 219 | } |
| 220 | |
| 221 | /* Returns the load factor of the hashtable. */ |
| 222 | inline float loadFactor() const { |
| 223 | return mLoadFactor; |
| 224 | }; |
| 225 | |
| 226 | /* Returns a const reference to the entry at the specified index. |
| 227 | * |
| 228 | * index: The index of the entry to retrieve. Must be a valid index within |
| 229 | * the bounds of the hashtable. |
| 230 | */ |
| 231 | inline const TEntry& entryAt(size_t index) const { |
| 232 | return entryFor(bucketAt(mBuckets, index)); |
| 233 | } |
| 234 | |
| 235 | /* Returns a non-const reference to the entry at the specified index. |
| 236 | * |
| 237 | * index: The index of the entry to edit. Must be a valid index within |
| 238 | * the bounds of the hashtable. |
| 239 | */ |
| 240 | inline TEntry& editEntryAt(size_t index) { |
| 241 | edit(); |
| 242 | return entryFor(bucketAt(mBuckets, index)); |
| 243 | } |
| 244 | |
| 245 | /* Clears the hashtable. |
| 246 | * All entries in the hashtable are destroyed immediately. |
| 247 | * If you need to do something special with the entries in the hashtable then iterate |
| 248 | * over them and do what you need before clearing the hashtable. |
| 249 | */ |
| 250 | inline void clear() { |
| 251 | BasicHashtableImpl::clear(); |
| 252 | } |
| 253 | |
| 254 | /* Returns the index of the next entry in the hashtable given the index of a previous entry. |
| 255 | * If the given index is -1, then returns the index of the first entry in the hashtable, |
| 256 | * if there is one, or -1 otherwise. |
| 257 | * If the given index is not -1, then returns the index of the next entry in the hashtable, |
| 258 | * in strictly increasing order, or -1 if there are none left. |
| 259 | * |
| 260 | * index: The index of the previous entry that was iterated, or -1 to begin |
| 261 | * iteration at the beginning of the hashtable. |
| 262 | */ |
| 263 | inline ssize_t next(ssize_t index) const { |
| 264 | return BasicHashtableImpl::next(index); |
| 265 | } |
| 266 | |
| 267 | /* Finds the index of an entry with the specified key. |
| 268 | * If the given index is -1, then returns the index of the first matching entry, |
| 269 | * otherwise returns the index of the next matching entry. |
| 270 | * If the hashtable contains multiple entries with keys that match the requested |
| 271 | * key, then the sequence of entries returned is arbitrary. |
| 272 | * Returns -1 if no entry was found. |
| 273 | * |
| 274 | * index: The index of the previous entry with the specified key, or -1 to |
| 275 | * find the first matching entry. |
| 276 | * hash: The hashcode of the key. |
| 277 | * key: The key. |
| 278 | */ |
| 279 | inline ssize_t find(ssize_t index, hash_t hash, const TKey& key) const { |
| 280 | return BasicHashtableImpl::find(index, hash, &key); |
| 281 | } |
| 282 | |
| 283 | /* Adds the entry to the hashtable. |
| 284 | * Returns the index of the newly added entry. |
| 285 | * If an entry with the same key already exists, then a duplicate entry is added. |
| 286 | * If the entry will not fit, then the hashtable's capacity is increased and |
| 287 | * its contents are rehashed. See rehash(). |
| 288 | * |
| 289 | * hash: The hashcode of the key. |
| 290 | * entry: The entry to add. |
| 291 | */ |
| 292 | inline size_t add(hash_t hash, const TEntry& entry) { |
| 293 | return BasicHashtableImpl::add(hash, &entry); |
| 294 | } |
| 295 | |
| 296 | /* Removes the entry with the specified index from the hashtable. |
| 297 | * The entry is destroyed immediately. |
| 298 | * The index must be valid. |
| 299 | * |
| 300 | * The hashtable is not compacted after an item is removed, so it is legal |
| 301 | * to continue iterating over the hashtable using next() or find(). |
| 302 | * |
| 303 | * index: The index of the entry to remove. Must be a valid index within the |
| 304 | * bounds of the hashtable, and it must refer to an existing entry. |
| 305 | */ |
| 306 | inline void removeAt(size_t index) { |
| 307 | BasicHashtableImpl::removeAt(index); |
| 308 | } |
| 309 | |
| 310 | /* Rehashes the contents of the hashtable. |
| 311 | * Grows the hashtable to at least the specified minimum capacity or the |
| 312 | * current number of elements, whichever is larger. |
| 313 | * |
| 314 | * Rehashing causes all entries to be copied and the entry indices may change. |
| 315 | * Although the hash codes are cached by the hashtable, rehashing can be an |
| 316 | * expensive operation and should be avoided unless the hashtable's size |
| 317 | * needs to be changed. |
| 318 | * |
| 319 | * Rehashing is the only way to change the capacity or load factor of the |
| 320 | * hashtable once it has been created. It can be used to compact the |
| 321 | * hashtable by choosing a minimum capacity that is smaller than the current |
| 322 | * capacity (such as 0). |
| 323 | * |
| 324 | * minimumCapacity: The desired minimum capacity after rehashing. |
| 325 | * loadFactor: The desired load factor after rehashing. |
| 326 | */ |
| 327 | inline void rehash(size_t minimumCapacity, float loadFactor) { |
| 328 | BasicHashtableImpl::rehash(minimumCapacity, loadFactor); |
| 329 | } |
| 330 | |
| 331 | protected: |
| 332 | static inline const TEntry& entryFor(const Bucket& bucket) { |
| 333 | return reinterpret_cast<const TEntry&>(bucket.entry); |
| 334 | } |
| 335 | |
| 336 | static inline TEntry& entryFor(Bucket& bucket) { |
| 337 | return reinterpret_cast<TEntry&>(bucket.entry); |
| 338 | } |
| 339 | |
| 340 | virtual bool compareBucketKey(const Bucket& bucket, const void* __restrict__ key) const; |
| 341 | virtual void initializeBucketEntry(Bucket& bucket, const void* __restrict__ entry) const; |
| 342 | virtual void destroyBucketEntry(Bucket& bucket) const; |
| 343 | |
| 344 | private: |
| 345 | // For dumping the raw contents of a hashtable during testing. |
| 346 | friend class BasicHashtableTest; |
| 347 | inline uint32_t cookieAt(size_t index) const { |
| 348 | return bucketAt(mBuckets, index).cookie; |
| 349 | } |
| 350 | }; |
| 351 | |
| 352 | template <typename TKey, typename TEntry> |
| 353 | BasicHashtable<TKey, TEntry>::BasicHashtable(size_t minimumInitialCapacity, float loadFactor) : |
| 354 | BasicHashtableImpl(sizeof(TEntry), traits<TEntry>::has_trivial_dtor, |
| 355 | minimumInitialCapacity, loadFactor) { |
| 356 | } |
| 357 | |
| 358 | template <typename TKey, typename TEntry> |
| 359 | BasicHashtable<TKey, TEntry>::BasicHashtable(const BasicHashtable<TKey, TEntry>& other) : |
| 360 | BasicHashtableImpl(other) { |
| 361 | } |
| 362 | |
| 363 | template <typename TKey, typename TEntry> |
| 364 | BasicHashtable<TKey, TEntry>::~BasicHashtable() { |
| 365 | dispose(); |
| 366 | } |
| 367 | |
| 368 | template <typename TKey, typename TEntry> |
| 369 | bool BasicHashtable<TKey, TEntry>::compareBucketKey(const Bucket& bucket, |
| 370 | const void* __restrict__ key) const { |
| 371 | return entryFor(bucket).getKey() == *static_cast<const TKey*>(key); |
| 372 | } |
| 373 | |
| 374 | template <typename TKey, typename TEntry> |
| 375 | void BasicHashtable<TKey, TEntry>::initializeBucketEntry(Bucket& bucket, |
| 376 | const void* __restrict__ entry) const { |
| 377 | if (!traits<TEntry>::has_trivial_copy) { |
| 378 | new (&entryFor(bucket)) TEntry(*(static_cast<const TEntry*>(entry))); |
| 379 | } else { |
| 380 | memcpy(&entryFor(bucket), entry, sizeof(TEntry)); |
| 381 | } |
| 382 | } |
| 383 | |
| 384 | template <typename TKey, typename TEntry> |
| 385 | void BasicHashtable<TKey, TEntry>::destroyBucketEntry(Bucket& bucket) const { |
| 386 | if (!traits<TEntry>::has_trivial_dtor) { |
| 387 | entryFor(bucket).~TEntry(); |
| 388 | } |
| 389 | } |
| 390 | |
| 391 | }; // namespace android |
| 392 | |
| 393 | #endif // ANDROID_BASIC_HASHTABLE_H |