Mathieu Chartier | c2e2062 | 2014-11-03 11:41:47 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2014 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include "hash_set.h" |
| 18 | |
| 19 | #include <map> |
Richard Uhler | cf7792d | 2015-08-27 09:04:18 -0700 | [diff] [blame] | 20 | #include <forward_list> |
Mathieu Chartier | c2e2062 | 2014-11-03 11:41:47 -0800 | [diff] [blame] | 21 | #include <sstream> |
| 22 | #include <string> |
| 23 | #include <unordered_set> |
Richard Uhler | cf7792d | 2015-08-27 09:04:18 -0700 | [diff] [blame] | 24 | #include <vector> |
Mathieu Chartier | c2e2062 | 2014-11-03 11:41:47 -0800 | [diff] [blame] | 25 | |
Mathieu Chartier | 47f867a | 2015-03-18 10:39:00 -0700 | [diff] [blame] | 26 | #include <gtest/gtest.h> |
Mathieu Chartier | e7c9a8c | 2014-11-06 16:35:45 -0800 | [diff] [blame] | 27 | #include "hash_map.h" |
Mathieu Chartier | c2e2062 | 2014-11-03 11:41:47 -0800 | [diff] [blame] | 28 | |
| 29 | namespace art { |
| 30 | |
Mathieu Chartier | e7c9a8c | 2014-11-06 16:35:45 -0800 | [diff] [blame] | 31 | struct IsEmptyFnString { |
Mathieu Chartier | c2e2062 | 2014-11-03 11:41:47 -0800 | [diff] [blame] | 32 | void MakeEmpty(std::string& item) const { |
| 33 | item.clear(); |
| 34 | } |
| 35 | bool IsEmpty(const std::string& item) const { |
| 36 | return item.empty(); |
| 37 | } |
| 38 | }; |
| 39 | |
Mathieu Chartier | 47f867a | 2015-03-18 10:39:00 -0700 | [diff] [blame] | 40 | class HashSetTest : public testing::Test { |
Mathieu Chartier | c2e2062 | 2014-11-03 11:41:47 -0800 | [diff] [blame] | 41 | public: |
| 42 | HashSetTest() : seed_(97421), unique_number_(0) { |
| 43 | } |
| 44 | std::string RandomString(size_t len) { |
| 45 | std::ostringstream oss; |
| 46 | for (size_t i = 0; i < len; ++i) { |
| 47 | oss << static_cast<char>('A' + PRand() % 64); |
| 48 | } |
| 49 | static_assert(' ' < 'A', "space must be less than a"); |
| 50 | oss << " " << unique_number_++; // Relies on ' ' < 'A' |
| 51 | return oss.str(); |
| 52 | } |
| 53 | void SetSeed(size_t seed) { |
| 54 | seed_ = seed; |
| 55 | } |
| 56 | size_t PRand() { // Pseudo random. |
| 57 | seed_ = seed_ * 1103515245 + 12345; |
| 58 | return seed_; |
| 59 | } |
| 60 | |
| 61 | private: |
| 62 | size_t seed_; |
| 63 | size_t unique_number_; |
| 64 | }; |
| 65 | |
| 66 | TEST_F(HashSetTest, TestSmoke) { |
| 67 | HashSet<std::string, IsEmptyFnString> hash_set; |
| 68 | const std::string test_string = "hello world 1234"; |
| 69 | ASSERT_TRUE(hash_set.Empty()); |
| 70 | ASSERT_EQ(hash_set.Size(), 0U); |
| 71 | hash_set.Insert(test_string); |
| 72 | auto it = hash_set.Find(test_string); |
| 73 | ASSERT_EQ(*it, test_string); |
| 74 | auto after_it = hash_set.Erase(it); |
| 75 | ASSERT_TRUE(after_it == hash_set.end()); |
| 76 | ASSERT_TRUE(hash_set.Empty()); |
| 77 | ASSERT_EQ(hash_set.Size(), 0U); |
| 78 | it = hash_set.Find(test_string); |
| 79 | ASSERT_TRUE(it == hash_set.end()); |
| 80 | } |
| 81 | |
| 82 | TEST_F(HashSetTest, TestInsertAndErase) { |
| 83 | HashSet<std::string, IsEmptyFnString> hash_set; |
| 84 | static constexpr size_t count = 1000; |
| 85 | std::vector<std::string> strings; |
| 86 | for (size_t i = 0; i < count; ++i) { |
| 87 | // Insert a bunch of elements and make sure we can find them. |
| 88 | strings.push_back(RandomString(10)); |
| 89 | hash_set.Insert(strings[i]); |
| 90 | auto it = hash_set.Find(strings[i]); |
| 91 | ASSERT_TRUE(it != hash_set.end()); |
| 92 | ASSERT_EQ(*it, strings[i]); |
| 93 | } |
| 94 | ASSERT_EQ(strings.size(), hash_set.Size()); |
| 95 | // Try to erase the odd strings. |
| 96 | for (size_t i = 1; i < count; i += 2) { |
| 97 | auto it = hash_set.Find(strings[i]); |
| 98 | ASSERT_TRUE(it != hash_set.end()); |
| 99 | ASSERT_EQ(*it, strings[i]); |
| 100 | hash_set.Erase(it); |
| 101 | } |
| 102 | // Test removed. |
| 103 | for (size_t i = 1; i < count; i += 2) { |
| 104 | auto it = hash_set.Find(strings[i]); |
| 105 | ASSERT_TRUE(it == hash_set.end()); |
| 106 | } |
| 107 | for (size_t i = 0; i < count; i += 2) { |
| 108 | auto it = hash_set.Find(strings[i]); |
| 109 | ASSERT_TRUE(it != hash_set.end()); |
| 110 | ASSERT_EQ(*it, strings[i]); |
| 111 | } |
| 112 | } |
| 113 | |
| 114 | TEST_F(HashSetTest, TestIterator) { |
| 115 | HashSet<std::string, IsEmptyFnString> hash_set; |
| 116 | ASSERT_TRUE(hash_set.begin() == hash_set.end()); |
| 117 | static constexpr size_t count = 1000; |
| 118 | std::vector<std::string> strings; |
| 119 | for (size_t i = 0; i < count; ++i) { |
| 120 | // Insert a bunch of elements and make sure we can find them. |
| 121 | strings.push_back(RandomString(10)); |
| 122 | hash_set.Insert(strings[i]); |
| 123 | } |
| 124 | // Make sure we visit each string exactly once. |
| 125 | std::map<std::string, size_t> found_count; |
| 126 | for (const std::string& s : hash_set) { |
| 127 | ++found_count[s]; |
| 128 | } |
| 129 | for (size_t i = 0; i < count; ++i) { |
| 130 | ASSERT_EQ(found_count[strings[i]], 1U); |
| 131 | } |
| 132 | found_count.clear(); |
| 133 | // Remove all the elements with iterator erase. |
| 134 | for (auto it = hash_set.begin(); it != hash_set.end();) { |
| 135 | ++found_count[*it]; |
| 136 | it = hash_set.Erase(it); |
| 137 | ASSERT_EQ(hash_set.Verify(), 0U); |
| 138 | } |
| 139 | for (size_t i = 0; i < count; ++i) { |
| 140 | ASSERT_EQ(found_count[strings[i]], 1U); |
| 141 | } |
| 142 | } |
| 143 | |
| 144 | TEST_F(HashSetTest, TestSwap) { |
| 145 | HashSet<std::string, IsEmptyFnString> hash_seta, hash_setb; |
| 146 | std::vector<std::string> strings; |
| 147 | static constexpr size_t count = 1000; |
| 148 | for (size_t i = 0; i < count; ++i) { |
| 149 | strings.push_back(RandomString(10)); |
| 150 | hash_seta.Insert(strings[i]); |
| 151 | } |
| 152 | std::swap(hash_seta, hash_setb); |
| 153 | hash_seta.Insert("TEST"); |
| 154 | hash_setb.Insert("TEST2"); |
| 155 | for (size_t i = 0; i < count; ++i) { |
| 156 | strings.push_back(RandomString(10)); |
| 157 | hash_seta.Insert(strings[i]); |
| 158 | } |
| 159 | } |
| 160 | |
Igor Murashkin | 3552d96 | 2015-06-22 15:57:38 -0700 | [diff] [blame] | 161 | TEST_F(HashSetTest, TestShrink) { |
| 162 | HashSet<std::string, IsEmptyFnString> hash_set; |
| 163 | std::vector<std::string> strings = {"a", "b", "c", "d", "e", "f", "g"}; |
| 164 | for (size_t i = 0; i < strings.size(); ++i) { |
| 165 | // Insert some strings into the beginning of our hash set to establish an initial size |
| 166 | hash_set.Insert(strings[i]); |
| 167 | } |
| 168 | |
| 169 | hash_set.ShrinkToMaximumLoad(); |
| 170 | const double initial_load = hash_set.CalculateLoadFactor(); |
| 171 | |
| 172 | // Insert a bunch of random strings to guarantee that we grow the capacity. |
| 173 | std::vector<std::string> random_strings; |
| 174 | static constexpr size_t count = 1000; |
| 175 | for (size_t i = 0; i < count; ++i) { |
| 176 | random_strings.push_back(RandomString(10)); |
| 177 | hash_set.Insert(random_strings[i]); |
| 178 | } |
| 179 | |
| 180 | // Erase all the extra strings which guarantees that our load factor will be really bad. |
| 181 | for (size_t i = 0; i < count; ++i) { |
| 182 | hash_set.Erase(hash_set.Find(random_strings[i])); |
| 183 | } |
| 184 | |
| 185 | const double bad_load = hash_set.CalculateLoadFactor(); |
| 186 | EXPECT_GT(initial_load, bad_load); |
| 187 | |
| 188 | // Shrink again, the load factor should be good again. |
| 189 | hash_set.ShrinkToMaximumLoad(); |
| 190 | EXPECT_DOUBLE_EQ(initial_load, hash_set.CalculateLoadFactor()); |
Igor Murashkin | e2facc5 | 2015-07-10 13:49:08 -0700 | [diff] [blame] | 191 | |
| 192 | // Make sure all the initial elements we had are still there |
| 193 | for (const std::string& initial_string : strings) { |
| 194 | EXPECT_NE(hash_set.end(), hash_set.Find(initial_string)) |
| 195 | << "expected to find " << initial_string; |
| 196 | } |
Igor Murashkin | 3552d96 | 2015-06-22 15:57:38 -0700 | [diff] [blame] | 197 | } |
| 198 | |
Mathieu Chartier | 32cc9ee | 2015-10-15 09:19:15 -0700 | [diff] [blame] | 199 | TEST_F(HashSetTest, TestLoadFactor) { |
| 200 | HashSet<std::string, IsEmptyFnString> hash_set; |
| 201 | static constexpr size_t kStringCount = 1000; |
| 202 | static constexpr double kEpsilon = 0.01; |
| 203 | for (size_t i = 0; i < kStringCount; ++i) { |
| 204 | hash_set.Insert(RandomString(i % 10 + 1)); |
| 205 | } |
| 206 | // Check that changing the load factor resizes the table to be within the target range. |
| 207 | EXPECT_GE(hash_set.CalculateLoadFactor() + kEpsilon, hash_set.GetMinLoadFactor()); |
| 208 | EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor()); |
| 209 | hash_set.SetLoadFactor(0.1, 0.3); |
| 210 | EXPECT_DOUBLE_EQ(0.1, hash_set.GetMinLoadFactor()); |
| 211 | EXPECT_DOUBLE_EQ(0.3, hash_set.GetMaxLoadFactor()); |
| 212 | EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor()); |
| 213 | hash_set.SetLoadFactor(0.6, 0.8); |
| 214 | EXPECT_LE(hash_set.CalculateLoadFactor() - kEpsilon, hash_set.GetMaxLoadFactor()); |
| 215 | } |
| 216 | |
Mathieu Chartier | c2e2062 | 2014-11-03 11:41:47 -0800 | [diff] [blame] | 217 | TEST_F(HashSetTest, TestStress) { |
| 218 | HashSet<std::string, IsEmptyFnString> hash_set; |
| 219 | std::unordered_multiset<std::string> std_set; |
| 220 | std::vector<std::string> strings; |
| 221 | static constexpr size_t string_count = 2000; |
| 222 | static constexpr size_t operations = 100000; |
| 223 | static constexpr size_t target_size = 5000; |
| 224 | for (size_t i = 0; i < string_count; ++i) { |
| 225 | strings.push_back(RandomString(i % 10 + 1)); |
| 226 | } |
| 227 | const size_t seed = time(nullptr); |
| 228 | SetSeed(seed); |
| 229 | LOG(INFO) << "Starting stress test with seed " << seed; |
| 230 | for (size_t i = 0; i < operations; ++i) { |
| 231 | ASSERT_EQ(hash_set.Size(), std_set.size()); |
| 232 | size_t delta = std::abs(static_cast<ssize_t>(target_size) - |
| 233 | static_cast<ssize_t>(hash_set.Size())); |
| 234 | size_t n = PRand(); |
| 235 | if (n % target_size == 0) { |
| 236 | hash_set.Clear(); |
| 237 | std_set.clear(); |
| 238 | ASSERT_TRUE(hash_set.Empty()); |
| 239 | ASSERT_TRUE(std_set.empty()); |
| 240 | } else if (n % target_size < delta) { |
| 241 | // Skew towards adding elements until we are at the desired size. |
| 242 | const std::string& s = strings[PRand() % string_count]; |
| 243 | hash_set.Insert(s); |
| 244 | std_set.insert(s); |
| 245 | ASSERT_EQ(*hash_set.Find(s), *std_set.find(s)); |
| 246 | } else { |
| 247 | const std::string& s = strings[PRand() % string_count]; |
| 248 | auto it1 = hash_set.Find(s); |
| 249 | auto it2 = std_set.find(s); |
| 250 | ASSERT_EQ(it1 == hash_set.end(), it2 == std_set.end()); |
| 251 | if (it1 != hash_set.end()) { |
| 252 | ASSERT_EQ(*it1, *it2); |
| 253 | hash_set.Erase(it1); |
| 254 | std_set.erase(it2); |
| 255 | } |
| 256 | } |
| 257 | } |
| 258 | } |
| 259 | |
Mathieu Chartier | e7c9a8c | 2014-11-06 16:35:45 -0800 | [diff] [blame] | 260 | struct IsEmptyStringPair { |
| 261 | void MakeEmpty(std::pair<std::string, int>& pair) const { |
| 262 | pair.first.clear(); |
| 263 | } |
| 264 | bool IsEmpty(const std::pair<std::string, int>& pair) const { |
| 265 | return pair.first.empty(); |
| 266 | } |
| 267 | }; |
| 268 | |
| 269 | TEST_F(HashSetTest, TestHashMap) { |
| 270 | HashMap<std::string, int, IsEmptyStringPair> hash_map; |
| 271 | hash_map.Insert(std::make_pair(std::string("abcd"), 123)); |
| 272 | hash_map.Insert(std::make_pair(std::string("abcd"), 124)); |
| 273 | hash_map.Insert(std::make_pair(std::string("bags"), 444)); |
| 274 | auto it = hash_map.Find(std::string("abcd")); |
| 275 | ASSERT_EQ(it->second, 123); |
| 276 | hash_map.Erase(it); |
| 277 | it = hash_map.Find(std::string("abcd")); |
| 278 | ASSERT_EQ(it->second, 124); |
| 279 | } |
| 280 | |
Richard Uhler | cf7792d | 2015-08-27 09:04:18 -0700 | [diff] [blame] | 281 | struct IsEmptyFnVectorInt { |
| 282 | void MakeEmpty(std::vector<int>& item) const { |
| 283 | item.clear(); |
| 284 | } |
| 285 | bool IsEmpty(const std::vector<int>& item) const { |
| 286 | return item.empty(); |
| 287 | } |
| 288 | }; |
| 289 | |
| 290 | template <typename T> |
| 291 | size_t HashIntSequence(T begin, T end) { |
| 292 | size_t hash = 0; |
| 293 | for (auto iter = begin; iter != end; ++iter) { |
| 294 | hash = hash * 2 + *iter; |
| 295 | } |
| 296 | return hash; |
| 297 | }; |
| 298 | |
| 299 | struct VectorIntHashEquals { |
| 300 | std::size_t operator()(const std::vector<int>& item) const { |
| 301 | return HashIntSequence(item.begin(), item.end()); |
| 302 | } |
| 303 | |
| 304 | std::size_t operator()(const std::forward_list<int>& item) const { |
| 305 | return HashIntSequence(item.begin(), item.end()); |
| 306 | } |
| 307 | |
| 308 | bool operator()(const std::vector<int>& a, const std::vector<int>& b) const { |
| 309 | return a == b; |
| 310 | } |
| 311 | |
| 312 | bool operator()(const std::vector<int>& a, const std::forward_list<int>& b) const { |
| 313 | auto aiter = a.begin(); |
| 314 | auto biter = b.begin(); |
| 315 | while (aiter != a.end() && biter != b.end()) { |
| 316 | if (*aiter != *biter) { |
| 317 | return false; |
| 318 | } |
| 319 | aiter++; |
| 320 | biter++; |
| 321 | } |
| 322 | return (aiter == a.end() && biter == b.end()); |
| 323 | } |
| 324 | }; |
| 325 | |
| 326 | TEST_F(HashSetTest, TestLookupByAlternateKeyType) { |
| 327 | HashSet<std::vector<int>, IsEmptyFnVectorInt, VectorIntHashEquals, VectorIntHashEquals> hash_set; |
| 328 | hash_set.Insert(std::vector<int>({1, 2, 3, 4})); |
| 329 | hash_set.Insert(std::vector<int>({4, 2})); |
| 330 | ASSERT_EQ(hash_set.end(), hash_set.Find(std::vector<int>({1, 1, 1, 1}))); |
| 331 | ASSERT_NE(hash_set.end(), hash_set.Find(std::vector<int>({1, 2, 3, 4}))); |
| 332 | ASSERT_EQ(hash_set.end(), hash_set.Find(std::forward_list<int>({1, 1, 1, 1}))); |
| 333 | ASSERT_NE(hash_set.end(), hash_set.Find(std::forward_list<int>({1, 2, 3, 4}))); |
| 334 | } |
| 335 | |
Mathieu Chartier | c482d38 | 2015-10-26 11:20:18 -0700 | [diff] [blame] | 336 | TEST_F(HashSetTest, TestReserve) { |
| 337 | HashSet<std::string, IsEmptyFnString> hash_set; |
| 338 | std::vector<size_t> sizes = {1, 10, 25, 55, 128, 1024, 4096}; |
| 339 | for (size_t size : sizes) { |
| 340 | hash_set.Reserve(size); |
| 341 | const size_t buckets_before = hash_set.NumBuckets(); |
| 342 | // Check that we expanded enough. |
| 343 | CHECK_GE(hash_set.ElementsUntilExpand(), size); |
| 344 | // Try inserting elements until we are at our reserve size and ensure the hash set did not |
| 345 | // expand. |
| 346 | while (hash_set.Size() < size) { |
| 347 | hash_set.Insert(std::to_string(hash_set.Size())); |
| 348 | } |
| 349 | CHECK_EQ(hash_set.NumBuckets(), buckets_before); |
| 350 | } |
| 351 | // Check the behaviour for shrinking, it does not necessarily resize down. |
| 352 | constexpr size_t size = 100; |
| 353 | hash_set.Reserve(size); |
| 354 | CHECK_GE(hash_set.ElementsUntilExpand(), size); |
| 355 | } |
| 356 | |
Mathieu Chartier | c2e2062 | 2014-11-03 11:41:47 -0800 | [diff] [blame] | 357 | } // namespace art |