ares_htable.c (13140B)
1 /* MIT License 2 * 3 * Copyright (c) 2023 Brad House 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a copy 6 * of this software and associated documentation files (the "Software"), to deal 7 * in the Software without restriction, including without limitation the rights 8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 * copies of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * SPDX-License-Identifier: MIT 25 */ 26 #include "ares_private.h" 27 #include "ares_llist.h" 28 #include "ares_htable.h" 29 30 #define ARES__HTABLE_MAX_BUCKETS (1U << 24) 31 #define ARES__HTABLE_MIN_BUCKETS (1U << 4) 32 #define ARES__HTABLE_EXPAND_PERCENT 75 33 34 struct ares_htable { 35 ares_htable_hashfunc_t hash; 36 ares_htable_bucket_key_t bucket_key; 37 ares_htable_bucket_free_t bucket_free; 38 ares_htable_key_eq_t key_eq; 39 unsigned int seed; 40 unsigned int size; 41 size_t num_keys; 42 size_t num_collisions; 43 /* NOTE: if we converted buckets into ares_slist_t we could guarantee on 44 * hash collisions we would have O(log n) worst case insert and search 45 * performance. (We'd also need to make key_eq into a key_cmp to 46 * support sort). That said, risk with a random hash seed is near zero, 47 * and ares_slist_t is heavier weight, so I think using ares_llist_t 48 * is an overall win. */ 49 ares_llist_t **buckets; 50 }; 51 52 static unsigned int ares_htable_generate_seed(ares_htable_t *htable) 53 { 54 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION 55 /* Seed needs to be static for fuzzing */ 56 return 0; 57 #else 58 unsigned int seed = 0; 59 time_t t = time(NULL); 60 61 /* Mix stack address, heap address, and time to generate a random seed, it 62 * doesn't have to be super secure, just quick. Likelihood of a hash 63 * collision attack is very low with a small amount of effort */ 64 seed |= (unsigned int)((size_t)htable & 0xFFFFFFFF); 65 seed |= (unsigned int)((size_t)&seed & 0xFFFFFFFF); 66 seed |= (unsigned int)(((ares_uint64_t)t) & 0xFFFFFFFF); 67 return seed; 68 #endif 69 } 70 71 static void ares_htable_buckets_destroy(ares_llist_t **buckets, 72 unsigned int size, 73 ares_bool_t destroy_vals) 74 { 75 unsigned int i; 76 77 if (buckets == NULL) { 78 return; 79 } 80 81 for (i = 0; i < size; i++) { 82 if (buckets[i] == NULL) { 83 continue; 84 } 85 86 if (!destroy_vals) { 87 ares_llist_replace_destructor(buckets[i], NULL); 88 } 89 90 ares_llist_destroy(buckets[i]); 91 } 92 93 ares_free(buckets); 94 } 95 96 void ares_htable_destroy(ares_htable_t *htable) 97 { 98 if (htable == NULL) { 99 return; 100 } 101 ares_htable_buckets_destroy(htable->buckets, htable->size, ARES_TRUE); 102 ares_free(htable); 103 } 104 105 ares_htable_t *ares_htable_create(ares_htable_hashfunc_t hash_func, 106 ares_htable_bucket_key_t bucket_key, 107 ares_htable_bucket_free_t bucket_free, 108 ares_htable_key_eq_t key_eq) 109 { 110 ares_htable_t *htable = NULL; 111 112 if (hash_func == NULL || bucket_key == NULL || bucket_free == NULL || 113 key_eq == NULL) { 114 goto fail; 115 } 116 117 htable = ares_malloc_zero(sizeof(*htable)); 118 if (htable == NULL) { 119 goto fail; 120 } 121 122 htable->hash = hash_func; 123 htable->bucket_key = bucket_key; 124 htable->bucket_free = bucket_free; 125 htable->key_eq = key_eq; 126 htable->seed = ares_htable_generate_seed(htable); 127 htable->size = ARES__HTABLE_MIN_BUCKETS; 128 htable->buckets = ares_malloc_zero(sizeof(*htable->buckets) * htable->size); 129 130 if (htable->buckets == NULL) { 131 goto fail; 132 } 133 134 return htable; 135 136 fail: 137 ares_htable_destroy(htable); 138 return NULL; 139 } 140 141 const void **ares_htable_all_buckets(const ares_htable_t *htable, size_t *num) 142 { 143 const void **out = NULL; 144 size_t cnt = 0; 145 size_t i; 146 147 if (htable == NULL || num == NULL) { 148 return NULL; /* LCOV_EXCL_LINE */ 149 } 150 151 *num = 0; 152 153 out = ares_malloc_zero(sizeof(*out) * htable->num_keys); 154 if (out == NULL) { 155 return NULL; /* LCOV_EXCL_LINE */ 156 } 157 158 for (i = 0; i < htable->size; i++) { 159 ares_llist_node_t *node; 160 for (node = ares_llist_node_first(htable->buckets[i]); node != NULL; 161 node = ares_llist_node_next(node)) { 162 out[cnt++] = ares_llist_node_val(node); 163 } 164 } 165 166 *num = cnt; 167 return out; 168 } 169 170 /*! Grabs the Hashtable index from the key and length. The h index is 171 * the hash of the function reduced to the size of the bucket list. 172 * We are doing "hash & (size - 1)" since we are guaranteeing a power of 173 * 2 for size. This is equivalent to "hash % size", but should be more 174 * efficient */ 175 #define HASH_IDX(h, key) h->hash(key, h->seed) & (h->size - 1) 176 177 static ares_llist_node_t *ares_htable_find(const ares_htable_t *htable, 178 unsigned int idx, const void *key) 179 { 180 ares_llist_node_t *node = NULL; 181 182 for (node = ares_llist_node_first(htable->buckets[idx]); node != NULL; 183 node = ares_llist_node_next(node)) { 184 if (htable->key_eq(key, htable->bucket_key(ares_llist_node_val(node)))) { 185 break; 186 } 187 } 188 189 return node; 190 } 191 192 static ares_bool_t ares_htable_expand(ares_htable_t *htable) 193 { 194 ares_llist_t **buckets = NULL; 195 unsigned int old_size = htable->size; 196 size_t i; 197 ares_llist_t **prealloc_llist = NULL; 198 size_t prealloc_llist_len = 0; 199 ares_bool_t rv = ARES_FALSE; 200 201 /* Not a failure, just won't expand */ 202 if (old_size == ARES__HTABLE_MAX_BUCKETS) { 203 return ARES_TRUE; /* LCOV_EXCL_LINE */ 204 } 205 206 htable->size <<= 1; 207 208 /* We must pre-allocate all memory we'll need before moving entries to the 209 * new hash array. Otherwise if there's a memory allocation failure in the 210 * middle, we wouldn't be able to recover. */ 211 buckets = ares_malloc_zero(sizeof(*buckets) * htable->size); 212 if (buckets == NULL) { 213 goto done; /* LCOV_EXCL_LINE */ 214 } 215 216 /* The maximum number of new llists we'll need is the number of collisions 217 * that were recorded */ 218 prealloc_llist_len = htable->num_collisions; 219 if (prealloc_llist_len) { 220 prealloc_llist = 221 ares_malloc_zero(sizeof(*prealloc_llist) * prealloc_llist_len); 222 if (prealloc_llist == NULL) { 223 goto done; /* LCOV_EXCL_LINE */ 224 } 225 } 226 for (i = 0; i < prealloc_llist_len; i++) { 227 prealloc_llist[i] = ares_llist_create(htable->bucket_free); 228 if (prealloc_llist[i] == NULL) { 229 goto done; 230 } 231 } 232 233 /* Iterate across all buckets and move the entries to the new buckets */ 234 htable->num_collisions = 0; 235 for (i = 0; i < old_size; i++) { 236 ares_llist_node_t *node; 237 238 /* Nothing in this bucket */ 239 if (htable->buckets[i] == NULL) { 240 continue; 241 } 242 243 /* Fast path optimization (most likely case), there is likely only a single 244 * entry in both the source and destination, check for this to confirm and 245 * if so, just move the bucket over */ 246 if (ares_llist_len(htable->buckets[i]) == 1) { 247 const void *val = ares_llist_first_val(htable->buckets[i]); 248 size_t idx = HASH_IDX(htable, htable->bucket_key(val)); 249 250 if (buckets[idx] == NULL) { 251 /* Swap! */ 252 buckets[idx] = htable->buckets[i]; 253 htable->buckets[i] = NULL; 254 continue; 255 } 256 } 257 258 /* Slow path, collisions */ 259 while ((node = ares_llist_node_first(htable->buckets[i])) != NULL) { 260 const void *val = ares_llist_node_val(node); 261 size_t idx = HASH_IDX(htable, htable->bucket_key(val)); 262 263 /* Try fast path again as maybe we popped one collision off and the 264 * next we can reuse the llist parent */ 265 if (buckets[idx] == NULL && ares_llist_len(htable->buckets[i]) == 1) { 266 /* Swap! */ 267 buckets[idx] = htable->buckets[i]; 268 htable->buckets[i] = NULL; 269 break; 270 } 271 272 /* Grab one off our preallocated list */ 273 if (buckets[idx] == NULL) { 274 /* Silence static analysis, this isn't possible but it doesn't know */ 275 if (prealloc_llist == NULL || prealloc_llist_len == 0) { 276 goto done; /* LCOV_EXCL_LINE */ 277 } 278 buckets[idx] = prealloc_llist[prealloc_llist_len - 1]; 279 prealloc_llist_len--; 280 } else { 281 /* Collision occurred since the bucket wasn't empty */ 282 htable->num_collisions++; 283 } 284 285 ares_llist_node_mvparent_first(node, buckets[idx]); 286 } 287 288 /* Abandoned bucket, destroy */ 289 if (htable->buckets[i] != NULL) { 290 ares_llist_destroy(htable->buckets[i]); 291 htable->buckets[i] = NULL; 292 } 293 } 294 295 /* We have guaranteed all the buckets have either been moved or destroyed, 296 * so we just call ares_free() on the array and swap out the pointer */ 297 ares_free(htable->buckets); 298 htable->buckets = buckets; 299 buckets = NULL; 300 rv = ARES_TRUE; 301 302 done: 303 ares_free(buckets); 304 /* destroy any unused preallocated buckets */ 305 ares_htable_buckets_destroy(prealloc_llist, (unsigned int)prealloc_llist_len, 306 ARES_FALSE); 307 308 /* On failure, we need to restore the htable size */ 309 if (rv != ARES_TRUE) { 310 htable->size = old_size; /* LCOV_EXCL_LINE */ 311 } 312 313 return rv; 314 } 315 316 ares_bool_t ares_htable_insert(ares_htable_t *htable, void *bucket) 317 { 318 unsigned int idx = 0; 319 ares_llist_node_t *node = NULL; 320 const void *key = NULL; 321 322 if (htable == NULL || bucket == NULL) { 323 return ARES_FALSE; 324 } 325 326 327 key = htable->bucket_key(bucket); 328 idx = HASH_IDX(htable, key); 329 330 /* See if we have a matching bucket already, if so, replace it */ 331 node = ares_htable_find(htable, idx, key); 332 if (node != NULL) { 333 ares_llist_node_replace(node, bucket); 334 return ARES_TRUE; 335 } 336 337 /* Check to see if we should rehash because likelihood of collisions has 338 * increased beyond our threshold */ 339 if (htable->num_keys + 1 > 340 (htable->size * ARES__HTABLE_EXPAND_PERCENT) / 100) { 341 if (!ares_htable_expand(htable)) { 342 return ARES_FALSE; /* LCOV_EXCL_LINE */ 343 } 344 /* If we expanded, need to calculate a new index */ 345 idx = HASH_IDX(htable, key); 346 } 347 348 /* We lazily allocate the linked list */ 349 if (htable->buckets[idx] == NULL) { 350 htable->buckets[idx] = ares_llist_create(htable->bucket_free); 351 if (htable->buckets[idx] == NULL) { 352 return ARES_FALSE; 353 } 354 } 355 356 node = ares_llist_insert_first(htable->buckets[idx], bucket); 357 if (node == NULL) { 358 return ARES_FALSE; 359 } 360 361 /* Track collisions for rehash stability */ 362 if (ares_llist_len(htable->buckets[idx]) > 1) { 363 htable->num_collisions++; 364 } 365 366 htable->num_keys++; 367 368 return ARES_TRUE; 369 } 370 371 void *ares_htable_get(const ares_htable_t *htable, const void *key) 372 { 373 unsigned int idx; 374 375 if (htable == NULL || key == NULL) { 376 return NULL; 377 } 378 379 idx = HASH_IDX(htable, key); 380 381 return ares_llist_node_val(ares_htable_find(htable, idx, key)); 382 } 383 384 ares_bool_t ares_htable_remove(ares_htable_t *htable, const void *key) 385 { 386 ares_llist_node_t *node; 387 unsigned int idx; 388 389 if (htable == NULL || key == NULL) { 390 return ARES_FALSE; 391 } 392 393 idx = HASH_IDX(htable, key); 394 node = ares_htable_find(htable, idx, key); 395 if (node == NULL) { 396 return ARES_FALSE; 397 } 398 399 htable->num_keys--; 400 401 /* Reduce collisions */ 402 if (ares_llist_len(ares_llist_node_parent(node)) > 1) { 403 htable->num_collisions--; 404 } 405 406 ares_llist_node_destroy(node); 407 return ARES_TRUE; 408 } 409 410 size_t ares_htable_num_keys(const ares_htable_t *htable) 411 { 412 if (htable == NULL) { 413 return 0; 414 } 415 return htable->num_keys; 416 } 417 418 unsigned int ares_htable_hash_FNV1a(const unsigned char *key, size_t key_len, 419 unsigned int seed) 420 { 421 unsigned int hv = seed ^ 2166136261U; 422 size_t i; 423 424 for (i = 0; i < key_len; i++) { 425 hv ^= (unsigned int)key[i]; 426 /* hv *= 16777619 (0x01000193) */ 427 hv += (hv << 1) + (hv << 4) + (hv << 7) + (hv << 8) + (hv << 24); 428 } 429 430 return hv; 431 } 432 433 /* Case insensitive version, meant for ASCII strings */ 434 unsigned int ares_htable_hash_FNV1a_casecmp(const unsigned char *key, 435 size_t key_len, unsigned int seed) 436 { 437 unsigned int hv = seed ^ 2166136261U; 438 size_t i; 439 440 for (i = 0; i < key_len; i++) { 441 hv ^= (unsigned int)ares_tolower(key[i]); 442 /* hv *= 16777619 (0x01000193) */ 443 hv += (hv << 1) + (hv << 4) + (hv << 7) + (hv << 8) + (hv << 24); 444 } 445 446 return hv; 447 }