640-mem-hash-map.patch 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494
  1. From 5a842672e79a7a5f6be837c483be4f9901a4ecc0 Mon Sep 17 00:00:00 2001
  2. From: Bruno Haible <[email protected]>
  3. Date: Wed, 30 Apr 2025 03:19:10 +0200
  4. Subject: [PATCH] New module mem-hash-map.
  5. * lib/mem-hash-map.h: New file, from GNU gettext.
  6. * lib/mem-hash-map.c: New file, from GNU gettext.
  7. * modules/mem-hash-map: New file, from GNU gettext.
  8. ---
  9. ChangeLog | 7 +
  10. lib/mem-hash-map.c | 352 +++++++++++++++++++++++++++++++++++++++++++
  11. lib/mem-hash-map.h | 90 +++++++++++
  12. modules/mem-hash-map | 25 +++
  13. 4 files changed, 474 insertions(+)
  14. create mode 100644 lib/mem-hash-map.c
  15. create mode 100644 lib/mem-hash-map.h
  16. create mode 100644 modules/mem-hash-map
  17. --- /dev/null
  18. +++ b/lib/mem-hash-map.c
  19. @@ -0,0 +1,352 @@
  20. +/* Simple hash table (no removals) where the keys are memory blocks.
  21. + Copyright (C) 1994-2025 Free Software Foundation, Inc.
  22. + Written by Ulrich Drepper <[email protected]>, October 1994.
  23. +
  24. + This file is free software: you can redistribute it and/or modify
  25. + it under the terms of the GNU General Public License as published
  26. + by the Free Software Foundation, either version 3 of the License,
  27. + or (at your option) any later version.
  28. +
  29. + This file is distributed in the hope that it will be useful,
  30. + but WITHOUT ANY WARRANTY; without even the implied warranty of
  31. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  32. + GNU General Public License for more details.
  33. +
  34. + You should have received a copy of the GNU General Public License
  35. + along with this program. If not, see <https://www.gnu.org/licenses/>. */
  36. +
  37. +#include <config.h>
  38. +
  39. +/* Specification. */
  40. +#include "mem-hash-map.h"
  41. +
  42. +#include <stdlib.h>
  43. +#include <string.h>
  44. +#include <stdio.h>
  45. +#include <limits.h>
  46. +#include <sys/types.h>
  47. +
  48. +#include "next-prime.h"
  49. +
  50. +/* Since this simple implementation of hash tables allows only insertion, no
  51. + removal of entries, the right data structure for the memory holding all keys
  52. + is an obstack. */
  53. +#include "obstack.h"
  54. +
  55. +/* Use checked memory allocation. */
  56. +#include "xalloc.h"
  57. +
  58. +#define obstack_chunk_alloc xmalloc
  59. +#define obstack_chunk_free free
  60. +
  61. +
  62. +typedef struct hash_entry
  63. +{
  64. + size_t used; /* Hash code of the key, or 0 for an unused entry. */
  65. + const void *key; /* Key. */
  66. + size_t keylen;
  67. + void *data; /* Value. */
  68. + struct hash_entry *next;
  69. +}
  70. +hash_entry;
  71. +
  72. +
  73. +/* Initialize a hash table. INIT_SIZE > 1 is the initial number of available
  74. + entries.
  75. + Return 0 always. */
  76. +int
  77. +hash_init (hash_table *htab, size_t init_size)
  78. +{
  79. + /* We need the size to be a prime. */
  80. + init_size = next_prime (init_size);
  81. +
  82. + /* Initialize the data structure. */
  83. + htab->size = init_size;
  84. + htab->filled = 0;
  85. + htab->first = NULL;
  86. + htab->table = XCALLOC (init_size + 1, hash_entry);
  87. +
  88. + obstack_init (&htab->mem_pool);
  89. +
  90. + return 0;
  91. +}
  92. +
  93. +
  94. +/* Delete a hash table's contents.
  95. + Return 0 always. */
  96. +int
  97. +hash_destroy (hash_table *htab)
  98. +{
  99. + free (htab->table);
  100. + obstack_free (&htab->mem_pool, NULL);
  101. + return 0;
  102. +}
  103. +
  104. +
  105. +/* Compute a hash code for a key consisting of KEYLEN bytes starting at KEY
  106. + in memory. */
  107. +static size_t
  108. +compute_hashval (const void *key, size_t keylen)
  109. +{
  110. + size_t cnt;
  111. + size_t hval;
  112. +
  113. + /* Compute the hash value for the given string. The algorithm
  114. + is taken from [Aho,Sethi,Ullman], fixed according to
  115. + https://haible.de/bruno/hashfunc.html. */
  116. + cnt = 0;
  117. + hval = keylen;
  118. + while (cnt < keylen)
  119. + {
  120. + hval = (hval << 9) | (hval >> (sizeof (size_t) * CHAR_BIT - 9));
  121. + hval += (size_t) *(((const char *) key) + cnt++);
  122. + }
  123. + return hval != 0 ? hval : ~((size_t) 0);
  124. +}
  125. +
  126. +
  127. +/* References:
  128. + [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986
  129. + [Knuth] The Art of Computer Programming, part3 (6.4) */
  130. +
  131. +/* Look up a given key in the hash table.
  132. + Return the index of the entry, if present, or otherwise the index a free
  133. + entry where it could be inserted. */
  134. +static size_t
  135. +lookup (const hash_table *htab,
  136. + const void *key, size_t keylen,
  137. + size_t hval)
  138. +{
  139. + size_t hash;
  140. + size_t idx;
  141. + hash_entry *table = htab->table;
  142. +
  143. + /* First hash function: simply take the modul but prevent zero. */
  144. + hash = 1 + hval % htab->size;
  145. +
  146. + idx = hash;
  147. +
  148. + if (table[idx].used)
  149. + {
  150. + if (table[idx].used == hval && table[idx].keylen == keylen
  151. + && memcmp (table[idx].key, key, keylen) == 0)
  152. + return idx;
  153. +
  154. + /* Second hash function as suggested in [Knuth]. */
  155. + hash = 1 + hval % (htab->size - 2);
  156. +
  157. + do
  158. + {
  159. + if (idx <= hash)
  160. + idx = htab->size + idx - hash;
  161. + else
  162. + idx -= hash;
  163. +
  164. + /* If entry is found use it. */
  165. + if (table[idx].used == hval && table[idx].keylen == keylen
  166. + && memcmp (table[idx].key, key, keylen) == 0)
  167. + return idx;
  168. + }
  169. + while (table[idx].used);
  170. + }
  171. + return idx;
  172. +}
  173. +
  174. +
  175. +/* Look up the value of a key in the given table.
  176. + If found, return 0 and set *RESULT to it. Otherwise return -1. */
  177. +int
  178. +hash_find_entry (const hash_table *htab, const void *key, size_t keylen,
  179. + void **result)
  180. +{
  181. + hash_entry *table = htab->table;
  182. + size_t idx = lookup (htab, key, keylen, compute_hashval (key, keylen));
  183. +
  184. + if (table[idx].used == 0)
  185. + return -1;
  186. +
  187. + *result = table[idx].data;
  188. + return 0;
  189. +}
  190. +
  191. +
  192. +/* Insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table at index IDX.
  193. + HVAL is the key's hash code. IDX depends on it. The table entry at index
  194. + IDX is known to be unused. */
  195. +static void
  196. +insert_entry_2 (hash_table *htab,
  197. + const void *key, size_t keylen,
  198. + size_t hval, size_t idx, void *data)
  199. +{
  200. + hash_entry *table = htab->table;
  201. +
  202. + table[idx].used = hval;
  203. + table[idx].key = key;
  204. + table[idx].keylen = keylen;
  205. + table[idx].data = data;
  206. +
  207. + /* List the new value in the list. */
  208. + if (htab->first == NULL)
  209. + {
  210. + table[idx].next = &table[idx];
  211. + htab->first = &table[idx];
  212. + }
  213. + else
  214. + {
  215. + table[idx].next = htab->first->next;
  216. + htab->first->next = &table[idx];
  217. + htab->first = &table[idx];
  218. + }
  219. +
  220. + ++htab->filled;
  221. +}
  222. +
  223. +
  224. +/* Grow the hash table. */
  225. +static void
  226. +resize (hash_table *htab)
  227. +{
  228. + size_t old_size = htab->size;
  229. + hash_entry *table = htab->table;
  230. + size_t idx;
  231. +
  232. + htab->size = next_prime (htab->size * 2);
  233. + htab->filled = 0;
  234. + htab->first = NULL;
  235. + htab->table = XCALLOC (1 + htab->size, hash_entry);
  236. +
  237. + for (idx = 1; idx <= old_size; ++idx)
  238. + if (table[idx].used)
  239. + insert_entry_2 (htab, table[idx].key, table[idx].keylen,
  240. + table[idx].used,
  241. + lookup (htab, table[idx].key, table[idx].keylen,
  242. + table[idx].used),
  243. + table[idx].data);
  244. +
  245. + free (table);
  246. +}
  247. +
  248. +
  249. +/* Try to insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table.
  250. + Return non-NULL (more precisely, the address of the KEY inside the table's
  251. + memory pool) if successful, or NULL if there is already an entry with the
  252. + given key. */
  253. +const void *
  254. +hash_insert_entry (hash_table *htab,
  255. + const void *key, size_t keylen,
  256. + void *data)
  257. +{
  258. + size_t hval = compute_hashval (key, keylen);
  259. + hash_entry *table = htab->table;
  260. + size_t idx = lookup (htab, key, keylen, hval);
  261. +
  262. + if (table[idx].used)
  263. + /* We don't want to overwrite the old value. */
  264. + return NULL;
  265. + else
  266. + {
  267. + /* An empty bucket has been found. */
  268. + void *keycopy = obstack_copy (&htab->mem_pool, key, keylen);
  269. + insert_entry_2 (htab, keycopy, keylen, hval, idx, data);
  270. + if (100 * htab->filled > 75 * htab->size)
  271. + /* Table is filled more than 75%. Resize the table. */
  272. + resize (htab);
  273. + return keycopy;
  274. + }
  275. +}
  276. +
  277. +
  278. +/* Insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table.
  279. + Return 0. */
  280. +int
  281. +hash_set_value (hash_table *htab,
  282. + const void *key, size_t keylen,
  283. + void *data)
  284. +{
  285. + size_t hval = compute_hashval (key, keylen);
  286. + hash_entry *table = htab->table;
  287. + size_t idx = lookup (htab, key, keylen, hval);
  288. +
  289. + if (table[idx].used)
  290. + {
  291. + /* Overwrite the old value. */
  292. + table[idx].data = data;
  293. + return 0;
  294. + }
  295. + else
  296. + {
  297. + /* An empty bucket has been found. */
  298. + void *keycopy = obstack_copy (&htab->mem_pool, key, keylen);
  299. + insert_entry_2 (htab, keycopy, keylen, hval, idx, data);
  300. + if (100 * htab->filled > 75 * htab->size)
  301. + /* Table is filled more than 75%. Resize the table. */
  302. + resize (htab);
  303. + return 0;
  304. + }
  305. +}
  306. +
  307. +
  308. +/* Steps *PTR forward to the next used entry in the given hash table. *PTR
  309. + should be initially set to NULL. Store information about the next entry
  310. + in *KEY, *KEYLEN, *DATA.
  311. + Return 0 normally, -1 when the whole hash table has been traversed. */
  312. +int
  313. +hash_iterate (hash_table *htab, void **ptr, const void **key, size_t *keylen,
  314. + void **data)
  315. +{
  316. + hash_entry *curr;
  317. +
  318. + if (*ptr == NULL)
  319. + {
  320. + if (htab->first == NULL)
  321. + return -1;
  322. + curr = htab->first;
  323. + }
  324. + else
  325. + {
  326. + if (*ptr == htab->first)
  327. + return -1;
  328. + curr = (hash_entry *) *ptr;
  329. + }
  330. + curr = curr->next;
  331. + *ptr = (void *) curr;
  332. +
  333. + *key = curr->key;
  334. + *keylen = curr->keylen;
  335. + *data = curr->data;
  336. + return 0;
  337. +}
  338. +
  339. +
  340. +/* Steps *PTR forward to the next used entry in the given hash table. *PTR
  341. + should be initially set to NULL. Store information about the next entry
  342. + in *KEY, *KEYLEN, *DATAP. *DATAP is set to point to the storage of the
  343. + value; modifying **DATAP will modify the value of the entry.
  344. + Return 0 normally, -1 when the whole hash table has been traversed. */
  345. +int
  346. +hash_iterate_modify (hash_table *htab, void **ptr,
  347. + const void **key, size_t *keylen,
  348. + void ***datap)
  349. +{
  350. + hash_entry *curr;
  351. +
  352. + if (*ptr == NULL)
  353. + {
  354. + if (htab->first == NULL)
  355. + return -1;
  356. + curr = htab->first;
  357. + }
  358. + else
  359. + {
  360. + if (*ptr == htab->first)
  361. + return -1;
  362. + curr = (hash_entry *) *ptr;
  363. + }
  364. + curr = curr->next;
  365. + *ptr = (void *) curr;
  366. +
  367. + *key = curr->key;
  368. + *keylen = curr->keylen;
  369. + *datap = &curr->data;
  370. + return 0;
  371. +}
  372. --- /dev/null
  373. +++ b/lib/mem-hash-map.h
  374. @@ -0,0 +1,90 @@
  375. +/* Simple hash table (no removals) where the keys are memory blocks.
  376. + Copyright (C) 1995-2025 Free Software Foundation, Inc.
  377. +
  378. + This file is free software: you can redistribute it and/or modify
  379. + it under the terms of the GNU General Public License as published
  380. + by the Free Software Foundation, either version 3 of the License,
  381. + or (at your option) any later version.
  382. +
  383. + This file is distributed in the hope that it will be useful,
  384. + but WITHOUT ANY WARRANTY; without even the implied warranty of
  385. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  386. + GNU General Public License for more details.
  387. +
  388. + You should have received a copy of the GNU General Public License
  389. + along with this program. If not, see <https://www.gnu.org/licenses/>. */
  390. +
  391. +#ifndef _GL_MEM_HASH_MAP_H
  392. +#define _GL_MEM_HASH_MAP_H
  393. +
  394. +#include <stddef.h>
  395. +
  396. +#include "obstack.h"
  397. +
  398. +#ifdef __cplusplus
  399. +extern "C" {
  400. +#endif
  401. +
  402. +struct hash_entry;
  403. +
  404. +typedef struct hash_table
  405. +{
  406. + size_t size; /* Number of allocated entries. */
  407. + size_t filled; /* Number of used entries. */
  408. + struct hash_entry *first; /* Pointer to head of list of entries. */
  409. + struct hash_entry *table; /* Pointer to array of entries. */
  410. + struct obstack mem_pool; /* Memory pool holding the keys. */
  411. +}
  412. +hash_table;
  413. +
  414. +/* Initialize a hash table. INIT_SIZE > 1 is the initial number of available
  415. + entries.
  416. + Return 0 always. */
  417. +extern int hash_init (hash_table *htab, size_t init_size);
  418. +
  419. +/* Delete a hash table's contents.
  420. + Return 0 always. */
  421. +extern int hash_destroy (hash_table *htab);
  422. +
  423. +/* Look up the value of a key in the given table.
  424. + If found, return 0 and set *RESULT to it. Otherwise return -1. */
  425. +extern int hash_find_entry (const hash_table *htab,
  426. + const void *key, size_t keylen,
  427. + void **result);
  428. +
  429. +/* Try to insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table.
  430. + Return non-NULL (more precisely, the address of the KEY inside the table's
  431. + memory pool) if successful, or NULL if there is already an entry with the
  432. + given key. */
  433. +extern const void * hash_insert_entry (hash_table *htab,
  434. + const void *key, size_t keylen,
  435. + void *data);
  436. +
  437. +/* Insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table.
  438. + Return 0. */
  439. +extern int hash_set_value (hash_table *htab,
  440. + const void *key, size_t keylen,
  441. + void *data);
  442. +
  443. +/* Steps *PTR forward to the next used entry in the given hash table. *PTR
  444. + should be initially set to NULL. Store information about the next entry
  445. + in *KEY, *KEYLEN, *DATA.
  446. + Return 0 normally, -1 when the whole hash table has been traversed. */
  447. +extern int hash_iterate (hash_table *htab, void **ptr,
  448. + const void **key, size_t *keylen,
  449. + void **data);
  450. +
  451. +/* Steps *PTR forward to the next used entry in the given hash table. *PTR
  452. + should be initially set to NULL. Store information about the next entry
  453. + in *KEY, *KEYLEN, *DATAP. *DATAP is set to point to the storage of the
  454. + value; modifying **DATAP will modify the value of the entry.
  455. + Return 0 normally, -1 when the whole hash table has been traversed. */
  456. +extern int hash_iterate_modify (hash_table *htab, void **ptr,
  457. + const void **key, size_t *keylen,
  458. + void ***datap);
  459. +
  460. +#ifdef __cplusplus
  461. +}
  462. +#endif
  463. +
  464. +#endif /* not _GL_MEM_HASH_MAP_H */
  465. --- /dev/null
  466. +++ b/modules/mem-hash-map
  467. @@ -0,0 +1,25 @@
  468. +Description:
  469. +Simple hash table (no removals) where the keys are memory blocks.
  470. +
  471. +Files:
  472. +lib/mem-hash-map.h
  473. +lib/mem-hash-map.c
  474. +
  475. +Depends-on:
  476. +next-prime
  477. +obstack
  478. +xalloc
  479. +
  480. +configure.ac:
  481. +
  482. +Makefile.am:
  483. +lib_SOURCES += mem-hash-map.h mem-hash-map.c
  484. +
  485. +Include:
  486. +"mem-hash-map.h"
  487. +
  488. +License:
  489. +GPL
  490. +
  491. +Maintainer:
  492. +Bruno Haible