HashTable.h 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. //
  2. // HashTable.h
  3. //
  4. // $Id: //poco/Main/Foundation/include/Poco/HashTable.h#9 $
  5. //
  6. // Library: Foundation
  7. // Package: Hashing
  8. // Module: HashTable
  9. //
  10. // Definition of the HashTable class.
  11. //
  12. // Copyright (c) 2006, Applied Informatics Software Engineering GmbH.
  13. // and Contributors.
  14. //
  15. // Permission is hereby granted, free of charge, to any person or organization
  16. // obtaining a copy of the software and accompanying documentation covered by
  17. // this license (the "Software") to use, reproduce, display, distribute,
  18. // execute, and transmit the Software, and to prepare derivative works of the
  19. // Software, and to permit third-parties to whom the Software is furnished to
  20. // do so, all subject to the following:
  21. //
  22. // The copyright notices in the Software and this entire statement, including
  23. // the above license grant, this restriction and the following disclaimer,
  24. // must be included in all copies of the Software, in whole or in part, and
  25. // all derivative works of the Software, unless such copies or derivative
  26. // works are solely in the form of machine-executable object code generated by
  27. // a source language processor.
  28. //
  29. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  30. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  31. // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
  32. // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
  33. // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
  34. // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  35. // DEALINGS IN THE SOFTWARE.
  36. //
  37. #ifndef Foundation_HashTable_INCLUDED
  38. #define Foundation_HashTable_INCLUDED
  39. #include "Poco/Foundation.h"
  40. #include "Poco/Exception.h"
  41. #include "Poco/HashFunction.h"
  42. #include "Poco/HashStatistic.h"
  43. #include <vector>
  44. #include <map>
  45. #include <cstddef>
  46. namespace Poco {
  47. //@ deprecated
  48. template <class Key, class Value, class KeyHashFunction = HashFunction<Key> >
  49. class HashTable
  50. /// A HashTable stores a key value pair that can be looked up via a hashed key.
  51. ///
  52. /// Collision handling is done via overflow maps(!). With small hash tables performance of this
  53. /// data struct will be closer to that a map than a hash table, i.e. slower. On the plus side,
  54. /// this class offers remove operations. Also HashTable full errors are not possible. If a fast
  55. /// HashTable implementation is needed and the remove operation is not required, use SimpleHashTable
  56. /// instead.
  57. ///
  58. /// This class is NOT thread safe.
  59. {
  60. public:
  61. typedef std::map<Key, Value> HashEntryMap;
  62. typedef HashEntryMap** HashTableVector;
  63. typedef typename HashEntryMap::const_iterator ConstIterator;
  64. typedef typename HashEntryMap::iterator Iterator;
  65. HashTable(UInt32 initialSize = 251):
  66. _entries(0),
  67. _size(0),
  68. _maxCapacity(initialSize)
  69. /// Creates the HashTable.
  70. {
  71. _entries = new HashEntryMap*[initialSize];
  72. memset(_entries, '\0', sizeof(HashEntryMap*)*initialSize);
  73. }
  74. HashTable(const HashTable& ht):
  75. _entries(new HashEntryMap*[ht._maxCapacity]),
  76. _size(ht._size),
  77. _maxCapacity(ht._maxCapacity)
  78. {
  79. for (UInt32 i = 0; i < _maxCapacity; ++i)
  80. {
  81. if (ht._entries[i])
  82. _entries[i] = new HashEntryMap(ht._entries[i]->begin(), ht._entries[i]->end());
  83. else
  84. _entries[i] = 0;
  85. }
  86. }
  87. ~HashTable()
  88. /// Destroys the HashTable.
  89. {
  90. clear();
  91. }
  92. HashTable& operator = (const HashTable& ht)
  93. {
  94. if (this != &ht)
  95. {
  96. clear();
  97. _maxCapacity = ht._maxCapacity;
  98. poco_assert_dbg (_entries == 0);
  99. _entries = new HashEntryMap*[_maxCapacity];
  100. _size = ht._size;
  101. for (UInt32 i = 0; i < _maxCapacity; ++i)
  102. {
  103. if (ht._entries[i])
  104. _entries[i] = new HashEntryMap(ht._entries[i]->begin(), ht._entries[i]->end());
  105. else
  106. _entries[i] = 0;
  107. }
  108. }
  109. return *this;
  110. }
  111. void clear()
  112. {
  113. if (!_entries)
  114. return;
  115. for (UInt32 i = 0; i < _maxCapacity; ++i)
  116. {
  117. if (_entries[i])
  118. delete _entries[i];
  119. }
  120. delete[] _entries;
  121. _entries = 0;
  122. _size = 0;
  123. _maxCapacity = 0;
  124. }
  125. UInt32 insert(const Key& key, const Value& value)
  126. /// Returns the hash value of the inserted item.
  127. /// Throws an exception if the entry was already inserted
  128. {
  129. UInt32 hsh = hash(key);
  130. insertRaw(key, hsh, value);
  131. return hsh;
  132. }
  133. Value& insertRaw(const Key& key, UInt32 hsh, const Value& value)
  134. /// Returns the hash value of the inserted item.
  135. /// Throws an exception if the entry was already inserted
  136. {
  137. if (!_entries[hsh])
  138. _entries[hsh] = new HashEntryMap();
  139. std::pair<typename HashEntryMap::iterator, bool> res(_entries[hsh]->insert(std::make_pair(key, value)));
  140. if (!res.second)
  141. throw InvalidArgumentException("HashTable::insert, key already exists.");
  142. _size++;
  143. return res.first->second;
  144. }
  145. UInt32 update(const Key& key, const Value& value)
  146. /// Returns the hash value of the inserted item.
  147. /// Replaces an existing entry if it finds one
  148. {
  149. UInt32 hsh = hash(key);
  150. updateRaw(key, hsh, value);
  151. return hsh;
  152. }
  153. void updateRaw(const Key& key, UInt32 hsh, const Value& value)
  154. /// Returns the hash value of the inserted item.
  155. /// Replaces an existing entry if it finds one
  156. {
  157. if (!_entries[hsh])
  158. _entries[hsh] = new HashEntryMap();
  159. std::pair<Iterator, bool> res = _entries[hsh]->insert(make_pair(key, value));
  160. if (res.second == false)
  161. res.first->second = value;
  162. else
  163. _size++;
  164. }
  165. void remove(const Key& key)
  166. {
  167. UInt32 hsh = hash(key);
  168. removeRaw(key, hsh);
  169. }
  170. void removeRaw(const Key& key, UInt32 hsh)
  171. /// Performance version, allows to specify the hash value
  172. {
  173. if (_entries[hsh])
  174. {
  175. _size -= _entries[hsh]->erase(key);
  176. }
  177. }
  178. UInt32 hash(const Key& key) const
  179. {
  180. return _hash(key, _maxCapacity);
  181. }
  182. const Value& get(const Key& key) const
  183. /// Throws an exception if the value does not exist
  184. {
  185. UInt32 hsh = hash(key);
  186. return getRaw(key, hsh);
  187. }
  188. const Value& getRaw(const Key& key, UInt32 hsh) const
  189. /// Throws an exception if the value does not exist
  190. {
  191. if (!_entries[hsh])
  192. throw InvalidArgumentException("key not found");
  193. ConstIterator it = _entries[hsh]->find(key);
  194. if (it == _entries[hsh]->end())
  195. throw InvalidArgumentException("key not found");
  196. return it->second;
  197. }
  198. Value& get(const Key& key)
  199. /// Throws an exception if the value does not exist
  200. {
  201. UInt32 hsh = hash(key);
  202. return const_cast<Value&>(getRaw(key, hsh));
  203. }
  204. const Value& operator [] (const Key& key) const
  205. {
  206. return get(key);
  207. }
  208. Value& operator [] (const Key& key)
  209. {
  210. UInt32 hsh = hash(key);
  211. if (!_entries[hsh])
  212. return insertRaw(key, hsh, Value());
  213. ConstIterator it = _entries[hsh]->find(key);
  214. if (it == _entries[hsh]->end())
  215. return insertRaw(key, hsh, Value());
  216. return it->second;
  217. }
  218. const Key& getKeyRaw(const Key& key, UInt32 hsh)
  219. /// Throws an exception if the key does not exist. returns a reference to the internally
  220. /// stored key. Useful when someone does an insert and wants for performance reason only to store
  221. /// a pointer to the key in another collection
  222. {
  223. if (!_entries[hsh])
  224. throw InvalidArgumentException("key not found");
  225. ConstIterator it = _entries[hsh]->find(key);
  226. if (it == _entries[hsh]->end())
  227. throw InvalidArgumentException("key not found");
  228. return it->first;
  229. }
  230. bool get(const Key& key, Value& v) const
  231. /// Sets v to the found value, returns false if no value was found
  232. {
  233. UInt32 hsh = hash(key);
  234. return getRaw(key, hsh, v);
  235. }
  236. bool getRaw(const Key& key, UInt32 hsh, Value& v) const
  237. /// Sets v to the found value, returns false if no value was found
  238. {
  239. if (!_entries[hsh])
  240. return false;
  241. ConstIterator it = _entries[hsh]->find(key);
  242. if (it == _entries[hsh]->end())
  243. return false;
  244. v = it->second;
  245. return true;
  246. }
  247. bool exists(const Key& key)
  248. {
  249. UInt32 hsh = hash(key);
  250. return existsRaw(key, hsh);
  251. }
  252. bool existsRaw(const Key& key, UInt32 hsh)
  253. {
  254. return _entries[hsh] && (_entries[hsh]->end() != _entries[hsh]->find(key));
  255. }
  256. std::size_t size() const
  257. /// Returns the number of elements already inserted into the HashTable
  258. {
  259. return _size;
  260. }
  261. UInt32 maxCapacity() const
  262. {
  263. return _maxCapacity;
  264. }
  265. void resize(UInt32 newSize)
  266. /// Resizes the hashtable, rehashes all existing entries. Expensive!
  267. {
  268. if (_maxCapacity != newSize)
  269. {
  270. HashTableVector cpy = _entries;
  271. _entries = 0;
  272. UInt32 oldSize = _maxCapacity;
  273. _maxCapacity = newSize;
  274. _entries = new HashEntryMap*[_maxCapacity];
  275. memset(_entries, '\0', sizeof(HashEntryMap*)*_maxCapacity);
  276. if (_size == 0)
  277. {
  278. // no data was yet inserted
  279. delete[] cpy;
  280. return;
  281. }
  282. _size = 0;
  283. for (UInt32 i = 0; i < oldSize; ++i)
  284. {
  285. if (cpy[i])
  286. {
  287. ConstIterator it = cpy[i]->begin();
  288. ConstIterator itEnd = cpy[i]->end();
  289. for (; it != itEnd; ++it)
  290. {
  291. insert(it->first, it->second);
  292. }
  293. delete cpy[i];
  294. }
  295. }
  296. delete[] cpy;
  297. }
  298. }
  299. HashStatistic currentState(bool details = false) const
  300. /// Returns the current internal state
  301. {
  302. UInt32 numberOfEntries = (UInt32)_size;
  303. UInt32 numZeroEntries = 0;
  304. UInt32 maxEntriesPerHash = 0;
  305. std::vector<UInt32> detailedEntriesPerHash;
  306. #ifdef DEBUG
  307. UInt32 totalSize = 0;
  308. #endif
  309. for (UInt32 i = 0; i < _maxCapacity; ++i)
  310. {
  311. if (_entries[i])
  312. {
  313. UInt32 size = (UInt32)_entries[i]->size();
  314. poco_assert_dbg(size != 0);
  315. if (size > maxEntriesPerHash)
  316. maxEntriesPerHash = size;
  317. if (details)
  318. detailedEntriesPerHash.push_back(size);
  319. #ifdef DEBUG
  320. totalSize += size;
  321. #endif
  322. }
  323. else
  324. {
  325. numZeroEntries++;
  326. if (details)
  327. detailedEntriesPerHash.push_back(0);
  328. }
  329. }
  330. #ifdef DEBUG
  331. poco_assert_dbg(totalSize == numberOfEntries);
  332. #endif
  333. return HashStatistic(_maxCapacity, numberOfEntries, numZeroEntries, maxEntriesPerHash, detailedEntriesPerHash);
  334. }
  335. private:
  336. HashTableVector _entries;
  337. std::size_t _size;
  338. UInt32 _maxCapacity;
  339. KeyHashFunction _hash;
  340. };
  341. } // namespace Poco
  342. #endif // Foundation_HashTable_INCLUDED