HashTable.h 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. //
  2. // HashTable.h
  3. //
  4. // $Id: //poco/svn/Foundation/include/Poco/HashTable.h#2 $
  5. //
  6. // Library: Foundation
  7. // Package: Hashing
  8. // Module: HashTable
  9. //
  10. // Definition of the HashTable class.
  11. //
  12. // Copyright (c) 2006, Applied Informatics Software Engineering GmbH.
  13. // and Contributors.
  14. //
  15. // Permission is hereby granted, free of charge, to any person or organization
  16. // obtaining a copy of the software and accompanying documentation covered by
  17. // this license (the "Software") to use, reproduce, display, distribute,
  18. // execute, and transmit the Software, and to prepare derivative works of the
  19. // Software, and to permit third-parties to whom the Software is furnished to
  20. // do so, all subject to the following:
  21. //
  22. // The copyright notices in the Software and this entire statement, including
  23. // the above license grant, this restriction and the following disclaimer,
  24. // must be included in all copies of the Software, in whole or in part, and
  25. // all derivative works of the Software, unless such copies or derivative
  26. // works are solely in the form of machine-executable object code generated by
  27. // a source language processor.
  28. //
  29. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  30. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  31. // FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
  32. // SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
  33. // FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
  34. // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  35. // DEALINGS IN THE SOFTWARE.
  36. //
  37. #ifndef Foundation_HashTable_INCLUDED
  38. #define Foundation_HashTable_INCLUDED
  39. #include "Poco/Foundation.h"
  40. #include "Poco/Exception.h"
  41. #include "Poco/HashFunction.h"
  42. #include "Poco/HashStatistic.h"
  43. #include <vector>
  44. #include <map>
  45. #include <cstddef>
  46. #include <cstring>
  47. namespace Poco {
  48. //@ deprecated
  49. template <class Key, class Value, class KeyHashFunction = HashFunction<Key> >
  50. class HashTable
  51. /// A HashTable stores a key value pair that can be looked up via a hashed key.
  52. ///
  53. /// Collision handling is done via overflow maps(!). With small hash tables performance of this
  54. /// data struct will be closer to that a map than a hash table, i.e. slower. On the plus side,
  55. /// this class offers remove operations. Also HashTable full errors are not possible. If a fast
  56. /// HashTable implementation is needed and the remove operation is not required, use SimpleHashTable
  57. /// instead.
  58. ///
  59. /// This class is NOT thread safe.
  60. {
  61. public:
  62. typedef std::map<Key, Value> HashEntryMap;
  63. typedef HashEntryMap** HashTableVector;
  64. typedef typename HashEntryMap::const_iterator ConstIterator;
  65. typedef typename HashEntryMap::iterator Iterator;
  66. HashTable(UInt32 initialSize = 251):
  67. _entries(0),
  68. _size(0),
  69. _maxCapacity(initialSize)
  70. /// Creates the HashTable.
  71. {
  72. _entries = new HashEntryMap*[initialSize];
  73. std::memset(_entries, '\0', sizeof(HashEntryMap*)*initialSize);
  74. }
  75. HashTable(const HashTable& ht):
  76. _entries(new HashEntryMap*[ht._maxCapacity]),
  77. _size(ht._size),
  78. _maxCapacity(ht._maxCapacity)
  79. {
  80. for (UInt32 i = 0; i < _maxCapacity; ++i)
  81. {
  82. if (ht._entries[i])
  83. _entries[i] = new HashEntryMap(ht._entries[i]->begin(), ht._entries[i]->end());
  84. else
  85. _entries[i] = 0;
  86. }
  87. }
  88. ~HashTable()
  89. /// Destroys the HashTable.
  90. {
  91. clear();
  92. }
  93. HashTable& operator = (const HashTable& ht)
  94. {
  95. if (this != &ht)
  96. {
  97. clear();
  98. _maxCapacity = ht._maxCapacity;
  99. poco_assert_dbg (_entries == 0);
  100. _entries = new HashEntryMap*[_maxCapacity];
  101. _size = ht._size;
  102. for (UInt32 i = 0; i < _maxCapacity; ++i)
  103. {
  104. if (ht._entries[i])
  105. _entries[i] = new HashEntryMap(ht._entries[i]->begin(), ht._entries[i]->end());
  106. else
  107. _entries[i] = 0;
  108. }
  109. }
  110. return *this;
  111. }
  112. void clear()
  113. {
  114. if (!_entries)
  115. return;
  116. for (UInt32 i = 0; i < _maxCapacity; ++i)
  117. {
  118. if (_entries[i])
  119. delete _entries[i];
  120. }
  121. delete[] _entries;
  122. _entries = 0;
  123. _size = 0;
  124. _maxCapacity = 0;
  125. }
  126. UInt32 insert(const Key& key, const Value& value)
  127. /// Returns the hash value of the inserted item.
  128. /// Throws an exception if the entry was already inserted
  129. {
  130. UInt32 hsh = hash(key);
  131. insertRaw(key, hsh, value);
  132. return hsh;
  133. }
  134. Value& insertRaw(const Key& key, UInt32 hsh, const Value& value)
  135. /// Returns the hash value of the inserted item.
  136. /// Throws an exception if the entry was already inserted
  137. {
  138. if (!_entries[hsh])
  139. _entries[hsh] = new HashEntryMap();
  140. std::pair<typename HashEntryMap::iterator, bool> res(_entries[hsh]->insert(std::make_pair(key, value)));
  141. if (!res.second)
  142. throw InvalidArgumentException("HashTable::insert, key already exists.");
  143. _size++;
  144. return res.first->second;
  145. }
  146. UInt32 update(const Key& key, const Value& value)
  147. /// Returns the hash value of the inserted item.
  148. /// Replaces an existing entry if it finds one
  149. {
  150. UInt32 hsh = hash(key);
  151. updateRaw(key, hsh, value);
  152. return hsh;
  153. }
  154. void updateRaw(const Key& key, UInt32 hsh, const Value& value)
  155. /// Returns the hash value of the inserted item.
  156. /// Replaces an existing entry if it finds one
  157. {
  158. if (!_entries[hsh])
  159. _entries[hsh] = new HashEntryMap();
  160. std::pair<Iterator, bool> res = _entries[hsh]->insert(make_pair(key, value));
  161. if (res.second == false)
  162. res.first->second = value;
  163. else
  164. _size++;
  165. }
  166. void remove(const Key& key)
  167. {
  168. UInt32 hsh = hash(key);
  169. removeRaw(key, hsh);
  170. }
  171. void removeRaw(const Key& key, UInt32 hsh)
  172. /// Performance version, allows to specify the hash value
  173. {
  174. if (_entries[hsh])
  175. {
  176. _size -= _entries[hsh]->erase(key);
  177. }
  178. }
  179. UInt32 hash(const Key& key) const
  180. {
  181. return _hash(key, _maxCapacity);
  182. }
  183. const Value& get(const Key& key) const
  184. /// Throws an exception if the value does not exist
  185. {
  186. UInt32 hsh = hash(key);
  187. return getRaw(key, hsh);
  188. }
  189. const Value& getRaw(const Key& key, UInt32 hsh) const
  190. /// Throws an exception if the value does not exist
  191. {
  192. if (!_entries[hsh])
  193. throw InvalidArgumentException("key not found");
  194. ConstIterator it = _entries[hsh]->find(key);
  195. if (it == _entries[hsh]->end())
  196. throw InvalidArgumentException("key not found");
  197. return it->second;
  198. }
  199. Value& get(const Key& key)
  200. /// Throws an exception if the value does not exist
  201. {
  202. UInt32 hsh = hash(key);
  203. return const_cast<Value&>(getRaw(key, hsh));
  204. }
  205. const Value& operator [] (const Key& key) const
  206. {
  207. return get(key);
  208. }
  209. Value& operator [] (const Key& key)
  210. {
  211. UInt32 hsh = hash(key);
  212. if (!_entries[hsh])
  213. return insertRaw(key, hsh, Value());
  214. ConstIterator it = _entries[hsh]->find(key);
  215. if (it == _entries[hsh]->end())
  216. return insertRaw(key, hsh, Value());
  217. return it->second;
  218. }
  219. const Key& getKeyRaw(const Key& key, UInt32 hsh)
  220. /// Throws an exception if the key does not exist. returns a reference to the internally
  221. /// stored key. Useful when someone does an insert and wants for performance reason only to store
  222. /// a pointer to the key in another collection
  223. {
  224. if (!_entries[hsh])
  225. throw InvalidArgumentException("key not found");
  226. ConstIterator it = _entries[hsh]->find(key);
  227. if (it == _entries[hsh]->end())
  228. throw InvalidArgumentException("key not found");
  229. return it->first;
  230. }
  231. bool get(const Key& key, Value& v) const
  232. /// Sets v to the found value, returns false if no value was found
  233. {
  234. UInt32 hsh = hash(key);
  235. return getRaw(key, hsh, v);
  236. }
  237. bool getRaw(const Key& key, UInt32 hsh, Value& v) const
  238. /// Sets v to the found value, returns false if no value was found
  239. {
  240. if (!_entries[hsh])
  241. return false;
  242. ConstIterator it = _entries[hsh]->find(key);
  243. if (it == _entries[hsh]->end())
  244. return false;
  245. v = it->second;
  246. return true;
  247. }
  248. bool exists(const Key& key)
  249. {
  250. UInt32 hsh = hash(key);
  251. return existsRaw(key, hsh);
  252. }
  253. bool existsRaw(const Key& key, UInt32 hsh)
  254. {
  255. return _entries[hsh] && (_entries[hsh]->end() != _entries[hsh]->find(key));
  256. }
  257. std::size_t size() const
  258. /// Returns the number of elements already inserted into the HashTable
  259. {
  260. return _size;
  261. }
  262. UInt32 maxCapacity() const
  263. {
  264. return _maxCapacity;
  265. }
  266. void resize(UInt32 newSize)
  267. /// Resizes the hashtable, rehashes all existing entries. Expensive!
  268. {
  269. if (_maxCapacity != newSize)
  270. {
  271. HashTableVector cpy = _entries;
  272. _entries = 0;
  273. UInt32 oldSize = _maxCapacity;
  274. _maxCapacity = newSize;
  275. _entries = new HashEntryMap*[_maxCapacity];
  276. std::memset(_entries, '\0', sizeof(HashEntryMap*)*_maxCapacity);
  277. if (_size == 0)
  278. {
  279. // no data was yet inserted
  280. delete[] cpy;
  281. return;
  282. }
  283. _size = 0;
  284. for (UInt32 i = 0; i < oldSize; ++i)
  285. {
  286. if (cpy[i])
  287. {
  288. ConstIterator it = cpy[i]->begin();
  289. ConstIterator itEnd = cpy[i]->end();
  290. for (; it != itEnd; ++it)
  291. {
  292. insert(it->first, it->second);
  293. }
  294. delete cpy[i];
  295. }
  296. }
  297. delete[] cpy;
  298. }
  299. }
  300. HashStatistic currentState(bool details = false) const
  301. /// Returns the current internal state
  302. {
  303. UInt32 numberOfEntries = (UInt32)_size;
  304. UInt32 numZeroEntries = 0;
  305. UInt32 maxEntriesPerHash = 0;
  306. std::vector<UInt32> detailedEntriesPerHash;
  307. #ifdef DEBUG
  308. UInt32 totalSize = 0;
  309. #endif
  310. for (UInt32 i = 0; i < _maxCapacity; ++i)
  311. {
  312. if (_entries[i])
  313. {
  314. UInt32 size = (UInt32)_entries[i]->size();
  315. poco_assert_dbg(size != 0);
  316. if (size > maxEntriesPerHash)
  317. maxEntriesPerHash = size;
  318. if (details)
  319. detailedEntriesPerHash.push_back(size);
  320. #ifdef DEBUG
  321. totalSize += size;
  322. #endif
  323. }
  324. else
  325. {
  326. numZeroEntries++;
  327. if (details)
  328. detailedEntriesPerHash.push_back(0);
  329. }
  330. }
  331. #ifdef DEBUG
  332. poco_assert_dbg(totalSize == numberOfEntries);
  333. #endif
  334. return HashStatistic(_maxCapacity, numberOfEntries, numZeroEntries, maxEntriesPerHash, detailedEntriesPerHash);
  335. }
  336. private:
  337. HashTableVector _entries;
  338. std::size_t _size;
  339. UInt32 _maxCapacity;
  340. KeyHashFunction _hash;
  341. };
  342. } // namespace Poco
  343. #endif // Foundation_HashTable_INCLUDED