summaryrefslogtreecommitdiff
path: root/src/hash.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/hash.h')
-rw-r--r--src/hash.h745
1 files changed, 745 insertions, 0 deletions
diff --git a/src/hash.h b/src/hash.h
new file mode 100644
index 0000000..9e498f1
--- /dev/null
+++ b/src/hash.h
@@ -0,0 +1,745 @@
1#ifndef HASH_H
2#define HASH_H
3
4#include <stddef.h>
5#include <string.h>
6#include <memory>
7#include <iostream>
8#include <list>
9#include <utility>
10#include "exceptionbase.h"
11#include "archable.h"
12#include "archive.h"
13
14#define bitsToBytes( n ) (n/32+(n%32>0 ? 1 : 0))
15
16namespace Bu
17{
18 subExceptionDecl( HashException )
19
20 enum eHashException
21 {
22 excodeNotFilled
23 };
24
25 template<typename T>
26 uint32_t __calcHashCode( const T &k );
27
28 template<typename T>
29 bool __cmpHashKeys( const T &a, const T &b );
30
31 struct __calcNextTSize_fast
32 {
33 uint32_t operator()( uint32_t nCapacity, uint32_t nFill, uint32_t nDeleted ) const
34 {
35 if( nDeleted >= nCapacity/2 )
36 return nCapacity;
37 return nCapacity*2+1;
38 }
39 };
40
41 template<typename key, typename value, typename sizecalc = __calcNextTSize_fast, typename keyalloc = std::allocator<key>, typename valuealloc = std::allocator<value>, typename challoc = std::allocator<uint32_t> >
42 class Hash;
43
44 template< typename key, typename _value, typename sizecalc = __calcNextTSize_fast, typename keyalloc = std::allocator<key>, typename valuealloc = std::allocator<_value>, typename challoc = std::allocator<uint32_t> >
45 struct HashProxy
46 {
47 friend class Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc>;
48 private:
49 HashProxy( Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc> &h, key *k, uint32_t nPos, uint32_t hash ) :
50 hsh( h ),
51 pKey( k ),
52 nPos( nPos ),
53 hash( hash ),
54 bFilled( false )
55 {
56 }
57
58 HashProxy( Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc> &h, uint32_t nPos, _value *pValue ) :
59 hsh( h ),
60 nPos( nPos ),
61 pValue( pValue ),
62 bFilled( true )
63 {
64 }
65
66 Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc> &hsh;
67 key *pKey;
68 uint32_t nPos;
69 _value *pValue;
70 uint32_t hash;
71 bool bFilled;
72
73 public:
74 operator _value &()
75 {
76 if( bFilled == false )
77 throw HashException(
78 excodeNotFilled,
79 "No data assosiated with that key."
80 );
81 return *pValue;
82 }
83
84 _value &value()
85 {
86 if( bFilled == false )
87 throw HashException(
88 excodeNotFilled,
89 "No data assosiated with that key."
90 );
91 return *pValue;
92 }
93
94 bool isFilled()
95 {
96 return bFilled;
97 }
98
99 void erase()
100 {
101 if( bFilled )
102 {
103 hsh._erase( nPos );
104 hsh.onDelete();
105 }
106 }
107
108 _value operator=( _value nval )
109 {
110 if( bFilled )
111 {
112 hsh.va.destroy( pValue );
113 hsh.va.construct( pValue, nval );
114 hsh.onUpdate();
115 }
116 else
117 {
118 hsh.fill( nPos, *pKey, nval, hash );
119 hsh.onInsert();
120 }
121
122 return nval;
123 }
124
125 _value *operator->()
126 {
127 if( bFilled == false )
128 throw HashException(
129 excodeNotFilled,
130 "No data assosiated with that key."
131 );
132 return pValue;
133 }
134 };
135
136 template<typename key, typename value, typename sizecalc, typename keyalloc, typename valuealloc, typename challoc >
137 class Hash
138 {
139 friend struct HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>;
140 public:
141 Hash() :
142 nCapacity( 11 ),
143 nFilled( 0 ),
144 nDeleted( 0 ),
145 bFilled( NULL ),
146 bDeleted( NULL ),
147 aKeys( NULL ),
148 aValues( NULL ),
149 aHashCodes( NULL )
150 {
151 nKeysSize = bitsToBytes( nCapacity );
152 bFilled = ca.allocate( nKeysSize );
153 bDeleted = ca.allocate( nKeysSize );
154 clearBits();
155
156 aHashCodes = ca.allocate( nCapacity );
157 aKeys = ka.allocate( nCapacity );
158 aValues = va.allocate( nCapacity );
159 }
160
161 Hash( const Hash &src ) :
162 nCapacity( src.nCapacity ),
163 nFilled( 0 ),
164 nDeleted( 0 ),
165 bFilled( NULL ),
166 bDeleted( NULL ),
167 aKeys( NULL ),
168 aValues( NULL ),
169 aHashCodes( NULL )
170 {
171 nKeysSize = bitsToBytes( nCapacity );
172 bFilled = ca.allocate( nKeysSize );
173 bDeleted = ca.allocate( nKeysSize );
174 clearBits();
175
176 aHashCodes = ca.allocate( nCapacity );
177 aKeys = ka.allocate( nCapacity );
178 aValues = va.allocate( nCapacity );
179
180 for( uint32_t j = 0; j < src.nCapacity; j++ )
181 {
182 if( src.isFilled( j ) )
183 {
184 insert( src.aKeys[j], src.aValues[j] );
185 }
186 }
187 }
188
189 Hash &operator=( const Hash &src )
190 {
191 for( uint32_t j = 0; j < nCapacity; j++ )
192 {
193 if( isFilled( j ) )
194 if( !isDeleted( j ) )
195 {
196 va.destroy( &aValues[j] );
197 ka.destroy( &aKeys[j] );
198 }
199 }
200 va.deallocate( aValues, nCapacity );
201 ka.deallocate( aKeys, nCapacity );
202 ca.deallocate( bFilled, nKeysSize );
203 ca.deallocate( bDeleted, nKeysSize );
204 ca.deallocate( aHashCodes, nCapacity );
205
206 nFilled = 0;
207 nDeleted = 0;
208 nCapacity = src.nCapacity;
209 nKeysSize = bitsToBytes( nCapacity );
210 bFilled = ca.allocate( nKeysSize );
211 bDeleted = ca.allocate( nKeysSize );
212 clearBits();
213
214 aHashCodes = ca.allocate( nCapacity );
215 aKeys = ka.allocate( nCapacity );
216 aValues = va.allocate( nCapacity );
217
218 for( uint32_t j = 0; j < src.nCapacity; j++ )
219 {
220 if( src.isFilled( j ) )
221 {
222 insert( src.aKeys[j], src.aValues[j] );
223 }
224 }
225
226 return *this;
227 }
228
229 virtual ~Hash()
230 {
231 for( uint32_t j = 0; j < nCapacity; j++ )
232 {
233 if( isFilled( j ) )
234 if( !isDeleted( j ) )
235 {
236 va.destroy( &aValues[j] );
237 ka.destroy( &aKeys[j] );
238 }
239 }
240 va.deallocate( aValues, nCapacity );
241 ka.deallocate( aKeys, nCapacity );
242 ca.deallocate( bFilled, nKeysSize );
243 ca.deallocate( bDeleted, nKeysSize );
244 ca.deallocate( aHashCodes, nCapacity );
245 }
246
247 uint32_t getCapacity()
248 {
249 return nCapacity;
250 }
251
252 uint32_t getFill()
253 {
254 return nFilled;
255 }
256
257 uint32_t size()
258 {
259 return nFilled-nDeleted;
260 }
261
262 uint32_t getDeleted()
263 {
264 return nDeleted;
265 }
266
267 virtual HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc> operator[]( key k )
268 {
269 uint32_t hash = __calcHashCode( k );
270 bool bFill;
271 uint32_t nPos = probe( hash, k, bFill );
272
273 if( bFill )
274 {
275 return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, nPos, &aValues[nPos] );
276 }
277 else
278 {
279 return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, &k, nPos, hash );
280 }
281 }
282
283 virtual void insert( key k, value v )
284 {
285 uint32_t hash = __calcHashCode( k );
286 bool bFill;
287 uint32_t nPos = probe( hash, k, bFill );
288
289 if( bFill )
290 {
291 va.destroy( &aValues[nPos] );
292 va.construct( &aValues[nPos], v );
293 onUpdate();
294 }
295 else
296 {
297 fill( nPos, k, v, hash );
298 onInsert();
299 }
300 }
301
302 virtual void erase( key k )
303 {
304 uint32_t hash = __calcHashCode( k );
305 bool bFill;
306 uint32_t nPos = probe( hash, k, bFill );
307
308 if( bFill )
309 {
310 _erase( nPos );
311 onDelete();
312 }
313 }
314
315 struct iterator;
316 virtual void erase( struct iterator &i )
317 {
318 if( this != &i.hsh )
319 throw HashException("This iterator didn't come from this Hash.");
320 if( isFilled( i.nPos ) && !isDeleted( i.nPos ) )
321 {
322 _erase( i.nPos );
323 onDelete();
324 }
325 }
326
327 virtual void clear()
328 {
329 for( uint32_t j = 0; j < nCapacity; j++ )
330 {
331 if( isFilled( j ) )
332 if( !isDeleted( j ) )
333 {
334 va.destroy( &aValues[j] );
335 ka.destroy( &aKeys[j] );
336 onDelete();
337 }
338 }
339
340 clearBits();
341 }
342
343 virtual value &get( key k )
344 {
345 uint32_t hash = __calcHashCode( k );
346 bool bFill;
347 uint32_t nPos = probe( hash, k, bFill );
348
349 if( bFill )
350 {
351 return aValues[nPos];
352 }
353 else
354 {
355 throw HashException(
356 excodeNotFilled,
357 "No data assosiated with that key."
358 );
359 }
360 }
361
362 virtual bool has( key k )
363 {
364 bool bFill;
365 probe( __calcHashCode( k ), k, bFill, false );
366
367 return bFill;
368 }
369
370 typedef struct iterator
371 {
372 friend class Hash<key, value, sizecalc, keyalloc, valuealloc, challoc>;
373 private:
374 iterator( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh ) :
375 hsh( hsh ),
376 nPos( 0 ),
377 bFinished( false )
378 {
379 nPos = hsh.getFirstPos( bFinished );
380 }
381
382 iterator( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh, bool bDone ) :
383 hsh( hsh ),
384 nPos( 0 ),
385 bFinished( bDone )
386 {
387 }
388
389 Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh;
390 uint32_t nPos;
391 bool bFinished;
392
393 public:
394 iterator operator++( int )
395 {
396 if( bFinished == false )
397 nPos = hsh.getNextPos( nPos, bFinished );
398
399 return *this;
400 }
401
402 iterator operator++()
403 {
404 if( bFinished == false )
405 nPos = hsh.getNextPos( nPos, bFinished );
406
407 return *this;
408 }
409
410 bool operator==( const iterator &oth )
411 {
412 if( bFinished != oth.bFinished )
413 return false;
414 if( bFinished == true )
415 {
416 return true;
417 }
418 else
419 {
420 if( oth.nPos == nPos )
421 return true;
422 return false;
423 }
424 }
425
426 bool operator!=( const iterator &oth )
427 {
428 return !(*this == oth );
429 }
430
431 iterator operator=( const iterator &oth )
432 {
433 if( &hsh != &oth.hsh )
434 throw HashException(
435 "Cannot mix iterators from different hash objects.");
436 nPos = oth.nPos;
437 bFinished = oth.bFinished;
438 }
439
440 std::pair<key,value> operator *()
441 {
442 return hsh.getAtPos( nPos );
443 }
444
445 key &getKey()
446 {
447 return hsh.getKeyAtPos( nPos );
448 }
449
450 value &getValue()
451 {
452 return hsh.getValueAtPos( nPos );
453 }
454 };
455
456 iterator begin()
457 {
458 return iterator( *this );
459 }
460
461 iterator end()
462 {
463 return iterator( *this, true );
464 }
465
466 std::list<key> getKeys()
467 {
468 std::list<key> lKeys;
469
470 for( uint32_t j = 0; j < nCapacity; j++ )
471 {
472 if( isFilled( j ) )
473 {
474 if( !isDeleted( j ) )
475 {
476 lKeys.push_back( aKeys[j] );
477 }
478 }
479 }
480
481 return lKeys;
482 }
483
484 protected:
485 virtual void onInsert() {}
486 virtual void onUpdate() {}
487 virtual void onDelete() {}
488 virtual void onReHash() {}
489
490 virtual void clearBits()
491 {
492 for( uint32_t j = 0; j < nKeysSize; j++ )
493 {
494 bFilled[j] = bDeleted[j] = 0;
495 }
496 }
497
498 virtual void fill( uint32_t loc, key &k, value &v, uint32_t hash )
499 {
500 bFilled[loc/32] |= (1<<(loc%32));
501 va.construct( &aValues[loc], v );
502 ka.construct( &aKeys[loc], k );
503 aHashCodes[loc] = hash;
504 nFilled++;
505 //printf("Filled: %d, Deleted: %d, Capacity: %d\n",
506 // nFilled, nDeleted, nCapacity );
507 }
508
509 virtual void _erase( uint32_t loc )
510 {
511 bDeleted[loc/32] |= (1<<(loc%32));
512 va.destroy( &aValues[loc] );
513 ka.destroy( &aKeys[loc] );
514 nDeleted++;
515 //printf("Filled: %d, Deleted: %d, Capacity: %d\n",
516 // nFilled, nDeleted, nCapacity );
517 }
518
519 virtual std::pair<key,value> getAtPos( uint32_t nPos )
520 {
521 return std::pair<key,value>(aKeys[nPos],aValues[nPos]);
522 }
523
524 virtual key &getKeyAtPos( uint32_t nPos )
525 {
526 return aKeys[nPos];
527 }
528
529 virtual value &getValueAtPos( uint32_t nPos )
530 {
531 return aValues[nPos];
532 }
533
534 virtual uint32_t getFirstPos( bool &bFinished )
535 {
536 for( uint32_t j = 0; j < nCapacity; j++ )
537 {
538 if( isFilled( j ) )
539 if( !isDeleted( j ) )
540 return j;
541 }
542
543 bFinished = true;
544 return 0;
545 }
546
547 virtual uint32_t getNextPos( uint32_t nPos, bool &bFinished )
548 {
549 for( uint32_t j = nPos+1; j < nCapacity; j++ )
550 {
551 if( isFilled( j ) )
552 if( !isDeleted( j ) )
553 return j;
554 }
555
556 bFinished = true;
557 return 0;
558 }
559
560 uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true )
561 {
562 uint32_t nCur = hash%nCapacity;
563
564 // First we scan to see if the key is already there, abort if we
565 // run out of probing room, or we find a non-filled entry
566 for( int8_t j = 0;
567 isFilled( nCur ) && j < 32;
568 nCur = (nCur + (1<<j))%nCapacity, j++
569 )
570 {
571 // Is this the same hash code we were looking for?
572 if( hash == aHashCodes[nCur] )
573 {
574 // Skip over deleted entries. Deleted entries are also filled,
575 // so we only have to do this check here.
576 if( isDeleted( nCur ) )
577 continue;
578
579 // Is it really the same key? (for safety)
580 if( __cmpHashKeys( aKeys[nCur], k ) == true )
581 {
582 bFill = true;
583 return nCur;
584 }
585 }
586 }
587
588 // This is our insurance, if the table is full, then go ahead and
589 // rehash, then try again.
590 if( isFilled( nCur ) && rehash == true )
591 {
592 reHash( szCalc(getCapacity(), getFill(), getDeleted()) );
593
594 // This is potentially dangerous, and could cause an infinite loop.
595 // Be careful writing probe, eh?
596 return probe( hash, k, bFill );
597 }
598
599 bFill = false;
600 return nCur;
601 }
602
603 void reHash( uint32_t nNewSize )
604 {
605 //printf("---REHASH---");
606 //printf("Filled: %d, Deleted: %d, Capacity: %d\n",
607 // nFilled, nDeleted, nCapacity );
608
609 // Save all the old data
610 uint32_t nOldCapacity = nCapacity;
611 uint32_t *bOldFilled = bFilled;
612 uint32_t *aOldHashCodes = aHashCodes;
613 uint32_t nOldKeysSize = nKeysSize;
614 uint32_t *bOldDeleted = bDeleted;
615 value *aOldValues = aValues;
616 key *aOldKeys = aKeys;
617
618 // Calculate new sizes
619 nCapacity = nNewSize;
620 nKeysSize = bitsToBytes( nCapacity );
621
622 // Allocate new memory + prep
623 bFilled = ca.allocate( nKeysSize );
624 bDeleted = ca.allocate( nKeysSize );
625 clearBits();
626
627 aHashCodes = ca.allocate( nCapacity );
628 aKeys = ka.allocate( nCapacity );
629 aValues = va.allocate( nCapacity );
630
631 nDeleted = nFilled = 0;
632
633 // Re-insert all of the old data (except deleted items)
634 for( uint32_t j = 0; j < nOldCapacity; j++ )
635 {
636 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 &&
637 (bOldDeleted[j/32]&(1<<(j%32)))==0 )
638 {
639 insert( aOldKeys[j], aOldValues[j] );
640 }
641 }
642
643 // Delete all of the old data
644 for( uint32_t j = 0; j < nOldCapacity; j++ )
645 {
646 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 )
647 {
648 va.destroy( &aOldValues[j] );
649 ka.destroy( &aOldKeys[j] );
650 }
651 }
652 va.deallocate( aOldValues, nOldCapacity );
653 ka.deallocate( aOldKeys, nOldCapacity );
654 ca.deallocate( bOldFilled, nOldKeysSize );
655 ca.deallocate( bOldDeleted, nOldKeysSize );
656 ca.deallocate( aOldHashCodes, nOldCapacity );
657 }
658
659 virtual bool isFilled( uint32_t loc ) const
660 {
661 return (bFilled[loc/32]&(1<<(loc%32)))!=0;
662 }
663
664 virtual bool isDeleted( uint32_t loc )
665 {
666 return (bDeleted[loc/32]&(1<<(loc%32)))!=0;
667 }
668
669 protected:
670 uint32_t nCapacity;
671 uint32_t nFilled;
672 uint32_t nDeleted;
673 uint32_t *bFilled;
674 uint32_t *bDeleted;
675 uint32_t nKeysSize;
676 key *aKeys;
677 value *aValues;
678 uint32_t *aHashCodes;
679 valuealloc va;
680 keyalloc ka;
681 challoc ca;
682 sizecalc szCalc;
683 };
684
685 template<> uint32_t __calcHashCode<int>( const int &k );
686 template<> bool __cmpHashKeys<int>( const int &a, const int &b );
687
688 template<> uint32_t __calcHashCode<unsigned int>( const unsigned int &k );
689 template<> bool __cmpHashKeys<unsigned int>( const unsigned int &a, const unsigned int &b );
690
691 template<> uint32_t __calcHashCode<const char *>( const char * const &k );
692 template<> bool __cmpHashKeys<const char *>( const char * const &a, const char * const &b );
693
694 template<> uint32_t __calcHashCode<char *>( char * const &k );
695 template<> bool __cmpHashKeys<char *>( char * const &a, char * const &b );
696
697 template<> uint32_t __calcHashCode<std::string>( const std::string &k );
698 template<> bool __cmpHashKeys<std::string>( const std::string &a, const std::string &b );
699
700 template<typename key, typename value>
701 Archive &operator<<( Archive &ar, Hash<key,value> &h )
702 {
703 ar << h.size();
704 for( typename Hash<key,value>::iterator i = h.begin(); i != h.end(); i++ )
705 {
706 std::pair<key,value> p = *i;
707 ar << p.first << p.second;
708 }
709
710 return ar;
711 }
712
713 template<typename key, typename value>
714 Archive &operator>>( Archive &ar, Hash<key,value> &h )
715 {
716 h.clear();
717 uint32_t nSize;
718 ar >> nSize;
719
720 for( uint32_t j = 0; j < nSize; j++ )
721 {
722 key k; value v;
723 ar >> k >> v;
724 h.insert( k, v );
725 }
726
727 return ar;
728 }
729
730 /*
731 template<typename key, typename value>
732 Serializer &operator&&( Serializer &ar, Hash<key,value> &h )
733 {
734 if( ar.isLoading() )
735 {
736 return ar >> h;
737 }
738 else
739 {
740 return ar << h;
741 }
742 }*/
743}
744
745#endif