summaryrefslogtreecommitdiff
path: root/src/hash.h
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2007-04-03 04:50:36 +0000
committerMike Buland <eichlan@xagasoft.com>2007-04-03 04:50:36 +0000
commitda89e6d30e57bd6dbb10b4d36b093ce9bbf5c666 (patch)
tree0c8d31d13521011dc52a3fbadbf9e7e27929308f /src/hash.h
parentf4c20290509d7ed3a8fd5304577e7a4cc0b9d974 (diff)
downloadlibbu++-da89e6d30e57bd6dbb10b4d36b093ce9bbf5c666.tar.gz
libbu++-da89e6d30e57bd6dbb10b4d36b093ce9bbf5c666.tar.bz2
libbu++-da89e6d30e57bd6dbb10b4d36b093ce9bbf5c666.tar.xz
libbu++-da89e6d30e57bd6dbb10b4d36b093ce9bbf5c666.zip
The first batch seem to have made it alright. Unfortunately the Archive class
isn't done yet, I'm going to make it rely on streams, so those will be next, then we can make it work all sortsa' well.
Diffstat (limited to 'src/hash.h')
-rw-r--r--src/hash.h745
1 files changed, 745 insertions, 0 deletions
diff --git a/src/hash.h b/src/hash.h
new file mode 100644
index 0000000..9e498f1
--- /dev/null
+++ b/src/hash.h
@@ -0,0 +1,745 @@
1#ifndef HASH_H
2#define HASH_H
3
4#include <stddef.h>
5#include <string.h>
6#include <memory>
7#include <iostream>
8#include <list>
9#include <utility>
10#include "exceptionbase.h"
11#include "archable.h"
12#include "archive.h"
13
14#define bitsToBytes( n ) (n/32+(n%32>0 ? 1 : 0))
15
16namespace Bu
17{
18 subExceptionDecl( HashException )
19
20 enum eHashException
21 {
22 excodeNotFilled
23 };
24
25 template<typename T>
26 uint32_t __calcHashCode( const T &k );
27
28 template<typename T>
29 bool __cmpHashKeys( const T &a, const T &b );
30
31 struct __calcNextTSize_fast
32 {
33 uint32_t operator()( uint32_t nCapacity, uint32_t nFill, uint32_t nDeleted ) const
34 {
35 if( nDeleted >= nCapacity/2 )
36 return nCapacity;
37 return nCapacity*2+1;
38 }
39 };
40
41 template<typename key, typename value, typename sizecalc = __calcNextTSize_fast, typename keyalloc = std::allocator<key>, typename valuealloc = std::allocator<value>, typename challoc = std::allocator<uint32_t> >
42 class Hash;
43
44 template< typename key, typename _value, typename sizecalc = __calcNextTSize_fast, typename keyalloc = std::allocator<key>, typename valuealloc = std::allocator<_value>, typename challoc = std::allocator<uint32_t> >
45 struct HashProxy
46 {
47 friend class Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc>;
48 private:
49 HashProxy( Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc> &h, key *k, uint32_t nPos, uint32_t hash ) :
50 hsh( h ),
51 pKey( k ),
52 nPos( nPos ),
53 hash( hash ),
54 bFilled( false )
55 {
56 }
57
58 HashProxy( Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc> &h, uint32_t nPos, _value *pValue ) :
59 hsh( h ),
60 nPos( nPos ),
61 pValue( pValue ),
62 bFilled( true )
63 {
64 }
65
66 Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc> &hsh;
67 key *pKey;
68 uint32_t nPos;
69 _value *pValue;
70 uint32_t hash;
71 bool bFilled;
72
73 public:
74 operator _value &()
75 {
76 if( bFilled == false )
77 throw HashException(
78 excodeNotFilled,
79 "No data assosiated with that key."
80 );
81 return *pValue;
82 }
83
84 _value &value()
85 {
86 if( bFilled == false )
87 throw HashException(
88 excodeNotFilled,
89 "No data assosiated with that key."
90 );
91 return *pValue;
92 }
93
94 bool isFilled()
95 {
96 return bFilled;
97 }
98
99 void erase()
100 {
101 if( bFilled )
102 {
103 hsh._erase( nPos );
104 hsh.onDelete();
105 }
106 }
107
108 _value operator=( _value nval )
109 {
110 if( bFilled )
111 {
112 hsh.va.destroy( pValue );
113 hsh.va.construct( pValue, nval );
114 hsh.onUpdate();
115 }
116 else
117 {
118 hsh.fill( nPos, *pKey, nval, hash );
119 hsh.onInsert();
120 }
121
122 return nval;
123 }
124
125 _value *operator->()
126 {
127 if( bFilled == false )
128 throw HashException(
129 excodeNotFilled,
130 "No data assosiated with that key."
131 );
132 return pValue;
133 }
134 };
135
136 template<typename key, typename value, typename sizecalc, typename keyalloc, typename valuealloc, typename challoc >
137 class Hash
138 {
139 friend struct HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>;
140 public:
141 Hash() :
142 nCapacity( 11 ),
143 nFilled( 0 ),
144 nDeleted( 0 ),
145 bFilled( NULL ),
146 bDeleted( NULL ),
147 aKeys( NULL ),
148 aValues( NULL ),
149 aHashCodes( NULL )
150 {
151 nKeysSize = bitsToBytes( nCapacity );
152 bFilled = ca.allocate( nKeysSize );
153 bDeleted = ca.allocate( nKeysSize );
154 clearBits();
155
156 aHashCodes = ca.allocate( nCapacity );
157 aKeys = ka.allocate( nCapacity );
158 aValues = va.allocate( nCapacity );
159 }
160
161 Hash( const Hash &src ) :
162 nCapacity( src.nCapacity ),
163 nFilled( 0 ),
164 nDeleted( 0 ),
165 bFilled( NULL ),
166 bDeleted( NULL ),
167 aKeys( NULL ),
168 aValues( NULL ),
169 aHashCodes( NULL )
170 {
171 nKeysSize = bitsToBytes( nCapacity );
172 bFilled = ca.allocate( nKeysSize );
173 bDeleted = ca.allocate( nKeysSize );
174 clearBits();
175
176 aHashCodes = ca.allocate( nCapacity );
177 aKeys = ka.allocate( nCapacity );
178 aValues = va.allocate( nCapacity );
179
180 for( uint32_t j = 0; j < src.nCapacity; j++ )
181 {
182 if( src.isFilled( j ) )
183 {
184 insert( src.aKeys[j], src.aValues[j] );
185 }
186 }
187 }
188
189 Hash &operator=( const Hash &src )
190 {
191 for( uint32_t j = 0; j < nCapacity; j++ )
192 {
193 if( isFilled( j ) )
194 if( !isDeleted( j ) )
195 {
196 va.destroy( &aValues[j] );
197 ka.destroy( &aKeys[j] );
198 }
199 }
200 va.deallocate( aValues, nCapacity );
201 ka.deallocate( aKeys, nCapacity );
202 ca.deallocate( bFilled, nKeysSize );
203 ca.deallocate( bDeleted, nKeysSize );
204 ca.deallocate( aHashCodes, nCapacity );
205
206 nFilled = 0;
207 nDeleted = 0;
208 nCapacity = src.nCapacity;
209 nKeysSize = bitsToBytes( nCapacity );
210 bFilled = ca.allocate( nKeysSize );
211 bDeleted = ca.allocate( nKeysSize );
212 clearBits();
213
214 aHashCodes = ca.allocate( nCapacity );
215 aKeys = ka.allocate( nCapacity );
216 aValues = va.allocate( nCapacity );
217
218 for( uint32_t j = 0; j < src.nCapacity; j++ )
219 {
220 if( src.isFilled( j ) )
221 {
222 insert( src.aKeys[j], src.aValues[j] );
223 }
224 }
225
226 return *this;
227 }
228
229 virtual ~Hash()
230 {
231 for( uint32_t j = 0; j < nCapacity; j++ )
232 {
233 if( isFilled( j ) )
234 if( !isDeleted( j ) )
235 {
236 va.destroy( &aValues[j] );
237 ka.destroy( &aKeys[j] );
238 }
239 }
240 va.deallocate( aValues, nCapacity );
241 ka.deallocate( aKeys, nCapacity );
242 ca.deallocate( bFilled, nKeysSize );
243 ca.deallocate( bDeleted, nKeysSize );
244 ca.deallocate( aHashCodes, nCapacity );
245 }
246
247 uint32_t getCapacity()
248 {
249 return nCapacity;
250 }
251
252 uint32_t getFill()
253 {
254 return nFilled;
255 }
256
257 uint32_t size()
258 {
259 return nFilled-nDeleted;
260 }
261
262 uint32_t getDeleted()
263 {
264 return nDeleted;
265 }
266
267 virtual HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc> operator[]( key k )
268 {
269 uint32_t hash = __calcHashCode( k );
270 bool bFill;
271 uint32_t nPos = probe( hash, k, bFill );
272
273 if( bFill )
274 {
275 return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, nPos, &aValues[nPos] );
276 }
277 else
278 {
279 return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, &k, nPos, hash );
280 }
281 }
282
283 virtual void insert( key k, value v )
284 {
285 uint32_t hash = __calcHashCode( k );
286 bool bFill;
287 uint32_t nPos = probe( hash, k, bFill );
288
289 if( bFill )
290 {
291 va.destroy( &aValues[nPos] );
292 va.construct( &aValues[nPos], v );
293 onUpdate();
294 }
295 else
296 {
297 fill( nPos, k, v, hash );
298 onInsert();
299 }
300 }
301
302 virtual void erase( key k )
303 {
304 uint32_t hash = __calcHashCode( k );
305 bool bFill;
306 uint32_t nPos = probe( hash, k, bFill );
307
308 if( bFill )
309 {
310 _erase( nPos );
311 onDelete();
312 }
313 }
314
315 struct iterator;
316 virtual void erase( struct iterator &i )
317 {
318 if( this != &i.hsh )
319 throw HashException("This iterator didn't come from this Hash.");
320 if( isFilled( i.nPos ) && !isDeleted( i.nPos ) )
321 {
322 _erase( i.nPos );
323 onDelete();
324 }
325 }
326
327 virtual void clear()
328 {
329 for( uint32_t j = 0; j < nCapacity; j++ )
330 {
331 if( isFilled( j ) )
332 if( !isDeleted( j ) )
333 {
334 va.destroy( &aValues[j] );
335 ka.destroy( &aKeys[j] );
336 onDelete();
337 }
338 }
339
340 clearBits();
341 }
342
343 virtual value &get( key k )
344 {
345 uint32_t hash = __calcHashCode( k );
346 bool bFill;
347 uint32_t nPos = probe( hash, k, bFill );
348
349 if( bFill )
350 {
351 return aValues[nPos];
352 }
353 else
354 {
355 throw HashException(
356 excodeNotFilled,
357 "No data assosiated with that key."
358 );
359 }
360 }
361
362 virtual bool has( key k )
363 {
364 bool bFill;
365 probe( __calcHashCode( k ), k, bFill, false );
366
367 return bFill;
368 }
369
370 typedef struct iterator
371 {
372 friend class Hash<key, value, sizecalc, keyalloc, valuealloc, challoc>;
373 private:
374 iterator( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh ) :
375 hsh( hsh ),
376 nPos( 0 ),
377 bFinished( false )
378 {
379 nPos = hsh.getFirstPos( bFinished );
380 }
381
382 iterator( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh, bool bDone ) :
383 hsh( hsh ),
384 nPos( 0 ),
385 bFinished( bDone )
386 {
387 }
388
389 Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh;
390 uint32_t nPos;
391 bool bFinished;
392
393 public:
394 iterator operator++( int )
395 {
396 if( bFinished == false )
397 nPos = hsh.getNextPos( nPos, bFinished );
398
399 return *this;
400 }
401
402 iterator operator++()
403 {
404 if( bFinished == false )
405 nPos = hsh.getNextPos( nPos, bFinished );
406
407 return *this;
408 }
409
410 bool operator==( const iterator &oth )
411 {
412 if( bFinished != oth.bFinished )
413 return false;
414 if( bFinished == true )
415 {
416 return true;
417 }
418 else
419 {
420 if( oth.nPos == nPos )
421 return true;
422 return false;
423 }
424 }
425
426 bool operator!=( const iterator &oth )
427 {
428 return !(*this == oth );
429 }
430
431 iterator operator=( const iterator &oth )
432 {
433 if( &hsh != &oth.hsh )
434 throw HashException(
435 "Cannot mix iterators from different hash objects.");
436 nPos = oth.nPos;
437 bFinished = oth.bFinished;
438 }
439
440 std::pair<key,value> operator *()
441 {
442 return hsh.getAtPos( nPos );
443 }
444
445 key &getKey()
446 {
447 return hsh.getKeyAtPos( nPos );
448 }
449
450 value &getValue()
451 {
452 return hsh.getValueAtPos( nPos );
453 }
454 };
455
456 iterator begin()
457 {
458 return iterator( *this );
459 }
460
461 iterator end()
462 {
463 return iterator( *this, true );
464 }
465
466 std::list<key> getKeys()
467 {
468 std::list<key> lKeys;
469
470 for( uint32_t j = 0; j < nCapacity; j++ )
471 {
472 if( isFilled( j ) )
473 {
474 if( !isDeleted( j ) )
475 {
476 lKeys.push_back( aKeys[j] );
477 }
478 }
479 }
480
481 return lKeys;
482 }
483
484 protected:
485 virtual void onInsert() {}
486 virtual void onUpdate() {}
487 virtual void onDelete() {}
488 virtual void onReHash() {}
489
490 virtual void clearBits()
491 {
492 for( uint32_t j = 0; j < nKeysSize; j++ )
493 {
494 bFilled[j] = bDeleted[j] = 0;
495 }
496 }
497
498 virtual void fill( uint32_t loc, key &k, value &v, uint32_t hash )
499 {
500 bFilled[loc/32] |= (1<<(loc%32));
501 va.construct( &aValues[loc], v );
502 ka.construct( &aKeys[loc], k );
503 aHashCodes[loc] = hash;
504 nFilled++;
505 //printf("Filled: %d, Deleted: %d, Capacity: %d\n",
506 // nFilled, nDeleted, nCapacity );
507 }
508
509 virtual void _erase( uint32_t loc )
510 {
511 bDeleted[loc/32] |= (1<<(loc%32));
512 va.destroy( &aValues[loc] );
513 ka.destroy( &aKeys[loc] );
514 nDeleted++;
515 //printf("Filled: %d, Deleted: %d, Capacity: %d\n",
516 // nFilled, nDeleted, nCapacity );
517 }
518
519 virtual std::pair<key,value> getAtPos( uint32_t nPos )
520 {
521 return std::pair<key,value>(aKeys[nPos],aValues[nPos]);
522 }
523
524 virtual key &getKeyAtPos( uint32_t nPos )
525 {
526 return aKeys[nPos];
527 }
528
529 virtual value &getValueAtPos( uint32_t nPos )
530 {
531 return aValues[nPos];
532 }
533
534 virtual uint32_t getFirstPos( bool &bFinished )
535 {
536 for( uint32_t j = 0; j < nCapacity; j++ )
537 {
538 if( isFilled( j ) )
539 if( !isDeleted( j ) )
540 return j;
541 }
542
543 bFinished = true;
544 return 0;
545 }
546
547 virtual uint32_t getNextPos( uint32_t nPos, bool &bFinished )
548 {
549 for( uint32_t j = nPos+1; j < nCapacity; j++ )
550 {
551 if( isFilled( j ) )
552 if( !isDeleted( j ) )
553 return j;
554 }
555
556 bFinished = true;
557 return 0;
558 }
559
560 uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true )
561 {
562 uint32_t nCur = hash%nCapacity;
563
564 // First we scan to see if the key is already there, abort if we
565 // run out of probing room, or we find a non-filled entry
566 for( int8_t j = 0;
567 isFilled( nCur ) && j < 32;
568 nCur = (nCur + (1<<j))%nCapacity, j++
569 )
570 {
571 // Is this the same hash code we were looking for?
572 if( hash == aHashCodes[nCur] )
573 {
574 // Skip over deleted entries. Deleted entries are also filled,
575 // so we only have to do this check here.
576 if( isDeleted( nCur ) )
577 continue;
578
579 // Is it really the same key? (for safety)
580 if( __cmpHashKeys( aKeys[nCur], k ) == true )
581 {
582 bFill = true;
583 return nCur;
584 }
585 }
586 }
587
588 // This is our insurance, if the table is full, then go ahead and
589 // rehash, then try again.
590 if( isFilled( nCur ) && rehash == true )
591 {
592 reHash( szCalc(getCapacity(), getFill(), getDeleted()) );
593
594 // This is potentially dangerous, and could cause an infinite loop.
595 // Be careful writing probe, eh?
596 return probe( hash, k, bFill );
597 }
598
599 bFill = false;
600 return nCur;
601 }
602
603 void reHash( uint32_t nNewSize )
604 {
605 //printf("---REHASH---");
606 //printf("Filled: %d, Deleted: %d, Capacity: %d\n",
607 // nFilled, nDeleted, nCapacity );
608
609 // Save all the old data
610 uint32_t nOldCapacity = nCapacity;
611 uint32_t *bOldFilled = bFilled;
612 uint32_t *aOldHashCodes = aHashCodes;
613 uint32_t nOldKeysSize = nKeysSize;
614 uint32_t *bOldDeleted = bDeleted;
615 value *aOldValues = aValues;
616 key *aOldKeys = aKeys;
617
618 // Calculate new sizes
619 nCapacity = nNewSize;
620 nKeysSize = bitsToBytes( nCapacity );
621
622 // Allocate new memory + prep
623 bFilled = ca.allocate( nKeysSize );
624 bDeleted = ca.allocate( nKeysSize );
625 clearBits();
626
627 aHashCodes = ca.allocate( nCapacity );
628 aKeys = ka.allocate( nCapacity );
629 aValues = va.allocate( nCapacity );
630
631 nDeleted = nFilled = 0;
632
633 // Re-insert all of the old data (except deleted items)
634 for( uint32_t j = 0; j < nOldCapacity; j++ )
635 {
636 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 &&
637 (bOldDeleted[j/32]&(1<<(j%32)))==0 )
638 {
639 insert( aOldKeys[j], aOldValues[j] );
640 }
641 }
642
643 // Delete all of the old data
644 for( uint32_t j = 0; j < nOldCapacity; j++ )
645 {
646 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 )
647 {
648 va.destroy( &aOldValues[j] );
649 ka.destroy( &aOldKeys[j] );
650 }
651 }
652 va.deallocate( aOldValues, nOldCapacity );
653 ka.deallocate( aOldKeys, nOldCapacity );
654 ca.deallocate( bOldFilled, nOldKeysSize );
655 ca.deallocate( bOldDeleted, nOldKeysSize );
656 ca.deallocate( aOldHashCodes, nOldCapacity );
657 }
658
659 virtual bool isFilled( uint32_t loc ) const
660 {
661 return (bFilled[loc/32]&(1<<(loc%32)))!=0;
662 }
663
664 virtual bool isDeleted( uint32_t loc )
665 {
666 return (bDeleted[loc/32]&(1<<(loc%32)))!=0;
667 }
668
669 protected:
670 uint32_t nCapacity;
671 uint32_t nFilled;
672 uint32_t nDeleted;
673 uint32_t *bFilled;
674 uint32_t *bDeleted;
675 uint32_t nKeysSize;
676 key *aKeys;
677 value *aValues;
678 uint32_t *aHashCodes;
679 valuealloc va;
680 keyalloc ka;
681 challoc ca;
682 sizecalc szCalc;
683 };
684
685 template<> uint32_t __calcHashCode<int>( const int &k );
686 template<> bool __cmpHashKeys<int>( const int &a, const int &b );
687
688 template<> uint32_t __calcHashCode<unsigned int>( const unsigned int &k );
689 template<> bool __cmpHashKeys<unsigned int>( const unsigned int &a, const unsigned int &b );
690
691 template<> uint32_t __calcHashCode<const char *>( const char * const &k );
692 template<> bool __cmpHashKeys<const char *>( const char * const &a, const char * const &b );
693
694 template<> uint32_t __calcHashCode<char *>( char * const &k );
695 template<> bool __cmpHashKeys<char *>( char * const &a, char * const &b );
696
697 template<> uint32_t __calcHashCode<std::string>( const std::string &k );
698 template<> bool __cmpHashKeys<std::string>( const std::string &a, const std::string &b );
699
700 template<typename key, typename value>
701 Archive &operator<<( Archive &ar, Hash<key,value> &h )
702 {
703 ar << h.size();
704 for( typename Hash<key,value>::iterator i = h.begin(); i != h.end(); i++ )
705 {
706 std::pair<key,value> p = *i;
707 ar << p.first << p.second;
708 }
709
710 return ar;
711 }
712
713 template<typename key, typename value>
714 Archive &operator>>( Archive &ar, Hash<key,value> &h )
715 {
716 h.clear();
717 uint32_t nSize;
718 ar >> nSize;
719
720 for( uint32_t j = 0; j < nSize; j++ )
721 {
722 key k; value v;
723 ar >> k >> v;
724 h.insert( k, v );
725 }
726
727 return ar;
728 }
729
730 /*
731 template<typename key, typename value>
732 Serializer &operator&&( Serializer &ar, Hash<key,value> &h )
733 {
734 if( ar.isLoading() )
735 {
736 return ar >> h;
737 }
738 else
739 {
740 return ar << h;
741 }
742 }*/
743}
744
745#endif