summaryrefslogtreecommitdiff
path: root/src/hash.h
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2007-04-03 03:49:53 +0000
committerMike Buland <eichlan@xagasoft.com>2007-04-03 03:49:53 +0000
commitf4c20290509d7ed3a8fd5304577e7a4cc0b9d974 (patch)
tree13cdf64f7cf134f397a7165b7a3fe0807e37026b /src/hash.h
parent74d4c8cd27334fc7204d5a8773deb3d424565778 (diff)
downloadlibbu++-f4c20290509d7ed3a8fd5304577e7a4cc0b9d974.tar.gz
libbu++-f4c20290509d7ed3a8fd5304577e7a4cc0b9d974.tar.bz2
libbu++-f4c20290509d7ed3a8fd5304577e7a4cc0b9d974.tar.xz
libbu++-f4c20290509d7ed3a8fd5304577e7a4cc0b9d974.zip
Ok, no code is left in src, it's all in src/old. We'll gradually move code back
into src as it's fixed and re-org'd. This includes tests, which, I may write a unit test system into libbu++ just to make my life easier.
Diffstat (limited to 'src/hash.h')
-rw-r--r--src/hash.h744
1 files changed, 0 insertions, 744 deletions
diff --git a/src/hash.h b/src/hash.h
deleted file mode 100644
index e819379..0000000
--- a/src/hash.h
+++ /dev/null
@@ -1,744 +0,0 @@
1#ifndef HASH_H
2#define HASH_H
3
4#include <stddef.h>
5#include <string.h>
6#include <memory>
7#include <iostream>
8#include <list>
9#include "exceptionbase.h"
10#include "hashable.h"
11#include "serializable.h"
12#include "serializer.h"
13
14#define bitsToBytes( n ) (n/32+(n%32>0 ? 1 : 0))
15
16subExceptionDecl( HashException )
17
18enum eHashException
19{
20 excodeNotFilled
21};
22
23template<typename T>
24uint32_t __calcHashCode( const T &k );
25
26template<typename T>
27bool __cmpHashKeys( const T &a, const T &b );
28
29struct __calcNextTSize_fast
30{
31 uint32_t operator()( uint32_t nCapacity, uint32_t nFill, uint32_t nDeleted ) const
32 {
33 if( nDeleted >= nCapacity/2 )
34 return nCapacity;
35 return nCapacity*2+1;
36 }
37};
38
39template<typename key, typename value, typename sizecalc = __calcNextTSize_fast, typename keyalloc = std::allocator<key>, typename valuealloc = std::allocator<value>, typename challoc = std::allocator<uint32_t> >
40class Hash;
41
42template< typename key, typename _value, typename sizecalc = __calcNextTSize_fast, typename keyalloc = std::allocator<key>, typename valuealloc = std::allocator<_value>, typename challoc = std::allocator<uint32_t> >
43struct HashProxy
44{
45 friend class Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc>;
46private:
47 HashProxy( Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc> &h, key *k, uint32_t nPos, uint32_t hash ) :
48 hsh( h ),
49 pKey( k ),
50 nPos( nPos ),
51 hash( hash ),
52 bFilled( false )
53 {
54 }
55
56 HashProxy( Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc> &h, uint32_t nPos, _value *pValue ) :
57 hsh( h ),
58 nPos( nPos ),
59 pValue( pValue ),
60 bFilled( true )
61 {
62 }
63
64 Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc> &hsh;
65 key *pKey;
66 uint32_t nPos;
67 _value *pValue;
68 uint32_t hash;
69 bool bFilled;
70
71public:
72 operator _value &()
73 {
74 if( bFilled == false )
75 throw HashException(
76 excodeNotFilled,
77 "No data assosiated with that key."
78 );
79 return *pValue;
80 }
81
82 _value &value()
83 {
84 if( bFilled == false )
85 throw HashException(
86 excodeNotFilled,
87 "No data assosiated with that key."
88 );
89 return *pValue;
90 }
91
92 bool isFilled()
93 {
94 return bFilled;
95 }
96
97 void erase()
98 {
99 if( bFilled )
100 {
101 hsh._erase( nPos );
102 hsh.onDelete();
103 }
104 }
105
106 _value operator=( _value nval )
107 {
108 if( bFilled )
109 {
110 hsh.va.destroy( pValue );
111 hsh.va.construct( pValue, nval );
112 hsh.onUpdate();
113 }
114 else
115 {
116 hsh.fill( nPos, *pKey, nval, hash );
117 hsh.onInsert();
118 }
119
120 return nval;
121 }
122
123 _value *operator->()
124 {
125 if( bFilled == false )
126 throw HashException(
127 excodeNotFilled,
128 "No data assosiated with that key."
129 );
130 return pValue;
131 }
132};
133
134template<typename key, typename value, typename sizecalc, typename keyalloc, typename valuealloc, typename challoc >
135class Hash
136{
137 friend struct HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>;
138public:
139 Hash() :
140 nCapacity( 11 ),
141 nFilled( 0 ),
142 nDeleted( 0 ),
143 bFilled( NULL ),
144 bDeleted( NULL ),
145 aKeys( NULL ),
146 aValues( NULL ),
147 aHashCodes( NULL )
148 {
149 nKeysSize = bitsToBytes( nCapacity );
150 bFilled = ca.allocate( nKeysSize );
151 bDeleted = ca.allocate( nKeysSize );
152 clearBits();
153
154 aHashCodes = ca.allocate( nCapacity );
155 aKeys = ka.allocate( nCapacity );
156 aValues = va.allocate( nCapacity );
157 }
158
159 Hash( const Hash &src ) :
160 nCapacity( src.nCapacity ),
161 nFilled( 0 ),
162 nDeleted( 0 ),
163 bFilled( NULL ),
164 bDeleted( NULL ),
165 aKeys( NULL ),
166 aValues( NULL ),
167 aHashCodes( NULL )
168 {
169 nKeysSize = bitsToBytes( nCapacity );
170 bFilled = ca.allocate( nKeysSize );
171 bDeleted = ca.allocate( nKeysSize );
172 clearBits();
173
174 aHashCodes = ca.allocate( nCapacity );
175 aKeys = ka.allocate( nCapacity );
176 aValues = va.allocate( nCapacity );
177
178 for( uint32_t j = 0; j < src.nCapacity; j++ )
179 {
180 if( src.isFilled( j ) )
181 {
182 insert( src.aKeys[j], src.aValues[j] );
183 }
184 }
185 }
186
187 Hash &operator=( const Hash &src )
188 {
189 for( uint32_t j = 0; j < nCapacity; j++ )
190 {
191 if( isFilled( j ) )
192 if( !isDeleted( j ) )
193 {
194 va.destroy( &aValues[j] );
195 ka.destroy( &aKeys[j] );
196 }
197 }
198 va.deallocate( aValues, nCapacity );
199 ka.deallocate( aKeys, nCapacity );
200 ca.deallocate( bFilled, nKeysSize );
201 ca.deallocate( bDeleted, nKeysSize );
202 ca.deallocate( aHashCodes, nCapacity );
203
204 nFilled = 0;
205 nDeleted = 0;
206 nCapacity = src.nCapacity;
207 nKeysSize = bitsToBytes( nCapacity );
208 bFilled = ca.allocate( nKeysSize );
209 bDeleted = ca.allocate( nKeysSize );
210 clearBits();
211
212 aHashCodes = ca.allocate( nCapacity );
213 aKeys = ka.allocate( nCapacity );
214 aValues = va.allocate( nCapacity );
215
216 for( uint32_t j = 0; j < src.nCapacity; j++ )
217 {
218 if( src.isFilled( j ) )
219 {
220 insert( src.aKeys[j], src.aValues[j] );
221 }
222 }
223
224 return *this;
225 }
226
227 virtual ~Hash()
228 {
229 for( uint32_t j = 0; j < nCapacity; j++ )
230 {
231 if( isFilled( j ) )
232 if( !isDeleted( j ) )
233 {
234 va.destroy( &aValues[j] );
235 ka.destroy( &aKeys[j] );
236 }
237 }
238 va.deallocate( aValues, nCapacity );
239 ka.deallocate( aKeys, nCapacity );
240 ca.deallocate( bFilled, nKeysSize );
241 ca.deallocate( bDeleted, nKeysSize );
242 ca.deallocate( aHashCodes, nCapacity );
243 }
244
245 uint32_t getCapacity()
246 {
247 return nCapacity;
248 }
249
250 uint32_t getFill()
251 {
252 return nFilled;
253 }
254
255 uint32_t size()
256 {
257 return nFilled-nDeleted;
258 }
259
260 uint32_t getDeleted()
261 {
262 return nDeleted;
263 }
264
265 virtual HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc> operator[]( key k )
266 {
267 uint32_t hash = __calcHashCode( k );
268 bool bFill;
269 uint32_t nPos = probe( hash, k, bFill );
270
271 if( bFill )
272 {
273 return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, nPos, &aValues[nPos] );
274 }
275 else
276 {
277 return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, &k, nPos, hash );
278 }
279 }
280
281 virtual void insert( key k, value v )
282 {
283 uint32_t hash = __calcHashCode( k );
284 bool bFill;
285 uint32_t nPos = probe( hash, k, bFill );
286
287 if( bFill )
288 {
289 va.destroy( &aValues[nPos] );
290 va.construct( &aValues[nPos], v );
291 onUpdate();
292 }
293 else
294 {
295 fill( nPos, k, v, hash );
296 onInsert();
297 }
298 }
299
300 virtual void erase( key k )
301 {
302 uint32_t hash = __calcHashCode( k );
303 bool bFill;
304 uint32_t nPos = probe( hash, k, bFill );
305
306 if( bFill )
307 {
308 _erase( nPos );
309 onDelete();
310 }
311 }
312
313 struct iterator;
314 virtual void erase( struct iterator &i )
315 {
316 if( this != &i.hsh )
317 throw HashException("This iterator didn't come from this Hash.");
318 if( isFilled( i.nPos ) && !isDeleted( i.nPos ) )
319 {
320 _erase( i.nPos );
321 onDelete();
322 }
323 }
324
325 virtual void clear()
326 {
327 for( uint32_t j = 0; j < nCapacity; j++ )
328 {
329 if( isFilled( j ) )
330 if( !isDeleted( j ) )
331 {
332 va.destroy( &aValues[j] );
333 ka.destroy( &aKeys[j] );
334 onDelete();
335 }
336 }
337
338 clearBits();
339 }
340
341 virtual value &get( key k )
342 {
343 uint32_t hash = __calcHashCode( k );
344 bool bFill;
345 uint32_t nPos = probe( hash, k, bFill );
346
347 if( bFill )
348 {
349 return aValues[nPos];
350 }
351 else
352 {
353 throw HashException(
354 excodeNotFilled,
355 "No data assosiated with that key."
356 );
357 }
358 }
359
360 virtual bool has( key k )
361 {
362 bool bFill;
363 probe( __calcHashCode( k ), k, bFill, false );
364
365 return bFill;
366 }
367
368 typedef struct iterator
369 {
370 friend class Hash<key, value, sizecalc, keyalloc, valuealloc, challoc>;
371 private:
372 iterator( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh ) :
373 hsh( hsh ),
374 nPos( 0 ),
375 bFinished( false )
376 {
377 nPos = hsh.getFirstPos( bFinished );
378 }
379
380 iterator( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh, bool bDone ) :
381 hsh( hsh ),
382 nPos( 0 ),
383 bFinished( bDone )
384 {
385 }
386
387 Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh;
388 uint32_t nPos;
389 bool bFinished;
390
391 public:
392 iterator operator++( int )
393 {
394 if( bFinished == false )
395 nPos = hsh.getNextPos( nPos, bFinished );
396
397 return *this;
398 }
399
400 iterator operator++()
401 {
402 if( bFinished == false )
403 nPos = hsh.getNextPos( nPos, bFinished );
404
405 return *this;
406 }
407
408 bool operator==( const iterator &oth )
409 {
410 if( bFinished != oth.bFinished )
411 return false;
412 if( bFinished == true )
413 {
414 return true;
415 }
416 else
417 {
418 if( oth.nPos == nPos )
419 return true;
420 return false;
421 }
422 }
423
424 bool operator!=( const iterator &oth )
425 {
426 return !(*this == oth );
427 }
428
429 iterator operator=( const iterator &oth )
430 {
431 if( &hsh != &oth.hsh )
432 throw HashException(
433 "Cannot mix iterators from different hash objects.");
434 nPos = oth.nPos;
435 bFinished = oth.bFinished;
436 }
437
438 std::pair<key,value> operator *()
439 {
440 return hsh.getAtPos( nPos );
441 }
442
443 key &getKey()
444 {
445 return hsh.getKeyAtPos( nPos );
446 }
447
448 value &getValue()
449 {
450 return hsh.getValueAtPos( nPos );
451 }
452 };
453
454 iterator begin()
455 {
456 return iterator( *this );
457 }
458
459 iterator end()
460 {
461 return iterator( *this, true );
462 }
463
464 std::list<key> getKeys()
465 {
466 std::list<key> lKeys;
467
468 for( uint32_t j = 0; j < nCapacity; j++ )
469 {
470 if( isFilled( j ) )
471 {
472 if( !isDeleted( j ) )
473 {
474 lKeys.push_back( aKeys[j] );
475 }
476 }
477 }
478
479 return lKeys;
480 }
481
482protected:
483 virtual void onInsert() {}
484 virtual void onUpdate() {}
485 virtual void onDelete() {}
486 virtual void onReHash() {}
487
488 virtual void clearBits()
489 {
490 for( uint32_t j = 0; j < nKeysSize; j++ )
491 {
492 bFilled[j] = bDeleted[j] = 0;
493 }
494 }
495
496 virtual void fill( uint32_t loc, key &k, value &v, uint32_t hash )
497 {
498 bFilled[loc/32] |= (1<<(loc%32));
499 va.construct( &aValues[loc], v );
500 ka.construct( &aKeys[loc], k );
501 aHashCodes[loc] = hash;
502 nFilled++;
503 //printf("Filled: %d, Deleted: %d, Capacity: %d\n",
504 // nFilled, nDeleted, nCapacity );
505 }
506
507 virtual void _erase( uint32_t loc )
508 {
509 bDeleted[loc/32] |= (1<<(loc%32));
510 va.destroy( &aValues[loc] );
511 ka.destroy( &aKeys[loc] );
512 nDeleted++;
513 //printf("Filled: %d, Deleted: %d, Capacity: %d\n",
514 // nFilled, nDeleted, nCapacity );
515 }
516
517 virtual std::pair<key,value> getAtPos( uint32_t nPos )
518 {
519 return std::pair<key,value>(aKeys[nPos],aValues[nPos]);
520 }
521
522 virtual key &getKeyAtPos( uint32_t nPos )
523 {
524 return aKeys[nPos];
525 }
526
527 virtual value &getValueAtPos( uint32_t nPos )
528 {
529 return aValues[nPos];
530 }
531
532 virtual uint32_t getFirstPos( bool &bFinished )
533 {
534 for( uint32_t j = 0; j < nCapacity; j++ )
535 {
536 if( isFilled( j ) )
537 if( !isDeleted( j ) )
538 return j;
539 }
540
541 bFinished = true;
542 return 0;
543 }
544
545 virtual uint32_t getNextPos( uint32_t nPos, bool &bFinished )
546 {
547 for( uint32_t j = nPos+1; j < nCapacity; j++ )
548 {
549 if( isFilled( j ) )
550 if( !isDeleted( j ) )
551 return j;
552 }
553
554 bFinished = true;
555 return 0;
556 }
557
558 uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true )
559 {
560 uint32_t nCur = hash%nCapacity;
561
562 // First we scan to see if the key is already there, abort if we
563 // run out of probing room, or we find a non-filled entry
564 for( int8_t j = 0;
565 isFilled( nCur ) && j < 32;
566 nCur = (nCur + (1<<j))%nCapacity, j++
567 )
568 {
569 // Is this the same hash code we were looking for?
570 if( hash == aHashCodes[nCur] )
571 {
572 // Skip over deleted entries. Deleted entries are also filled,
573 // so we only have to do this check here.
574 if( isDeleted( nCur ) )
575 continue;
576
577 // Is it really the same key? (for safety)
578 if( __cmpHashKeys( aKeys[nCur], k ) == true )
579 {
580 bFill = true;
581 return nCur;
582 }
583 }
584 }
585
586 // This is our insurance, if the table is full, then go ahead and
587 // rehash, then try again.
588 if( isFilled( nCur ) && rehash == true )
589 {
590 reHash( szCalc(getCapacity(), getFill(), getDeleted()) );
591
592 // This is potentially dangerous, and could cause an infinite loop.
593 // Be careful writing probe, eh?
594 return probe( hash, k, bFill );
595 }
596
597 bFill = false;
598 return nCur;
599 }
600
601 void reHash( uint32_t nNewSize )
602 {
603 //printf("---REHASH---");
604 //printf("Filled: %d, Deleted: %d, Capacity: %d\n",
605 // nFilled, nDeleted, nCapacity );
606
607 // Save all the old data
608 uint32_t nOldCapacity = nCapacity;
609 uint32_t *bOldFilled = bFilled;
610 uint32_t *aOldHashCodes = aHashCodes;
611 uint32_t nOldKeysSize = nKeysSize;
612 uint32_t *bOldDeleted = bDeleted;
613 value *aOldValues = aValues;
614 key *aOldKeys = aKeys;
615
616 // Calculate new sizes
617 nCapacity = nNewSize;
618 nKeysSize = bitsToBytes( nCapacity );
619
620 // Allocate new memory + prep
621 bFilled = ca.allocate( nKeysSize );
622 bDeleted = ca.allocate( nKeysSize );
623 clearBits();
624
625 aHashCodes = ca.allocate( nCapacity );
626 aKeys = ka.allocate( nCapacity );
627 aValues = va.allocate( nCapacity );
628
629 nDeleted = nFilled = 0;
630
631 // Re-insert all of the old data (except deleted items)
632 for( uint32_t j = 0; j < nOldCapacity; j++ )
633 {
634 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 &&
635 (bOldDeleted[j/32]&(1<<(j%32)))==0 )
636 {
637 insert( aOldKeys[j], aOldValues[j] );
638 }
639 }
640
641 // Delete all of the old data
642 for( uint32_t j = 0; j < nOldCapacity; j++ )
643 {
644 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 )
645 {
646 va.destroy( &aOldValues[j] );
647 ka.destroy( &aOldKeys[j] );
648 }
649 }
650 va.deallocate( aOldValues, nOldCapacity );
651 ka.deallocate( aOldKeys, nOldCapacity );
652 ca.deallocate( bOldFilled, nOldKeysSize );
653 ca.deallocate( bOldDeleted, nOldKeysSize );
654 ca.deallocate( aOldHashCodes, nOldCapacity );
655 }
656
657 virtual bool isFilled( uint32_t loc ) const
658 {
659 return (bFilled[loc/32]&(1<<(loc%32)))!=0;
660 }
661
662 virtual bool isDeleted( uint32_t loc )
663 {
664 return (bDeleted[loc/32]&(1<<(loc%32)))!=0;
665 }
666
667protected:
668 uint32_t nCapacity;
669 uint32_t nFilled;
670 uint32_t nDeleted;
671 uint32_t *bFilled;
672 uint32_t *bDeleted;
673 uint32_t nKeysSize;
674 key *aKeys;
675 value *aValues;
676 uint32_t *aHashCodes;
677 valuealloc va;
678 keyalloc ka;
679 challoc ca;
680 sizecalc szCalc;
681};
682
683template<> uint32_t __calcHashCode<int>( const int &k );
684template<> bool __cmpHashKeys<int>( const int &a, const int &b );
685
686template<> uint32_t __calcHashCode<unsigned int>( const unsigned int &k );
687template<> bool __cmpHashKeys<unsigned int>( const unsigned int &a, const unsigned int &b );
688
689template<> uint32_t __calcHashCode<const char *>( const char * const &k );
690template<> bool __cmpHashKeys<const char *>( const char * const &a, const char * const &b );
691
692template<> uint32_t __calcHashCode<char *>( char * const &k );
693template<> bool __cmpHashKeys<char *>( char * const &a, char * const &b );
694
695template<> uint32_t __calcHashCode<std::string>( const std::string &k );
696template<> bool __cmpHashKeys<std::string>( const std::string &a, const std::string &b );
697
698template<> uint32_t __calcHashCode<Hashable>( const Hashable &k );
699template<> bool __cmpHashKeys<Hashable>( const Hashable &a, const Hashable &b );
700
701template<typename key, typename value>
702Serializer &operator<<( Serializer &ar, Hash<key,value> &h )
703{
704 ar << h.size();
705 for( typename Hash<key,value>::iterator i = h.begin(); i != h.end(); i++ )
706 {
707 std::pair<key,value> p = *i;
708 ar << p.first << p.second;
709 }
710
711 return ar;
712}
713
714template<typename key, typename value>
715Serializer &operator>>( Serializer &ar, Hash<key,value> &h )
716{
717 h.clear();
718 uint32_t nSize;
719 ar >> nSize;
720
721 for( uint32_t j = 0; j < nSize; j++ )
722 {
723 key k; value v;
724 ar >> k >> v;
725 h.insert( k, v );
726 }
727
728 return ar;
729}
730
731template<typename key, typename value>
732Serializer &operator&&( Serializer &ar, Hash<key,value> &h )
733{
734 if( ar.isLoading() )
735 {
736 return ar >> h;
737 }
738 else
739 {
740 return ar << h;
741 }
742}
743
744#endif