summaryrefslogtreecommitdiff
path: root/src/set.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/set.h760
1 files changed, 3 insertions, 757 deletions
diff --git a/src/set.h b/src/set.h
index 6a2abce..047ff7f 100644
--- a/src/set.h
+++ b/src/set.h
@@ -24,17 +24,10 @@ namespace Bu
24{ 24{
25 subExceptionDecl( SetException ) 25 subExceptionDecl( SetException )
26 26
27 template<typename T>
28 uint32_t __calcHashCode( const T &k );
29
30 template<typename T>
31 bool __cmpHashKeys( const T &a, const T &b );
32
33 template<typename key, typename sizecalc = struct __calcNextTSize_fast, typename keyalloc = std::allocator<key>, typename challoc = std::allocator<uint32_t> >
34 class Set;
35
36 /** 27 /**
37 * Libbu Template Set 28 *@todo Set should be rewritten, possibly using a b-tree as ordered storage
29 * in the backend. It should use either a b-tree or array for storage and
30 * allow set intersections, unions, etc.
38 *@param key (typename) The datatype of the hashtable keys 31 *@param key (typename) The datatype of the hashtable keys
39 *@param sizecalc (typename) Functor to compute new table size on rehash 32 *@param sizecalc (typename) Functor to compute new table size on rehash
40 *@param keyalloc (typename) Memory allocator for hashtable keys 33 *@param keyalloc (typename) Memory allocator for hashtable keys
@@ -45,754 +38,7 @@ namespace Bu
45 class Set 38 class Set
46 { 39 {
47 public: 40 public:
48 Set() :
49 nCapacity( 11 ),
50 nFilled( 0 ),
51 nDeleted( 0 ),
52 bFilled( NULL ),
53 bDeleted( NULL ),
54 aKeys( NULL ),
55 aHashCodes( NULL )
56 {
57 nKeysSize = bitsToBytes( nCapacity );
58 bFilled = ca.allocate( nKeysSize );
59 bDeleted = ca.allocate( nKeysSize );
60 clearBits();
61
62 aHashCodes = ca.allocate( nCapacity );
63 aKeys = ka.allocate( nCapacity );
64 }
65
66 Set( const Set &src ) :
67 nCapacity( src.nCapacity ),
68 nFilled( 0 ),
69 nDeleted( 0 ),
70 bFilled( NULL ),
71 bDeleted( NULL ),
72 aKeys( NULL ),
73 aHashCodes( NULL )
74 {
75 nKeysSize = bitsToBytes( nCapacity );
76 bFilled = ca.allocate( nKeysSize );
77 bDeleted = ca.allocate( nKeysSize );
78 clearBits();
79
80 aHashCodes = ca.allocate( nCapacity );
81 aKeys = ka.allocate( nCapacity );
82
83 for( uint32_t j = 0; j < src.nCapacity; j++ )
84 {
85 if( src.isFilled( j ) )
86 {
87 insert( src.aKeys[j] );
88 }
89 }
90 }
91
92 /**
93 * Set assignment operator. Clears this hashtable and
94 * copies RH into it.
95 */
96 Set &operator=( const Set &src )
97 {
98 for( uint32_t j = 0; j < nCapacity; j++ )
99 {
100 if( isFilled( j ) )
101 if( !isDeleted( j ) )
102 {
103 ka.destroy( &aKeys[j] );
104 }
105 }
106 ka.deallocate( aKeys, nCapacity );
107 ca.deallocate( bFilled, nKeysSize );
108 ca.deallocate( bDeleted, nKeysSize );
109 ca.deallocate( aHashCodes, nCapacity );
110
111 nFilled = 0;
112 nDeleted = 0;
113 nCapacity = src.nCapacity;
114 nKeysSize = bitsToBytes( nCapacity );
115 bFilled = ca.allocate( nKeysSize );
116 bDeleted = ca.allocate( nKeysSize );
117 clearBits();
118
119 aHashCodes = ca.allocate( nCapacity );
120 aKeys = ka.allocate( nCapacity );
121
122 for( uint32_t j = 0; j < src.nCapacity; j++ )
123 {
124 if( src.isFilled( j ) )
125 {
126 insert( src.aKeys[j] );
127 }
128 }
129
130 return *this;
131 }
132
133 virtual ~Set()
134 {
135 for( uint32_t j = 0; j < nCapacity; j++ )
136 {
137 if( isFilled( j ) )
138 if( !isDeleted( j ) )
139 {
140 ka.destroy( &aKeys[j] );
141 }
142 }
143 ka.deallocate( aKeys, nCapacity );
144 ca.deallocate( bFilled, nKeysSize );
145 ca.deallocate( bDeleted, nKeysSize );
146 ca.deallocate( aHashCodes, nCapacity );
147 }
148
149 /**
150 * Get the current hash table capacity. (Changes at re-hash)
151 *@returns (uint32_t) The current capacity.
152 */
153 uint32_t getCapacity()
154 {
155 return nCapacity;
156 }
157
158 /**
159 * Get the number of hash locations spoken for. (Including
160 * not-yet-cleaned-up deleted items.)
161 *@returns (uint32_t) The current fill state.
162 */
163 uint32_t getFill()
164 {
165 return nFilled;
166 }
167
168 /**
169 * Get the number of items stored in the hash table.
170 *@returns (uint32_t) The number of items stored in the hash table.
171 */
172 uint32_t getSize()
173 {
174 return nFilled-nDeleted;
175 }
176
177 /**
178 * Get the number of items which have been deleted, but not yet
179 * cleaned up.
180 *@returns (uint32_t) The number of deleted items.
181 */
182 uint32_t getDeleted()
183 {
184 return nDeleted;
185 }
186
187 /**
188 * Insert key (k) into the set
189 *@param k (key_type) Key to list the value under.
190 */
191 virtual void insert( key k )
192 {
193 uint32_t hash = __calcHashCode( k );
194 bool bFill;
195 uint32_t nPos = probe( hash, k, bFill );
196
197 if( bFill )
198 {
199 onUpdate();
200 }
201 else
202 {
203 fill( nPos, k, hash );
204 onInsert();
205 }
206 }
207
208 /**
209 * Remove a value from the hash table.
210 *@param k (key_type) The data under this key will be erased.
211 */
212 virtual void erase( key k )
213 {
214 uint32_t hash = __calcHashCode( k );
215 bool bFill;
216 uint32_t nPos = probe( hash, k, bFill );
217
218 if( bFill )
219 {
220 _erase( nPos );
221 onDelete();
222 }
223 }
224
225 struct iterator;
226
227 /**
228 * Remove a value from the hash pointed to from an iterator.
229 *@param i (iterator &) The data to be erased.
230 */
231 virtual void erase( struct iterator &i )
232 {
233 if( this != &i.hsh )
234 throw SetException("This iterator didn't come from this Hash.");
235 if( isFilled( i.nPos ) && !isDeleted( i.nPos ) )
236 {
237 _erase( i.nPos );
238 onDelete();
239 }
240 }
241
242 /**
243 * Remove all data from the hash table.
244 */
245 virtual void clear()
246 {
247 for( uint32_t j = 0; j < nCapacity; j++ )
248 {
249 if( isFilled( j ) )
250 if( !isDeleted( j ) )
251 {
252 _erase( j );
253 onDelete();
254 }
255 }
256
257 clearBits();
258 }
259
260 /**
261 * Does the hash table contain an item under key (k).
262 *@param k (key_type) The key to check.
263 *@returns (bool) Whether there was an item in the hash under key (k).
264 */
265 virtual bool has( key k )
266 {
267 bool bFill;
268 probe( __calcHashCode( k ), k, bFill, false );
269
270 return bFill;
271 }
272
273 /**
274 * Iteration structure for iterating through the hash.
275 */
276 typedef struct iterator
277 {
278 friend class Set<key, sizecalc, keyalloc, challoc>;
279 private:
280 iterator( Set<key, sizecalc, keyalloc, challoc> &hsh ) :
281 hsh( hsh ),
282 nPos( 0 ),
283 bFinished( false )
284 {
285 nPos = hsh.getFirstPos( bFinished );
286 }
287
288 iterator( Set<key, sizecalc, keyalloc, challoc> &hsh, bool bDone ) :
289 hsh( hsh ),
290 nPos( 0 ),
291 bFinished( bDone )
292 {
293 }
294
295 Set<key, sizecalc, keyalloc, challoc> &hsh;
296 uint32_t nPos;
297 bool bFinished;
298
299 public:
300 /**
301 * Iterator incrementation operator. Move the iterator forward.
302 */
303 iterator operator++( int )
304 {
305 if( bFinished == false )
306 nPos = hsh.getNextPos( nPos, bFinished );
307
308 return *this;
309 }
310
311 /**
312 * Iterator incrementation operator. Move the iterator forward.
313 */
314 iterator operator++()
315 {
316 if( bFinished == false )
317 nPos = hsh.getNextPos( nPos, bFinished );
318
319 return *this;
320 }
321
322 /**
323 * Iterator equality comparison operator. Iterators the same?
324 */
325 bool operator==( const iterator &oth ) const
326 {
327 if( bFinished != oth.bFinished )
328 return false;
329 if( bFinished == true )
330 {
331 return true;
332 }
333 else
334 {
335 if( oth.nPos == nPos )
336 return true;
337 return false;
338 }
339 }
340
341 /**
342 * Iterator not equality comparison operator. Not the same?
343 */
344 bool operator!=( const iterator &oth ) const
345 {
346 return !(*this == oth );
347 }
348
349 /**
350 * Iterator assignment operator.
351 */
352 iterator operator=( const iterator &oth )
353 {
354 if( &hsh != &oth.hsh )
355 throw SetException(
356 "Cannot mix iterators from different set objects.");
357 nPos = oth.nPos;
358 bFinished = oth.bFinished;
359 }
360
361 /**
362 * Iterator dereference operator... err.. get the value
363 *@returns (value_type &) The value behind this iterator.
364 */
365 key &operator *()
366 {
367 return hsh.getKeyAtPos( nPos );
368 }
369
370 const key &operator *() const
371 {
372 return hsh.getKeyAtPos( nPos );
373 }
374
375 bool isValid() const
376 {
377 return !bFinished;
378 }
379
380 operator bool() const
381 {
382 return !bFinished;
383 }
384 } iterator;
385
386 /**
387 * Iteration structure for iterating through the set (const).
388 */
389 typedef struct const_iterator
390 {
391 friend class Set<key, sizecalc, keyalloc, challoc>;
392 private:
393 const_iterator( const Set<key, sizecalc, keyalloc, challoc> &hsh ) :
394 hsh( hsh ),
395 nPos( 0 ),
396 bFinished( false )
397 {
398 nPos = hsh.getFirstPos( bFinished );
399 }
400
401 const_iterator( const Set<key, sizecalc, keyalloc, challoc> &hsh, bool bDone ) :
402 hsh( hsh ),
403 nPos( 0 ),
404 bFinished( bDone )
405 {
406 }
407
408 const Set<key, sizecalc, keyalloc, challoc> &hsh;
409 uint32_t nPos;
410 bool bFinished;
411
412 public:
413 /**
414 * Iterator incrementation operator. Move the iterator forward.
415 */
416 const_iterator operator++( int )
417 {
418 if( bFinished == false )
419 nPos = hsh.getNextPos( nPos, bFinished );
420
421 return *this;
422 }
423
424 /**
425 * Iterator incrementation operator. Move the iterator forward.
426 */
427 const_iterator operator++()
428 {
429 if( bFinished == false )
430 nPos = hsh.getNextPos( nPos, bFinished );
431
432 return *this;
433 }
434
435 /**
436 * Iterator equality comparison operator. Iterators the same?
437 */
438 bool operator==( const const_iterator &oth ) const
439 {
440 if( bFinished != oth.bFinished )
441 return false;
442 if( bFinished == true )
443 {
444 return true;
445 }
446 else
447 {
448 if( oth.nPos == nPos )
449 return true;
450 return false;
451 }
452 }
453
454 /**
455 * Iterator not equality comparison operator. Not the same?
456 */
457 bool operator!=( const const_iterator &oth ) const
458 {
459 return !(*this == oth );
460 }
461
462 /**
463 * Iterator assignment operator.
464 */
465 const_iterator operator=( const const_iterator &oth )
466 {
467 if( &hsh != &oth.hsh )
468 throw SetException(
469 "Cannot mix iterators from different hash objects.");
470 nPos = oth.nPos;
471 bFinished = oth.bFinished;
472 }
473
474 /**
475 * Iterator dereference operator... err.. get the value
476 *@returns (value_type &) The value behind this iterator.
477 */
478 const key &operator *() const
479 {
480 return hsh.getKeyAtPos( nPos );
481 }
482
483 bool isValid() const
484 {
485 return !bFinished;
486 }
487
488 operator bool() const
489 {
490 return !bFinished;
491 }
492 } const_iterator;
493
494 /**
495 * Get an iterator pointing to the first item in the hash table.
496 *@returns (iterator) An iterator pointing to the first item in the
497 * hash table.
498 */
499 iterator begin()
500 {
501 return iterator( *this );
502 }
503
504 const_iterator begin() const
505 {
506 return const_iterator( *this );
507 }
508
509 /**
510 * Get an iterator pointing to a point just past the last item in the
511 * hash table.
512 *@returns (iterator) An iterator pointing to a point just past the
513 * last item in the hash table.
514 */
515 iterator end()
516 {
517 return iterator( *this, true );
518 }
519
520 const_iterator end() const
521 {
522 return const_iterator( *this, true );
523 }
524
525 /**
526 * Get a list of all the keys in the hash table.
527 *@returns (std::list<key_type>) The list of keys in the hash table.
528 */
529 Bu::List<key> getKeys() const
530 {
531 Bu::List<key> lKeys;
532
533 for( uint32_t j = 0; j < nCapacity; j++ )
534 {
535 if( isFilled( j ) )
536 {
537 if( !isDeleted( j ) )
538 {
539 lKeys.append( aKeys[j] );
540 }
541 }
542 }
543
544 return lKeys;
545 }
546
547 protected:
548 virtual void onInsert() {}
549 virtual void onUpdate() {}
550 virtual void onDelete() {}
551 virtual void onReHash() {}
552
553 virtual void clearBits()
554 {
555 for( uint32_t j = 0; j < nKeysSize; j++ )
556 {
557 bFilled[j] = bDeleted[j] = 0;
558 }
559 }
560
561 virtual void fill( uint32_t loc, key &k, uint32_t hash )
562 {
563 bFilled[loc/32] |= (1<<(loc%32));
564 ka.construct( &aKeys[loc], k );
565 aHashCodes[loc] = hash;
566 nFilled++;
567 //printf("Filled: %d, Deleted: %d, Capacity: %d\n",
568 // nFilled, nDeleted, nCapacity );
569 }
570
571 virtual void _erase( uint32_t loc )
572 {
573 bDeleted[loc/32] |= (1<<(loc%32));
574 ka.destroy( &aKeys[loc] );
575 nDeleted++;
576 //printf("Filled: %d, Deleted: %d, Capacity: %d\n",
577 // nFilled, nDeleted, nCapacity );
578 }
579
580 virtual key &getKeyAtPos( uint32_t nPos )
581 {
582 return aKeys[nPos];
583 }
584
585 virtual const key &getKeyAtPos( uint32_t nPos ) const
586 {
587 return aKeys[nPos];
588 }
589
590 virtual uint32_t getFirstPos( bool &bFinished ) const
591 {
592 for( uint32_t j = 0; j < nCapacity; j++ )
593 {
594 if( isFilled( j ) )
595 if( !isDeleted( j ) )
596 return j;
597 }
598
599 bFinished = true;
600 return 0;
601 }
602
603 virtual uint32_t getNextPos( uint32_t nPos, bool &bFinished ) const
604 {
605 for( uint32_t j = nPos+1; j < nCapacity; j++ )
606 {
607 if( isFilled( j ) )
608 if( !isDeleted( j ) )
609 return j;
610 }
611
612 bFinished = true;
613 return 0;
614 }
615
616 uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true )
617 {
618 uint32_t nCur = hash%nCapacity;
619
620 // First we scan to see if the key is already there, abort if we
621 // run out of probing room, or we find a non-filled entry
622 int8_t j;
623 for( j = 0;
624 isFilled( nCur ) && j < 32;
625 nCur = (nCur + (1<<j))%nCapacity, j++
626 )
627 {
628 // Is this the same hash code we were looking for?
629 if( hash == aHashCodes[nCur] )
630 {
631 // Skip over deleted entries. Deleted entries are also filled,
632 // so we only have to do this check here.
633 if( isDeleted( nCur ) )
634 continue;
635
636 // Is it really the same key? (for safety)
637 if( __cmpHashKeys( aKeys[nCur], k ) == true )
638 {
639 bFill = true;
640 return nCur;
641 }
642 }
643 }
644
645 // This is our insurance, if the table is full, then go ahead and
646 // rehash, then try again.
647 if( (isFilled( nCur ) || j == 32) && rehash == true )
648 {
649 reHash( szCalc(getCapacity(), getFill(), getDeleted()) );
650
651 // This is potentially dangerous, and could cause an infinite loop.
652 // Be careful writing probe, eh?
653 return probe( hash, k, bFill );
654 }
655
656 bFill = false;
657 return nCur;
658 }
659
660 uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true ) const
661 {
662 uint32_t nCur = hash%nCapacity;
663
664 // First we scan to see if the key is already there, abort if we
665 // run out of probing room, or we find a non-filled entry
666 for( int8_t j = 0;
667 isFilled( nCur ) && j < 32;
668 nCur = (nCur + (1<<j))%nCapacity, j++
669 )
670 {
671 // Is this the same hash code we were looking for?
672 if( hash == aHashCodes[nCur] )
673 {
674 // Skip over deleted entries. Deleted entries are also filled,
675 // so we only have to do this check here.
676 if( isDeleted( nCur ) )
677 continue;
678
679 // Is it really the same key? (for safety)
680 if( __cmpHashKeys( aKeys[nCur], k ) == true )
681 {
682 bFill = true;
683 return nCur;
684 }
685 }
686 }
687
688 bFill = false;
689 return nCur;
690 }
691
692 void reHash( uint32_t nNewSize )
693 {
694 //printf("---REHASH---");
695 //printf("Filled: %d, Deleted: %d, Capacity: %d\n",
696 // nFilled, nDeleted, nCapacity );
697
698 // Save all the old data
699 uint32_t nOldCapacity = nCapacity;
700 uint32_t *bOldFilled = bFilled;
701 uint32_t *aOldHashCodes = aHashCodes;
702 uint32_t nOldKeysSize = nKeysSize;
703 uint32_t *bOldDeleted = bDeleted;
704 key *aOldKeys = aKeys;
705
706 // Calculate new sizes
707 nCapacity = nNewSize;
708 nKeysSize = bitsToBytes( nCapacity );
709
710 // Allocate new memory + prep
711 bFilled = ca.allocate( nKeysSize );
712 bDeleted = ca.allocate( nKeysSize );
713 clearBits();
714
715 aHashCodes = ca.allocate( nCapacity );
716 aKeys = ka.allocate( nCapacity );
717
718 nDeleted = nFilled = 0;
719
720 // Re-insert all of the old data (except deleted items)
721 for( uint32_t j = 0; j < nOldCapacity; j++ )
722 {
723 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 &&
724 (bOldDeleted[j/32]&(1<<(j%32)))==0 )
725 {
726 insert( aOldKeys[j] );
727 }
728 }
729
730 // Delete all of the old data
731 for( uint32_t j = 0; j < nOldCapacity; j++ )
732 {
733 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 )
734 {
735 ka.destroy( &aOldKeys[j] );
736 }
737 }
738 ka.deallocate( aOldKeys, nOldCapacity );
739 ca.deallocate( bOldFilled, nOldKeysSize );
740 ca.deallocate( bOldDeleted, nOldKeysSize );
741 ca.deallocate( aOldHashCodes, nOldCapacity );
742 }
743
744 virtual bool isFilled( uint32_t loc ) const
745 {
746 return (bFilled[loc/32]&(1<<(loc%32)))!=0;
747 }
748
749 virtual bool isDeleted( uint32_t loc ) const
750 {
751 return (bDeleted[loc/32]&(1<<(loc%32)))!=0;
752 }
753
754 protected:
755 uint32_t nCapacity;
756 uint32_t nFilled;
757 uint32_t nDeleted;
758 uint32_t *bFilled;
759 uint32_t *bDeleted;
760 uint32_t nKeysSize;
761 key *aKeys;
762 uint32_t *aHashCodes;
763 keyalloc ka;
764 challoc ca;
765 sizecalc szCalc;
766 }; 41 };
767
768 template<typename key, typename b, typename c, typename d>
769 Archive &operator<<( Archive &ar, const Set<key, b, c, d> &h )
770 {
771 ar << h.getSize();
772 for( typename Set<key, b, c, d>::const_iterator i = h.begin(); i != h.end(); i++ )
773 {
774 ar << (*i);
775 }
776
777 return ar;
778 }
779
780 template<typename key, typename b, typename c, typename d>
781 Archive &operator>>( Archive &ar, Set<key, b, c, d> &h )
782 {
783 h.clear();
784 long nSize;
785 ar >> nSize;
786
787 for( long j = 0; j < nSize; j++ )
788 {
789 key v;
790 ar >> v;
791 h.insert( v );
792 }
793
794 return ar;
795 }
796} 42}
797 43
798#endif 44#endif