aboutsummaryrefslogtreecommitdiff
path: root/src/set.h
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2010-10-14 23:26:25 +0000
committerMike Buland <eichlan@xagasoft.com>2010-10-14 23:26:25 +0000
commit867dae89929a11a421ec21af71d494ad0ecc1963 (patch)
tree85d4330d5cdc0567194f1bfb2f9b1bda4e95b444 /src/set.h
parent13fa07280dd9ed2c62ad2be0848f88b0d85fe322 (diff)
downloadlibbu++-867dae89929a11a421ec21af71d494ad0ecc1963.tar.gz
libbu++-867dae89929a11a421ec21af71d494ad0ecc1963.tar.bz2
libbu++-867dae89929a11a421ec21af71d494ad0ecc1963.tar.xz
libbu++-867dae89929a11a421ec21af71d494ad0ecc1963.zip
SharedCore has more features now, which is cool, including a test to see if
another object of the parent type has the same core, and another to clone the parent object. That one is pretty cool, it means you can now get a real copy when you want to, great for multi-threaded stuff. Also, two more classes are now SharedCore: Hash and Heap!
Diffstat (limited to 'src/set.h')
-rw-r--r--src/set.h760
1 files changed, 3 insertions, 757 deletions
diff --git a/src/set.h b/src/set.h
index 6a2abce..047ff7f 100644
--- a/src/set.h
+++ b/src/set.h
@@ -24,17 +24,10 @@ namespace Bu
24{ 24{
25 subExceptionDecl( SetException ) 25 subExceptionDecl( SetException )
26 26
27 template<typename T>
28 uint32_t __calcHashCode( const T &k );
29
30 template<typename T>
31 bool __cmpHashKeys( const T &a, const T &b );
32
33 template<typename key, typename sizecalc = struct __calcNextTSize_fast, typename keyalloc = std::allocator<key>, typename challoc = std::allocator<uint32_t> >
34 class Set;
35
36 /** 27 /**
37 * Libbu Template Set 28 *@todo Set should be rewritten, possibly using a b-tree as ordered storage
29 * in the backend. It should use either a b-tree or array for storage and
30 * allow set intersections, unions, etc.
38 *@param key (typename) The datatype of the hashtable keys 31 *@param key (typename) The datatype of the hashtable keys
39 *@param sizecalc (typename) Functor to compute new table size on rehash 32 *@param sizecalc (typename) Functor to compute new table size on rehash
40 *@param keyalloc (typename) Memory allocator for hashtable keys 33 *@param keyalloc (typename) Memory allocator for hashtable keys
@@ -45,754 +38,7 @@ namespace Bu
45 class Set 38 class Set
46 { 39 {
47 public: 40 public:
48 Set() :
49 nCapacity( 11 ),
50 nFilled( 0 ),
51 nDeleted( 0 ),
52 bFilled( NULL ),
53 bDeleted( NULL ),
54 aKeys( NULL ),
55 aHashCodes( NULL )
56 {
57 nKeysSize = bitsToBytes( nCapacity );
58 bFilled = ca.allocate( nKeysSize );
59 bDeleted = ca.allocate( nKeysSize );
60 clearBits();
61
62 aHashCodes = ca.allocate( nCapacity );
63 aKeys = ka.allocate( nCapacity );
64 }
65
66 Set( const Set &src ) :
67 nCapacity( src.nCapacity ),
68 nFilled( 0 ),
69 nDeleted( 0 ),
70 bFilled( NULL ),
71 bDeleted( NULL ),
72 aKeys( NULL ),
73 aHashCodes( NULL )
74 {
75 nKeysSize = bitsToBytes( nCapacity );
76 bFilled = ca.allocate( nKeysSize );
77 bDeleted = ca.allocate( nKeysSize );
78 clearBits();
79
80 aHashCodes = ca.allocate( nCapacity );
81 aKeys = ka.allocate( nCapacity );
82
83 for( uint32_t j = 0; j < src.nCapacity; j++ )
84 {
85 if( src.isFilled( j ) )
86 {
87 insert( src.aKeys[j] );
88 }
89 }
90 }
91
92 /**
93 * Set assignment operator. Clears this hashtable and
94 * copies RH into it.
95 */
96 Set &operator=( const Set &src )
97 {
98 for( uint32_t j = 0; j < nCapacity; j++ )
99 {
100 if( isFilled( j ) )
101 if( !isDeleted( j ) )
102 {
103 ka.destroy( &aKeys[j] );
104 }
105 }
106 ka.deallocate( aKeys, nCapacity );
107 ca.deallocate( bFilled, nKeysSize );
108 ca.deallocate( bDeleted, nKeysSize );
109 ca.deallocate( aHashCodes, nCapacity );
110
111 nFilled = 0;
112 nDeleted = 0;
113 nCapacity = src.nCapacity;
114 nKeysSize = bitsToBytes( nCapacity );
115 bFilled = ca.allocate( nKeysSize );
116 bDeleted = ca.allocate( nKeysSize );
117 clearBits();
118
119 aHashCodes = ca.allocate( nCapacity );
120 aKeys = ka.allocate( nCapacity );
121
122 for( uint32_t j = 0; j < src.nCapacity; j++ )
123 {
124 if( src.isFilled( j ) )
125 {
126 insert( src.aKeys[j] );
127 }
128 }
129
130 return *this;
131 }
132
133 virtual ~Set()
134 {
135 for( uint32_t j = 0; j < nCapacity; j++ )
136 {
137 if( isFilled( j ) )
138 if( !isDeleted( j ) )
139 {
140 ka.destroy( &aKeys[j] );
141 }
142 }
143 ka.deallocate( aKeys, nCapacity );
144 ca.deallocate( bFilled, nKeysSize );
145 ca.deallocate( bDeleted, nKeysSize );
146 ca.deallocate( aHashCodes, nCapacity );
147 }
148
149 /**
150 * Get the current hash table capacity. (Changes at re-hash)
151 *@returns (uint32_t) The current capacity.
152 */
153 uint32_t getCapacity()
154 {
155 return nCapacity;
156 }
157
158 /**
159 * Get the number of hash locations spoken for. (Including
160 * not-yet-cleaned-up deleted items.)
161 *@returns (uint32_t) The current fill state.
162 */
163 uint32_t getFill()
164 {
165 return nFilled;
166 }
167
168 /**
169 * Get the number of items stored in the hash table.
170 *@returns (uint32_t) The number of items stored in the hash table.
171 */
172 uint32_t getSize()
173 {
174 return nFilled-nDeleted;
175 }
176
177 /**
178 * Get the number of items which have been deleted, but not yet
179 * cleaned up.
180 *@returns (uint32_t) The number of deleted items.
181 */
182 uint32_t getDeleted()
183 {
184 return nDeleted;
185 }
186
187 /**
188 * Insert key (k) into the set
189 *@param k (key_type) Key to list the value under.
190 */
191 virtual void insert( key k )
192 {
193 uint32_t hash = __calcHashCode( k );
194 bool bFill;
195 uint32_t nPos = probe( hash, k, bFill );
196
197 if( bFill )
198 {
199 onUpdate();
200 }
201 else
202 {
203 fill( nPos, k, hash );
204 onInsert();
205 }
206 }
207
208 /**
209 * Remove a value from the hash table.
210 *@param k (key_type) The data under this key will be erased.
211 */
212 virtual void erase( key k )
213 {
214 uint32_t hash = __calcHashCode( k );
215 bool bFill;
216 uint32_t nPos = probe( hash, k, bFill );
217
218 if( bFill )
219 {
220 _erase( nPos );
221 onDelete();
222 }
223 }
224
225 struct iterator;
226
227 /**
228 * Remove a value from the hash pointed to from an iterator.
229 *@param i (iterator &) The data to be erased.
230 */
231 virtual void erase( struct iterator &i )
232 {
233 if( this != &i.hsh )
234 throw SetException("This iterator didn't come from this Hash.");
235 if( isFilled( i.nPos ) && !isDeleted( i.nPos ) )
236 {
237 _erase( i.nPos );
238 onDelete();
239 }
240 }
241
242 /**
243 * Remove all data from the hash table.
244 */
245 virtual void clear()
246 {
247 for( uint32_t j = 0; j < nCapacity; j++ )
248 {
249 if( isFilled( j ) )
250 if( !isDeleted( j ) )
251 {
252 _erase( j );
253 onDelete();
254 }
255 }
256
257 clearBits();
258 }
259
260 /**
261 * Does the hash table contain an item under key (k).
262 *@param k (key_type) The key to check.
263 *@returns (bool) Whether there was an item in the hash under key (k).
264 */
265 virtual bool has( key k )
266 {
267 bool bFill;
268 probe( __calcHashCode( k ), k, bFill, false );
269
270 return bFill;
271 }
272
273 /**
274 * Iteration structure for iterating through the hash.
275 */
276 typedef struct iterator
277 {
278 friend class Set<key, sizecalc, keyalloc, challoc>;
279 private:
280 iterator( Set<key, sizecalc, keyalloc, challoc> &hsh ) :
281 hsh( hsh ),
282 nPos( 0 ),
283 bFinished( false )
284 {
285 nPos = hsh.getFirstPos( bFinished );
286 }
287
288 iterator( Set<key, sizecalc, keyalloc, challoc> &hsh, bool bDone ) :
289 hsh( hsh ),
290 nPos( 0 ),
291 bFinished( bDone )
292 {
293 }
294
295 Set<key, sizecalc, keyalloc, challoc> &hsh;
296 uint32_t nPos;
297 bool bFinished;
298
299 public:
300 /**
301 * Iterator incrementation operator. Move the iterator forward.
302 */
303 iterator operator++( int )
304 {
305 if( bFinished == false )
306 nPos = hsh.getNextPos( nPos, bFinished );
307
308 return *this;
309 }
310
311 /**
312 * Iterator incrementation operator. Move the iterator forward.
313 */
314 iterator operator++()
315 {
316 if( bFinished == false )
317 nPos = hsh.getNextPos( nPos, bFinished );
318
319 return *this;
320 }
321
322 /**
323 * Iterator equality comparison operator. Iterators the same?
324 */
325 bool operator==( const iterator &oth ) const
326 {
327 if( bFinished != oth.bFinished )
328 return false;
329 if( bFinished == true )
330 {
331 return true;
332 }
333 else
334 {
335 if( oth.nPos == nPos )
336 return true;
337 return false;
338 }
339 }
340
341 /**
342 * Iterator not equality comparison operator. Not the same?
343 */
344 bool operator!=( const iterator &oth ) const
345 {
346 return !(*this == oth );
347 }
348
349 /**
350 * Iterator assignment operator.
351 */
352 iterator operator=( const iterator &oth )
353 {
354 if( &hsh != &oth.hsh )
355 throw SetException(
356 "Cannot mix iterators from different set objects.");
357 nPos = oth.nPos;
358 bFinished = oth.bFinished;
359 }
360
361 /**
362 * Iterator dereference operator... err.. get the value
363 *@returns (value_type &) The value behind this iterator.
364 */
365 key &operator *()
366 {
367 return hsh.getKeyAtPos( nPos );
368 }
369
370 const key &operator *() const
371 {
372 return hsh.getKeyAtPos( nPos );
373 }
374
375 bool isValid() const
376 {
377 return !bFinished;
378 }
379
380 operator bool() const
381 {
382 return !bFinished;
383 }
384 } iterator;
385
386 /**
387 * Iteration structure for iterating through the set (const).
388 */
389 typedef struct const_iterator
390 {
391 friend class Set<key, sizecalc, keyalloc, challoc>;
392 private:
393 const_iterator( const Set<key, sizecalc, keyalloc, challoc> &hsh ) :
394 hsh( hsh ),
395 nPos( 0 ),
396 bFinished( false )
397 {
398 nPos = hsh.getFirstPos( bFinished );
399 }
400
401 const_iterator( const Set<key, sizecalc, keyalloc, challoc> &hsh, bool bDone ) :
402 hsh( hsh ),
403 nPos( 0 ),
404 bFinished( bDone )
405 {
406 }
407
408 const Set<key, sizecalc, keyalloc, challoc> &hsh;
409 uint32_t nPos;
410 bool bFinished;
411
412 public:
413 /**
414 * Iterator incrementation operator. Move the iterator forward.
415 */
416 const_iterator operator++( int )
417 {
418 if( bFinished == false )
419 nPos = hsh.getNextPos( nPos, bFinished );
420
421 return *this;
422 }
423
424 /**
425 * Iterator incrementation operator. Move the iterator forward.
426 */
427 const_iterator operator++()
428 {
429 if( bFinished == false )
430 nPos = hsh.getNextPos( nPos, bFinished );
431
432 return *this;
433 }
434
435 /**
436 * Iterator equality comparison operator. Iterators the same?
437 */
438 bool operator==( const const_iterator &oth ) const
439 {
440 if( bFinished != oth.bFinished )
441 return false;
442 if( bFinished == true )
443 {
444 return true;
445 }
446 else
447 {
448 if( oth.nPos == nPos )
449 return true;
450 return false;
451 }
452 }
453
454 /**
455 * Iterator not equality comparison operator. Not the same?
456 */
457 bool operator!=( const const_iterator &oth ) const
458 {
459 return !(*this == oth );
460 }
461
462 /**
463 * Iterator assignment operator.
464 */
465 const_iterator operator=( const const_iterator &oth )
466 {
467 if( &hsh != &oth.hsh )
468 throw SetException(
469 "Cannot mix iterators from different hash objects.");
470 nPos = oth.nPos;
471 bFinished = oth.bFinished;
472 }
473
474 /**
475 * Iterator dereference operator... err.. get the value
476 *@returns (value_type &) The value behind this iterator.
477 */
478 const key &operator *() const
479 {
480 return hsh.getKeyAtPos( nPos );
481 }
482
483 bool isValid() const
484 {
485 return !bFinished;
486 }
487
488 operator bool() const
489 {
490 return !bFinished;
491 }
492 } const_iterator;
493
494 /**
495 * Get an iterator pointing to the first item in the hash table.
496 *@returns (iterator) An iterator pointing to the first item in the
497 * hash table.
498 */
499 iterator begin()
500 {
501 return iterator( *this );
502 }
503
504 const_iterator begin() const
505 {
506 return const_iterator( *this );
507 }
508
509 /**
510 * Get an iterator pointing to a point just past the last item in the
511 * hash table.
512 *@returns (iterator) An iterator pointing to a point just past the
513 * last item in the hash table.
514 */
515 iterator end()
516 {
517 return iterator( *this, true );
518 }
519
520 const_iterator end() const
521 {
522 return const_iterator( *this, true );
523 }
524
525 /**
526 * Get a list of all the keys in the hash table.
527 *@returns (std::list<key_type>) The list of keys in the hash table.
528 */
529 Bu::List<key> getKeys() const
530 {
531 Bu::List<key> lKeys;
532
533 for( uint32_t j = 0; j < nCapacity; j++ )
534 {
535 if( isFilled( j ) )
536 {
537 if( !isDeleted( j ) )
538 {
539 lKeys.append( aKeys[j] );
540 }
541 }
542 }
543
544 return lKeys;
545 }
546
547 protected:
548 virtual void onInsert() {}
549 virtual void onUpdate() {}
550 virtual void onDelete() {}
551 virtual void onReHash() {}
552
553 virtual void clearBits()
554 {
555 for( uint32_t j = 0; j < nKeysSize; j++ )
556 {
557 bFilled[j] = bDeleted[j] = 0;
558 }
559 }
560
561 virtual void fill( uint32_t loc, key &k, uint32_t hash )
562 {
563 bFilled[loc/32] |= (1<<(loc%32));
564 ka.construct( &aKeys[loc], k );
565 aHashCodes[loc] = hash;
566 nFilled++;
567 //printf("Filled: %d, Deleted: %d, Capacity: %d\n",
568 // nFilled, nDeleted, nCapacity );
569 }
570
571 virtual void _erase( uint32_t loc )
572 {
573 bDeleted[loc/32] |= (1<<(loc%32));
574 ka.destroy( &aKeys[loc] );
575 nDeleted++;
576 //printf("Filled: %d, Deleted: %d, Capacity: %d\n",
577 // nFilled, nDeleted, nCapacity );
578 }
579
580 virtual key &getKeyAtPos( uint32_t nPos )
581 {
582 return aKeys[nPos];
583 }
584
585 virtual const key &getKeyAtPos( uint32_t nPos ) const
586 {
587 return aKeys[nPos];
588 }
589
590 virtual uint32_t getFirstPos( bool &bFinished ) const
591 {
592 for( uint32_t j = 0; j < nCapacity; j++ )
593 {
594 if( isFilled( j ) )
595 if( !isDeleted( j ) )
596 return j;
597 }
598
599 bFinished = true;
600 return 0;
601 }
602
603 virtual uint32_t getNextPos( uint32_t nPos, bool &bFinished ) const
604 {
605 for( uint32_t j = nPos+1; j < nCapacity; j++ )
606 {
607 if( isFilled( j ) )
608 if( !isDeleted( j ) )
609 return j;
610 }
611
612 bFinished = true;
613 return 0;
614 }
615
616 uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true )
617 {
618 uint32_t nCur = hash%nCapacity;
619
620 // First we scan to see if the key is already there, abort if we
621 // run out of probing room, or we find a non-filled entry
622 int8_t j;
623 for( j = 0;
624 isFilled( nCur ) && j < 32;
625 nCur = (nCur + (1<<j))%nCapacity, j++
626 )
627 {
628 // Is this the same hash code we were looking for?
629 if( hash == aHashCodes[nCur] )
630 {
631 // Skip over deleted entries. Deleted entries are also filled,
632 // so we only have to do this check here.
633 if( isDeleted( nCur ) )
634 continue;
635
636 // Is it really the same key? (for safety)
637 if( __cmpHashKeys( aKeys[nCur], k ) == true )
638 {
639 bFill = true;
640 return nCur;
641 }
642 }
643 }
644
645 // This is our insurance, if the table is full, then go ahead and
646 // rehash, then try again.
647 if( (isFilled( nCur ) || j == 32) && rehash == true )
648 {
649 reHash( szCalc(getCapacity(), getFill(), getDeleted()) );
650
651 // This is potentially dangerous, and could cause an infinite loop.
652 // Be careful writing probe, eh?
653 return probe( hash, k, bFill );
654 }
655
656 bFill = false;
657 return nCur;
658 }
659
660 uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true ) const
661 {
662 uint32_t nCur = hash%nCapacity;
663
664 // First we scan to see if the key is already there, abort if we
665 // run out of probing room, or we find a non-filled entry
666 for( int8_t j = 0;
667 isFilled( nCur ) && j < 32;
668 nCur = (nCur + (1<<j))%nCapacity, j++
669 )
670 {
671 // Is this the same hash code we were looking for?
672 if( hash == aHashCodes[nCur] )
673 {
674 // Skip over deleted entries. Deleted entries are also filled,
675 // so we only have to do this check here.
676 if( isDeleted( nCur ) )
677 continue;
678
679 // Is it really the same key? (for safety)
680 if( __cmpHashKeys( aKeys[nCur], k ) == true )
681 {
682 bFill = true;
683 return nCur;
684 }
685 }
686 }
687
688 bFill = false;
689 return nCur;
690 }
691
692 void reHash( uint32_t nNewSize )
693 {
694 //printf("---REHASH---");
695 //printf("Filled: %d, Deleted: %d, Capacity: %d\n",
696 // nFilled, nDeleted, nCapacity );
697
698 // Save all the old data
699 uint32_t nOldCapacity = nCapacity;
700 uint32_t *bOldFilled = bFilled;
701 uint32_t *aOldHashCodes = aHashCodes;
702 uint32_t nOldKeysSize = nKeysSize;
703 uint32_t *bOldDeleted = bDeleted;
704 key *aOldKeys = aKeys;
705
706 // Calculate new sizes
707 nCapacity = nNewSize;
708 nKeysSize = bitsToBytes( nCapacity );
709
710 // Allocate new memory + prep
711 bFilled = ca.allocate( nKeysSize );
712 bDeleted = ca.allocate( nKeysSize );
713 clearBits();
714
715 aHashCodes = ca.allocate( nCapacity );
716 aKeys = ka.allocate( nCapacity );
717
718 nDeleted = nFilled = 0;
719
720 // Re-insert all of the old data (except deleted items)
721 for( uint32_t j = 0; j < nOldCapacity; j++ )
722 {
723 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 &&
724 (bOldDeleted[j/32]&(1<<(j%32)))==0 )
725 {
726 insert( aOldKeys[j] );
727 }
728 }
729
730 // Delete all of the old data
731 for( uint32_t j = 0; j < nOldCapacity; j++ )
732 {
733 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 )
734 {
735 ka.destroy( &aOldKeys[j] );
736 }
737 }
738 ka.deallocate( aOldKeys, nOldCapacity );
739 ca.deallocate( bOldFilled, nOldKeysSize );
740 ca.deallocate( bOldDeleted, nOldKeysSize );
741 ca.deallocate( aOldHashCodes, nOldCapacity );
742 }
743
744 virtual bool isFilled( uint32_t loc ) const
745 {
746 return (bFilled[loc/32]&(1<<(loc%32)))!=0;
747 }
748
749 virtual bool isDeleted( uint32_t loc ) const
750 {
751 return (bDeleted[loc/32]&(1<<(loc%32)))!=0;
752 }
753
754 protected:
755 uint32_t nCapacity;
756 uint32_t nFilled;
757 uint32_t nDeleted;
758 uint32_t *bFilled;
759 uint32_t *bDeleted;
760 uint32_t nKeysSize;
761 key *aKeys;
762 uint32_t *aHashCodes;
763 keyalloc ka;
764 challoc ca;
765 sizecalc szCalc;
766 }; 41 };
767
768 template<typename key, typename b, typename c, typename d>
769 Archive &operator<<( Archive &ar, const Set<key, b, c, d> &h )
770 {
771 ar << h.getSize();
772 for( typename Set<key, b, c, d>::const_iterator i = h.begin(); i != h.end(); i++ )
773 {
774 ar << (*i);
775 }
776
777 return ar;
778 }
779
780 template<typename key, typename b, typename c, typename d>
781 Archive &operator>>( Archive &ar, Set<key, b, c, d> &h )
782 {
783 h.clear();
784 long nSize;
785 ar >> nSize;
786
787 for( long j = 0; j < nSize; j++ )
788 {
789 key v;
790 ar >> v;
791 h.insert( v );
792 }
793
794 return ar;
795 }
796} 42}
797 43
798#endif 44#endif