aboutsummaryrefslogtreecommitdiff
path: root/src/hash.h
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2006-11-21 12:23:13 +0000
committerMike Buland <eichlan@xagasoft.com>2006-11-21 12:23:13 +0000
commit4b8bad3d711f39db84f094edf83c5496a3f02cd6 (patch)
treebbaf654af3e82e67544ae17f07b7fbe7d02ce0ec /src/hash.h
parent737b1aee54da9ff45a4fb6eb7e636eff9019128e (diff)
downloadlibbu++-4b8bad3d711f39db84f094edf83c5496a3f02cd6.tar.gz
libbu++-4b8bad3d711f39db84f094edf83c5496a3f02cd6.tar.bz2
libbu++-4b8bad3d711f39db84f094edf83c5496a3f02cd6.tar.xz
libbu++-4b8bad3d711f39db84f094edf83c5496a3f02cd6.zip
Wow, craziness. Part way through working on the confpair system I got some
sudden insperation and completely redid Hash. Now everything but delete is implemented, including typesafe iterators and more. It's really cool, and everyone should check it out and start using it right away!
Diffstat (limited to 'src/hash.h')
-rw-r--r--src/hash.h447
1 files changed, 386 insertions, 61 deletions
diff --git a/src/hash.h b/src/hash.h
index 8385bb9..b8ced99 100644
--- a/src/hash.h
+++ b/src/hash.h
@@ -1,106 +1,431 @@
1#ifndef HASH_H 1#ifndef HASH_H
2#define HASH_H 2#define HASH_H
3/* 3
4#include <stddef.h> 4#include <stddef.h>
5#include <string.h> 5#include <string.h>
6#include <memory>
7#include <iostream>
6#include "hashable.h" 8#include "hashable.h"
7 9
8#define bitsToBytes( n ) (n/8+(n%8>0 ? 1 : 0)) 10#define bitsToBytes( n ) (n/32+(n%32>0 ? 1 : 0))
11
12template<typename T>
13uint32_t __calcHashCode( T k );
14
15template<typename T>
16bool __cmpHashKeys( T a, T b );
17
18struct __calcNextTSize_fast
19{
20 uint32_t operator()( uint32_t nCapacity, uint32_t nFill, uint32_t nDeleted ) const
21 {
22 return nCapacity*2+1;
23 }
24};
25
26template<typename key, typename value, typename sizecalc = __calcNextTSize_fast, typename keyalloc = std::allocator<key>, typename valuealloc = std::allocator<value>, typename challoc = std::allocator<uint32_t> >
27class Hash;
28
29template< typename key, typename value, typename sizecalc, typename keyalloc, typename valuealloc, typename challoc >
30struct HashProxy
31{
32 friend class Hash<key, value, sizecalc, keyalloc, valuealloc, challoc>;
33private:
34 HashProxy( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &h, key k, value v ) :
35 hsh( h ),
36 tKey( k ),
37 tValue( v ),
38 bFilled( true )
39 {
40 }
41
42 HashProxy( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &h, key k ) :
43 hsh( h ),
44 tKey( k ),
45 bFilled( false )
46 {
47 }
48
49 Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh;
50 key tKey;
51 value tValue;
52 bool bFilled;
53
54public:
55 operator value()
56 {
57 if( bFilled == false )
58 throw "Nope, no data there";
59 return tValue;
60 }
9 61
10template<class key, class value> 62 value operator=( value nval )
63 {
64 hsh.insert( tKey, nval );
65 return nval;
66 }
67
68};
69
70template<typename key, typename value, typename sizecalc, typename keyalloc, typename valuealloc, typename challoc >
11class Hash 71class Hash
12{ 72{
13 class iterator 73public:
74 Hash() :
75 nCapacity( 11 ),
76 nFilled( 0 ),
77 nDeleted( 0 ),
78 bFilled( NULL ),
79 aKeys( NULL ),
80 aValues( NULL ),
81 aHashCodes( NULL )
14 { 82 {
15 friend class Hash<key, value>; 83 nKeysSize = bitsToBytes( nCapacity );
16 private: 84 bFilled = ca.allocate( nKeysSize );
17 iterator( Hash<key, value> *hsh, int nIndex, key keyval, bool bInsert ) : 85 bDeleted = ca.allocate( nKeysSize );
18 hsh( hsh ), 86 clearBits();
19 nIndex( nIndex ), 87
20 keyval( keyval ), 88 aHashCodes = ca.allocate( nCapacity );
21 bInsert( bInsert ) 89 aKeys = ka.allocate( nCapacity );
90 aValues = va.allocate( nCapacity );
91 }
92
93 virtual ~Hash()
94 {
95 for( uint32_t j = 0; j < nCapacity; j++ )
22 { 96 {
97 if( isFilled( j ) )
98 {
99 va.destroy( &aValues[j] );
100 ka.destroy( &aKeys[j] );
101 }
23 } 102 }
103 va.deallocate( aValues, nCapacity );
104 ka.deallocate( aKeys, nCapacity );
105 ca.deallocate( bFilled, nKeysSize );
106 ca.deallocate( bDeleted, nKeysSize );
107 ca.deallocate( aHashCodes, nCapacity );
108 }
24 109
25 public: 110 void clearBits()
26 iterator() : 111 {
27 hsh( NULL ), 112 for( uint32_t j = 0; j < nKeysSize; j++ )
28 nIndex( -1 )
29 { 113 {
114 bFilled[j] = bDeleted[j] = 0;
30 } 115 }
116 }
117
118 int hasKey( key keyval )
119 {
120 printf("%s\n", keyval );
121 }
122
123 uint32_t getCapacity()
124 {
125 return nCapacity;
126 }
127
128 uint32_t getFill()
129 {
130 return nFilled;
131 }
132
133 uint32_t getDeleted()
134 {
135 return nDeleted;
136 }
137
138 HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc> operator[]( key k )
139 {
140 uint32_t hash = __calcHashCode( k );
141 bool bFill;
142 uint32_t nPos = probe( hash, k, bFill );
31 143
32 iterator &operator=( iterator &src ) 144 if( bFill )
33 { 145 {
34 this->hsh = src.hsh; 146 return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, aKeys[nPos], aValues[nPos] );
35 this->nIndex = src.nIndex;
36 } 147 }
148 else
149 {
150 return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, k );
151 }
152 }
153
154 void insert( key k, value v )
155 {
156 uint32_t hash = __calcHashCode( k );
157 bool bFill;
158 uint32_t nPos = probe( hash, k, bFill );
37 159
38 iterator &operator=( value &src ) 160 if( bFill )
39 { 161 {
40 if( bInsert ) 162 va.destroy( &aValues[nPos] );
41 printf("You wanted to insert %d\n", src ); 163 va.construct( &aValues[nPos], v );
42 else 164 }
43 printf("You wanted to insert %d\n", src ); 165 else
166 {
167 va.construct( &aValues[nPos], v );
168 ka.construct( &aKeys[nPos], k );
169 fill( nPos );
170 aHashCodes[nPos] = hash;
171 nFilled++;
44 } 172 }
173 }
45 174
46 private: 175 value get( key k )
47 Hash<key, value> *hsh; 176 {
48 int nIndex; 177 uint32_t hash = __calcHashCode( k );
49 bool bInsert; 178 bool bFill;
50 key keyval; 179 uint32_t nPos = probe( hash, k, bFill );
51 }; 180
181 if( bFill )
182 {
183 return aValues[nPos];
184 }
185 else
186 {
187 throw "Hey, no such thing...";
188 }
189 }
190
191 uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true )
192 {
193 uint32_t nCur = hash%nCapacity;
194
195 // First we scan to see if the key is already there, abort if we
196 // run out of probing room, or we find a non-filled entry
197 for( int8_t j = 0;
198 isFilled( nCur ) && j < 32;
199 nCur = (nCur + (1<<j))%nCapacity, j++
200 )
201 {
202 // Is this the same hash code we were looking for?
203 if( hash == aHashCodes[nCur] )
204 {
205 // Is it really the same key? (for safety)
206 if( __cmpHashKeys( aKeys[nCur], k ) == true &&
207 isDeleted( nCur ) == false )
208 {
209 bFill = true;
210 return nCur;
211 }
212 }
213 }
214
215 // This is our insurance, if the table is full, then go ahead and
216 // rehash, then try again.
217 if( isFilled( nCur ) && rehash == true )
218 {
219 reHash( szCalc(getCapacity(), getFill(), getDeleted()) );
220
221 // This is potentially dangerous, and could cause an infinite loop.
222 // Be careful writing probe, eh?
223 return probe( hash, k, bFill );
224 }
225
226 bFill = false;
227 return nCur;
228 }
229
230 void reHash( uint32_t nNewSize )
231 {
232 // Save all the old data
233 uint32_t nOldCapacity = nCapacity;
234 uint32_t *bOldFilled = bFilled;
235 uint32_t *aOldHashCodes = aHashCodes;
236 uint32_t nOldKeysSize = nKeysSize;
237 uint32_t *bOldDeleted = bDeleted;
238 value *aOldValues = aValues;
239 key *aOldKeys = aKeys;
240
241 // Calculate new sizes
242 nCapacity = nNewSize;
243 nKeysSize = bitsToBytes( nCapacity );
244
245 // Allocate new memory + prep
246 bFilled = ca.allocate( nKeysSize );
247 bDeleted = ca.allocate( nKeysSize );
248 clearBits();
249
250 aHashCodes = ca.allocate( nCapacity );
251 aKeys = ka.allocate( nCapacity );
252 aValues = va.allocate( nCapacity );
253
254 // Re-insert all of the old data (except deleted items)
255 for( uint32_t j = 0; j < nOldCapacity; j++ )
256 {
257 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 )
258 {
259 insert( aOldKeys[j], aOldValues[j] );
260 }
261 }
262
263 // Delete all of the old data
264 for( uint32_t j = 0; j < nOldCapacity; j++ )
265 {
266 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 )
267 {
268 va.destroy( &aOldValues[j] );
269 ka.destroy( &aOldKeys[j] );
270 }
271 }
272 va.deallocate( aOldValues, nOldCapacity );
273 ka.deallocate( aOldKeys, nOldCapacity );
274 ca.deallocate( bOldFilled, nOldKeysSize );
275 ca.deallocate( bOldDeleted, nOldKeysSize );
276 ca.deallocate( aOldHashCodes, nOldCapacity );
277 }
52 278
53 template<class refval> 279 void fill( uint32_t loc )
54 class VRef
55 { 280 {
281 bFilled[loc/32] |= (1<<(loc%32));
282 }
283
284 bool isFilled( uint32_t loc )
285 {
286 return (bFilled[loc/32]&(1<<(loc%32)))!=0;
287 }
288
289 bool isDeleted( uint32_t loc )
290 {
291 return (bDeleted[loc/32]&(1<<(loc%32)))!=0;
292 }
293
294 typedef struct iterator
295 {
296 friend class Hash<key, value, sizecalc, keyalloc, valuealloc, challoc>;
297 private:
298 iterator( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh ) :
299 hsh( hsh ),
300 bFinished( false ),
301 nPos( 0 )
302 {
303 nPos = hsh.getFirstPos( bFinished );
304 }
305
306 iterator( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh, bool bDone ) :
307 hsh( hsh ),
308 nPos( 0 ),
309 bFinished( bDone )
310 {
311 }
312
313 Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh;
314
315 uint32_t nPos;
316 bool bFinished;
317
56 public: 318 public:
57 VRef( refval &data ) : 319 iterator operator++( int )
58 data( data )
59 { 320 {
321 if( bFinished == false )
322 nPos = hsh.getNextPos( nPos, bFinished );
323
324 return *this;
325 }
326
327 iterator operator++()
328 {
329 if( bFinished == false )
330 nPos = hsh.getNextPos( nPos, bFinished );
331
332 return *this;
333 }
334
335 bool operator==( const iterator &oth )
336 {
337 if( bFinished != oth.bFinished )
338 return false;
339 if( bFinished == true )
340 {
341 return true;
342 }
343 else
344 {
345 if( oth.nPos == nPos )
346 return true;
347 return false;
348 }
349 }
350
351 bool operator!=( const iterator &oth )
352 {
353 return !(*this == oth );
354 }
355
356 std::pair<key,value> operator *()
357 {
358 return hsh.getAtPos( nPos );
60 } 359 }
61 refval &data;
62 }; 360 };
63 361
64public: 362 iterator begin()
65 Hash() :
66 nCapacity( 11 ),
67 nFilled( 0 ),
68 bFilled( NULL ),
69 aKeys( NULL ),
70 aValues( NULL ),
71 aHashCodes( NULL )
72 { 363 {
73 int nKeysSize = bitsToBytes( nCapacity ); 364 return iterator( *this );
74 bFilled = new unsigned char[ nKeysSize ]; 365 }
75 memset( bFilled, 0, nKeysSize ); 366
76 367 iterator end()
77 aKeys = new VRef<key>*[nCapacity]; 368 {
78 aValues = new value[nCapacity]; 369 return iterator( *this, true );
79 } 370 }
80 371
81 virtual ~Hash() 372private:
373 std::pair<key,value> getAtPos( uint32_t nPos )
82 { 374 {
375 return std::pair<key,value>(aKeys[nPos],aValues[nPos]);
83 } 376 }
84 377
85 iterator operator[]( key keyval ) 378 uint32_t getFirstPos( bool &bFinished )
86 { 379 {
87 //iterator i( this, 4, keyval, true ); 380 for( uint32_t j = 0; j < nCapacity; j++ )
88 //return i; 381 {
89 printf("%s\n", keyval.getString() ); 382 if( isFilled( j ) )
383 return j;
384 }
385
386 bFinished = true;
90 } 387 }
91 388
92 int hasKey( key keyval ) 389 uint32_t getNextPos( uint32_t nPos, bool &bFinished )
93 { 390 {
94 printf("%s\n", keyval.getString() ); 391 for( uint32_t j = nPos+1; j < nCapacity; j++ )
392 {
393 if( isFilled( j ) )
394 return j;
395 }
396
397 bFinished = true;
95 } 398 }
96 399
97private: 400private:
98 int nCapacity; 401 uint32_t nCapacity;
99 int nFilled; 402 uint32_t nFilled;
100 unsigned char *bFilled; 403 uint32_t nDeleted;
101 VRef<key> **aKeys; 404 uint32_t *bFilled;
102 unsigned long int *aHashCodes; 405 uint32_t *bDeleted;
406 uint32_t *aHashCodes;
407 uint32_t nKeysSize;
103 value *aValues; 408 value *aValues;
409 key *aKeys;
410 valuealloc va;
411 keyalloc ka;
412 challoc ca;
413 sizecalc szCalc;
104}; 414};
105*/ 415
416template<> uint32_t __calcHashCode<const char *>( const char *k );
417template<> bool __cmpHashKeys<const char *>( const char *a, const char *b );
418
419template<> uint32_t __calcHashCode<char *>( char *k );
420template<> bool __cmpHashKeys<char *>( char *a, char *b );
421
422template<> uint32_t __calcHashCode<const std::string>( const std::string k );
423template<> bool __cmpHashKeys<const std::string>( const std::string a, const std::string b );
424
425template<> uint32_t __calcHashCode<std::string>( std::string k );
426template<> bool __cmpHashKeys<std::string>( std::string a, std::string b );
427
428template<> uint32_t __calcHashCode<Hashable &>( Hashable &k );
429template<> bool __cmpHashKeys<Hashable &>( Hashable &a, Hashable &b );
430
106#endif 431#endif