aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2006-11-21 16:01:57 +0000
committerMike Buland <eichlan@xagasoft.com>2006-11-21 16:01:57 +0000
commit6f6bb9f9309a6d5e471579ec565d56e4d083dafb (patch)
tree5ab79eded3cdf59053e90cf24d977dca31fed495
parent525b50abe6b5a6e06c2b4ba327c9490de5277a5b (diff)
downloadlibbu++-6f6bb9f9309a6d5e471579ec565d56e4d083dafb.tar.gz
libbu++-6f6bb9f9309a6d5e471579ec565d56e4d083dafb.tar.bz2
libbu++-6f6bb9f9309a6d5e471579ec565d56e4d083dafb.tar.xz
libbu++-6f6bb9f9309a6d5e471579ec565d56e4d083dafb.zip
Added erase functionality, and specializations for using ints as hash keys, so
really it does everything the old one did, does it better, easier, and possibly faster.
-rw-r--r--src/hash.cpp20
-rw-r--r--src/hash.h247
-rw-r--r--src/tests/hash.cpp26
3 files changed, 184 insertions, 109 deletions
diff --git a/src/hash.cpp b/src/hash.cpp
index 14be208..0241753 100644
--- a/src/hash.cpp
+++ b/src/hash.cpp
@@ -1,5 +1,25 @@
1#include "hash.h" 1#include "hash.h"
2 2
3template<> uint32_t __calcHashCode<const int>( const int k )
4{
5 return k;
6}
7
8template<> bool __cmpHashKeys<const int>( const int a, const int b )
9{
10 return a == b;
11}
12
13template<> uint32_t __calcHashCode<int>( int k )
14{
15 return k;
16}
17
18template<> bool __cmpHashKeys<int>( int a, int b )
19{
20 return a == b;
21}
22
3template<> 23template<>
4uint32_t __calcHashCode<const char *>( const char * k ) 24uint32_t __calcHashCode<const char *>( const char * k )
5{ 25{
diff --git a/src/hash.h b/src/hash.h
index c4f4b43..ff3bafd 100644
--- a/src/hash.h
+++ b/src/hash.h
@@ -40,8 +40,9 @@ private:
40 { 40 {
41 } 41 }
42 42
43 HashProxy( Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc> &h, _value *pValue ) : 43 HashProxy( Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc> &h, uint32_t nPos, _value *pValue ) :
44 hsh( h ), 44 hsh( h ),
45 nPos( nPos ),
45 pValue( pValue ), 46 pValue( pValue ),
46 bFilled( true ) 47 bFilled( true )
47 { 48 {
@@ -74,6 +75,12 @@ public:
74 return bFilled; 75 return bFilled;
75 } 76 }
76 77
78 void erase()
79 {
80 if( bFilled )
81 hsh._erase( nPos );
82 }
83
77 _value operator=( _value nval ) 84 _value operator=( _value nval )
78 { 85 {
79 if( bFilled ) 86 if( bFilled )
@@ -119,10 +126,11 @@ public:
119 for( uint32_t j = 0; j < nCapacity; j++ ) 126 for( uint32_t j = 0; j < nCapacity; j++ )
120 { 127 {
121 if( isFilled( j ) ) 128 if( isFilled( j ) )
122 { 129 if( !isDeleted( j ) )
123 va.destroy( &aValues[j] ); 130 {
124 ka.destroy( &aKeys[j] ); 131 va.destroy( &aValues[j] );
125 } 132 ka.destroy( &aKeys[j] );
133 }
126 } 134 }
127 va.deallocate( aValues, nCapacity ); 135 va.deallocate( aValues, nCapacity );
128 ka.deallocate( aKeys, nCapacity ); 136 ka.deallocate( aKeys, nCapacity );
@@ -167,7 +175,7 @@ public:
167 175
168 if( bFill ) 176 if( bFill )
169 { 177 {
170 return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, &aValues[nPos] ); 178 return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, nPos, &aValues[nPos] );
171 } 179 }
172 else 180 else
173 { 181 {
@@ -192,7 +200,7 @@ public:
192 } 200 }
193 } 201 }
194 202
195 value get( key k ) 203 void erase( key k )
196 { 204 {
197 uint32_t hash = __calcHashCode( k ); 205 uint32_t hash = __calcHashCode( k );
198 bool bFill; 206 bool bFill;
@@ -200,110 +208,24 @@ public:
200 208
201 if( bFill ) 209 if( bFill )
202 { 210 {
203 return aValues[nPos]; 211 _erase( nPos );
204 }
205 else
206 {
207 throw "Hey, no such thing...";
208 }
209 }
210
211 uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true )
212 {
213 uint32_t nCur = hash%nCapacity;
214
215 // First we scan to see if the key is already there, abort if we
216 // run out of probing room, or we find a non-filled entry
217 for( int8_t j = 0;
218 isFilled( nCur ) && j < 32;
219 nCur = (nCur + (1<<j))%nCapacity, j++
220 )
221 {
222 // Is this the same hash code we were looking for?
223 if( hash == aHashCodes[nCur] )
224 {
225 // Is it really the same key? (for safety)
226 if( __cmpHashKeys( aKeys[nCur], k ) == true &&
227 isDeleted( nCur ) == false )
228 {
229 bFill = true;
230 return nCur;
231 }
232 }
233 } 212 }
234
235 // This is our insurance, if the table is full, then go ahead and
236 // rehash, then try again.
237 if( isFilled( nCur ) && rehash == true )
238 {
239 reHash( szCalc(getCapacity(), getFill(), getDeleted()) );
240
241 // This is potentially dangerous, and could cause an infinite loop.
242 // Be careful writing probe, eh?
243 return probe( hash, k, bFill );
244 }
245
246 bFill = false;
247 return nCur;
248 } 213 }
249 214
250 void reHash( uint32_t nNewSize ) 215 value get( key k )
251 { 216 {
252 // Save all the old data 217 uint32_t hash = __calcHashCode( k );
253 uint32_t nOldCapacity = nCapacity; 218 bool bFill;
254 uint32_t *bOldFilled = bFilled; 219 uint32_t nPos = probe( hash, k, bFill );
255 uint32_t *aOldHashCodes = aHashCodes;
256 uint32_t nOldKeysSize = nKeysSize;
257 uint32_t *bOldDeleted = bDeleted;
258 value *aOldValues = aValues;
259 key *aOldKeys = aKeys;
260
261 // Calculate new sizes
262 nCapacity = nNewSize;
263 nKeysSize = bitsToBytes( nCapacity );
264
265 // Allocate new memory + prep
266 bFilled = ca.allocate( nKeysSize );
267 bDeleted = ca.allocate( nKeysSize );
268 clearBits();
269
270 aHashCodes = ca.allocate( nCapacity );
271 aKeys = ka.allocate( nCapacity );
272 aValues = va.allocate( nCapacity );
273 220
274 // Re-insert all of the old data (except deleted items) 221 if( bFill )
275 for( uint32_t j = 0; j < nOldCapacity; j++ )
276 { 222 {
277 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) 223 return aValues[nPos];
278 {
279 insert( aOldKeys[j], aOldValues[j] );
280 }
281 } 224 }
282 225 else
283 // Delete all of the old data
284 for( uint32_t j = 0; j < nOldCapacity; j++ )
285 { 226 {
286 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) 227 throw "Hey, no such thing...";
287 {
288 va.destroy( &aOldValues[j] );
289 ka.destroy( &aOldKeys[j] );
290 }
291 } 228 }
292 va.deallocate( aOldValues, nOldCapacity );
293 ka.deallocate( aOldKeys, nOldCapacity );
294 ca.deallocate( bOldFilled, nOldKeysSize );
295 ca.deallocate( bOldDeleted, nOldKeysSize );
296 ca.deallocate( aOldHashCodes, nOldCapacity );
297 }
298
299 bool isFilled( uint32_t loc )
300 {
301 return (bFilled[loc/32]&(1<<(loc%32)))!=0;
302 }
303
304 bool isDeleted( uint32_t loc )
305 {
306 return (bDeleted[loc/32]&(1<<(loc%32)))!=0;
307 } 229 }
308 230
309 typedef struct iterator 231 typedef struct iterator
@@ -394,6 +316,13 @@ private:
394 nFilled++; 316 nFilled++;
395 } 317 }
396 318
319 void _erase( uint32_t loc )
320 {
321 bDeleted[loc/32] |= (1<<(loc%32));
322 va.destroy( &aValues[loc] );
323 ka.destroy( &aKeys[loc] );
324 }
325
397 std::pair<key,value> getAtPos( uint32_t nPos ) 326 std::pair<key,value> getAtPos( uint32_t nPos )
398 { 327 {
399 return std::pair<key,value>(aKeys[nPos],aValues[nPos]); 328 return std::pair<key,value>(aKeys[nPos],aValues[nPos]);
@@ -404,7 +333,8 @@ private:
404 for( uint32_t j = 0; j < nCapacity; j++ ) 333 for( uint32_t j = 0; j < nCapacity; j++ )
405 { 334 {
406 if( isFilled( j ) ) 335 if( isFilled( j ) )
407 return j; 336 if( !isDeleted( j ) )
337 return j;
408 } 338 }
409 339
410 bFinished = true; 340 bFinished = true;
@@ -415,12 +345,115 @@ private:
415 for( uint32_t j = nPos+1; j < nCapacity; j++ ) 345 for( uint32_t j = nPos+1; j < nCapacity; j++ )
416 { 346 {
417 if( isFilled( j ) ) 347 if( isFilled( j ) )
418 return j; 348 if( !isDeleted( j ) )
349 return j;
419 } 350 }
420 351
421 bFinished = true; 352 bFinished = true;
422 } 353 }
423 354
355 uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true )
356 {
357 uint32_t nCur = hash%nCapacity;
358
359 // First we scan to see if the key is already there, abort if we
360 // run out of probing room, or we find a non-filled entry
361 for( int8_t j = 0;
362 isFilled( nCur ) && j < 32;
363 nCur = (nCur + (1<<j))%nCapacity, j++
364 )
365 {
366 // Is this the same hash code we were looking for?
367 if( hash == aHashCodes[nCur] )
368 {
369 // Skip over deleted entries. Deleted entries are also filled,
370 // so we only have to do this check here.
371 if( isDeleted( nCur ) )
372 continue;
373
374 // Is it really the same key? (for safety)
375 if( __cmpHashKeys( aKeys[nCur], k ) == true )
376 {
377 bFill = true;
378 return nCur;
379 }
380 }
381 }
382
383 // This is our insurance, if the table is full, then go ahead and
384 // rehash, then try again.
385 if( isFilled( nCur ) && rehash == true )
386 {
387 reHash( szCalc(getCapacity(), getFill(), getDeleted()) );
388
389 // This is potentially dangerous, and could cause an infinite loop.
390 // Be careful writing probe, eh?
391 return probe( hash, k, bFill );
392 }
393
394 bFill = false;
395 return nCur;
396 }
397
398 void reHash( uint32_t nNewSize )
399 {
400 // Save all the old data
401 uint32_t nOldCapacity = nCapacity;
402 uint32_t *bOldFilled = bFilled;
403 uint32_t *aOldHashCodes = aHashCodes;
404 uint32_t nOldKeysSize = nKeysSize;
405 uint32_t *bOldDeleted = bDeleted;
406 value *aOldValues = aValues;
407 key *aOldKeys = aKeys;
408
409 // Calculate new sizes
410 nCapacity = nNewSize;
411 nKeysSize = bitsToBytes( nCapacity );
412
413 // Allocate new memory + prep
414 bFilled = ca.allocate( nKeysSize );
415 bDeleted = ca.allocate( nKeysSize );
416 clearBits();
417
418 aHashCodes = ca.allocate( nCapacity );
419 aKeys = ka.allocate( nCapacity );
420 aValues = va.allocate( nCapacity );
421
422 // Re-insert all of the old data (except deleted items)
423 for( uint32_t j = 0; j < nOldCapacity; j++ )
424 {
425 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 )
426 {
427 insert( aOldKeys[j], aOldValues[j] );
428 }
429 }
430
431 // Delete all of the old data
432 for( uint32_t j = 0; j < nOldCapacity; j++ )
433 {
434 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 )
435 {
436 va.destroy( &aOldValues[j] );
437 ka.destroy( &aOldKeys[j] );
438 }
439 }
440 va.deallocate( aOldValues, nOldCapacity );
441 ka.deallocate( aOldKeys, nOldCapacity );
442 ca.deallocate( bOldFilled, nOldKeysSize );
443 ca.deallocate( bOldDeleted, nOldKeysSize );
444 ca.deallocate( aOldHashCodes, nOldCapacity );
445 }
446
447 bool isFilled( uint32_t loc )
448 {
449 return (bFilled[loc/32]&(1<<(loc%32)))!=0;
450 }
451
452 bool isDeleted( uint32_t loc )
453 {
454 return (bDeleted[loc/32]&(1<<(loc%32)))!=0;
455 }
456
424private: 457private:
425 uint32_t nCapacity; 458 uint32_t nCapacity;
426 uint32_t nFilled; 459 uint32_t nFilled;
@@ -437,6 +470,12 @@ private:
437 sizecalc szCalc; 470 sizecalc szCalc;
438}; 471};
439 472
473template<> uint32_t __calcHashCode<const int>( const int k );
474template<> bool __cmpHashKeys<const int>( const int a, const int b );
475
476template<> uint32_t __calcHashCode<int>( int k );
477template<> bool __cmpHashKeys<int>( int a, int b );
478
440template<> uint32_t __calcHashCode<const char *>( const char *k ); 479template<> uint32_t __calcHashCode<const char *>( const char *k );
441template<> bool __cmpHashKeys<const char *>( const char *a, const char *b ); 480template<> bool __cmpHashKeys<const char *>( const char *a, const char *b );
442 481
diff --git a/src/tests/hash.cpp b/src/tests/hash.cpp
index 8406439..f9a8f12 100644
--- a/src/tests/hash.cpp
+++ b/src/tests/hash.cpp
@@ -67,11 +67,9 @@ int main()
67 } 67 }
68 68
69 printf("Getting\n-------------------\n\n"); 69 printf("Getting\n-------------------\n\n");
70 printf("%d\n", sTest.get( names[10] ) ); 70
71 printf("%d\n", (int)sTest[names[10]] ); 71 sTest.erase("Homer the Great");
72 sTest[names[10]] = 22; 72 sTest["Bart's Comet"].erase();
73 sTest["That one guy"] = 135;
74 printf("%d\n", (int)sTest[names[10]] );
75 73
76 for( Hash<std::string, int>::iterator i = sTest.begin(); 74 for( Hash<std::string, int>::iterator i = sTest.begin();
77 i != sTest.end(); i++ ) 75 i != sTest.end(); i++ )
@@ -79,5 +77,23 @@ int main()
79 Hash<std::string, int>::iterator j = i; 77 Hash<std::string, int>::iterator j = i;
80 printf("%d: %s\n", (*j).second, (*j).first.c_str() ); 78 printf("%d: %s\n", (*j).second, (*j).first.c_str() );
81 } 79 }
80
81 for( int j = 0; j < 33; j++ )
82 {
83 if( sTest[names[j]].isFilled() )
84 {
85 if( sTest[names[j]] != j )
86 {
87 printf("'%s' should be %d, is %d\n",
88 names[j], j,
89 sTest[names[j]].value()
90 );
91 }
92 }
93 else
94 {
95 printf("Missing element %d, '%s'\n", j, names[j] );
96 }
97 }
82} 98}
83 99