aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2006-11-21 16:01:57 +0000
committerMike Buland <eichlan@xagasoft.com>2006-11-21 16:01:57 +0000
commit6f6bb9f9309a6d5e471579ec565d56e4d083dafb (patch)
tree5ab79eded3cdf59053e90cf24d977dca31fed495 /src
parent525b50abe6b5a6e06c2b4ba327c9490de5277a5b (diff)
downloadlibbu++-6f6bb9f9309a6d5e471579ec565d56e4d083dafb.tar.gz
libbu++-6f6bb9f9309a6d5e471579ec565d56e4d083dafb.tar.bz2
libbu++-6f6bb9f9309a6d5e471579ec565d56e4d083dafb.tar.xz
libbu++-6f6bb9f9309a6d5e471579ec565d56e4d083dafb.zip
Added erase functionality, and specializations for using ints as hash keys, so
really it does everything the old one did, does it better, easier, and possibly faster.
Diffstat (limited to 'src')
-rw-r--r--src/hash.cpp20
-rw-r--r--src/hash.h247
-rw-r--r--src/tests/hash.cpp26
3 files changed, 184 insertions, 109 deletions
diff --git a/src/hash.cpp b/src/hash.cpp
index 14be208..0241753 100644
--- a/src/hash.cpp
+++ b/src/hash.cpp
@@ -1,5 +1,25 @@
1#include "hash.h" 1#include "hash.h"
2 2
3template<> uint32_t __calcHashCode<const int>( const int k )
4{
5 return k;
6}
7
8template<> bool __cmpHashKeys<const int>( const int a, const int b )
9{
10 return a == b;
11}
12
13template<> uint32_t __calcHashCode<int>( int k )
14{
15 return k;
16}
17
18template<> bool __cmpHashKeys<int>( int a, int b )
19{
20 return a == b;
21}
22
3template<> 23template<>
4uint32_t __calcHashCode<const char *>( const char * k ) 24uint32_t __calcHashCode<const char *>( const char * k )
5{ 25{
diff --git a/src/hash.h b/src/hash.h
index c4f4b43..ff3bafd 100644
--- a/src/hash.h
+++ b/src/hash.h
@@ -40,8 +40,9 @@ private:
40 { 40 {
41 } 41 }
42 42
43 HashProxy( Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc> &h, _value *pValue ) : 43 HashProxy( Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc> &h, uint32_t nPos, _value *pValue ) :
44 hsh( h ), 44 hsh( h ),
45 nPos( nPos ),
45 pValue( pValue ), 46 pValue( pValue ),
46 bFilled( true ) 47 bFilled( true )
47 { 48 {
@@ -74,6 +75,12 @@ public:
74 return bFilled; 75 return bFilled;
75 } 76 }
76 77
78 void erase()
79 {
80 if( bFilled )
81 hsh._erase( nPos );
82 }
83
77 _value operator=( _value nval ) 84 _value operator=( _value nval )
78 { 85 {
79 if( bFilled ) 86 if( bFilled )
@@ -119,10 +126,11 @@ public:
119 for( uint32_t j = 0; j < nCapacity; j++ ) 126 for( uint32_t j = 0; j < nCapacity; j++ )
120 { 127 {
121 if( isFilled( j ) ) 128 if( isFilled( j ) )
122 { 129 if( !isDeleted( j ) )
123 va.destroy( &aValues[j] ); 130 {
124 ka.destroy( &aKeys[j] ); 131 va.destroy( &aValues[j] );
125 } 132 ka.destroy( &aKeys[j] );
133 }
126 } 134 }
127 va.deallocate( aValues, nCapacity ); 135 va.deallocate( aValues, nCapacity );
128 ka.deallocate( aKeys, nCapacity ); 136 ka.deallocate( aKeys, nCapacity );
@@ -167,7 +175,7 @@ public:
167 175
168 if( bFill ) 176 if( bFill )
169 { 177 {
170 return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, &aValues[nPos] ); 178 return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, nPos, &aValues[nPos] );
171 } 179 }
172 else 180 else
173 { 181 {
@@ -192,7 +200,7 @@ public:
192 } 200 }
193 } 201 }
194 202
195 value get( key k ) 203 void erase( key k )
196 { 204 {
197 uint32_t hash = __calcHashCode( k ); 205 uint32_t hash = __calcHashCode( k );
198 bool bFill; 206 bool bFill;
@@ -200,110 +208,24 @@ public:
200 208
201 if( bFill ) 209 if( bFill )
202 { 210 {
203 return aValues[nPos]; 211 _erase( nPos );
204 }
205 else
206 {
207 throw "Hey, no such thing...";
208 }
209 }
210
211 uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true )
212 {
213 uint32_t nCur = hash%nCapacity;
214
215 // First we scan to see if the key is already there, abort if we
216 // run out of probing room, or we find a non-filled entry
217 for( int8_t j = 0;
218 isFilled( nCur ) && j < 32;
219 nCur = (nCur + (1<<j))%nCapacity, j++
220 )
221 {
222 // Is this the same hash code we were looking for?
223 if( hash == aHashCodes[nCur] )
224 {
225 // Is it really the same key? (for safety)
226 if( __cmpHashKeys( aKeys[nCur], k ) == true &&
227 isDeleted( nCur ) == false )
228 {
229 bFill = true;
230 return nCur;
231 }
232 }
233 } 212 }
234
235 // This is our insurance, if the table is full, then go ahead and
236 // rehash, then try again.
237 if( isFilled( nCur ) && rehash == true )
238 {
239 reHash( szCalc(getCapacity(), getFill(), getDeleted()) );
240
241 // This is potentially dangerous, and could cause an infinite loop.
242 // Be careful writing probe, eh?
243 return probe( hash, k, bFill );
244 }
245
246 bFill = false;
247 return nCur;
248 } 213 }
249 214
250 void reHash( uint32_t nNewSize ) 215 value get( key k )
251 { 216 {
252 // Save all the old data 217 uint32_t hash = __calcHashCode( k );
253 uint32_t nOldCapacity = nCapacity; 218 bool bFill;
254 uint32_t *bOldFilled = bFilled; 219 uint32_t nPos = probe( hash, k, bFill );
255 uint32_t *aOldHashCodes = aHashCodes;
256 uint32_t nOldKeysSize = nKeysSize;
257 uint32_t *bOldDeleted = bDeleted;
258 value *aOldValues = aValues;
259 key *aOldKeys = aKeys;
260
261 // Calculate new sizes
262 nCapacity = nNewSize;
263 nKeysSize = bitsToBytes( nCapacity );
264
265 // Allocate new memory + prep
266 bFilled = ca.allocate( nKeysSize );
267 bDeleted = ca.allocate( nKeysSize );
268 clearBits();
269
270 aHashCodes = ca.allocate( nCapacity );
271 aKeys = ka.allocate( nCapacity );
272 aValues = va.allocate( nCapacity );
273 220
274 // Re-insert all of the old data (except deleted items) 221 if( bFill )
275 for( uint32_t j = 0; j < nOldCapacity; j++ )
276 { 222 {
277 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) 223 return aValues[nPos];
278 {
279 insert( aOldKeys[j], aOldValues[j] );
280 }
281 } 224 }
282 225 else
283 // Delete all of the old data
284 for( uint32_t j = 0; j < nOldCapacity; j++ )
285 { 226 {
286 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) 227 throw "Hey, no such thing...";
287 {
288 va.destroy( &aOldValues[j] );
289 ka.destroy( &aOldKeys[j] );
290 }
291 } 228 }
292 va.deallocate( aOldValues, nOldCapacity );
293 ka.deallocate( aOldKeys, nOldCapacity );
294 ca.deallocate( bOldFilled, nOldKeysSize );
295 ca.deallocate( bOldDeleted, nOldKeysSize );
296 ca.deallocate( aOldHashCodes, nOldCapacity );
297 }
298
299 bool isFilled( uint32_t loc )
300 {
301 return (bFilled[loc/32]&(1<<(loc%32)))!=0;
302 }
303
304 bool isDeleted( uint32_t loc )
305 {
306 return (bDeleted[loc/32]&(1<<(loc%32)))!=0;
307 } 229 }
308 230
309 typedef struct iterator 231 typedef struct iterator
@@ -394,6 +316,13 @@ private:
394 nFilled++; 316 nFilled++;
395 } 317 }
396 318
319 void _erase( uint32_t loc )
320 {
321 bDeleted[loc/32] |= (1<<(loc%32));
322 va.destroy( &aValues[loc] );
323 ka.destroy( &aKeys[loc] );
324 }
325
397 std::pair<key,value> getAtPos( uint32_t nPos ) 326 std::pair<key,value> getAtPos( uint32_t nPos )
398 { 327 {
399 return std::pair<key,value>(aKeys[nPos],aValues[nPos]); 328 return std::pair<key,value>(aKeys[nPos],aValues[nPos]);
@@ -404,7 +333,8 @@ private:
404 for( uint32_t j = 0; j < nCapacity; j++ ) 333 for( uint32_t j = 0; j < nCapacity; j++ )
405 { 334 {
406 if( isFilled( j ) ) 335 if( isFilled( j ) )
407 return j; 336 if( !isDeleted( j ) )
337 return j;
408 } 338 }
409 339
410 bFinished = true; 340 bFinished = true;
@@ -415,12 +345,115 @@ private:
415 for( uint32_t j = nPos+1; j < nCapacity; j++ ) 345 for( uint32_t j = nPos+1; j < nCapacity; j++ )
416 { 346 {
417 if( isFilled( j ) ) 347 if( isFilled( j ) )
418 return j; 348 if( !isDeleted( j ) )
349 return j;
419 } 350 }
420 351
421 bFinished = true; 352 bFinished = true;
422 } 353 }
423 354
355 uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true )
356 {
357 uint32_t nCur = hash%nCapacity;
358
359 // First we scan to see if the key is already there, abort if we
360 // run out of probing room, or we find a non-filled entry
361 for( int8_t j = 0;
362 isFilled( nCur ) && j < 32;
363 nCur = (nCur + (1<<j))%nCapacity, j++
364 )
365 {
366 // Is this the same hash code we were looking for?
367 if( hash == aHashCodes[nCur] )
368 {
369 // Skip over deleted entries. Deleted entries are also filled,
370 // so we only have to do this check here.
371 if( isDeleted( nCur ) )
372 continue;
373
374 // Is it really the same key? (for safety)
375 if( __cmpHashKeys( aKeys[nCur], k ) == true )
376 {
377 bFill = true;
378 return nCur;
379 }
380 }
381 }
382
383 // This is our insurance, if the table is full, then go ahead and
384 // rehash, then try again.
385 if( isFilled( nCur ) && rehash == true )
386 {
387 reHash( szCalc(getCapacity(), getFill(), getDeleted()) );
388
389 // This is potentially dangerous, and could cause an infinite loop.
390 // Be careful writing probe, eh?
391 return probe( hash, k, bFill );
392 }
393
394 bFill = false;
395 return nCur;
396 }
397
398 void reHash( uint32_t nNewSize )
399 {
400 // Save all the old data
401 uint32_t nOldCapacity = nCapacity;
402 uint32_t *bOldFilled = bFilled;
403 uint32_t *aOldHashCodes = aHashCodes;
404 uint32_t nOldKeysSize = nKeysSize;
405 uint32_t *bOldDeleted = bDeleted;
406 value *aOldValues = aValues;
407 key *aOldKeys = aKeys;
408
409 // Calculate new sizes
410 nCapacity = nNewSize;
411 nKeysSize = bitsToBytes( nCapacity );
412
413 // Allocate new memory + prep
414 bFilled = ca.allocate( nKeysSize );
415 bDeleted = ca.allocate( nKeysSize );
416 clearBits();
417
418 aHashCodes = ca.allocate( nCapacity );
419 aKeys = ka.allocate( nCapacity );
420 aValues = va.allocate( nCapacity );
421
422 // Re-insert all of the old data (except deleted items)
423 for( uint32_t j = 0; j < nOldCapacity; j++ )
424 {
425 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 )
426 {
427 insert( aOldKeys[j], aOldValues[j] );
428 }
429 }
430
431 // Delete all of the old data
432 for( uint32_t j = 0; j < nOldCapacity; j++ )
433 {
434 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 )
435 {
436 va.destroy( &aOldValues[j] );
437 ka.destroy( &aOldKeys[j] );
438 }
439 }
440 va.deallocate( aOldValues, nOldCapacity );
441 ka.deallocate( aOldKeys, nOldCapacity );
442 ca.deallocate( bOldFilled, nOldKeysSize );
443 ca.deallocate( bOldDeleted, nOldKeysSize );
444 ca.deallocate( aOldHashCodes, nOldCapacity );
445 }
446
447 bool isFilled( uint32_t loc )
448 {
449 return (bFilled[loc/32]&(1<<(loc%32)))!=0;
450 }
451
452 bool isDeleted( uint32_t loc )
453 {
454 return (bDeleted[loc/32]&(1<<(loc%32)))!=0;
455 }
456
424private: 457private:
425 uint32_t nCapacity; 458 uint32_t nCapacity;
426 uint32_t nFilled; 459 uint32_t nFilled;
@@ -437,6 +470,12 @@ private:
437 sizecalc szCalc; 470 sizecalc szCalc;
438}; 471};
439 472
473template<> uint32_t __calcHashCode<const int>( const int k );
474template<> bool __cmpHashKeys<const int>( const int a, const int b );
475
476template<> uint32_t __calcHashCode<int>( int k );
477template<> bool __cmpHashKeys<int>( int a, int b );
478
440template<> uint32_t __calcHashCode<const char *>( const char *k ); 479template<> uint32_t __calcHashCode<const char *>( const char *k );
441template<> bool __cmpHashKeys<const char *>( const char *a, const char *b ); 480template<> bool __cmpHashKeys<const char *>( const char *a, const char *b );
442 481
diff --git a/src/tests/hash.cpp b/src/tests/hash.cpp
index 8406439..f9a8f12 100644
--- a/src/tests/hash.cpp
+++ b/src/tests/hash.cpp
@@ -67,11 +67,9 @@ int main()
67 } 67 }
68 68
69 printf("Getting\n-------------------\n\n"); 69 printf("Getting\n-------------------\n\n");
70 printf("%d\n", sTest.get( names[10] ) ); 70
71 printf("%d\n", (int)sTest[names[10]] ); 71 sTest.erase("Homer the Great");
72 sTest[names[10]] = 22; 72 sTest["Bart's Comet"].erase();
73 sTest["That one guy"] = 135;
74 printf("%d\n", (int)sTest[names[10]] );
75 73
76 for( Hash<std::string, int>::iterator i = sTest.begin(); 74 for( Hash<std::string, int>::iterator i = sTest.begin();
77 i != sTest.end(); i++ ) 75 i != sTest.end(); i++ )
@@ -79,5 +77,23 @@ int main()
79 Hash<std::string, int>::iterator j = i; 77 Hash<std::string, int>::iterator j = i;
80 printf("%d: %s\n", (*j).second, (*j).first.c_str() ); 78 printf("%d: %s\n", (*j).second, (*j).first.c_str() );
81 } 79 }
80
81 for( int j = 0; j < 33; j++ )
82 {
83 if( sTest[names[j]].isFilled() )
84 {
85 if( sTest[names[j]] != j )
86 {
87 printf("'%s' should be %d, is %d\n",
88 names[j], j,
89 sTest[names[j]].value()
90 );
91 }
92 }
93 else
94 {
95 printf("Missing element %d, '%s'\n", j, names[j] );
96 }
97 }
82} 98}
83 99