diff options
author | Mike Buland <eichlan@xagasoft.com> | 2006-11-21 16:01:57 +0000 |
---|---|---|
committer | Mike Buland <eichlan@xagasoft.com> | 2006-11-21 16:01:57 +0000 |
commit | 6f6bb9f9309a6d5e471579ec565d56e4d083dafb (patch) | |
tree | 5ab79eded3cdf59053e90cf24d977dca31fed495 | |
parent | 525b50abe6b5a6e06c2b4ba327c9490de5277a5b (diff) | |
download | libbu++-6f6bb9f9309a6d5e471579ec565d56e4d083dafb.tar.gz libbu++-6f6bb9f9309a6d5e471579ec565d56e4d083dafb.tar.bz2 libbu++-6f6bb9f9309a6d5e471579ec565d56e4d083dafb.tar.xz libbu++-6f6bb9f9309a6d5e471579ec565d56e4d083dafb.zip |
Added erase functionality, and specializations for using ints as hash keys, so
really it does everything the old one did, does it better, easier, and possibly
faster.
-rw-r--r-- | src/hash.cpp | 20 | ||||
-rw-r--r-- | src/hash.h | 247 | ||||
-rw-r--r-- | src/tests/hash.cpp | 26 |
3 files changed, 184 insertions, 109 deletions
diff --git a/src/hash.cpp b/src/hash.cpp index 14be208..0241753 100644 --- a/src/hash.cpp +++ b/src/hash.cpp | |||
@@ -1,5 +1,25 @@ | |||
1 | #include "hash.h" | 1 | #include "hash.h" |
2 | 2 | ||
3 | template<> uint32_t __calcHashCode<const int>( const int k ) | ||
4 | { | ||
5 | return k; | ||
6 | } | ||
7 | |||
8 | template<> bool __cmpHashKeys<const int>( const int a, const int b ) | ||
9 | { | ||
10 | return a == b; | ||
11 | } | ||
12 | |||
13 | template<> uint32_t __calcHashCode<int>( int k ) | ||
14 | { | ||
15 | return k; | ||
16 | } | ||
17 | |||
18 | template<> bool __cmpHashKeys<int>( int a, int b ) | ||
19 | { | ||
20 | return a == b; | ||
21 | } | ||
22 | |||
3 | template<> | 23 | template<> |
4 | uint32_t __calcHashCode<const char *>( const char * k ) | 24 | uint32_t __calcHashCode<const char *>( const char * k ) |
5 | { | 25 | { |
@@ -40,8 +40,9 @@ private: | |||
40 | { | 40 | { |
41 | } | 41 | } |
42 | 42 | ||
43 | HashProxy( Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc> &h, _value *pValue ) : | 43 | HashProxy( Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc> &h, uint32_t nPos, _value *pValue ) : |
44 | hsh( h ), | 44 | hsh( h ), |
45 | nPos( nPos ), | ||
45 | pValue( pValue ), | 46 | pValue( pValue ), |
46 | bFilled( true ) | 47 | bFilled( true ) |
47 | { | 48 | { |
@@ -74,6 +75,12 @@ public: | |||
74 | return bFilled; | 75 | return bFilled; |
75 | } | 76 | } |
76 | 77 | ||
78 | void erase() | ||
79 | { | ||
80 | if( bFilled ) | ||
81 | hsh._erase( nPos ); | ||
82 | } | ||
83 | |||
77 | _value operator=( _value nval ) | 84 | _value operator=( _value nval ) |
78 | { | 85 | { |
79 | if( bFilled ) | 86 | if( bFilled ) |
@@ -119,10 +126,11 @@ public: | |||
119 | for( uint32_t j = 0; j < nCapacity; j++ ) | 126 | for( uint32_t j = 0; j < nCapacity; j++ ) |
120 | { | 127 | { |
121 | if( isFilled( j ) ) | 128 | if( isFilled( j ) ) |
122 | { | 129 | if( !isDeleted( j ) ) |
123 | va.destroy( &aValues[j] ); | 130 | { |
124 | ka.destroy( &aKeys[j] ); | 131 | va.destroy( &aValues[j] ); |
125 | } | 132 | ka.destroy( &aKeys[j] ); |
133 | } | ||
126 | } | 134 | } |
127 | va.deallocate( aValues, nCapacity ); | 135 | va.deallocate( aValues, nCapacity ); |
128 | ka.deallocate( aKeys, nCapacity ); | 136 | ka.deallocate( aKeys, nCapacity ); |
@@ -167,7 +175,7 @@ public: | |||
167 | 175 | ||
168 | if( bFill ) | 176 | if( bFill ) |
169 | { | 177 | { |
170 | return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, &aValues[nPos] ); | 178 | return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, nPos, &aValues[nPos] ); |
171 | } | 179 | } |
172 | else | 180 | else |
173 | { | 181 | { |
@@ -192,7 +200,7 @@ public: | |||
192 | } | 200 | } |
193 | } | 201 | } |
194 | 202 | ||
195 | value get( key k ) | 203 | void erase( key k ) |
196 | { | 204 | { |
197 | uint32_t hash = __calcHashCode( k ); | 205 | uint32_t hash = __calcHashCode( k ); |
198 | bool bFill; | 206 | bool bFill; |
@@ -200,110 +208,24 @@ public: | |||
200 | 208 | ||
201 | if( bFill ) | 209 | if( bFill ) |
202 | { | 210 | { |
203 | return aValues[nPos]; | 211 | _erase( nPos ); |
204 | } | ||
205 | else | ||
206 | { | ||
207 | throw "Hey, no such thing..."; | ||
208 | } | ||
209 | } | ||
210 | |||
211 | uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true ) | ||
212 | { | ||
213 | uint32_t nCur = hash%nCapacity; | ||
214 | |||
215 | // First we scan to see if the key is already there, abort if we | ||
216 | // run out of probing room, or we find a non-filled entry | ||
217 | for( int8_t j = 0; | ||
218 | isFilled( nCur ) && j < 32; | ||
219 | nCur = (nCur + (1<<j))%nCapacity, j++ | ||
220 | ) | ||
221 | { | ||
222 | // Is this the same hash code we were looking for? | ||
223 | if( hash == aHashCodes[nCur] ) | ||
224 | { | ||
225 | // Is it really the same key? (for safety) | ||
226 | if( __cmpHashKeys( aKeys[nCur], k ) == true && | ||
227 | isDeleted( nCur ) == false ) | ||
228 | { | ||
229 | bFill = true; | ||
230 | return nCur; | ||
231 | } | ||
232 | } | ||
233 | } | 212 | } |
234 | |||
235 | // This is our insurance, if the table is full, then go ahead and | ||
236 | // rehash, then try again. | ||
237 | if( isFilled( nCur ) && rehash == true ) | ||
238 | { | ||
239 | reHash( szCalc(getCapacity(), getFill(), getDeleted()) ); | ||
240 | |||
241 | // This is potentially dangerous, and could cause an infinite loop. | ||
242 | // Be careful writing probe, eh? | ||
243 | return probe( hash, k, bFill ); | ||
244 | } | ||
245 | |||
246 | bFill = false; | ||
247 | return nCur; | ||
248 | } | 213 | } |
249 | 214 | ||
250 | void reHash( uint32_t nNewSize ) | 215 | value get( key k ) |
251 | { | 216 | { |
252 | // Save all the old data | 217 | uint32_t hash = __calcHashCode( k ); |
253 | uint32_t nOldCapacity = nCapacity; | 218 | bool bFill; |
254 | uint32_t *bOldFilled = bFilled; | 219 | uint32_t nPos = probe( hash, k, bFill ); |
255 | uint32_t *aOldHashCodes = aHashCodes; | ||
256 | uint32_t nOldKeysSize = nKeysSize; | ||
257 | uint32_t *bOldDeleted = bDeleted; | ||
258 | value *aOldValues = aValues; | ||
259 | key *aOldKeys = aKeys; | ||
260 | |||
261 | // Calculate new sizes | ||
262 | nCapacity = nNewSize; | ||
263 | nKeysSize = bitsToBytes( nCapacity ); | ||
264 | |||
265 | // Allocate new memory + prep | ||
266 | bFilled = ca.allocate( nKeysSize ); | ||
267 | bDeleted = ca.allocate( nKeysSize ); | ||
268 | clearBits(); | ||
269 | |||
270 | aHashCodes = ca.allocate( nCapacity ); | ||
271 | aKeys = ka.allocate( nCapacity ); | ||
272 | aValues = va.allocate( nCapacity ); | ||
273 | 220 | ||
274 | // Re-insert all of the old data (except deleted items) | 221 | if( bFill ) |
275 | for( uint32_t j = 0; j < nOldCapacity; j++ ) | ||
276 | { | 222 | { |
277 | if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) | 223 | return aValues[nPos]; |
278 | { | ||
279 | insert( aOldKeys[j], aOldValues[j] ); | ||
280 | } | ||
281 | } | 224 | } |
282 | 225 | else | |
283 | // Delete all of the old data | ||
284 | for( uint32_t j = 0; j < nOldCapacity; j++ ) | ||
285 | { | 226 | { |
286 | if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) | 227 | throw "Hey, no such thing..."; |
287 | { | ||
288 | va.destroy( &aOldValues[j] ); | ||
289 | ka.destroy( &aOldKeys[j] ); | ||
290 | } | ||
291 | } | 228 | } |
292 | va.deallocate( aOldValues, nOldCapacity ); | ||
293 | ka.deallocate( aOldKeys, nOldCapacity ); | ||
294 | ca.deallocate( bOldFilled, nOldKeysSize ); | ||
295 | ca.deallocate( bOldDeleted, nOldKeysSize ); | ||
296 | ca.deallocate( aOldHashCodes, nOldCapacity ); | ||
297 | } | ||
298 | |||
299 | bool isFilled( uint32_t loc ) | ||
300 | { | ||
301 | return (bFilled[loc/32]&(1<<(loc%32)))!=0; | ||
302 | } | ||
303 | |||
304 | bool isDeleted( uint32_t loc ) | ||
305 | { | ||
306 | return (bDeleted[loc/32]&(1<<(loc%32)))!=0; | ||
307 | } | 229 | } |
308 | 230 | ||
309 | typedef struct iterator | 231 | typedef struct iterator |
@@ -394,6 +316,13 @@ private: | |||
394 | nFilled++; | 316 | nFilled++; |
395 | } | 317 | } |
396 | 318 | ||
319 | void _erase( uint32_t loc ) | ||
320 | { | ||
321 | bDeleted[loc/32] |= (1<<(loc%32)); | ||
322 | va.destroy( &aValues[loc] ); | ||
323 | ka.destroy( &aKeys[loc] ); | ||
324 | } | ||
325 | |||
397 | std::pair<key,value> getAtPos( uint32_t nPos ) | 326 | std::pair<key,value> getAtPos( uint32_t nPos ) |
398 | { | 327 | { |
399 | return std::pair<key,value>(aKeys[nPos],aValues[nPos]); | 328 | return std::pair<key,value>(aKeys[nPos],aValues[nPos]); |
@@ -404,7 +333,8 @@ private: | |||
404 | for( uint32_t j = 0; j < nCapacity; j++ ) | 333 | for( uint32_t j = 0; j < nCapacity; j++ ) |
405 | { | 334 | { |
406 | if( isFilled( j ) ) | 335 | if( isFilled( j ) ) |
407 | return j; | 336 | if( !isDeleted( j ) ) |
337 | return j; | ||
408 | } | 338 | } |
409 | 339 | ||
410 | bFinished = true; | 340 | bFinished = true; |
@@ -415,12 +345,115 @@ private: | |||
415 | for( uint32_t j = nPos+1; j < nCapacity; j++ ) | 345 | for( uint32_t j = nPos+1; j < nCapacity; j++ ) |
416 | { | 346 | { |
417 | if( isFilled( j ) ) | 347 | if( isFilled( j ) ) |
418 | return j; | 348 | if( !isDeleted( j ) ) |
349 | return j; | ||
419 | } | 350 | } |
420 | 351 | ||
421 | bFinished = true; | 352 | bFinished = true; |
422 | } | 353 | } |
423 | 354 | ||
355 | uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true ) | ||
356 | { | ||
357 | uint32_t nCur = hash%nCapacity; | ||
358 | |||
359 | // First we scan to see if the key is already there, abort if we | ||
360 | // run out of probing room, or we find a non-filled entry | ||
361 | for( int8_t j = 0; | ||
362 | isFilled( nCur ) && j < 32; | ||
363 | nCur = (nCur + (1<<j))%nCapacity, j++ | ||
364 | ) | ||
365 | { | ||
366 | // Is this the same hash code we were looking for? | ||
367 | if( hash == aHashCodes[nCur] ) | ||
368 | { | ||
369 | // Skip over deleted entries. Deleted entries are also filled, | ||
370 | // so we only have to do this check here. | ||
371 | if( isDeleted( nCur ) ) | ||
372 | continue; | ||
373 | |||
374 | // Is it really the same key? (for safety) | ||
375 | if( __cmpHashKeys( aKeys[nCur], k ) == true ) | ||
376 | { | ||
377 | bFill = true; | ||
378 | return nCur; | ||
379 | } | ||
380 | } | ||
381 | } | ||
382 | |||
383 | // This is our insurance, if the table is full, then go ahead and | ||
384 | // rehash, then try again. | ||
385 | if( isFilled( nCur ) && rehash == true ) | ||
386 | { | ||
387 | reHash( szCalc(getCapacity(), getFill(), getDeleted()) ); | ||
388 | |||
389 | // This is potentially dangerous, and could cause an infinite loop. | ||
390 | // Be careful writing probe, eh? | ||
391 | return probe( hash, k, bFill ); | ||
392 | } | ||
393 | |||
394 | bFill = false; | ||
395 | return nCur; | ||
396 | } | ||
397 | |||
398 | void reHash( uint32_t nNewSize ) | ||
399 | { | ||
400 | // Save all the old data | ||
401 | uint32_t nOldCapacity = nCapacity; | ||
402 | uint32_t *bOldFilled = bFilled; | ||
403 | uint32_t *aOldHashCodes = aHashCodes; | ||
404 | uint32_t nOldKeysSize = nKeysSize; | ||
405 | uint32_t *bOldDeleted = bDeleted; | ||
406 | value *aOldValues = aValues; | ||
407 | key *aOldKeys = aKeys; | ||
408 | |||
409 | // Calculate new sizes | ||
410 | nCapacity = nNewSize; | ||
411 | nKeysSize = bitsToBytes( nCapacity ); | ||
412 | |||
413 | // Allocate new memory + prep | ||
414 | bFilled = ca.allocate( nKeysSize ); | ||
415 | bDeleted = ca.allocate( nKeysSize ); | ||
416 | clearBits(); | ||
417 | |||
418 | aHashCodes = ca.allocate( nCapacity ); | ||
419 | aKeys = ka.allocate( nCapacity ); | ||
420 | aValues = va.allocate( nCapacity ); | ||
421 | |||
422 | // Re-insert all of the old data (except deleted items) | ||
423 | for( uint32_t j = 0; j < nOldCapacity; j++ ) | ||
424 | { | ||
425 | if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) | ||
426 | { | ||
427 | insert( aOldKeys[j], aOldValues[j] ); | ||
428 | } | ||
429 | } | ||
430 | |||
431 | // Delete all of the old data | ||
432 | for( uint32_t j = 0; j < nOldCapacity; j++ ) | ||
433 | { | ||
434 | if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) | ||
435 | { | ||
436 | va.destroy( &aOldValues[j] ); | ||
437 | ka.destroy( &aOldKeys[j] ); | ||
438 | } | ||
439 | } | ||
440 | va.deallocate( aOldValues, nOldCapacity ); | ||
441 | ka.deallocate( aOldKeys, nOldCapacity ); | ||
442 | ca.deallocate( bOldFilled, nOldKeysSize ); | ||
443 | ca.deallocate( bOldDeleted, nOldKeysSize ); | ||
444 | ca.deallocate( aOldHashCodes, nOldCapacity ); | ||
445 | } | ||
446 | |||
447 | bool isFilled( uint32_t loc ) | ||
448 | { | ||
449 | return (bFilled[loc/32]&(1<<(loc%32)))!=0; | ||
450 | } | ||
451 | |||
452 | bool isDeleted( uint32_t loc ) | ||
453 | { | ||
454 | return (bDeleted[loc/32]&(1<<(loc%32)))!=0; | ||
455 | } | ||
456 | |||
424 | private: | 457 | private: |
425 | uint32_t nCapacity; | 458 | uint32_t nCapacity; |
426 | uint32_t nFilled; | 459 | uint32_t nFilled; |
@@ -437,6 +470,12 @@ private: | |||
437 | sizecalc szCalc; | 470 | sizecalc szCalc; |
438 | }; | 471 | }; |
439 | 472 | ||
473 | template<> uint32_t __calcHashCode<const int>( const int k ); | ||
474 | template<> bool __cmpHashKeys<const int>( const int a, const int b ); | ||
475 | |||
476 | template<> uint32_t __calcHashCode<int>( int k ); | ||
477 | template<> bool __cmpHashKeys<int>( int a, int b ); | ||
478 | |||
440 | template<> uint32_t __calcHashCode<const char *>( const char *k ); | 479 | template<> uint32_t __calcHashCode<const char *>( const char *k ); |
441 | template<> bool __cmpHashKeys<const char *>( const char *a, const char *b ); | 480 | template<> bool __cmpHashKeys<const char *>( const char *a, const char *b ); |
442 | 481 | ||
diff --git a/src/tests/hash.cpp b/src/tests/hash.cpp index 8406439..f9a8f12 100644 --- a/src/tests/hash.cpp +++ b/src/tests/hash.cpp | |||
@@ -67,11 +67,9 @@ int main() | |||
67 | } | 67 | } |
68 | 68 | ||
69 | printf("Getting\n-------------------\n\n"); | 69 | printf("Getting\n-------------------\n\n"); |
70 | printf("%d\n", sTest.get( names[10] ) ); | 70 | |
71 | printf("%d\n", (int)sTest[names[10]] ); | 71 | sTest.erase("Homer the Great"); |
72 | sTest[names[10]] = 22; | 72 | sTest["Bart's Comet"].erase(); |
73 | sTest["That one guy"] = 135; | ||
74 | printf("%d\n", (int)sTest[names[10]] ); | ||
75 | 73 | ||
76 | for( Hash<std::string, int>::iterator i = sTest.begin(); | 74 | for( Hash<std::string, int>::iterator i = sTest.begin(); |
77 | i != sTest.end(); i++ ) | 75 | i != sTest.end(); i++ ) |
@@ -79,5 +77,23 @@ int main() | |||
79 | Hash<std::string, int>::iterator j = i; | 77 | Hash<std::string, int>::iterator j = i; |
80 | printf("%d: %s\n", (*j).second, (*j).first.c_str() ); | 78 | printf("%d: %s\n", (*j).second, (*j).first.c_str() ); |
81 | } | 79 | } |
80 | |||
81 | for( int j = 0; j < 33; j++ ) | ||
82 | { | ||
83 | if( sTest[names[j]].isFilled() ) | ||
84 | { | ||
85 | if( sTest[names[j]] != j ) | ||
86 | { | ||
87 | printf("'%s' should be %d, is %d\n", | ||
88 | names[j], j, | ||
89 | sTest[names[j]].value() | ||
90 | ); | ||
91 | } | ||
92 | } | ||
93 | else | ||
94 | { | ||
95 | printf("Missing element %d, '%s'\n", j, names[j] ); | ||
96 | } | ||
97 | } | ||
82 | } | 98 | } |
83 | 99 | ||