diff options
author | Mike Buland <eichlan@xagasoft.com> | 2006-11-21 16:01:57 +0000 |
---|---|---|
committer | Mike Buland <eichlan@xagasoft.com> | 2006-11-21 16:01:57 +0000 |
commit | 6f6bb9f9309a6d5e471579ec565d56e4d083dafb (patch) | |
tree | 5ab79eded3cdf59053e90cf24d977dca31fed495 /src/hash.h | |
parent | 525b50abe6b5a6e06c2b4ba327c9490de5277a5b (diff) | |
download | libbu++-6f6bb9f9309a6d5e471579ec565d56e4d083dafb.tar.gz libbu++-6f6bb9f9309a6d5e471579ec565d56e4d083dafb.tar.bz2 libbu++-6f6bb9f9309a6d5e471579ec565d56e4d083dafb.tar.xz libbu++-6f6bb9f9309a6d5e471579ec565d56e4d083dafb.zip |
Added erase functionality, and specializations for using ints as hash keys, so
really it does everything the old one did, does it better, easier, and possibly
faster.
Diffstat (limited to 'src/hash.h')
-rw-r--r-- | src/hash.h | 247 |
1 files changed, 143 insertions, 104 deletions
@@ -40,8 +40,9 @@ private: | |||
40 | { | 40 | { |
41 | } | 41 | } |
42 | 42 | ||
43 | HashProxy( Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc> &h, _value *pValue ) : | 43 | HashProxy( Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc> &h, uint32_t nPos, _value *pValue ) : |
44 | hsh( h ), | 44 | hsh( h ), |
45 | nPos( nPos ), | ||
45 | pValue( pValue ), | 46 | pValue( pValue ), |
46 | bFilled( true ) | 47 | bFilled( true ) |
47 | { | 48 | { |
@@ -74,6 +75,12 @@ public: | |||
74 | return bFilled; | 75 | return bFilled; |
75 | } | 76 | } |
76 | 77 | ||
78 | void erase() | ||
79 | { | ||
80 | if( bFilled ) | ||
81 | hsh._erase( nPos ); | ||
82 | } | ||
83 | |||
77 | _value operator=( _value nval ) | 84 | _value operator=( _value nval ) |
78 | { | 85 | { |
79 | if( bFilled ) | 86 | if( bFilled ) |
@@ -119,10 +126,11 @@ public: | |||
119 | for( uint32_t j = 0; j < nCapacity; j++ ) | 126 | for( uint32_t j = 0; j < nCapacity; j++ ) |
120 | { | 127 | { |
121 | if( isFilled( j ) ) | 128 | if( isFilled( j ) ) |
122 | { | 129 | if( !isDeleted( j ) ) |
123 | va.destroy( &aValues[j] ); | 130 | { |
124 | ka.destroy( &aKeys[j] ); | 131 | va.destroy( &aValues[j] ); |
125 | } | 132 | ka.destroy( &aKeys[j] ); |
133 | } | ||
126 | } | 134 | } |
127 | va.deallocate( aValues, nCapacity ); | 135 | va.deallocate( aValues, nCapacity ); |
128 | ka.deallocate( aKeys, nCapacity ); | 136 | ka.deallocate( aKeys, nCapacity ); |
@@ -167,7 +175,7 @@ public: | |||
167 | 175 | ||
168 | if( bFill ) | 176 | if( bFill ) |
169 | { | 177 | { |
170 | return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, &aValues[nPos] ); | 178 | return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, nPos, &aValues[nPos] ); |
171 | } | 179 | } |
172 | else | 180 | else |
173 | { | 181 | { |
@@ -192,7 +200,7 @@ public: | |||
192 | } | 200 | } |
193 | } | 201 | } |
194 | 202 | ||
195 | value get( key k ) | 203 | void erase( key k ) |
196 | { | 204 | { |
197 | uint32_t hash = __calcHashCode( k ); | 205 | uint32_t hash = __calcHashCode( k ); |
198 | bool bFill; | 206 | bool bFill; |
@@ -200,110 +208,24 @@ public: | |||
200 | 208 | ||
201 | if( bFill ) | 209 | if( bFill ) |
202 | { | 210 | { |
203 | return aValues[nPos]; | 211 | _erase( nPos ); |
204 | } | ||
205 | else | ||
206 | { | ||
207 | throw "Hey, no such thing..."; | ||
208 | } | ||
209 | } | ||
210 | |||
211 | uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true ) | ||
212 | { | ||
213 | uint32_t nCur = hash%nCapacity; | ||
214 | |||
215 | // First we scan to see if the key is already there, abort if we | ||
216 | // run out of probing room, or we find a non-filled entry | ||
217 | for( int8_t j = 0; | ||
218 | isFilled( nCur ) && j < 32; | ||
219 | nCur = (nCur + (1<<j))%nCapacity, j++ | ||
220 | ) | ||
221 | { | ||
222 | // Is this the same hash code we were looking for? | ||
223 | if( hash == aHashCodes[nCur] ) | ||
224 | { | ||
225 | // Is it really the same key? (for safety) | ||
226 | if( __cmpHashKeys( aKeys[nCur], k ) == true && | ||
227 | isDeleted( nCur ) == false ) | ||
228 | { | ||
229 | bFill = true; | ||
230 | return nCur; | ||
231 | } | ||
232 | } | ||
233 | } | 212 | } |
234 | |||
235 | // This is our insurance, if the table is full, then go ahead and | ||
236 | // rehash, then try again. | ||
237 | if( isFilled( nCur ) && rehash == true ) | ||
238 | { | ||
239 | reHash( szCalc(getCapacity(), getFill(), getDeleted()) ); | ||
240 | |||
241 | // This is potentially dangerous, and could cause an infinite loop. | ||
242 | // Be careful writing probe, eh? | ||
243 | return probe( hash, k, bFill ); | ||
244 | } | ||
245 | |||
246 | bFill = false; | ||
247 | return nCur; | ||
248 | } | 213 | } |
249 | 214 | ||
250 | void reHash( uint32_t nNewSize ) | 215 | value get( key k ) |
251 | { | 216 | { |
252 | // Save all the old data | 217 | uint32_t hash = __calcHashCode( k ); |
253 | uint32_t nOldCapacity = nCapacity; | 218 | bool bFill; |
254 | uint32_t *bOldFilled = bFilled; | 219 | uint32_t nPos = probe( hash, k, bFill ); |
255 | uint32_t *aOldHashCodes = aHashCodes; | ||
256 | uint32_t nOldKeysSize = nKeysSize; | ||
257 | uint32_t *bOldDeleted = bDeleted; | ||
258 | value *aOldValues = aValues; | ||
259 | key *aOldKeys = aKeys; | ||
260 | |||
261 | // Calculate new sizes | ||
262 | nCapacity = nNewSize; | ||
263 | nKeysSize = bitsToBytes( nCapacity ); | ||
264 | |||
265 | // Allocate new memory + prep | ||
266 | bFilled = ca.allocate( nKeysSize ); | ||
267 | bDeleted = ca.allocate( nKeysSize ); | ||
268 | clearBits(); | ||
269 | |||
270 | aHashCodes = ca.allocate( nCapacity ); | ||
271 | aKeys = ka.allocate( nCapacity ); | ||
272 | aValues = va.allocate( nCapacity ); | ||
273 | 220 | ||
274 | // Re-insert all of the old data (except deleted items) | 221 | if( bFill ) |
275 | for( uint32_t j = 0; j < nOldCapacity; j++ ) | ||
276 | { | 222 | { |
277 | if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) | 223 | return aValues[nPos]; |
278 | { | ||
279 | insert( aOldKeys[j], aOldValues[j] ); | ||
280 | } | ||
281 | } | 224 | } |
282 | 225 | else | |
283 | // Delete all of the old data | ||
284 | for( uint32_t j = 0; j < nOldCapacity; j++ ) | ||
285 | { | 226 | { |
286 | if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) | 227 | throw "Hey, no such thing..."; |
287 | { | ||
288 | va.destroy( &aOldValues[j] ); | ||
289 | ka.destroy( &aOldKeys[j] ); | ||
290 | } | ||
291 | } | 228 | } |
292 | va.deallocate( aOldValues, nOldCapacity ); | ||
293 | ka.deallocate( aOldKeys, nOldCapacity ); | ||
294 | ca.deallocate( bOldFilled, nOldKeysSize ); | ||
295 | ca.deallocate( bOldDeleted, nOldKeysSize ); | ||
296 | ca.deallocate( aOldHashCodes, nOldCapacity ); | ||
297 | } | ||
298 | |||
299 | bool isFilled( uint32_t loc ) | ||
300 | { | ||
301 | return (bFilled[loc/32]&(1<<(loc%32)))!=0; | ||
302 | } | ||
303 | |||
304 | bool isDeleted( uint32_t loc ) | ||
305 | { | ||
306 | return (bDeleted[loc/32]&(1<<(loc%32)))!=0; | ||
307 | } | 229 | } |
308 | 230 | ||
309 | typedef struct iterator | 231 | typedef struct iterator |
@@ -394,6 +316,13 @@ private: | |||
394 | nFilled++; | 316 | nFilled++; |
395 | } | 317 | } |
396 | 318 | ||
319 | void _erase( uint32_t loc ) | ||
320 | { | ||
321 | bDeleted[loc/32] |= (1<<(loc%32)); | ||
322 | va.destroy( &aValues[loc] ); | ||
323 | ka.destroy( &aKeys[loc] ); | ||
324 | } | ||
325 | |||
397 | std::pair<key,value> getAtPos( uint32_t nPos ) | 326 | std::pair<key,value> getAtPos( uint32_t nPos ) |
398 | { | 327 | { |
399 | return std::pair<key,value>(aKeys[nPos],aValues[nPos]); | 328 | return std::pair<key,value>(aKeys[nPos],aValues[nPos]); |
@@ -404,7 +333,8 @@ private: | |||
404 | for( uint32_t j = 0; j < nCapacity; j++ ) | 333 | for( uint32_t j = 0; j < nCapacity; j++ ) |
405 | { | 334 | { |
406 | if( isFilled( j ) ) | 335 | if( isFilled( j ) ) |
407 | return j; | 336 | if( !isDeleted( j ) ) |
337 | return j; | ||
408 | } | 338 | } |
409 | 339 | ||
410 | bFinished = true; | 340 | bFinished = true; |
@@ -415,12 +345,115 @@ private: | |||
415 | for( uint32_t j = nPos+1; j < nCapacity; j++ ) | 345 | for( uint32_t j = nPos+1; j < nCapacity; j++ ) |
416 | { | 346 | { |
417 | if( isFilled( j ) ) | 347 | if( isFilled( j ) ) |
418 | return j; | 348 | if( !isDeleted( j ) ) |
349 | return j; | ||
419 | } | 350 | } |
420 | 351 | ||
421 | bFinished = true; | 352 | bFinished = true; |
422 | } | 353 | } |
423 | 354 | ||
355 | uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true ) | ||
356 | { | ||
357 | uint32_t nCur = hash%nCapacity; | ||
358 | |||
359 | // First we scan to see if the key is already there, abort if we | ||
360 | // run out of probing room, or we find a non-filled entry | ||
361 | for( int8_t j = 0; | ||
362 | isFilled( nCur ) && j < 32; | ||
363 | nCur = (nCur + (1<<j))%nCapacity, j++ | ||
364 | ) | ||
365 | { | ||
366 | // Is this the same hash code we were looking for? | ||
367 | if( hash == aHashCodes[nCur] ) | ||
368 | { | ||
369 | // Skip over deleted entries. Deleted entries are also filled, | ||
370 | // so we only have to do this check here. | ||
371 | if( isDeleted( nCur ) ) | ||
372 | continue; | ||
373 | |||
374 | // Is it really the same key? (for safety) | ||
375 | if( __cmpHashKeys( aKeys[nCur], k ) == true ) | ||
376 | { | ||
377 | bFill = true; | ||
378 | return nCur; | ||
379 | } | ||
380 | } | ||
381 | } | ||
382 | |||
383 | // This is our insurance, if the table is full, then go ahead and | ||
384 | // rehash, then try again. | ||
385 | if( isFilled( nCur ) && rehash == true ) | ||
386 | { | ||
387 | reHash( szCalc(getCapacity(), getFill(), getDeleted()) ); | ||
388 | |||
389 | // This is potentially dangerous, and could cause an infinite loop. | ||
390 | // Be careful writing probe, eh? | ||
391 | return probe( hash, k, bFill ); | ||
392 | } | ||
393 | |||
394 | bFill = false; | ||
395 | return nCur; | ||
396 | } | ||
397 | |||
398 | void reHash( uint32_t nNewSize ) | ||
399 | { | ||
400 | // Save all the old data | ||
401 | uint32_t nOldCapacity = nCapacity; | ||
402 | uint32_t *bOldFilled = bFilled; | ||
403 | uint32_t *aOldHashCodes = aHashCodes; | ||
404 | uint32_t nOldKeysSize = nKeysSize; | ||
405 | uint32_t *bOldDeleted = bDeleted; | ||
406 | value *aOldValues = aValues; | ||
407 | key *aOldKeys = aKeys; | ||
408 | |||
409 | // Calculate new sizes | ||
410 | nCapacity = nNewSize; | ||
411 | nKeysSize = bitsToBytes( nCapacity ); | ||
412 | |||
413 | // Allocate new memory + prep | ||
414 | bFilled = ca.allocate( nKeysSize ); | ||
415 | bDeleted = ca.allocate( nKeysSize ); | ||
416 | clearBits(); | ||
417 | |||
418 | aHashCodes = ca.allocate( nCapacity ); | ||
419 | aKeys = ka.allocate( nCapacity ); | ||
420 | aValues = va.allocate( nCapacity ); | ||
421 | |||
422 | // Re-insert all of the old data (except deleted items) | ||
423 | for( uint32_t j = 0; j < nOldCapacity; j++ ) | ||
424 | { | ||
425 | if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) | ||
426 | { | ||
427 | insert( aOldKeys[j], aOldValues[j] ); | ||
428 | } | ||
429 | } | ||
430 | |||
431 | // Delete all of the old data | ||
432 | for( uint32_t j = 0; j < nOldCapacity; j++ ) | ||
433 | { | ||
434 | if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) | ||
435 | { | ||
436 | va.destroy( &aOldValues[j] ); | ||
437 | ka.destroy( &aOldKeys[j] ); | ||
438 | } | ||
439 | } | ||
440 | va.deallocate( aOldValues, nOldCapacity ); | ||
441 | ka.deallocate( aOldKeys, nOldCapacity ); | ||
442 | ca.deallocate( bOldFilled, nOldKeysSize ); | ||
443 | ca.deallocate( bOldDeleted, nOldKeysSize ); | ||
444 | ca.deallocate( aOldHashCodes, nOldCapacity ); | ||
445 | } | ||
446 | |||
447 | bool isFilled( uint32_t loc ) | ||
448 | { | ||
449 | return (bFilled[loc/32]&(1<<(loc%32)))!=0; | ||
450 | } | ||
451 | |||
452 | bool isDeleted( uint32_t loc ) | ||
453 | { | ||
454 | return (bDeleted[loc/32]&(1<<(loc%32)))!=0; | ||
455 | } | ||
456 | |||
424 | private: | 457 | private: |
425 | uint32_t nCapacity; | 458 | uint32_t nCapacity; |
426 | uint32_t nFilled; | 459 | uint32_t nFilled; |
@@ -437,6 +470,12 @@ private: | |||
437 | sizecalc szCalc; | 470 | sizecalc szCalc; |
438 | }; | 471 | }; |
439 | 472 | ||
473 | template<> uint32_t __calcHashCode<const int>( const int k ); | ||
474 | template<> bool __cmpHashKeys<const int>( const int a, const int b ); | ||
475 | |||
476 | template<> uint32_t __calcHashCode<int>( int k ); | ||
477 | template<> bool __cmpHashKeys<int>( int a, int b ); | ||
478 | |||
440 | template<> uint32_t __calcHashCode<const char *>( const char *k ); | 479 | template<> uint32_t __calcHashCode<const char *>( const char *k ); |
441 | template<> bool __cmpHashKeys<const char *>( const char *a, const char *b ); | 480 | template<> bool __cmpHashKeys<const char *>( const char *a, const char *b ); |
442 | 481 | ||