diff options
| -rw-r--r-- | src/hash.cpp | 20 | ||||
| -rw-r--r-- | src/hash.h | 247 | ||||
| -rw-r--r-- | src/tests/hash.cpp | 26 |
3 files changed, 184 insertions, 109 deletions
diff --git a/src/hash.cpp b/src/hash.cpp index 14be208..0241753 100644 --- a/src/hash.cpp +++ b/src/hash.cpp | |||
| @@ -1,5 +1,25 @@ | |||
| 1 | #include "hash.h" | 1 | #include "hash.h" |
| 2 | 2 | ||
| 3 | template<> uint32_t __calcHashCode<const int>( const int k ) | ||
| 4 | { | ||
| 5 | return k; | ||
| 6 | } | ||
| 7 | |||
| 8 | template<> bool __cmpHashKeys<const int>( const int a, const int b ) | ||
| 9 | { | ||
| 10 | return a == b; | ||
| 11 | } | ||
| 12 | |||
| 13 | template<> uint32_t __calcHashCode<int>( int k ) | ||
| 14 | { | ||
| 15 | return k; | ||
| 16 | } | ||
| 17 | |||
| 18 | template<> bool __cmpHashKeys<int>( int a, int b ) | ||
| 19 | { | ||
| 20 | return a == b; | ||
| 21 | } | ||
| 22 | |||
| 3 | template<> | 23 | template<> |
| 4 | uint32_t __calcHashCode<const char *>( const char * k ) | 24 | uint32_t __calcHashCode<const char *>( const char * k ) |
| 5 | { | 25 | { |
| @@ -40,8 +40,9 @@ private: | |||
| 40 | { | 40 | { |
| 41 | } | 41 | } |
| 42 | 42 | ||
| 43 | HashProxy( Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc> &h, _value *pValue ) : | 43 | HashProxy( Hash<key, _value, sizecalc, keyalloc, valuealloc, challoc> &h, uint32_t nPos, _value *pValue ) : |
| 44 | hsh( h ), | 44 | hsh( h ), |
| 45 | nPos( nPos ), | ||
| 45 | pValue( pValue ), | 46 | pValue( pValue ), |
| 46 | bFilled( true ) | 47 | bFilled( true ) |
| 47 | { | 48 | { |
| @@ -74,6 +75,12 @@ public: | |||
| 74 | return bFilled; | 75 | return bFilled; |
| 75 | } | 76 | } |
| 76 | 77 | ||
| 78 | void erase() | ||
| 79 | { | ||
| 80 | if( bFilled ) | ||
| 81 | hsh._erase( nPos ); | ||
| 82 | } | ||
| 83 | |||
| 77 | _value operator=( _value nval ) | 84 | _value operator=( _value nval ) |
| 78 | { | 85 | { |
| 79 | if( bFilled ) | 86 | if( bFilled ) |
| @@ -119,10 +126,11 @@ public: | |||
| 119 | for( uint32_t j = 0; j < nCapacity; j++ ) | 126 | for( uint32_t j = 0; j < nCapacity; j++ ) |
| 120 | { | 127 | { |
| 121 | if( isFilled( j ) ) | 128 | if( isFilled( j ) ) |
| 122 | { | 129 | if( !isDeleted( j ) ) |
| 123 | va.destroy( &aValues[j] ); | 130 | { |
| 124 | ka.destroy( &aKeys[j] ); | 131 | va.destroy( &aValues[j] ); |
| 125 | } | 132 | ka.destroy( &aKeys[j] ); |
| 133 | } | ||
| 126 | } | 134 | } |
| 127 | va.deallocate( aValues, nCapacity ); | 135 | va.deallocate( aValues, nCapacity ); |
| 128 | ka.deallocate( aKeys, nCapacity ); | 136 | ka.deallocate( aKeys, nCapacity ); |
| @@ -167,7 +175,7 @@ public: | |||
| 167 | 175 | ||
| 168 | if( bFill ) | 176 | if( bFill ) |
| 169 | { | 177 | { |
| 170 | return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, &aValues[nPos] ); | 178 | return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, nPos, &aValues[nPos] ); |
| 171 | } | 179 | } |
| 172 | else | 180 | else |
| 173 | { | 181 | { |
| @@ -192,7 +200,7 @@ public: | |||
| 192 | } | 200 | } |
| 193 | } | 201 | } |
| 194 | 202 | ||
| 195 | value get( key k ) | 203 | void erase( key k ) |
| 196 | { | 204 | { |
| 197 | uint32_t hash = __calcHashCode( k ); | 205 | uint32_t hash = __calcHashCode( k ); |
| 198 | bool bFill; | 206 | bool bFill; |
| @@ -200,110 +208,24 @@ public: | |||
| 200 | 208 | ||
| 201 | if( bFill ) | 209 | if( bFill ) |
| 202 | { | 210 | { |
| 203 | return aValues[nPos]; | 211 | _erase( nPos ); |
| 204 | } | ||
| 205 | else | ||
| 206 | { | ||
| 207 | throw "Hey, no such thing..."; | ||
| 208 | } | ||
| 209 | } | ||
| 210 | |||
| 211 | uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true ) | ||
| 212 | { | ||
| 213 | uint32_t nCur = hash%nCapacity; | ||
| 214 | |||
| 215 | // First we scan to see if the key is already there, abort if we | ||
| 216 | // run out of probing room, or we find a non-filled entry | ||
| 217 | for( int8_t j = 0; | ||
| 218 | isFilled( nCur ) && j < 32; | ||
| 219 | nCur = (nCur + (1<<j))%nCapacity, j++ | ||
| 220 | ) | ||
| 221 | { | ||
| 222 | // Is this the same hash code we were looking for? | ||
| 223 | if( hash == aHashCodes[nCur] ) | ||
| 224 | { | ||
| 225 | // Is it really the same key? (for safety) | ||
| 226 | if( __cmpHashKeys( aKeys[nCur], k ) == true && | ||
| 227 | isDeleted( nCur ) == false ) | ||
| 228 | { | ||
| 229 | bFill = true; | ||
| 230 | return nCur; | ||
| 231 | } | ||
| 232 | } | ||
| 233 | } | 212 | } |
| 234 | |||
| 235 | // This is our insurance, if the table is full, then go ahead and | ||
| 236 | // rehash, then try again. | ||
| 237 | if( isFilled( nCur ) && rehash == true ) | ||
| 238 | { | ||
| 239 | reHash( szCalc(getCapacity(), getFill(), getDeleted()) ); | ||
| 240 | |||
| 241 | // This is potentially dangerous, and could cause an infinite loop. | ||
| 242 | // Be careful writing probe, eh? | ||
| 243 | return probe( hash, k, bFill ); | ||
| 244 | } | ||
| 245 | |||
| 246 | bFill = false; | ||
| 247 | return nCur; | ||
| 248 | } | 213 | } |
| 249 | 214 | ||
| 250 | void reHash( uint32_t nNewSize ) | 215 | value get( key k ) |
| 251 | { | 216 | { |
| 252 | // Save all the old data | 217 | uint32_t hash = __calcHashCode( k ); |
| 253 | uint32_t nOldCapacity = nCapacity; | 218 | bool bFill; |
| 254 | uint32_t *bOldFilled = bFilled; | 219 | uint32_t nPos = probe( hash, k, bFill ); |
| 255 | uint32_t *aOldHashCodes = aHashCodes; | ||
| 256 | uint32_t nOldKeysSize = nKeysSize; | ||
| 257 | uint32_t *bOldDeleted = bDeleted; | ||
| 258 | value *aOldValues = aValues; | ||
| 259 | key *aOldKeys = aKeys; | ||
| 260 | |||
| 261 | // Calculate new sizes | ||
| 262 | nCapacity = nNewSize; | ||
| 263 | nKeysSize = bitsToBytes( nCapacity ); | ||
| 264 | |||
| 265 | // Allocate new memory + prep | ||
| 266 | bFilled = ca.allocate( nKeysSize ); | ||
| 267 | bDeleted = ca.allocate( nKeysSize ); | ||
| 268 | clearBits(); | ||
| 269 | |||
| 270 | aHashCodes = ca.allocate( nCapacity ); | ||
| 271 | aKeys = ka.allocate( nCapacity ); | ||
| 272 | aValues = va.allocate( nCapacity ); | ||
| 273 | 220 | ||
| 274 | // Re-insert all of the old data (except deleted items) | 221 | if( bFill ) |
| 275 | for( uint32_t j = 0; j < nOldCapacity; j++ ) | ||
| 276 | { | 222 | { |
| 277 | if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) | 223 | return aValues[nPos]; |
| 278 | { | ||
| 279 | insert( aOldKeys[j], aOldValues[j] ); | ||
| 280 | } | ||
| 281 | } | 224 | } |
| 282 | 225 | else | |
| 283 | // Delete all of the old data | ||
| 284 | for( uint32_t j = 0; j < nOldCapacity; j++ ) | ||
| 285 | { | 226 | { |
| 286 | if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) | 227 | throw "Hey, no such thing..."; |
| 287 | { | ||
| 288 | va.destroy( &aOldValues[j] ); | ||
| 289 | ka.destroy( &aOldKeys[j] ); | ||
| 290 | } | ||
| 291 | } | 228 | } |
| 292 | va.deallocate( aOldValues, nOldCapacity ); | ||
| 293 | ka.deallocate( aOldKeys, nOldCapacity ); | ||
| 294 | ca.deallocate( bOldFilled, nOldKeysSize ); | ||
| 295 | ca.deallocate( bOldDeleted, nOldKeysSize ); | ||
| 296 | ca.deallocate( aOldHashCodes, nOldCapacity ); | ||
| 297 | } | ||
| 298 | |||
| 299 | bool isFilled( uint32_t loc ) | ||
| 300 | { | ||
| 301 | return (bFilled[loc/32]&(1<<(loc%32)))!=0; | ||
| 302 | } | ||
| 303 | |||
| 304 | bool isDeleted( uint32_t loc ) | ||
| 305 | { | ||
| 306 | return (bDeleted[loc/32]&(1<<(loc%32)))!=0; | ||
| 307 | } | 229 | } |
| 308 | 230 | ||
| 309 | typedef struct iterator | 231 | typedef struct iterator |
| @@ -394,6 +316,13 @@ private: | |||
| 394 | nFilled++; | 316 | nFilled++; |
| 395 | } | 317 | } |
| 396 | 318 | ||
| 319 | void _erase( uint32_t loc ) | ||
| 320 | { | ||
| 321 | bDeleted[loc/32] |= (1<<(loc%32)); | ||
| 322 | va.destroy( &aValues[loc] ); | ||
| 323 | ka.destroy( &aKeys[loc] ); | ||
| 324 | } | ||
| 325 | |||
| 397 | std::pair<key,value> getAtPos( uint32_t nPos ) | 326 | std::pair<key,value> getAtPos( uint32_t nPos ) |
| 398 | { | 327 | { |
| 399 | return std::pair<key,value>(aKeys[nPos],aValues[nPos]); | 328 | return std::pair<key,value>(aKeys[nPos],aValues[nPos]); |
| @@ -404,7 +333,8 @@ private: | |||
| 404 | for( uint32_t j = 0; j < nCapacity; j++ ) | 333 | for( uint32_t j = 0; j < nCapacity; j++ ) |
| 405 | { | 334 | { |
| 406 | if( isFilled( j ) ) | 335 | if( isFilled( j ) ) |
| 407 | return j; | 336 | if( !isDeleted( j ) ) |
| 337 | return j; | ||
| 408 | } | 338 | } |
| 409 | 339 | ||
| 410 | bFinished = true; | 340 | bFinished = true; |
| @@ -415,12 +345,115 @@ private: | |||
| 415 | for( uint32_t j = nPos+1; j < nCapacity; j++ ) | 345 | for( uint32_t j = nPos+1; j < nCapacity; j++ ) |
| 416 | { | 346 | { |
| 417 | if( isFilled( j ) ) | 347 | if( isFilled( j ) ) |
| 418 | return j; | 348 | if( !isDeleted( j ) ) |
| 349 | return j; | ||
| 419 | } | 350 | } |
| 420 | 351 | ||
| 421 | bFinished = true; | 352 | bFinished = true; |
| 422 | } | 353 | } |
| 423 | 354 | ||
| 355 | uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true ) | ||
| 356 | { | ||
| 357 | uint32_t nCur = hash%nCapacity; | ||
| 358 | |||
| 359 | // First we scan to see if the key is already there, abort if we | ||
| 360 | // run out of probing room, or we find a non-filled entry | ||
| 361 | for( int8_t j = 0; | ||
| 362 | isFilled( nCur ) && j < 32; | ||
| 363 | nCur = (nCur + (1<<j))%nCapacity, j++ | ||
| 364 | ) | ||
| 365 | { | ||
| 366 | // Is this the same hash code we were looking for? | ||
| 367 | if( hash == aHashCodes[nCur] ) | ||
| 368 | { | ||
| 369 | // Skip over deleted entries. Deleted entries are also filled, | ||
| 370 | // so we only have to do this check here. | ||
| 371 | if( isDeleted( nCur ) ) | ||
| 372 | continue; | ||
| 373 | |||
| 374 | // Is it really the same key? (for safety) | ||
| 375 | if( __cmpHashKeys( aKeys[nCur], k ) == true ) | ||
| 376 | { | ||
| 377 | bFill = true; | ||
| 378 | return nCur; | ||
| 379 | } | ||
| 380 | } | ||
| 381 | } | ||
| 382 | |||
| 383 | // This is our insurance, if the table is full, then go ahead and | ||
| 384 | // rehash, then try again. | ||
| 385 | if( isFilled( nCur ) && rehash == true ) | ||
| 386 | { | ||
| 387 | reHash( szCalc(getCapacity(), getFill(), getDeleted()) ); | ||
| 388 | |||
| 389 | // This is potentially dangerous, and could cause an infinite loop. | ||
| 390 | // Be careful writing probe, eh? | ||
| 391 | return probe( hash, k, bFill ); | ||
| 392 | } | ||
| 393 | |||
| 394 | bFill = false; | ||
| 395 | return nCur; | ||
| 396 | } | ||
| 397 | |||
| 398 | void reHash( uint32_t nNewSize ) | ||
| 399 | { | ||
| 400 | // Save all the old data | ||
| 401 | uint32_t nOldCapacity = nCapacity; | ||
| 402 | uint32_t *bOldFilled = bFilled; | ||
| 403 | uint32_t *aOldHashCodes = aHashCodes; | ||
| 404 | uint32_t nOldKeysSize = nKeysSize; | ||
| 405 | uint32_t *bOldDeleted = bDeleted; | ||
| 406 | value *aOldValues = aValues; | ||
| 407 | key *aOldKeys = aKeys; | ||
| 408 | |||
| 409 | // Calculate new sizes | ||
| 410 | nCapacity = nNewSize; | ||
| 411 | nKeysSize = bitsToBytes( nCapacity ); | ||
| 412 | |||
| 413 | // Allocate new memory + prep | ||
| 414 | bFilled = ca.allocate( nKeysSize ); | ||
| 415 | bDeleted = ca.allocate( nKeysSize ); | ||
| 416 | clearBits(); | ||
| 417 | |||
| 418 | aHashCodes = ca.allocate( nCapacity ); | ||
| 419 | aKeys = ka.allocate( nCapacity ); | ||
| 420 | aValues = va.allocate( nCapacity ); | ||
| 421 | |||
| 422 | // Re-insert all of the old data (except deleted items) | ||
| 423 | for( uint32_t j = 0; j < nOldCapacity; j++ ) | ||
| 424 | { | ||
| 425 | if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) | ||
| 426 | { | ||
| 427 | insert( aOldKeys[j], aOldValues[j] ); | ||
| 428 | } | ||
| 429 | } | ||
| 430 | |||
| 431 | // Delete all of the old data | ||
| 432 | for( uint32_t j = 0; j < nOldCapacity; j++ ) | ||
| 433 | { | ||
| 434 | if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) | ||
| 435 | { | ||
| 436 | va.destroy( &aOldValues[j] ); | ||
| 437 | ka.destroy( &aOldKeys[j] ); | ||
| 438 | } | ||
| 439 | } | ||
| 440 | va.deallocate( aOldValues, nOldCapacity ); | ||
| 441 | ka.deallocate( aOldKeys, nOldCapacity ); | ||
| 442 | ca.deallocate( bOldFilled, nOldKeysSize ); | ||
| 443 | ca.deallocate( bOldDeleted, nOldKeysSize ); | ||
| 444 | ca.deallocate( aOldHashCodes, nOldCapacity ); | ||
| 445 | } | ||
| 446 | |||
| 447 | bool isFilled( uint32_t loc ) | ||
| 448 | { | ||
| 449 | return (bFilled[loc/32]&(1<<(loc%32)))!=0; | ||
| 450 | } | ||
| 451 | |||
| 452 | bool isDeleted( uint32_t loc ) | ||
| 453 | { | ||
| 454 | return (bDeleted[loc/32]&(1<<(loc%32)))!=0; | ||
| 455 | } | ||
| 456 | |||
| 424 | private: | 457 | private: |
| 425 | uint32_t nCapacity; | 458 | uint32_t nCapacity; |
| 426 | uint32_t nFilled; | 459 | uint32_t nFilled; |
| @@ -437,6 +470,12 @@ private: | |||
| 437 | sizecalc szCalc; | 470 | sizecalc szCalc; |
| 438 | }; | 471 | }; |
| 439 | 472 | ||
| 473 | template<> uint32_t __calcHashCode<const int>( const int k ); | ||
| 474 | template<> bool __cmpHashKeys<const int>( const int a, const int b ); | ||
| 475 | |||
| 476 | template<> uint32_t __calcHashCode<int>( int k ); | ||
| 477 | template<> bool __cmpHashKeys<int>( int a, int b ); | ||
| 478 | |||
| 440 | template<> uint32_t __calcHashCode<const char *>( const char *k ); | 479 | template<> uint32_t __calcHashCode<const char *>( const char *k ); |
| 441 | template<> bool __cmpHashKeys<const char *>( const char *a, const char *b ); | 480 | template<> bool __cmpHashKeys<const char *>( const char *a, const char *b ); |
| 442 | 481 | ||
diff --git a/src/tests/hash.cpp b/src/tests/hash.cpp index 8406439..f9a8f12 100644 --- a/src/tests/hash.cpp +++ b/src/tests/hash.cpp | |||
| @@ -67,11 +67,9 @@ int main() | |||
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | printf("Getting\n-------------------\n\n"); | 69 | printf("Getting\n-------------------\n\n"); |
| 70 | printf("%d\n", sTest.get( names[10] ) ); | 70 | |
| 71 | printf("%d\n", (int)sTest[names[10]] ); | 71 | sTest.erase("Homer the Great"); |
| 72 | sTest[names[10]] = 22; | 72 | sTest["Bart's Comet"].erase(); |
| 73 | sTest["That one guy"] = 135; | ||
| 74 | printf("%d\n", (int)sTest[names[10]] ); | ||
| 75 | 73 | ||
| 76 | for( Hash<std::string, int>::iterator i = sTest.begin(); | 74 | for( Hash<std::string, int>::iterator i = sTest.begin(); |
| 77 | i != sTest.end(); i++ ) | 75 | i != sTest.end(); i++ ) |
| @@ -79,5 +77,23 @@ int main() | |||
| 79 | Hash<std::string, int>::iterator j = i; | 77 | Hash<std::string, int>::iterator j = i; |
| 80 | printf("%d: %s\n", (*j).second, (*j).first.c_str() ); | 78 | printf("%d: %s\n", (*j).second, (*j).first.c_str() ); |
| 81 | } | 79 | } |
| 80 | |||
| 81 | for( int j = 0; j < 33; j++ ) | ||
| 82 | { | ||
| 83 | if( sTest[names[j]].isFilled() ) | ||
| 84 | { | ||
| 85 | if( sTest[names[j]] != j ) | ||
| 86 | { | ||
| 87 | printf("'%s' should be %d, is %d\n", | ||
| 88 | names[j], j, | ||
| 89 | sTest[names[j]].value() | ||
| 90 | ); | ||
| 91 | } | ||
| 92 | } | ||
| 93 | else | ||
| 94 | { | ||
| 95 | printf("Missing element %d, '%s'\n", j, names[j] ); | ||
| 96 | } | ||
| 97 | } | ||
| 82 | } | 98 | } |
| 83 | 99 | ||
