diff options
Diffstat (limited to 'src/hash.h')
| -rw-r--r-- | src/hash.h | 447 |
1 files changed, 386 insertions, 61 deletions
| @@ -1,106 +1,431 @@ | |||
| 1 | #ifndef HASH_H | 1 | #ifndef HASH_H |
| 2 | #define HASH_H | 2 | #define HASH_H |
| 3 | /* | 3 | |
| 4 | #include <stddef.h> | 4 | #include <stddef.h> |
| 5 | #include <string.h> | 5 | #include <string.h> |
| 6 | #include <memory> | ||
| 7 | #include <iostream> | ||
| 6 | #include "hashable.h" | 8 | #include "hashable.h" |
| 7 | 9 | ||
| 8 | #define bitsToBytes( n ) (n/8+(n%8>0 ? 1 : 0)) | 10 | #define bitsToBytes( n ) (n/32+(n%32>0 ? 1 : 0)) |
| 11 | |||
| 12 | template<typename T> | ||
| 13 | uint32_t __calcHashCode( T k ); | ||
| 14 | |||
| 15 | template<typename T> | ||
| 16 | bool __cmpHashKeys( T a, T b ); | ||
| 17 | |||
| 18 | struct __calcNextTSize_fast | ||
| 19 | { | ||
| 20 | uint32_t operator()( uint32_t nCapacity, uint32_t nFill, uint32_t nDeleted ) const | ||
| 21 | { | ||
| 22 | return nCapacity*2+1; | ||
| 23 | } | ||
| 24 | }; | ||
| 25 | |||
| 26 | template<typename key, typename value, typename sizecalc = __calcNextTSize_fast, typename keyalloc = std::allocator<key>, typename valuealloc = std::allocator<value>, typename challoc = std::allocator<uint32_t> > | ||
| 27 | class Hash; | ||
| 28 | |||
| 29 | template< typename key, typename value, typename sizecalc, typename keyalloc, typename valuealloc, typename challoc > | ||
| 30 | struct HashProxy | ||
| 31 | { | ||
| 32 | friend class Hash<key, value, sizecalc, keyalloc, valuealloc, challoc>; | ||
| 33 | private: | ||
| 34 | HashProxy( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &h, key k, value v ) : | ||
| 35 | hsh( h ), | ||
| 36 | tKey( k ), | ||
| 37 | tValue( v ), | ||
| 38 | bFilled( true ) | ||
| 39 | { | ||
| 40 | } | ||
| 41 | |||
| 42 | HashProxy( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &h, key k ) : | ||
| 43 | hsh( h ), | ||
| 44 | tKey( k ), | ||
| 45 | bFilled( false ) | ||
| 46 | { | ||
| 47 | } | ||
| 48 | |||
| 49 | Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh; | ||
| 50 | key tKey; | ||
| 51 | value tValue; | ||
| 52 | bool bFilled; | ||
| 53 | |||
| 54 | public: | ||
| 55 | operator value() | ||
| 56 | { | ||
| 57 | if( bFilled == false ) | ||
| 58 | throw "Nope, no data there"; | ||
| 59 | return tValue; | ||
| 60 | } | ||
| 9 | 61 | ||
| 10 | template<class key, class value> | 62 | value operator=( value nval ) |
| 63 | { | ||
| 64 | hsh.insert( tKey, nval ); | ||
| 65 | return nval; | ||
| 66 | } | ||
| 67 | |||
| 68 | }; | ||
| 69 | |||
| 70 | template<typename key, typename value, typename sizecalc, typename keyalloc, typename valuealloc, typename challoc > | ||
| 11 | class Hash | 71 | class Hash |
| 12 | { | 72 | { |
| 13 | class iterator | 73 | public: |
| 74 | Hash() : | ||
| 75 | nCapacity( 11 ), | ||
| 76 | nFilled( 0 ), | ||
| 77 | nDeleted( 0 ), | ||
| 78 | bFilled( NULL ), | ||
| 79 | aKeys( NULL ), | ||
| 80 | aValues( NULL ), | ||
| 81 | aHashCodes( NULL ) | ||
| 14 | { | 82 | { |
| 15 | friend class Hash<key, value>; | 83 | nKeysSize = bitsToBytes( nCapacity ); |
| 16 | private: | 84 | bFilled = ca.allocate( nKeysSize ); |
| 17 | iterator( Hash<key, value> *hsh, int nIndex, key keyval, bool bInsert ) : | 85 | bDeleted = ca.allocate( nKeysSize ); |
| 18 | hsh( hsh ), | 86 | clearBits(); |
| 19 | nIndex( nIndex ), | 87 | |
| 20 | keyval( keyval ), | 88 | aHashCodes = ca.allocate( nCapacity ); |
| 21 | bInsert( bInsert ) | 89 | aKeys = ka.allocate( nCapacity ); |
| 90 | aValues = va.allocate( nCapacity ); | ||
| 91 | } | ||
| 92 | |||
| 93 | virtual ~Hash() | ||
| 94 | { | ||
| 95 | for( uint32_t j = 0; j < nCapacity; j++ ) | ||
| 22 | { | 96 | { |
| 97 | if( isFilled( j ) ) | ||
| 98 | { | ||
| 99 | va.destroy( &aValues[j] ); | ||
| 100 | ka.destroy( &aKeys[j] ); | ||
| 101 | } | ||
| 23 | } | 102 | } |
| 103 | va.deallocate( aValues, nCapacity ); | ||
| 104 | ka.deallocate( aKeys, nCapacity ); | ||
| 105 | ca.deallocate( bFilled, nKeysSize ); | ||
| 106 | ca.deallocate( bDeleted, nKeysSize ); | ||
| 107 | ca.deallocate( aHashCodes, nCapacity ); | ||
| 108 | } | ||
| 24 | 109 | ||
| 25 | public: | 110 | void clearBits() |
| 26 | iterator() : | 111 | { |
| 27 | hsh( NULL ), | 112 | for( uint32_t j = 0; j < nKeysSize; j++ ) |
| 28 | nIndex( -1 ) | ||
| 29 | { | 113 | { |
| 114 | bFilled[j] = bDeleted[j] = 0; | ||
| 30 | } | 115 | } |
| 116 | } | ||
| 117 | |||
| 118 | int hasKey( key keyval ) | ||
| 119 | { | ||
| 120 | printf("%s\n", keyval ); | ||
| 121 | } | ||
| 122 | |||
| 123 | uint32_t getCapacity() | ||
| 124 | { | ||
| 125 | return nCapacity; | ||
| 126 | } | ||
| 127 | |||
| 128 | uint32_t getFill() | ||
| 129 | { | ||
| 130 | return nFilled; | ||
| 131 | } | ||
| 132 | |||
| 133 | uint32_t getDeleted() | ||
| 134 | { | ||
| 135 | return nDeleted; | ||
| 136 | } | ||
| 137 | |||
| 138 | HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc> operator[]( key k ) | ||
| 139 | { | ||
| 140 | uint32_t hash = __calcHashCode( k ); | ||
| 141 | bool bFill; | ||
| 142 | uint32_t nPos = probe( hash, k, bFill ); | ||
| 31 | 143 | ||
| 32 | iterator &operator=( iterator &src ) | 144 | if( bFill ) |
| 33 | { | 145 | { |
| 34 | this->hsh = src.hsh; | 146 | return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, aKeys[nPos], aValues[nPos] ); |
| 35 | this->nIndex = src.nIndex; | ||
| 36 | } | 147 | } |
| 148 | else | ||
| 149 | { | ||
| 150 | return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, k ); | ||
| 151 | } | ||
| 152 | } | ||
| 153 | |||
| 154 | void insert( key k, value v ) | ||
| 155 | { | ||
| 156 | uint32_t hash = __calcHashCode( k ); | ||
| 157 | bool bFill; | ||
| 158 | uint32_t nPos = probe( hash, k, bFill ); | ||
| 37 | 159 | ||
| 38 | iterator &operator=( value &src ) | 160 | if( bFill ) |
| 39 | { | 161 | { |
| 40 | if( bInsert ) | 162 | va.destroy( &aValues[nPos] ); |
| 41 | printf("You wanted to insert %d\n", src ); | 163 | va.construct( &aValues[nPos], v ); |
| 42 | else | 164 | } |
| 43 | printf("You wanted to insert %d\n", src ); | 165 | else |
| 166 | { | ||
| 167 | va.construct( &aValues[nPos], v ); | ||
| 168 | ka.construct( &aKeys[nPos], k ); | ||
| 169 | fill( nPos ); | ||
| 170 | aHashCodes[nPos] = hash; | ||
| 171 | nFilled++; | ||
| 44 | } | 172 | } |
| 173 | } | ||
| 45 | 174 | ||
| 46 | private: | 175 | value get( key k ) |
| 47 | Hash<key, value> *hsh; | 176 | { |
| 48 | int nIndex; | 177 | uint32_t hash = __calcHashCode( k ); |
| 49 | bool bInsert; | 178 | bool bFill; |
| 50 | key keyval; | 179 | uint32_t nPos = probe( hash, k, bFill ); |
| 51 | }; | 180 | |
| 181 | if( bFill ) | ||
| 182 | { | ||
| 183 | return aValues[nPos]; | ||
| 184 | } | ||
| 185 | else | ||
| 186 | { | ||
| 187 | throw "Hey, no such thing..."; | ||
| 188 | } | ||
| 189 | } | ||
| 190 | |||
| 191 | uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true ) | ||
| 192 | { | ||
| 193 | uint32_t nCur = hash%nCapacity; | ||
| 194 | |||
| 195 | // First we scan to see if the key is already there, abort if we | ||
| 196 | // run out of probing room, or we find a non-filled entry | ||
| 197 | for( int8_t j = 0; | ||
| 198 | isFilled( nCur ) && j < 32; | ||
| 199 | nCur = (nCur + (1<<j))%nCapacity, j++ | ||
| 200 | ) | ||
| 201 | { | ||
| 202 | // Is this the same hash code we were looking for? | ||
| 203 | if( hash == aHashCodes[nCur] ) | ||
| 204 | { | ||
| 205 | // Is it really the same key? (for safety) | ||
| 206 | if( __cmpHashKeys( aKeys[nCur], k ) == true && | ||
| 207 | isDeleted( nCur ) == false ) | ||
| 208 | { | ||
| 209 | bFill = true; | ||
| 210 | return nCur; | ||
| 211 | } | ||
| 212 | } | ||
| 213 | } | ||
| 214 | |||
| 215 | // This is our insurance, if the table is full, then go ahead and | ||
| 216 | // rehash, then try again. | ||
| 217 | if( isFilled( nCur ) && rehash == true ) | ||
| 218 | { | ||
| 219 | reHash( szCalc(getCapacity(), getFill(), getDeleted()) ); | ||
| 220 | |||
| 221 | // This is potentially dangerous, and could cause an infinite loop. | ||
| 222 | // Be careful writing probe, eh? | ||
| 223 | return probe( hash, k, bFill ); | ||
| 224 | } | ||
| 225 | |||
| 226 | bFill = false; | ||
| 227 | return nCur; | ||
| 228 | } | ||
| 229 | |||
| 230 | void reHash( uint32_t nNewSize ) | ||
| 231 | { | ||
| 232 | // Save all the old data | ||
| 233 | uint32_t nOldCapacity = nCapacity; | ||
| 234 | uint32_t *bOldFilled = bFilled; | ||
| 235 | uint32_t *aOldHashCodes = aHashCodes; | ||
| 236 | uint32_t nOldKeysSize = nKeysSize; | ||
| 237 | uint32_t *bOldDeleted = bDeleted; | ||
| 238 | value *aOldValues = aValues; | ||
| 239 | key *aOldKeys = aKeys; | ||
| 240 | |||
| 241 | // Calculate new sizes | ||
| 242 | nCapacity = nNewSize; | ||
| 243 | nKeysSize = bitsToBytes( nCapacity ); | ||
| 244 | |||
| 245 | // Allocate new memory + prep | ||
| 246 | bFilled = ca.allocate( nKeysSize ); | ||
| 247 | bDeleted = ca.allocate( nKeysSize ); | ||
| 248 | clearBits(); | ||
| 249 | |||
| 250 | aHashCodes = ca.allocate( nCapacity ); | ||
| 251 | aKeys = ka.allocate( nCapacity ); | ||
| 252 | aValues = va.allocate( nCapacity ); | ||
| 253 | |||
| 254 | // Re-insert all of the old data (except deleted items) | ||
| 255 | for( uint32_t j = 0; j < nOldCapacity; j++ ) | ||
| 256 | { | ||
| 257 | if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) | ||
| 258 | { | ||
| 259 | insert( aOldKeys[j], aOldValues[j] ); | ||
| 260 | } | ||
| 261 | } | ||
| 262 | |||
| 263 | // Delete all of the old data | ||
| 264 | for( uint32_t j = 0; j < nOldCapacity; j++ ) | ||
| 265 | { | ||
| 266 | if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) | ||
| 267 | { | ||
| 268 | va.destroy( &aOldValues[j] ); | ||
| 269 | ka.destroy( &aOldKeys[j] ); | ||
| 270 | } | ||
| 271 | } | ||
| 272 | va.deallocate( aOldValues, nOldCapacity ); | ||
| 273 | ka.deallocate( aOldKeys, nOldCapacity ); | ||
| 274 | ca.deallocate( bOldFilled, nOldKeysSize ); | ||
| 275 | ca.deallocate( bOldDeleted, nOldKeysSize ); | ||
| 276 | ca.deallocate( aOldHashCodes, nOldCapacity ); | ||
| 277 | } | ||
| 52 | 278 | ||
| 53 | template<class refval> | 279 | void fill( uint32_t loc ) |
| 54 | class VRef | ||
| 55 | { | 280 | { |
| 281 | bFilled[loc/32] |= (1<<(loc%32)); | ||
| 282 | } | ||
| 283 | |||
| 284 | bool isFilled( uint32_t loc ) | ||
| 285 | { | ||
| 286 | return (bFilled[loc/32]&(1<<(loc%32)))!=0; | ||
| 287 | } | ||
| 288 | |||
| 289 | bool isDeleted( uint32_t loc ) | ||
| 290 | { | ||
| 291 | return (bDeleted[loc/32]&(1<<(loc%32)))!=0; | ||
| 292 | } | ||
| 293 | |||
| 294 | typedef struct iterator | ||
| 295 | { | ||
| 296 | friend class Hash<key, value, sizecalc, keyalloc, valuealloc, challoc>; | ||
| 297 | private: | ||
| 298 | iterator( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh ) : | ||
| 299 | hsh( hsh ), | ||
| 300 | bFinished( false ), | ||
| 301 | nPos( 0 ) | ||
| 302 | { | ||
| 303 | nPos = hsh.getFirstPos( bFinished ); | ||
| 304 | } | ||
| 305 | |||
| 306 | iterator( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh, bool bDone ) : | ||
| 307 | hsh( hsh ), | ||
| 308 | nPos( 0 ), | ||
| 309 | bFinished( bDone ) | ||
| 310 | { | ||
| 311 | } | ||
| 312 | |||
| 313 | Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh; | ||
| 314 | |||
| 315 | uint32_t nPos; | ||
| 316 | bool bFinished; | ||
| 317 | |||
| 56 | public: | 318 | public: |
| 57 | VRef( refval &data ) : | 319 | iterator operator++( int ) |
| 58 | data( data ) | ||
| 59 | { | 320 | { |
| 321 | if( bFinished == false ) | ||
| 322 | nPos = hsh.getNextPos( nPos, bFinished ); | ||
| 323 | |||
| 324 | return *this; | ||
| 325 | } | ||
| 326 | |||
| 327 | iterator operator++() | ||
| 328 | { | ||
| 329 | if( bFinished == false ) | ||
| 330 | nPos = hsh.getNextPos( nPos, bFinished ); | ||
| 331 | |||
| 332 | return *this; | ||
| 333 | } | ||
| 334 | |||
| 335 | bool operator==( const iterator &oth ) | ||
| 336 | { | ||
| 337 | if( bFinished != oth.bFinished ) | ||
| 338 | return false; | ||
| 339 | if( bFinished == true ) | ||
| 340 | { | ||
| 341 | return true; | ||
| 342 | } | ||
| 343 | else | ||
| 344 | { | ||
| 345 | if( oth.nPos == nPos ) | ||
| 346 | return true; | ||
| 347 | return false; | ||
| 348 | } | ||
| 349 | } | ||
| 350 | |||
| 351 | bool operator!=( const iterator &oth ) | ||
| 352 | { | ||
| 353 | return !(*this == oth ); | ||
| 354 | } | ||
| 355 | |||
| 356 | std::pair<key,value> operator *() | ||
| 357 | { | ||
| 358 | return hsh.getAtPos( nPos ); | ||
| 60 | } | 359 | } |
| 61 | refval &data; | ||
| 62 | }; | 360 | }; |
| 63 | 361 | ||
| 64 | public: | 362 | iterator begin() |
| 65 | Hash() : | ||
| 66 | nCapacity( 11 ), | ||
| 67 | nFilled( 0 ), | ||
| 68 | bFilled( NULL ), | ||
| 69 | aKeys( NULL ), | ||
| 70 | aValues( NULL ), | ||
| 71 | aHashCodes( NULL ) | ||
| 72 | { | 363 | { |
| 73 | int nKeysSize = bitsToBytes( nCapacity ); | 364 | return iterator( *this ); |
| 74 | bFilled = new unsigned char[ nKeysSize ]; | 365 | } |
| 75 | memset( bFilled, 0, nKeysSize ); | 366 | |
| 76 | 367 | iterator end() | |
| 77 | aKeys = new VRef<key>*[nCapacity]; | 368 | { |
| 78 | aValues = new value[nCapacity]; | 369 | return iterator( *this, true ); |
| 79 | } | 370 | } |
| 80 | 371 | ||
| 81 | virtual ~Hash() | 372 | private: |
| 373 | std::pair<key,value> getAtPos( uint32_t nPos ) | ||
| 82 | { | 374 | { |
| 375 | return std::pair<key,value>(aKeys[nPos],aValues[nPos]); | ||
| 83 | } | 376 | } |
| 84 | 377 | ||
| 85 | iterator operator[]( key keyval ) | 378 | uint32_t getFirstPos( bool &bFinished ) |
| 86 | { | 379 | { |
| 87 | //iterator i( this, 4, keyval, true ); | 380 | for( uint32_t j = 0; j < nCapacity; j++ ) |
| 88 | //return i; | 381 | { |
| 89 | printf("%s\n", keyval.getString() ); | 382 | if( isFilled( j ) ) |
| 383 | return j; | ||
| 384 | } | ||
| 385 | |||
| 386 | bFinished = true; | ||
| 90 | } | 387 | } |
| 91 | 388 | ||
| 92 | int hasKey( key keyval ) | 389 | uint32_t getNextPos( uint32_t nPos, bool &bFinished ) |
| 93 | { | 390 | { |
| 94 | printf("%s\n", keyval.getString() ); | 391 | for( uint32_t j = nPos+1; j < nCapacity; j++ ) |
| 392 | { | ||
| 393 | if( isFilled( j ) ) | ||
| 394 | return j; | ||
| 395 | } | ||
| 396 | |||
| 397 | bFinished = true; | ||
| 95 | } | 398 | } |
| 96 | 399 | ||
| 97 | private: | 400 | private: |
| 98 | int nCapacity; | 401 | uint32_t nCapacity; |
| 99 | int nFilled; | 402 | uint32_t nFilled; |
| 100 | unsigned char *bFilled; | 403 | uint32_t nDeleted; |
| 101 | VRef<key> **aKeys; | 404 | uint32_t *bFilled; |
| 102 | unsigned long int *aHashCodes; | 405 | uint32_t *bDeleted; |
| 406 | uint32_t *aHashCodes; | ||
| 407 | uint32_t nKeysSize; | ||
| 103 | value *aValues; | 408 | value *aValues; |
| 409 | key *aKeys; | ||
| 410 | valuealloc va; | ||
| 411 | keyalloc ka; | ||
| 412 | challoc ca; | ||
| 413 | sizecalc szCalc; | ||
| 104 | }; | 414 | }; |
| 105 | */ | 415 | |
| 416 | template<> uint32_t __calcHashCode<const char *>( const char *k ); | ||
| 417 | template<> bool __cmpHashKeys<const char *>( const char *a, const char *b ); | ||
| 418 | |||
| 419 | template<> uint32_t __calcHashCode<char *>( char *k ); | ||
| 420 | template<> bool __cmpHashKeys<char *>( char *a, char *b ); | ||
| 421 | |||
| 422 | template<> uint32_t __calcHashCode<const std::string>( const std::string k ); | ||
| 423 | template<> bool __cmpHashKeys<const std::string>( const std::string a, const std::string b ); | ||
| 424 | |||
| 425 | template<> uint32_t __calcHashCode<std::string>( std::string k ); | ||
| 426 | template<> bool __cmpHashKeys<std::string>( std::string a, std::string b ); | ||
| 427 | |||
| 428 | template<> uint32_t __calcHashCode<Hashable &>( Hashable &k ); | ||
| 429 | template<> bool __cmpHashKeys<Hashable &>( Hashable &a, Hashable &b ); | ||
| 430 | |||
| 106 | #endif | 431 | #endif |
