diff options
Diffstat (limited to '')
-rw-r--r-- | src/hash.h | 447 |
1 files changed, 386 insertions, 61 deletions
@@ -1,106 +1,431 @@ | |||
1 | #ifndef HASH_H | 1 | #ifndef HASH_H |
2 | #define HASH_H | 2 | #define HASH_H |
3 | /* | 3 | |
4 | #include <stddef.h> | 4 | #include <stddef.h> |
5 | #include <string.h> | 5 | #include <string.h> |
6 | #include <memory> | ||
7 | #include <iostream> | ||
6 | #include "hashable.h" | 8 | #include "hashable.h" |
7 | 9 | ||
8 | #define bitsToBytes( n ) (n/8+(n%8>0 ? 1 : 0)) | 10 | #define bitsToBytes( n ) (n/32+(n%32>0 ? 1 : 0)) |
11 | |||
12 | template<typename T> | ||
13 | uint32_t __calcHashCode( T k ); | ||
14 | |||
15 | template<typename T> | ||
16 | bool __cmpHashKeys( T a, T b ); | ||
17 | |||
18 | struct __calcNextTSize_fast | ||
19 | { | ||
20 | uint32_t operator()( uint32_t nCapacity, uint32_t nFill, uint32_t nDeleted ) const | ||
21 | { | ||
22 | return nCapacity*2+1; | ||
23 | } | ||
24 | }; | ||
25 | |||
26 | template<typename key, typename value, typename sizecalc = __calcNextTSize_fast, typename keyalloc = std::allocator<key>, typename valuealloc = std::allocator<value>, typename challoc = std::allocator<uint32_t> > | ||
27 | class Hash; | ||
28 | |||
29 | template< typename key, typename value, typename sizecalc, typename keyalloc, typename valuealloc, typename challoc > | ||
30 | struct HashProxy | ||
31 | { | ||
32 | friend class Hash<key, value, sizecalc, keyalloc, valuealloc, challoc>; | ||
33 | private: | ||
34 | HashProxy( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &h, key k, value v ) : | ||
35 | hsh( h ), | ||
36 | tKey( k ), | ||
37 | tValue( v ), | ||
38 | bFilled( true ) | ||
39 | { | ||
40 | } | ||
41 | |||
42 | HashProxy( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &h, key k ) : | ||
43 | hsh( h ), | ||
44 | tKey( k ), | ||
45 | bFilled( false ) | ||
46 | { | ||
47 | } | ||
48 | |||
49 | Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh; | ||
50 | key tKey; | ||
51 | value tValue; | ||
52 | bool bFilled; | ||
53 | |||
54 | public: | ||
55 | operator value() | ||
56 | { | ||
57 | if( bFilled == false ) | ||
58 | throw "Nope, no data there"; | ||
59 | return tValue; | ||
60 | } | ||
9 | 61 | ||
10 | template<class key, class value> | 62 | value operator=( value nval ) |
63 | { | ||
64 | hsh.insert( tKey, nval ); | ||
65 | return nval; | ||
66 | } | ||
67 | |||
68 | }; | ||
69 | |||
70 | template<typename key, typename value, typename sizecalc, typename keyalloc, typename valuealloc, typename challoc > | ||
11 | class Hash | 71 | class Hash |
12 | { | 72 | { |
13 | class iterator | 73 | public: |
74 | Hash() : | ||
75 | nCapacity( 11 ), | ||
76 | nFilled( 0 ), | ||
77 | nDeleted( 0 ), | ||
78 | bFilled( NULL ), | ||
79 | aKeys( NULL ), | ||
80 | aValues( NULL ), | ||
81 | aHashCodes( NULL ) | ||
14 | { | 82 | { |
15 | friend class Hash<key, value>; | 83 | nKeysSize = bitsToBytes( nCapacity ); |
16 | private: | 84 | bFilled = ca.allocate( nKeysSize ); |
17 | iterator( Hash<key, value> *hsh, int nIndex, key keyval, bool bInsert ) : | 85 | bDeleted = ca.allocate( nKeysSize ); |
18 | hsh( hsh ), | 86 | clearBits(); |
19 | nIndex( nIndex ), | 87 | |
20 | keyval( keyval ), | 88 | aHashCodes = ca.allocate( nCapacity ); |
21 | bInsert( bInsert ) | 89 | aKeys = ka.allocate( nCapacity ); |
90 | aValues = va.allocate( nCapacity ); | ||
91 | } | ||
92 | |||
93 | virtual ~Hash() | ||
94 | { | ||
95 | for( uint32_t j = 0; j < nCapacity; j++ ) | ||
22 | { | 96 | { |
97 | if( isFilled( j ) ) | ||
98 | { | ||
99 | va.destroy( &aValues[j] ); | ||
100 | ka.destroy( &aKeys[j] ); | ||
101 | } | ||
23 | } | 102 | } |
103 | va.deallocate( aValues, nCapacity ); | ||
104 | ka.deallocate( aKeys, nCapacity ); | ||
105 | ca.deallocate( bFilled, nKeysSize ); | ||
106 | ca.deallocate( bDeleted, nKeysSize ); | ||
107 | ca.deallocate( aHashCodes, nCapacity ); | ||
108 | } | ||
24 | 109 | ||
25 | public: | 110 | void clearBits() |
26 | iterator() : | 111 | { |
27 | hsh( NULL ), | 112 | for( uint32_t j = 0; j < nKeysSize; j++ ) |
28 | nIndex( -1 ) | ||
29 | { | 113 | { |
114 | bFilled[j] = bDeleted[j] = 0; | ||
30 | } | 115 | } |
116 | } | ||
117 | |||
118 | int hasKey( key keyval ) | ||
119 | { | ||
120 | printf("%s\n", keyval ); | ||
121 | } | ||
122 | |||
123 | uint32_t getCapacity() | ||
124 | { | ||
125 | return nCapacity; | ||
126 | } | ||
127 | |||
128 | uint32_t getFill() | ||
129 | { | ||
130 | return nFilled; | ||
131 | } | ||
132 | |||
133 | uint32_t getDeleted() | ||
134 | { | ||
135 | return nDeleted; | ||
136 | } | ||
137 | |||
138 | HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc> operator[]( key k ) | ||
139 | { | ||
140 | uint32_t hash = __calcHashCode( k ); | ||
141 | bool bFill; | ||
142 | uint32_t nPos = probe( hash, k, bFill ); | ||
31 | 143 | ||
32 | iterator &operator=( iterator &src ) | 144 | if( bFill ) |
33 | { | 145 | { |
34 | this->hsh = src.hsh; | 146 | return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, aKeys[nPos], aValues[nPos] ); |
35 | this->nIndex = src.nIndex; | ||
36 | } | 147 | } |
148 | else | ||
149 | { | ||
150 | return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, k ); | ||
151 | } | ||
152 | } | ||
153 | |||
154 | void insert( key k, value v ) | ||
155 | { | ||
156 | uint32_t hash = __calcHashCode( k ); | ||
157 | bool bFill; | ||
158 | uint32_t nPos = probe( hash, k, bFill ); | ||
37 | 159 | ||
38 | iterator &operator=( value &src ) | 160 | if( bFill ) |
39 | { | 161 | { |
40 | if( bInsert ) | 162 | va.destroy( &aValues[nPos] ); |
41 | printf("You wanted to insert %d\n", src ); | 163 | va.construct( &aValues[nPos], v ); |
42 | else | 164 | } |
43 | printf("You wanted to insert %d\n", src ); | 165 | else |
166 | { | ||
167 | va.construct( &aValues[nPos], v ); | ||
168 | ka.construct( &aKeys[nPos], k ); | ||
169 | fill( nPos ); | ||
170 | aHashCodes[nPos] = hash; | ||
171 | nFilled++; | ||
44 | } | 172 | } |
173 | } | ||
45 | 174 | ||
46 | private: | 175 | value get( key k ) |
47 | Hash<key, value> *hsh; | 176 | { |
48 | int nIndex; | 177 | uint32_t hash = __calcHashCode( k ); |
49 | bool bInsert; | 178 | bool bFill; |
50 | key keyval; | 179 | uint32_t nPos = probe( hash, k, bFill ); |
51 | }; | 180 | |
181 | if( bFill ) | ||
182 | { | ||
183 | return aValues[nPos]; | ||
184 | } | ||
185 | else | ||
186 | { | ||
187 | throw "Hey, no such thing..."; | ||
188 | } | ||
189 | } | ||
190 | |||
191 | uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true ) | ||
192 | { | ||
193 | uint32_t nCur = hash%nCapacity; | ||
194 | |||
195 | // First we scan to see if the key is already there, abort if we | ||
196 | // run out of probing room, or we find a non-filled entry | ||
197 | for( int8_t j = 0; | ||
198 | isFilled( nCur ) && j < 32; | ||
199 | nCur = (nCur + (1<<j))%nCapacity, j++ | ||
200 | ) | ||
201 | { | ||
202 | // Is this the same hash code we were looking for? | ||
203 | if( hash == aHashCodes[nCur] ) | ||
204 | { | ||
205 | // Is it really the same key? (for safety) | ||
206 | if( __cmpHashKeys( aKeys[nCur], k ) == true && | ||
207 | isDeleted( nCur ) == false ) | ||
208 | { | ||
209 | bFill = true; | ||
210 | return nCur; | ||
211 | } | ||
212 | } | ||
213 | } | ||
214 | |||
215 | // This is our insurance, if the table is full, then go ahead and | ||
216 | // rehash, then try again. | ||
217 | if( isFilled( nCur ) && rehash == true ) | ||
218 | { | ||
219 | reHash( szCalc(getCapacity(), getFill(), getDeleted()) ); | ||
220 | |||
221 | // This is potentially dangerous, and could cause an infinite loop. | ||
222 | // Be careful writing probe, eh? | ||
223 | return probe( hash, k, bFill ); | ||
224 | } | ||
225 | |||
226 | bFill = false; | ||
227 | return nCur; | ||
228 | } | ||
229 | |||
230 | void reHash( uint32_t nNewSize ) | ||
231 | { | ||
232 | // Save all the old data | ||
233 | uint32_t nOldCapacity = nCapacity; | ||
234 | uint32_t *bOldFilled = bFilled; | ||
235 | uint32_t *aOldHashCodes = aHashCodes; | ||
236 | uint32_t nOldKeysSize = nKeysSize; | ||
237 | uint32_t *bOldDeleted = bDeleted; | ||
238 | value *aOldValues = aValues; | ||
239 | key *aOldKeys = aKeys; | ||
240 | |||
241 | // Calculate new sizes | ||
242 | nCapacity = nNewSize; | ||
243 | nKeysSize = bitsToBytes( nCapacity ); | ||
244 | |||
245 | // Allocate new memory + prep | ||
246 | bFilled = ca.allocate( nKeysSize ); | ||
247 | bDeleted = ca.allocate( nKeysSize ); | ||
248 | clearBits(); | ||
249 | |||
250 | aHashCodes = ca.allocate( nCapacity ); | ||
251 | aKeys = ka.allocate( nCapacity ); | ||
252 | aValues = va.allocate( nCapacity ); | ||
253 | |||
254 | // Re-insert all of the old data (except deleted items) | ||
255 | for( uint32_t j = 0; j < nOldCapacity; j++ ) | ||
256 | { | ||
257 | if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) | ||
258 | { | ||
259 | insert( aOldKeys[j], aOldValues[j] ); | ||
260 | } | ||
261 | } | ||
262 | |||
263 | // Delete all of the old data | ||
264 | for( uint32_t j = 0; j < nOldCapacity; j++ ) | ||
265 | { | ||
266 | if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) | ||
267 | { | ||
268 | va.destroy( &aOldValues[j] ); | ||
269 | ka.destroy( &aOldKeys[j] ); | ||
270 | } | ||
271 | } | ||
272 | va.deallocate( aOldValues, nOldCapacity ); | ||
273 | ka.deallocate( aOldKeys, nOldCapacity ); | ||
274 | ca.deallocate( bOldFilled, nOldKeysSize ); | ||
275 | ca.deallocate( bOldDeleted, nOldKeysSize ); | ||
276 | ca.deallocate( aOldHashCodes, nOldCapacity ); | ||
277 | } | ||
52 | 278 | ||
53 | template<class refval> | 279 | void fill( uint32_t loc ) |
54 | class VRef | ||
55 | { | 280 | { |
281 | bFilled[loc/32] |= (1<<(loc%32)); | ||
282 | } | ||
283 | |||
284 | bool isFilled( uint32_t loc ) | ||
285 | { | ||
286 | return (bFilled[loc/32]&(1<<(loc%32)))!=0; | ||
287 | } | ||
288 | |||
289 | bool isDeleted( uint32_t loc ) | ||
290 | { | ||
291 | return (bDeleted[loc/32]&(1<<(loc%32)))!=0; | ||
292 | } | ||
293 | |||
294 | typedef struct iterator | ||
295 | { | ||
296 | friend class Hash<key, value, sizecalc, keyalloc, valuealloc, challoc>; | ||
297 | private: | ||
298 | iterator( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh ) : | ||
299 | hsh( hsh ), | ||
300 | bFinished( false ), | ||
301 | nPos( 0 ) | ||
302 | { | ||
303 | nPos = hsh.getFirstPos( bFinished ); | ||
304 | } | ||
305 | |||
306 | iterator( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh, bool bDone ) : | ||
307 | hsh( hsh ), | ||
308 | nPos( 0 ), | ||
309 | bFinished( bDone ) | ||
310 | { | ||
311 | } | ||
312 | |||
313 | Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh; | ||
314 | |||
315 | uint32_t nPos; | ||
316 | bool bFinished; | ||
317 | |||
56 | public: | 318 | public: |
57 | VRef( refval &data ) : | 319 | iterator operator++( int ) |
58 | data( data ) | ||
59 | { | 320 | { |
321 | if( bFinished == false ) | ||
322 | nPos = hsh.getNextPos( nPos, bFinished ); | ||
323 | |||
324 | return *this; | ||
325 | } | ||
326 | |||
327 | iterator operator++() | ||
328 | { | ||
329 | if( bFinished == false ) | ||
330 | nPos = hsh.getNextPos( nPos, bFinished ); | ||
331 | |||
332 | return *this; | ||
333 | } | ||
334 | |||
335 | bool operator==( const iterator &oth ) | ||
336 | { | ||
337 | if( bFinished != oth.bFinished ) | ||
338 | return false; | ||
339 | if( bFinished == true ) | ||
340 | { | ||
341 | return true; | ||
342 | } | ||
343 | else | ||
344 | { | ||
345 | if( oth.nPos == nPos ) | ||
346 | return true; | ||
347 | return false; | ||
348 | } | ||
349 | } | ||
350 | |||
351 | bool operator!=( const iterator &oth ) | ||
352 | { | ||
353 | return !(*this == oth ); | ||
354 | } | ||
355 | |||
356 | std::pair<key,value> operator *() | ||
357 | { | ||
358 | return hsh.getAtPos( nPos ); | ||
60 | } | 359 | } |
61 | refval &data; | ||
62 | }; | 360 | }; |
63 | 361 | ||
64 | public: | 362 | iterator begin() |
65 | Hash() : | ||
66 | nCapacity( 11 ), | ||
67 | nFilled( 0 ), | ||
68 | bFilled( NULL ), | ||
69 | aKeys( NULL ), | ||
70 | aValues( NULL ), | ||
71 | aHashCodes( NULL ) | ||
72 | { | 363 | { |
73 | int nKeysSize = bitsToBytes( nCapacity ); | 364 | return iterator( *this ); |
74 | bFilled = new unsigned char[ nKeysSize ]; | 365 | } |
75 | memset( bFilled, 0, nKeysSize ); | 366 | |
76 | 367 | iterator end() | |
77 | aKeys = new VRef<key>*[nCapacity]; | 368 | { |
78 | aValues = new value[nCapacity]; | 369 | return iterator( *this, true ); |
79 | } | 370 | } |
80 | 371 | ||
81 | virtual ~Hash() | 372 | private: |
373 | std::pair<key,value> getAtPos( uint32_t nPos ) | ||
82 | { | 374 | { |
375 | return std::pair<key,value>(aKeys[nPos],aValues[nPos]); | ||
83 | } | 376 | } |
84 | 377 | ||
85 | iterator operator[]( key keyval ) | 378 | uint32_t getFirstPos( bool &bFinished ) |
86 | { | 379 | { |
87 | //iterator i( this, 4, keyval, true ); | 380 | for( uint32_t j = 0; j < nCapacity; j++ ) |
88 | //return i; | 381 | { |
89 | printf("%s\n", keyval.getString() ); | 382 | if( isFilled( j ) ) |
383 | return j; | ||
384 | } | ||
385 | |||
386 | bFinished = true; | ||
90 | } | 387 | } |
91 | 388 | ||
92 | int hasKey( key keyval ) | 389 | uint32_t getNextPos( uint32_t nPos, bool &bFinished ) |
93 | { | 390 | { |
94 | printf("%s\n", keyval.getString() ); | 391 | for( uint32_t j = nPos+1; j < nCapacity; j++ ) |
392 | { | ||
393 | if( isFilled( j ) ) | ||
394 | return j; | ||
395 | } | ||
396 | |||
397 | bFinished = true; | ||
95 | } | 398 | } |
96 | 399 | ||
97 | private: | 400 | private: |
98 | int nCapacity; | 401 | uint32_t nCapacity; |
99 | int nFilled; | 402 | uint32_t nFilled; |
100 | unsigned char *bFilled; | 403 | uint32_t nDeleted; |
101 | VRef<key> **aKeys; | 404 | uint32_t *bFilled; |
102 | unsigned long int *aHashCodes; | 405 | uint32_t *bDeleted; |
406 | uint32_t *aHashCodes; | ||
407 | uint32_t nKeysSize; | ||
103 | value *aValues; | 408 | value *aValues; |
409 | key *aKeys; | ||
410 | valuealloc va; | ||
411 | keyalloc ka; | ||
412 | challoc ca; | ||
413 | sizecalc szCalc; | ||
104 | }; | 414 | }; |
105 | */ | 415 | |
416 | template<> uint32_t __calcHashCode<const char *>( const char *k ); | ||
417 | template<> bool __cmpHashKeys<const char *>( const char *a, const char *b ); | ||
418 | |||
419 | template<> uint32_t __calcHashCode<char *>( char *k ); | ||
420 | template<> bool __cmpHashKeys<char *>( char *a, char *b ); | ||
421 | |||
422 | template<> uint32_t __calcHashCode<const std::string>( const std::string k ); | ||
423 | template<> bool __cmpHashKeys<const std::string>( const std::string a, const std::string b ); | ||
424 | |||
425 | template<> uint32_t __calcHashCode<std::string>( std::string k ); | ||
426 | template<> bool __cmpHashKeys<std::string>( std::string a, std::string b ); | ||
427 | |||
428 | template<> uint32_t __calcHashCode<Hashable &>( Hashable &k ); | ||
429 | template<> bool __cmpHashKeys<Hashable &>( Hashable &a, Hashable &b ); | ||
430 | |||
106 | #endif | 431 | #endif |