diff options
author | Mike Buland <eichlan@xagasoft.com> | 2006-11-21 12:23:13 +0000 |
---|---|---|
committer | Mike Buland <eichlan@xagasoft.com> | 2006-11-21 12:23:13 +0000 |
commit | 4b8bad3d711f39db84f094edf83c5496a3f02cd6 (patch) | |
tree | bbaf654af3e82e67544ae17f07b7fbe7d02ce0ec /src/hash.h | |
parent | 737b1aee54da9ff45a4fb6eb7e636eff9019128e (diff) | |
download | libbu++-4b8bad3d711f39db84f094edf83c5496a3f02cd6.tar.gz libbu++-4b8bad3d711f39db84f094edf83c5496a3f02cd6.tar.bz2 libbu++-4b8bad3d711f39db84f094edf83c5496a3f02cd6.tar.xz libbu++-4b8bad3d711f39db84f094edf83c5496a3f02cd6.zip |
Wow, craziness. Part way through working on the confpair system I got some
sudden insperation and completely redid Hash. Now everything but delete is
implemented, including typesafe iterators and more. It's really cool, and
everyone should check it out and start using it right away!
Diffstat (limited to '')
-rw-r--r-- | src/hash.h | 447 |
1 files changed, 386 insertions, 61 deletions
@@ -1,106 +1,431 @@ | |||
1 | #ifndef HASH_H | 1 | #ifndef HASH_H |
2 | #define HASH_H | 2 | #define HASH_H |
3 | /* | 3 | |
4 | #include <stddef.h> | 4 | #include <stddef.h> |
5 | #include <string.h> | 5 | #include <string.h> |
6 | #include <memory> | ||
7 | #include <iostream> | ||
6 | #include "hashable.h" | 8 | #include "hashable.h" |
7 | 9 | ||
8 | #define bitsToBytes( n ) (n/8+(n%8>0 ? 1 : 0)) | 10 | #define bitsToBytes( n ) (n/32+(n%32>0 ? 1 : 0)) |
11 | |||
12 | template<typename T> | ||
13 | uint32_t __calcHashCode( T k ); | ||
14 | |||
15 | template<typename T> | ||
16 | bool __cmpHashKeys( T a, T b ); | ||
17 | |||
18 | struct __calcNextTSize_fast | ||
19 | { | ||
20 | uint32_t operator()( uint32_t nCapacity, uint32_t nFill, uint32_t nDeleted ) const | ||
21 | { | ||
22 | return nCapacity*2+1; | ||
23 | } | ||
24 | }; | ||
25 | |||
26 | template<typename key, typename value, typename sizecalc = __calcNextTSize_fast, typename keyalloc = std::allocator<key>, typename valuealloc = std::allocator<value>, typename challoc = std::allocator<uint32_t> > | ||
27 | class Hash; | ||
28 | |||
29 | template< typename key, typename value, typename sizecalc, typename keyalloc, typename valuealloc, typename challoc > | ||
30 | struct HashProxy | ||
31 | { | ||
32 | friend class Hash<key, value, sizecalc, keyalloc, valuealloc, challoc>; | ||
33 | private: | ||
34 | HashProxy( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &h, key k, value v ) : | ||
35 | hsh( h ), | ||
36 | tKey( k ), | ||
37 | tValue( v ), | ||
38 | bFilled( true ) | ||
39 | { | ||
40 | } | ||
41 | |||
42 | HashProxy( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &h, key k ) : | ||
43 | hsh( h ), | ||
44 | tKey( k ), | ||
45 | bFilled( false ) | ||
46 | { | ||
47 | } | ||
48 | |||
49 | Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh; | ||
50 | key tKey; | ||
51 | value tValue; | ||
52 | bool bFilled; | ||
53 | |||
54 | public: | ||
55 | operator value() | ||
56 | { | ||
57 | if( bFilled == false ) | ||
58 | throw "Nope, no data there"; | ||
59 | return tValue; | ||
60 | } | ||
9 | 61 | ||
10 | template<class key, class value> | 62 | value operator=( value nval ) |
63 | { | ||
64 | hsh.insert( tKey, nval ); | ||
65 | return nval; | ||
66 | } | ||
67 | |||
68 | }; | ||
69 | |||
70 | template<typename key, typename value, typename sizecalc, typename keyalloc, typename valuealloc, typename challoc > | ||
11 | class Hash | 71 | class Hash |
12 | { | 72 | { |
13 | class iterator | 73 | public: |
74 | Hash() : | ||
75 | nCapacity( 11 ), | ||
76 | nFilled( 0 ), | ||
77 | nDeleted( 0 ), | ||
78 | bFilled( NULL ), | ||
79 | aKeys( NULL ), | ||
80 | aValues( NULL ), | ||
81 | aHashCodes( NULL ) | ||
14 | { | 82 | { |
15 | friend class Hash<key, value>; | 83 | nKeysSize = bitsToBytes( nCapacity ); |
16 | private: | 84 | bFilled = ca.allocate( nKeysSize ); |
17 | iterator( Hash<key, value> *hsh, int nIndex, key keyval, bool bInsert ) : | 85 | bDeleted = ca.allocate( nKeysSize ); |
18 | hsh( hsh ), | 86 | clearBits(); |
19 | nIndex( nIndex ), | 87 | |
20 | keyval( keyval ), | 88 | aHashCodes = ca.allocate( nCapacity ); |
21 | bInsert( bInsert ) | 89 | aKeys = ka.allocate( nCapacity ); |
90 | aValues = va.allocate( nCapacity ); | ||
91 | } | ||
92 | |||
93 | virtual ~Hash() | ||
94 | { | ||
95 | for( uint32_t j = 0; j < nCapacity; j++ ) | ||
22 | { | 96 | { |
97 | if( isFilled( j ) ) | ||
98 | { | ||
99 | va.destroy( &aValues[j] ); | ||
100 | ka.destroy( &aKeys[j] ); | ||
101 | } | ||
23 | } | 102 | } |
103 | va.deallocate( aValues, nCapacity ); | ||
104 | ka.deallocate( aKeys, nCapacity ); | ||
105 | ca.deallocate( bFilled, nKeysSize ); | ||
106 | ca.deallocate( bDeleted, nKeysSize ); | ||
107 | ca.deallocate( aHashCodes, nCapacity ); | ||
108 | } | ||
24 | 109 | ||
25 | public: | 110 | void clearBits() |
26 | iterator() : | 111 | { |
27 | hsh( NULL ), | 112 | for( uint32_t j = 0; j < nKeysSize; j++ ) |
28 | nIndex( -1 ) | ||
29 | { | 113 | { |
114 | bFilled[j] = bDeleted[j] = 0; | ||
30 | } | 115 | } |
116 | } | ||
117 | |||
118 | int hasKey( key keyval ) | ||
119 | { | ||
120 | printf("%s\n", keyval ); | ||
121 | } | ||
122 | |||
123 | uint32_t getCapacity() | ||
124 | { | ||
125 | return nCapacity; | ||
126 | } | ||
127 | |||
128 | uint32_t getFill() | ||
129 | { | ||
130 | return nFilled; | ||
131 | } | ||
132 | |||
133 | uint32_t getDeleted() | ||
134 | { | ||
135 | return nDeleted; | ||
136 | } | ||
137 | |||
138 | HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc> operator[]( key k ) | ||
139 | { | ||
140 | uint32_t hash = __calcHashCode( k ); | ||
141 | bool bFill; | ||
142 | uint32_t nPos = probe( hash, k, bFill ); | ||
31 | 143 | ||
32 | iterator &operator=( iterator &src ) | 144 | if( bFill ) |
33 | { | 145 | { |
34 | this->hsh = src.hsh; | 146 | return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, aKeys[nPos], aValues[nPos] ); |
35 | this->nIndex = src.nIndex; | ||
36 | } | 147 | } |
148 | else | ||
149 | { | ||
150 | return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, k ); | ||
151 | } | ||
152 | } | ||
153 | |||
154 | void insert( key k, value v ) | ||
155 | { | ||
156 | uint32_t hash = __calcHashCode( k ); | ||
157 | bool bFill; | ||
158 | uint32_t nPos = probe( hash, k, bFill ); | ||
37 | 159 | ||
38 | iterator &operator=( value &src ) | 160 | if( bFill ) |
39 | { | 161 | { |
40 | if( bInsert ) | 162 | va.destroy( &aValues[nPos] ); |
41 | printf("You wanted to insert %d\n", src ); | 163 | va.construct( &aValues[nPos], v ); |
42 | else | 164 | } |
43 | printf("You wanted to insert %d\n", src ); | 165 | else |
166 | { | ||
167 | va.construct( &aValues[nPos], v ); | ||
168 | ka.construct( &aKeys[nPos], k ); | ||
169 | fill( nPos ); | ||
170 | aHashCodes[nPos] = hash; | ||
171 | nFilled++; | ||
44 | } | 172 | } |
173 | } | ||
45 | 174 | ||
46 | private: | 175 | value get( key k ) |
47 | Hash<key, value> *hsh; | 176 | { |
48 | int nIndex; | 177 | uint32_t hash = __calcHashCode( k ); |
49 | bool bInsert; | 178 | bool bFill; |
50 | key keyval; | 179 | uint32_t nPos = probe( hash, k, bFill ); |
51 | }; | 180 | |
181 | if( bFill ) | ||
182 | { | ||
183 | return aValues[nPos]; | ||
184 | } | ||
185 | else | ||
186 | { | ||
187 | throw "Hey, no such thing..."; | ||
188 | } | ||
189 | } | ||
190 | |||
191 | uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true ) | ||
192 | { | ||
193 | uint32_t nCur = hash%nCapacity; | ||
194 | |||
195 | // First we scan to see if the key is already there, abort if we | ||
196 | // run out of probing room, or we find a non-filled entry | ||
197 | for( int8_t j = 0; | ||
198 | isFilled( nCur ) && j < 32; | ||
199 | nCur = (nCur + (1<<j))%nCapacity, j++ | ||
200 | ) | ||
201 | { | ||
202 | // Is this the same hash code we were looking for? | ||
203 | if( hash == aHashCodes[nCur] ) | ||
204 | { | ||
205 | // Is it really the same key? (for safety) | ||
206 | if( __cmpHashKeys( aKeys[nCur], k ) == true && | ||
207 | isDeleted( nCur ) == false ) | ||
208 | { | ||
209 | bFill = true; | ||
210 | return nCur; | ||
211 | } | ||
212 | } | ||
213 | } | ||
214 | |||
215 | // This is our insurance, if the table is full, then go ahead and | ||
216 | // rehash, then try again. | ||
217 | if( isFilled( nCur ) && rehash == true ) | ||
218 | { | ||
219 | reHash( szCalc(getCapacity(), getFill(), getDeleted()) ); | ||
220 | |||
221 | // This is potentially dangerous, and could cause an infinite loop. | ||
222 | // Be careful writing probe, eh? | ||
223 | return probe( hash, k, bFill ); | ||
224 | } | ||
225 | |||
226 | bFill = false; | ||
227 | return nCur; | ||
228 | } | ||
229 | |||
230 | void reHash( uint32_t nNewSize ) | ||
231 | { | ||
232 | // Save all the old data | ||
233 | uint32_t nOldCapacity = nCapacity; | ||
234 | uint32_t *bOldFilled = bFilled; | ||
235 | uint32_t *aOldHashCodes = aHashCodes; | ||
236 | uint32_t nOldKeysSize = nKeysSize; | ||
237 | uint32_t *bOldDeleted = bDeleted; | ||
238 | value *aOldValues = aValues; | ||
239 | key *aOldKeys = aKeys; | ||
240 | |||
241 | // Calculate new sizes | ||
242 | nCapacity = nNewSize; | ||
243 | nKeysSize = bitsToBytes( nCapacity ); | ||
244 | |||
245 | // Allocate new memory + prep | ||
246 | bFilled = ca.allocate( nKeysSize ); | ||
247 | bDeleted = ca.allocate( nKeysSize ); | ||
248 | clearBits(); | ||
249 | |||
250 | aHashCodes = ca.allocate( nCapacity ); | ||
251 | aKeys = ka.allocate( nCapacity ); | ||
252 | aValues = va.allocate( nCapacity ); | ||
253 | |||
254 | // Re-insert all of the old data (except deleted items) | ||
255 | for( uint32_t j = 0; j < nOldCapacity; j++ ) | ||
256 | { | ||
257 | if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) | ||
258 | { | ||
259 | insert( aOldKeys[j], aOldValues[j] ); | ||
260 | } | ||
261 | } | ||
262 | |||
263 | // Delete all of the old data | ||
264 | for( uint32_t j = 0; j < nOldCapacity; j++ ) | ||
265 | { | ||
266 | if( (bOldFilled[j/32]&(1<<(j%32)))!=0 ) | ||
267 | { | ||
268 | va.destroy( &aOldValues[j] ); | ||
269 | ka.destroy( &aOldKeys[j] ); | ||
270 | } | ||
271 | } | ||
272 | va.deallocate( aOldValues, nOldCapacity ); | ||
273 | ka.deallocate( aOldKeys, nOldCapacity ); | ||
274 | ca.deallocate( bOldFilled, nOldKeysSize ); | ||
275 | ca.deallocate( bOldDeleted, nOldKeysSize ); | ||
276 | ca.deallocate( aOldHashCodes, nOldCapacity ); | ||
277 | } | ||
52 | 278 | ||
53 | template<class refval> | 279 | void fill( uint32_t loc ) |
54 | class VRef | ||
55 | { | 280 | { |
281 | bFilled[loc/32] |= (1<<(loc%32)); | ||
282 | } | ||
283 | |||
284 | bool isFilled( uint32_t loc ) | ||
285 | { | ||
286 | return (bFilled[loc/32]&(1<<(loc%32)))!=0; | ||
287 | } | ||
288 | |||
289 | bool isDeleted( uint32_t loc ) | ||
290 | { | ||
291 | return (bDeleted[loc/32]&(1<<(loc%32)))!=0; | ||
292 | } | ||
293 | |||
294 | typedef struct iterator | ||
295 | { | ||
296 | friend class Hash<key, value, sizecalc, keyalloc, valuealloc, challoc>; | ||
297 | private: | ||
298 | iterator( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh ) : | ||
299 | hsh( hsh ), | ||
300 | bFinished( false ), | ||
301 | nPos( 0 ) | ||
302 | { | ||
303 | nPos = hsh.getFirstPos( bFinished ); | ||
304 | } | ||
305 | |||
306 | iterator( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh, bool bDone ) : | ||
307 | hsh( hsh ), | ||
308 | nPos( 0 ), | ||
309 | bFinished( bDone ) | ||
310 | { | ||
311 | } | ||
312 | |||
313 | Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh; | ||
314 | |||
315 | uint32_t nPos; | ||
316 | bool bFinished; | ||
317 | |||
56 | public: | 318 | public: |
57 | VRef( refval &data ) : | 319 | iterator operator++( int ) |
58 | data( data ) | ||
59 | { | 320 | { |
321 | if( bFinished == false ) | ||
322 | nPos = hsh.getNextPos( nPos, bFinished ); | ||
323 | |||
324 | return *this; | ||
325 | } | ||
326 | |||
327 | iterator operator++() | ||
328 | { | ||
329 | if( bFinished == false ) | ||
330 | nPos = hsh.getNextPos( nPos, bFinished ); | ||
331 | |||
332 | return *this; | ||
333 | } | ||
334 | |||
335 | bool operator==( const iterator &oth ) | ||
336 | { | ||
337 | if( bFinished != oth.bFinished ) | ||
338 | return false; | ||
339 | if( bFinished == true ) | ||
340 | { | ||
341 | return true; | ||
342 | } | ||
343 | else | ||
344 | { | ||
345 | if( oth.nPos == nPos ) | ||
346 | return true; | ||
347 | return false; | ||
348 | } | ||
349 | } | ||
350 | |||
351 | bool operator!=( const iterator &oth ) | ||
352 | { | ||
353 | return !(*this == oth ); | ||
354 | } | ||
355 | |||
356 | std::pair<key,value> operator *() | ||
357 | { | ||
358 | return hsh.getAtPos( nPos ); | ||
60 | } | 359 | } |
61 | refval &data; | ||
62 | }; | 360 | }; |
63 | 361 | ||
64 | public: | 362 | iterator begin() |
65 | Hash() : | ||
66 | nCapacity( 11 ), | ||
67 | nFilled( 0 ), | ||
68 | bFilled( NULL ), | ||
69 | aKeys( NULL ), | ||
70 | aValues( NULL ), | ||
71 | aHashCodes( NULL ) | ||
72 | { | 363 | { |
73 | int nKeysSize = bitsToBytes( nCapacity ); | 364 | return iterator( *this ); |
74 | bFilled = new unsigned char[ nKeysSize ]; | 365 | } |
75 | memset( bFilled, 0, nKeysSize ); | 366 | |
76 | 367 | iterator end() | |
77 | aKeys = new VRef<key>*[nCapacity]; | 368 | { |
78 | aValues = new value[nCapacity]; | 369 | return iterator( *this, true ); |
79 | } | 370 | } |
80 | 371 | ||
81 | virtual ~Hash() | 372 | private: |
373 | std::pair<key,value> getAtPos( uint32_t nPos ) | ||
82 | { | 374 | { |
375 | return std::pair<key,value>(aKeys[nPos],aValues[nPos]); | ||
83 | } | 376 | } |
84 | 377 | ||
85 | iterator operator[]( key keyval ) | 378 | uint32_t getFirstPos( bool &bFinished ) |
86 | { | 379 | { |
87 | //iterator i( this, 4, keyval, true ); | 380 | for( uint32_t j = 0; j < nCapacity; j++ ) |
88 | //return i; | 381 | { |
89 | printf("%s\n", keyval.getString() ); | 382 | if( isFilled( j ) ) |
383 | return j; | ||
384 | } | ||
385 | |||
386 | bFinished = true; | ||
90 | } | 387 | } |
91 | 388 | ||
92 | int hasKey( key keyval ) | 389 | uint32_t getNextPos( uint32_t nPos, bool &bFinished ) |
93 | { | 390 | { |
94 | printf("%s\n", keyval.getString() ); | 391 | for( uint32_t j = nPos+1; j < nCapacity; j++ ) |
392 | { | ||
393 | if( isFilled( j ) ) | ||
394 | return j; | ||
395 | } | ||
396 | |||
397 | bFinished = true; | ||
95 | } | 398 | } |
96 | 399 | ||
97 | private: | 400 | private: |
98 | int nCapacity; | 401 | uint32_t nCapacity; |
99 | int nFilled; | 402 | uint32_t nFilled; |
100 | unsigned char *bFilled; | 403 | uint32_t nDeleted; |
101 | VRef<key> **aKeys; | 404 | uint32_t *bFilled; |
102 | unsigned long int *aHashCodes; | 405 | uint32_t *bDeleted; |
406 | uint32_t *aHashCodes; | ||
407 | uint32_t nKeysSize; | ||
103 | value *aValues; | 408 | value *aValues; |
409 | key *aKeys; | ||
410 | valuealloc va; | ||
411 | keyalloc ka; | ||
412 | challoc ca; | ||
413 | sizecalc szCalc; | ||
104 | }; | 414 | }; |
105 | */ | 415 | |
416 | template<> uint32_t __calcHashCode<const char *>( const char *k ); | ||
417 | template<> bool __cmpHashKeys<const char *>( const char *a, const char *b ); | ||
418 | |||
419 | template<> uint32_t __calcHashCode<char *>( char *k ); | ||
420 | template<> bool __cmpHashKeys<char *>( char *a, char *b ); | ||
421 | |||
422 | template<> uint32_t __calcHashCode<const std::string>( const std::string k ); | ||
423 | template<> bool __cmpHashKeys<const std::string>( const std::string a, const std::string b ); | ||
424 | |||
425 | template<> uint32_t __calcHashCode<std::string>( std::string k ); | ||
426 | template<> bool __cmpHashKeys<std::string>( std::string a, std::string b ); | ||
427 | |||
428 | template<> uint32_t __calcHashCode<Hashable &>( Hashable &k ); | ||
429 | template<> bool __cmpHashKeys<Hashable &>( Hashable &a, Hashable &b ); | ||
430 | |||
106 | #endif | 431 | #endif |