summaryrefslogtreecommitdiff
path: root/src/hash.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/hash.h')
-rw-r--r--src/hash.h447
1 files changed, 386 insertions, 61 deletions
diff --git a/src/hash.h b/src/hash.h
index 8385bb9..b8ced99 100644
--- a/src/hash.h
+++ b/src/hash.h
@@ -1,106 +1,431 @@
1#ifndef HASH_H 1#ifndef HASH_H
2#define HASH_H 2#define HASH_H
3/* 3
4#include <stddef.h> 4#include <stddef.h>
5#include <string.h> 5#include <string.h>
6#include <memory>
7#include <iostream>
6#include "hashable.h" 8#include "hashable.h"
7 9
8#define bitsToBytes( n ) (n/8+(n%8>0 ? 1 : 0)) 10#define bitsToBytes( n ) (n/32+(n%32>0 ? 1 : 0))
11
12template<typename T>
13uint32_t __calcHashCode( T k );
14
15template<typename T>
16bool __cmpHashKeys( T a, T b );
17
18struct __calcNextTSize_fast
19{
20 uint32_t operator()( uint32_t nCapacity, uint32_t nFill, uint32_t nDeleted ) const
21 {
22 return nCapacity*2+1;
23 }
24};
25
26template<typename key, typename value, typename sizecalc = __calcNextTSize_fast, typename keyalloc = std::allocator<key>, typename valuealloc = std::allocator<value>, typename challoc = std::allocator<uint32_t> >
27class Hash;
28
29template< typename key, typename value, typename sizecalc, typename keyalloc, typename valuealloc, typename challoc >
30struct HashProxy
31{
32 friend class Hash<key, value, sizecalc, keyalloc, valuealloc, challoc>;
33private:
34 HashProxy( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &h, key k, value v ) :
35 hsh( h ),
36 tKey( k ),
37 tValue( v ),
38 bFilled( true )
39 {
40 }
41
42 HashProxy( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &h, key k ) :
43 hsh( h ),
44 tKey( k ),
45 bFilled( false )
46 {
47 }
48
49 Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh;
50 key tKey;
51 value tValue;
52 bool bFilled;
53
54public:
55 operator value()
56 {
57 if( bFilled == false )
58 throw "Nope, no data there";
59 return tValue;
60 }
9 61
10template<class key, class value> 62 value operator=( value nval )
63 {
64 hsh.insert( tKey, nval );
65 return nval;
66 }
67
68};
69
70template<typename key, typename value, typename sizecalc, typename keyalloc, typename valuealloc, typename challoc >
11class Hash 71class Hash
12{ 72{
13 class iterator 73public:
74 Hash() :
75 nCapacity( 11 ),
76 nFilled( 0 ),
77 nDeleted( 0 ),
78 bFilled( NULL ),
79 aKeys( NULL ),
80 aValues( NULL ),
81 aHashCodes( NULL )
14 { 82 {
15 friend class Hash<key, value>; 83 nKeysSize = bitsToBytes( nCapacity );
16 private: 84 bFilled = ca.allocate( nKeysSize );
17 iterator( Hash<key, value> *hsh, int nIndex, key keyval, bool bInsert ) : 85 bDeleted = ca.allocate( nKeysSize );
18 hsh( hsh ), 86 clearBits();
19 nIndex( nIndex ), 87
20 keyval( keyval ), 88 aHashCodes = ca.allocate( nCapacity );
21 bInsert( bInsert ) 89 aKeys = ka.allocate( nCapacity );
90 aValues = va.allocate( nCapacity );
91 }
92
93 virtual ~Hash()
94 {
95 for( uint32_t j = 0; j < nCapacity; j++ )
22 { 96 {
97 if( isFilled( j ) )
98 {
99 va.destroy( &aValues[j] );
100 ka.destroy( &aKeys[j] );
101 }
23 } 102 }
103 va.deallocate( aValues, nCapacity );
104 ka.deallocate( aKeys, nCapacity );
105 ca.deallocate( bFilled, nKeysSize );
106 ca.deallocate( bDeleted, nKeysSize );
107 ca.deallocate( aHashCodes, nCapacity );
108 }
24 109
25 public: 110 void clearBits()
26 iterator() : 111 {
27 hsh( NULL ), 112 for( uint32_t j = 0; j < nKeysSize; j++ )
28 nIndex( -1 )
29 { 113 {
114 bFilled[j] = bDeleted[j] = 0;
30 } 115 }
116 }
117
118 int hasKey( key keyval )
119 {
120 printf("%s\n", keyval );
121 }
122
123 uint32_t getCapacity()
124 {
125 return nCapacity;
126 }
127
128 uint32_t getFill()
129 {
130 return nFilled;
131 }
132
133 uint32_t getDeleted()
134 {
135 return nDeleted;
136 }
137
138 HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc> operator[]( key k )
139 {
140 uint32_t hash = __calcHashCode( k );
141 bool bFill;
142 uint32_t nPos = probe( hash, k, bFill );
31 143
32 iterator &operator=( iterator &src ) 144 if( bFill )
33 { 145 {
34 this->hsh = src.hsh; 146 return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, aKeys[nPos], aValues[nPos] );
35 this->nIndex = src.nIndex;
36 } 147 }
148 else
149 {
150 return HashProxy<key, value, sizecalc, keyalloc, valuealloc, challoc>( *this, k );
151 }
152 }
153
154 void insert( key k, value v )
155 {
156 uint32_t hash = __calcHashCode( k );
157 bool bFill;
158 uint32_t nPos = probe( hash, k, bFill );
37 159
38 iterator &operator=( value &src ) 160 if( bFill )
39 { 161 {
40 if( bInsert ) 162 va.destroy( &aValues[nPos] );
41 printf("You wanted to insert %d\n", src ); 163 va.construct( &aValues[nPos], v );
42 else 164 }
43 printf("You wanted to insert %d\n", src ); 165 else
166 {
167 va.construct( &aValues[nPos], v );
168 ka.construct( &aKeys[nPos], k );
169 fill( nPos );
170 aHashCodes[nPos] = hash;
171 nFilled++;
44 } 172 }
173 }
45 174
46 private: 175 value get( key k )
47 Hash<key, value> *hsh; 176 {
48 int nIndex; 177 uint32_t hash = __calcHashCode( k );
49 bool bInsert; 178 bool bFill;
50 key keyval; 179 uint32_t nPos = probe( hash, k, bFill );
51 }; 180
181 if( bFill )
182 {
183 return aValues[nPos];
184 }
185 else
186 {
187 throw "Hey, no such thing...";
188 }
189 }
190
191 uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true )
192 {
193 uint32_t nCur = hash%nCapacity;
194
195 // First we scan to see if the key is already there, abort if we
196 // run out of probing room, or we find a non-filled entry
197 for( int8_t j = 0;
198 isFilled( nCur ) && j < 32;
199 nCur = (nCur + (1<<j))%nCapacity, j++
200 )
201 {
202 // Is this the same hash code we were looking for?
203 if( hash == aHashCodes[nCur] )
204 {
205 // Is it really the same key? (for safety)
206 if( __cmpHashKeys( aKeys[nCur], k ) == true &&
207 isDeleted( nCur ) == false )
208 {
209 bFill = true;
210 return nCur;
211 }
212 }
213 }
214
215 // This is our insurance, if the table is full, then go ahead and
216 // rehash, then try again.
217 if( isFilled( nCur ) && rehash == true )
218 {
219 reHash( szCalc(getCapacity(), getFill(), getDeleted()) );
220
221 // This is potentially dangerous, and could cause an infinite loop.
222 // Be careful writing probe, eh?
223 return probe( hash, k, bFill );
224 }
225
226 bFill = false;
227 return nCur;
228 }
229
230 void reHash( uint32_t nNewSize )
231 {
232 // Save all the old data
233 uint32_t nOldCapacity = nCapacity;
234 uint32_t *bOldFilled = bFilled;
235 uint32_t *aOldHashCodes = aHashCodes;
236 uint32_t nOldKeysSize = nKeysSize;
237 uint32_t *bOldDeleted = bDeleted;
238 value *aOldValues = aValues;
239 key *aOldKeys = aKeys;
240
241 // Calculate new sizes
242 nCapacity = nNewSize;
243 nKeysSize = bitsToBytes( nCapacity );
244
245 // Allocate new memory + prep
246 bFilled = ca.allocate( nKeysSize );
247 bDeleted = ca.allocate( nKeysSize );
248 clearBits();
249
250 aHashCodes = ca.allocate( nCapacity );
251 aKeys = ka.allocate( nCapacity );
252 aValues = va.allocate( nCapacity );
253
254 // Re-insert all of the old data (except deleted items)
255 for( uint32_t j = 0; j < nOldCapacity; j++ )
256 {
257 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 )
258 {
259 insert( aOldKeys[j], aOldValues[j] );
260 }
261 }
262
263 // Delete all of the old data
264 for( uint32_t j = 0; j < nOldCapacity; j++ )
265 {
266 if( (bOldFilled[j/32]&(1<<(j%32)))!=0 )
267 {
268 va.destroy( &aOldValues[j] );
269 ka.destroy( &aOldKeys[j] );
270 }
271 }
272 va.deallocate( aOldValues, nOldCapacity );
273 ka.deallocate( aOldKeys, nOldCapacity );
274 ca.deallocate( bOldFilled, nOldKeysSize );
275 ca.deallocate( bOldDeleted, nOldKeysSize );
276 ca.deallocate( aOldHashCodes, nOldCapacity );
277 }
52 278
53 template<class refval> 279 void fill( uint32_t loc )
54 class VRef
55 { 280 {
281 bFilled[loc/32] |= (1<<(loc%32));
282 }
283
284 bool isFilled( uint32_t loc )
285 {
286 return (bFilled[loc/32]&(1<<(loc%32)))!=0;
287 }
288
289 bool isDeleted( uint32_t loc )
290 {
291 return (bDeleted[loc/32]&(1<<(loc%32)))!=0;
292 }
293
294 typedef struct iterator
295 {
296 friend class Hash<key, value, sizecalc, keyalloc, valuealloc, challoc>;
297 private:
298 iterator( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh ) :
299 hsh( hsh ),
300 bFinished( false ),
301 nPos( 0 )
302 {
303 nPos = hsh.getFirstPos( bFinished );
304 }
305
306 iterator( Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh, bool bDone ) :
307 hsh( hsh ),
308 nPos( 0 ),
309 bFinished( bDone )
310 {
311 }
312
313 Hash<key, value, sizecalc, keyalloc, valuealloc, challoc> &hsh;
314
315 uint32_t nPos;
316 bool bFinished;
317
56 public: 318 public:
57 VRef( refval &data ) : 319 iterator operator++( int )
58 data( data )
59 { 320 {
321 if( bFinished == false )
322 nPos = hsh.getNextPos( nPos, bFinished );
323
324 return *this;
325 }
326
327 iterator operator++()
328 {
329 if( bFinished == false )
330 nPos = hsh.getNextPos( nPos, bFinished );
331
332 return *this;
333 }
334
335 bool operator==( const iterator &oth )
336 {
337 if( bFinished != oth.bFinished )
338 return false;
339 if( bFinished == true )
340 {
341 return true;
342 }
343 else
344 {
345 if( oth.nPos == nPos )
346 return true;
347 return false;
348 }
349 }
350
351 bool operator!=( const iterator &oth )
352 {
353 return !(*this == oth );
354 }
355
356 std::pair<key,value> operator *()
357 {
358 return hsh.getAtPos( nPos );
60 } 359 }
61 refval &data;
62 }; 360 };
63 361
64public: 362 iterator begin()
65 Hash() :
66 nCapacity( 11 ),
67 nFilled( 0 ),
68 bFilled( NULL ),
69 aKeys( NULL ),
70 aValues( NULL ),
71 aHashCodes( NULL )
72 { 363 {
73 int nKeysSize = bitsToBytes( nCapacity ); 364 return iterator( *this );
74 bFilled = new unsigned char[ nKeysSize ]; 365 }
75 memset( bFilled, 0, nKeysSize ); 366
76 367 iterator end()
77 aKeys = new VRef<key>*[nCapacity]; 368 {
78 aValues = new value[nCapacity]; 369 return iterator( *this, true );
79 } 370 }
80 371
81 virtual ~Hash() 372private:
373 std::pair<key,value> getAtPos( uint32_t nPos )
82 { 374 {
375 return std::pair<key,value>(aKeys[nPos],aValues[nPos]);
83 } 376 }
84 377
85 iterator operator[]( key keyval ) 378 uint32_t getFirstPos( bool &bFinished )
86 { 379 {
87 //iterator i( this, 4, keyval, true ); 380 for( uint32_t j = 0; j < nCapacity; j++ )
88 //return i; 381 {
89 printf("%s\n", keyval.getString() ); 382 if( isFilled( j ) )
383 return j;
384 }
385
386 bFinished = true;
90 } 387 }
91 388
92 int hasKey( key keyval ) 389 uint32_t getNextPos( uint32_t nPos, bool &bFinished )
93 { 390 {
94 printf("%s\n", keyval.getString() ); 391 for( uint32_t j = nPos+1; j < nCapacity; j++ )
392 {
393 if( isFilled( j ) )
394 return j;
395 }
396
397 bFinished = true;
95 } 398 }
96 399
97private: 400private:
98 int nCapacity; 401 uint32_t nCapacity;
99 int nFilled; 402 uint32_t nFilled;
100 unsigned char *bFilled; 403 uint32_t nDeleted;
101 VRef<key> **aKeys; 404 uint32_t *bFilled;
102 unsigned long int *aHashCodes; 405 uint32_t *bDeleted;
406 uint32_t *aHashCodes;
407 uint32_t nKeysSize;
103 value *aValues; 408 value *aValues;
409 key *aKeys;
410 valuealloc va;
411 keyalloc ka;
412 challoc ca;
413 sizecalc szCalc;
104}; 414};
105*/ 415
416template<> uint32_t __calcHashCode<const char *>( const char *k );
417template<> bool __cmpHashKeys<const char *>( const char *a, const char *b );
418
419template<> uint32_t __calcHashCode<char *>( char *k );
420template<> bool __cmpHashKeys<char *>( char *a, char *b );
421
422template<> uint32_t __calcHashCode<const std::string>( const std::string k );
423template<> bool __cmpHashKeys<const std::string>( const std::string a, const std::string b );
424
425template<> uint32_t __calcHashCode<std::string>( std::string k );
426template<> bool __cmpHashKeys<std::string>( std::string a, std::string b );
427
428template<> uint32_t __calcHashCode<Hashable &>( Hashable &k );
429template<> bool __cmpHashKeys<Hashable &>( Hashable &a, Hashable &b );
430
106#endif 431#endif