diff options
Diffstat (limited to 'src/hashtable.cpp')
-rw-r--r-- | src/hashtable.cpp | 345 |
1 files changed, 345 insertions, 0 deletions
diff --git a/src/hashtable.cpp b/src/hashtable.cpp new file mode 100644 index 0000000..9dfe653 --- /dev/null +++ b/src/hashtable.cpp | |||
@@ -0,0 +1,345 @@ | |||
1 | #include <string.h> | ||
2 | #include <stdio.h> | ||
3 | #include <math.h> | ||
4 | |||
5 | #include "hashtable.h" | ||
6 | |||
7 | HashTable::HashTable( HashFunction *hNewFunc, unsigned long int nInitSize, bool bAllowDupes ) | ||
8 | { | ||
9 | hFunc = hNewFunc; | ||
10 | nTableSize = nextPrime( nInitSize ); | ||
11 | aTable = new HashNode[nTableSize]; | ||
12 | //for( int j = 0; j < nTableSize; j++ ) if( aTable[j].id || aTable[j].data || aTable[j].bDeleted ) printf("Unclean entry\n"); | ||
13 | nSize = 0; | ||
14 | nFilled = 0; | ||
15 | this->bAllowDupes = bAllowDupes; | ||
16 | } | ||
17 | |||
18 | HashTable::~HashTable() | ||
19 | { | ||
20 | delete[] aTable; | ||
21 | delete hFunc; | ||
22 | } | ||
23 | |||
24 | void HashTable::set( int j, const void *newID, const void *newData ) | ||
25 | { | ||
26 | if( newData == NULL ) | ||
27 | { | ||
28 | printf("Inserting NULL data is indestinguishable from uninserted data!\n"); | ||
29 | } | ||
30 | aTable[j].id = newID; | ||
31 | aTable[j].data = newData; | ||
32 | } | ||
33 | |||
34 | bool HashTable::isFilled( int j ) | ||
35 | { | ||
36 | return (aTable[j].id != NULL)||(aTable[j].bDeleted); | ||
37 | } | ||
38 | |||
39 | bool HashTable::reHash( unsigned long int nNewSize ) | ||
40 | { | ||
41 | HashNode *aOldTable = aTable; | ||
42 | unsigned long int oldSize = nTableSize; | ||
43 | |||
44 | // If the table can still be used if we just get rid of deleted items, don't | ||
45 | // change the size of the table, otherwise, go ahead and use the number | ||
46 | // passed in. | ||
47 | if( nSize > nTableSize>>1 ) | ||
48 | { | ||
49 | nTableSize = nextPrime( nNewSize ); | ||
50 | } | ||
51 | |||
52 | aTable = newTable( nTableSize ); | ||
53 | //for( int j = 0; j < nTableSize; j++ ) if( aTable[j].id || aTable[j].data || aTable[j].bDeleted ) printf("Unclean entry\n"); | ||
54 | |||
55 | nSize = 0; | ||
56 | nFilled = 0; | ||
57 | |||
58 | for( unsigned long int j = 0; j < oldSize; j++ ) | ||
59 | { | ||
60 | if( aOldTable[j].id != NULL && aOldTable[j].bDeleted == false ) | ||
61 | { | ||
62 | insert( aOldTable[j].id, aOldTable[j].data ); | ||
63 | } | ||
64 | } | ||
65 | |||
66 | delete[] aOldTable; | ||
67 | } | ||
68 | |||
69 | unsigned long int HashTable::probe( unsigned long int nStart, const void *id ) | ||
70 | { | ||
71 | int nHash = nStart; | ||
72 | nStart = nStart%nTableSize; | ||
73 | if( bAllowDupes == true ) | ||
74 | { | ||
75 | for( | ||
76 | unsigned long int j=0; | ||
77 | isFilled( nStart ) && j < 32; | ||
78 | nStart = (nStart+(1<<j))%nTableSize, j++ | ||
79 | ); | ||
80 | |||
81 | /** | ||
82 | * This is an ugly little hack. If the hash table is too full in allow- | ||
83 | * dups mode we have to fall back on a linear search, otherwise you can | ||
84 | * only get up to 32 entries with the same name. | ||
85 | */ | ||
86 | if( isFilled( nStart ) ) | ||
87 | { | ||
88 | int nOldStart = nStart; | ||
89 | for( | ||
90 | nStart++; | ||
91 | isFilled( nStart ) && nStart != nOldStart; | ||
92 | nStart = (nStart+1)%nTableSize | ||
93 | ); | ||
94 | } | ||
95 | } | ||
96 | else | ||
97 | { | ||
98 | for( | ||
99 | unsigned long int j=0; | ||
100 | isFilled( nStart ) && j < 32; | ||
101 | nStart = (nStart+(1<<j))%nTableSize, j++ | ||
102 | ) | ||
103 | { | ||
104 | if( isFilled( nStart ) ) | ||
105 | { | ||
106 | if( hFunc->cmpIDs( aTable[nStart].id, id ) == true && | ||
107 | aTable[nStart].bDeleted == false ) | ||
108 | { | ||
109 | return nStart; | ||
110 | } | ||
111 | } | ||
112 | } | ||
113 | } | ||
114 | // This is our insurance, if the table is full, then go ahead and rehash, | ||
115 | // then try again. | ||
116 | if( isFilled( nStart ) ) | ||
117 | { | ||
118 | reHash( getCapacity()*2 ); | ||
119 | return probe( nHash, id ); | ||
120 | } | ||
121 | return nStart; | ||
122 | } | ||
123 | |||
124 | HashTable::HashNode *HashTable::newTable( unsigned long int nNewSize ) | ||
125 | { | ||
126 | return new HashNode[nNewSize]; | ||
127 | } | ||
128 | |||
129 | #ifdef HASH_DEBUG_VIS | ||
130 | void HashTable::printDebugLine( const char *exData ) | ||
131 | { | ||
132 | char *buf = new char[getCapacity()+3]; | ||
133 | int j; | ||
134 | buf[0] = '['; | ||
135 | for( j = 0; j < getCapacity(); j++ ) | ||
136 | { | ||
137 | buf[j+1] = (aTable[j].bDeleted)?('X'):((isFilled( j ))?('#'):('-')); | ||
138 | } | ||
139 | buf[j+1] = ']'; | ||
140 | buf[j+2] = '\0'; | ||
141 | printf("%s %s\n", buf, exData ); | ||
142 | delete[] buf; | ||
143 | } | ||
144 | #endif | ||
145 | |||
146 | bool HashTable::insert( const void *id, const void *data ) | ||
147 | { | ||
148 | unsigned long int nPos = probe( hFunc->hash( id ), id )%nTableSize; | ||
149 | |||
150 | if( bAllowDupes == true ) | ||
151 | { | ||
152 | if( aTable[nPos].id == NULL && aTable[nPos].bDeleted == false ) | ||
153 | { | ||
154 | set( nPos, id, data ); | ||
155 | #ifdef HASH_DEBUG_VIS | ||
156 | printDebugLine( (const char *)id ); | ||
157 | #endif | ||
158 | nSize++; | ||
159 | nFilled++; | ||
160 | return true; | ||
161 | } | ||
162 | else | ||
163 | { | ||
164 | return false; | ||
165 | } | ||
166 | } | ||
167 | else | ||
168 | { | ||
169 | if( aTable[nPos].id == NULL && aTable[nPos].bDeleted == false ) | ||
170 | { | ||
171 | set( nPos, id, data ); | ||
172 | #ifdef HASH_DEBUG_VIS | ||
173 | printDebugLine( (const char *)id ); | ||
174 | #endif | ||
175 | nSize++; | ||
176 | nFilled++; | ||
177 | return true; | ||
178 | } | ||
179 | else if( hFunc->cmpIDs( aTable[nPos].id, id ) == true ) | ||
180 | { | ||
181 | set( nPos, id, data ); | ||
182 | #ifdef HASH_DEBUG_VIS | ||
183 | printDebugLine( (const char *)id ); | ||
184 | #endif | ||
185 | return true; | ||
186 | } | ||
187 | else | ||
188 | { | ||
189 | return false; | ||
190 | } | ||
191 | } | ||
192 | } | ||
193 | |||
194 | const void *HashTable::get( const void *id, unsigned long int nSkip ) | ||
195 | { | ||
196 | unsigned long int nPos = hFunc->hash( id )%nTableSize; | ||
197 | |||
198 | for( unsigned long int j=0; j < 32; nPos = (nPos+(1<<j))%nTableSize, j++ ) | ||
199 | { | ||
200 | if( !isFilled( nPos ) ) return NULL; | ||
201 | if( hFunc->cmpIDs( id, aTable[nPos].id ) && | ||
202 | aTable[nPos].bDeleted == false ) | ||
203 | { | ||
204 | if( nSkip == 0 ) | ||
205 | { | ||
206 | return aTable[nPos].data; | ||
207 | } | ||
208 | else | ||
209 | { | ||
210 | nSkip--; | ||
211 | } | ||
212 | } | ||
213 | } | ||
214 | |||
215 | return NULL; | ||
216 | } | ||
217 | |||
218 | void *HashTable::getFirstItemPos() | ||
219 | { | ||
220 | HashPos *pos = new HashPos; | ||
221 | return pos; | ||
222 | } | ||
223 | |||
224 | const void *HashTable::getItemData( void *xPos ) | ||
225 | { | ||
226 | return aTable[((HashPos *)xPos)->nPos].data; | ||
227 | } | ||
228 | |||
229 | const void *HashTable::getItemID( void *xPos ) | ||
230 | { | ||
231 | return aTable[((HashPos *)xPos)->nPos].id; | ||
232 | } | ||
233 | |||
234 | void *HashTable::getNextItemPos( void *xPos ) | ||
235 | { | ||
236 | HashPos *pos = (HashPos *)xPos; | ||
237 | if( pos->bStarted == false ) | ||
238 | { | ||
239 | pos->bStarted = true; | ||
240 | pos->nPos = 0; | ||
241 | } | ||
242 | else | ||
243 | { | ||
244 | pos->nPos++; | ||
245 | } | ||
246 | if( pos->nPos < nTableSize ) | ||
247 | { | ||
248 | for( ; pos->nPos < nTableSize; pos->nPos++ ) | ||
249 | { | ||
250 | if( isFilled( pos->nPos ) && | ||
251 | aTable[pos->nPos].bDeleted == false ) | ||
252 | { | ||
253 | return xPos; | ||
254 | } | ||
255 | } | ||
256 | } | ||
257 | |||
258 | delete pos; | ||
259 | |||
260 | return NULL; | ||
261 | } | ||
262 | |||
263 | // Big-O sqrt(n) | ||
264 | // Change this to be erethpothynies table with a storage | ||
265 | // lookup later on. | ||
266 | bool HashTable::isPrime (int num) | ||
267 | { | ||
268 | if (num == 2) // the only even prime | ||
269 | return true; | ||
270 | else if (num % 2 == 0) // other even numbers are composite | ||
271 | return false; | ||
272 | else | ||
273 | { | ||
274 | //bool prime = true; | ||
275 | int divisor = 3; | ||
276 | int upperLimit = static_cast<int>(sqrt(num) + 1); | ||
277 | while (divisor <= upperLimit) | ||
278 | { | ||
279 | if (num % divisor == 0) | ||
280 | return false; | ||
281 | // prime = false; | ||
282 | divisor +=2; | ||
283 | } | ||
284 | return true; | ||
285 | } | ||
286 | } | ||
287 | |||
288 | // Big-O n^(3/2) | ||
289 | int HashTable::nextPrime( int base ) | ||
290 | { | ||
291 | int nPrime; | ||
292 | for( nPrime = base; isPrime( nPrime ) == false; nPrime++ ); | ||
293 | return nPrime; | ||
294 | } | ||
295 | |||
296 | unsigned long int HashTable::getCapacity() | ||
297 | { | ||
298 | return nTableSize; | ||
299 | } | ||
300 | |||
301 | unsigned long int HashTable::getSize() | ||
302 | { | ||
303 | return nSize; | ||
304 | } | ||
305 | |||
306 | double HashTable::getLoad() | ||
307 | { | ||
308 | return (double)(nFilled)/(double)(nTableSize); | ||
309 | } | ||
310 | |||
311 | const void *HashTable::operator[](const void *id) | ||
312 | { | ||
313 | return get( id ); | ||
314 | } | ||
315 | |||
316 | bool HashTable::del( const void *id, int nSkip ) | ||
317 | { | ||
318 | unsigned long int nPos = hFunc->hash( id )%nTableSize; | ||
319 | |||
320 | for( unsigned long int j=0; j < 32; nPos = (nPos+(1<<j))%nTableSize, j++ ) | ||
321 | { | ||
322 | if( !isFilled( nPos ) ) return false; | ||
323 | if( hFunc->cmpIDs( id, aTable[nPos].id ) && | ||
324 | aTable[nPos].bDeleted == false ) | ||
325 | { | ||
326 | if( nSkip == 0 ) | ||
327 | { | ||
328 | aTable[nPos].bDeleted = true; | ||
329 | // aTable[nPos]. | ||
330 | nSize--; | ||
331 | #ifdef HASH_DEBUG_VIS | ||
332 | printDebugLine( (const char *)id ); | ||
333 | #endif | ||
334 | return true; | ||
335 | } | ||
336 | else | ||
337 | { | ||
338 | nSkip--; | ||
339 | } | ||
340 | } | ||
341 | } | ||
342 | |||
343 | return false; | ||
344 | } | ||
345 | |||