diff options
Diffstat (limited to 'src/hashtable.h')
-rw-r--r-- | src/hashtable.h | 299 |
1 files changed, 299 insertions, 0 deletions
diff --git a/src/hashtable.h b/src/hashtable.h new file mode 100644 index 0000000..d14be71 --- /dev/null +++ b/src/hashtable.h | |||
@@ -0,0 +1,299 @@ | |||
1 | /**\hashtable.h | ||
2 | * Describes the HashFunction, HashFunctionString, and HashTable classes. It | ||
3 | * was just easier to put them all in one set of files. | ||
4 | *@author Mike Buland | ||
5 | */ | ||
6 | |||
7 | #ifndef HASH_TABLE_H | ||
8 | #define HASH_TABLE_H | ||
9 | |||
10 | //Uncomment this line to see a cool text-mode visualization of what's going on | ||
11 | //#define HASH_DEBUG_VIS 1 | ||
12 | |||
13 | #include <stdlib.h> | ||
14 | #include <string.h> | ||
15 | #include <ctype.h> | ||
16 | |||
17 | #include "hashfunction.h" | ||
18 | |||
19 | /** | ||
20 | * A simple yet flexable hash-table. This uses several tricks to help ensure | ||
21 | * that the table is always running at maximum efficiency. You no longer have | ||
22 | * to specify a "danger fill level" when more space is needed a rehash is | ||
23 | * automatically trigered. Deleting elements is fully supported, as well as | ||
24 | * duplicate elements. To work with and allow duplicates simple construct your | ||
25 | * HashTable the way you normally would, but when deleting or getting elements | ||
26 | * you can specify a skip value. This effectively allows you to treat elements | ||
27 | * with duplicate ID's as though they were in a zero-based array. The first | ||
28 | * element inserted with a given ID would be at skip zero, the next at skip 1 | ||
29 | * and so on. This allows you to quickly search for elements with duplicate | ||
30 | * names, just stop when you get a null for a skip number, i.e. | ||
31 | * <pre> | ||
32 | * for( int j = 0;; j++ ) | ||
33 | * { | ||
34 | * void *pData = hash.get( myID, j ); | ||
35 | * if( !pData ) break; | ||
36 | * // Do something interesting with pData | ||
37 | * } | ||
38 | * </pre> | ||
39 | * There are new features in this HashTable that also allow for memory saving | ||
40 | * when dealing with systems where many elements are being deleted from the | ||
41 | * table. In those cases the elements deleted cannot be simply deleted, instead | ||
42 | * they have to be marked as deleted and hidden from the user, but maintained in | ||
43 | * the table so that future hashing operations don't fail. When rehashing | ||
44 | * occurs all elements marked as deleted are quietly removed. In these cases, | ||
45 | * if the number of deleted items would free enough space in the table for the | ||
46 | * table to be used efficiently without resizing, it is left the same size and | ||
47 | * rehashing is performed effectively in place, allowing the deleted items to | ||
48 | * be removed. | ||
49 | * <br> | ||
50 | * For info on adding new hashing algorithms, please see the HashFunction class. | ||
51 | *@author Mike Buland | ||
52 | *@todo Fix probing for tables that allow duplicates, and delete an item, then | ||
53 | * insert an item with the same name. | ||
54 | */ | ||
55 | class HashTable | ||
56 | { | ||
57 | public: | ||
58 | /** Constructs a hash table. | ||
59 | *@param hNewFunc A pointer to a hashfunction class to use. If this is | ||
60 | * null the default general string type will be used. | ||
61 | *@param nInitSize The initial size of the hashtable. | ||
62 | *@param bAllowDupes Setting this value to true allows the system to | ||
63 | * insert more than one copy of any given key. This can be tricky, and | ||
64 | * will require you to use the nSkip parameter on the get function. | ||
65 | */ | ||
66 | HashTable( HashFunction *hNewFunc, unsigned long int nInitSize, bool bAllowDupes=false ); | ||
67 | |||
68 | /** | ||
69 | * Destroys the hashtable, cleaning up all internal storage, but not stored | ||
70 | * elements. Also deletes the HashFunction passed in in the constructor. | ||
71 | */ | ||
72 | ~HashTable(); | ||
73 | |||
74 | /** Inserts an item into the hashtable. This function will trigger a | ||
75 | * rehash if adding another item would force the table's load factor over | ||
76 | * the danger level. | ||
77 | *@param id used to find the data later. | ||
78 | *@param data The data item to insert into the table with the identifier | ||
79 | * id | ||
80 | *@returns True if insertion was successfull, and false if it failed. | ||
81 | */ | ||
82 | bool insert( const void *id, const void *data ); | ||
83 | |||
84 | /** Gets an item in the hashtable based on the id of that item. If there | ||
85 | * is more than one item with the same id you can use the nSkip parameter | ||
86 | * to access all of them. | ||
87 | *@param id The id of the item you're trying to find. | ||
88 | *@param nSkip The number of items with that id to skip before returning | ||
89 | * with the requested item. | ||
90 | *@returns A pointer to the data stored at the given id. | ||
91 | */ | ||
92 | const void *get( const void *id, unsigned long int nSkip=0 ); | ||
93 | |||
94 | /** Gets the total capacity of the hashtable. This is actually the number | ||
95 | * of total positions available inside the hashtable at the moment. This | ||
96 | * will change when the hashtable's load exceeds the danger level. | ||
97 | * Please note that this is NOT the actual amount of space available. | ||
98 | * In reality you can only access about 45-50 percent of that space. | ||
99 | *@returns The total capacity. | ||
100 | */ | ||
101 | unsigned long int getCapacity(); | ||
102 | |||
103 | /** Gets the number of filled in items in the hash table. This is roughly | ||
104 | * equivelent to the getSize function assosiated with the Lists. | ||
105 | *@returns The number of filled in items in the hash table. | ||
106 | */ | ||
107 | unsigned long int getSize(); | ||
108 | |||
109 | /** Gets the load (percentage) of filled in items in the table. This is | ||
110 | * technically the size divided by the capacity, but is definately usefull | ||
111 | * since it's required to check if it's time to rehash. | ||
112 | *@returns The table load in the range 0.0 to 1.0 | ||
113 | */ | ||
114 | double getLoad(); | ||
115 | |||
116 | /** Sets up an xPos object for use indexing the items in the table. Call | ||
117 | * this first and follow the directions for getNextItemPos below to | ||
118 | * iterate through every item in the table, while avoiding the empty | ||
119 | * spaces. | ||
120 | *@returns A pointer to a xPos object telling the hashtable where to find | ||
121 | * the item you're looking at. | ||
122 | */ | ||
123 | void *getFirstItemPos(); | ||
124 | |||
125 | /** Get the item's data that is being pointed to by xPos. This is only | ||
126 | * valid after xPos was created using getFirstItemPos and getNextItemPos | ||
127 | * was called at least once. | ||
128 | *@param xPos supplied by getFirstItemPos. | ||
129 | *@returns The key value that was used to insert the data into the table. | ||
130 | */ | ||
131 | const void *getItemData( void *xPos ); | ||
132 | |||
133 | /** Get the item's ID that is being pointed to by xPos. This is only | ||
134 | * valid after xPos was created using getFirstItemPos and getNextItemPos | ||
135 | * was called at least once. | ||
136 | *@param xPos supplied by getFirstItemPos. | ||
137 | *@returns The key value that was used to insert the data into the table. | ||
138 | */ | ||
139 | const void *getItemID( void *xPos ); | ||
140 | |||
141 | /** Used for iterating through a hash table sequentially. This will | ||
142 | * update the xPos pointer to point to the next time, all ready to | ||
143 | * be accessed with getItemID and getItemData. This must be called at | ||
144 | * least once before xPos is meaningful, and will return a NULL when it | ||
145 | * has reached the last item. | ||
146 | *@param xPos This must be an object created by a call to the function | ||
147 | * getFirstItemPos, and is only meaningful to the internal routines. | ||
148 | * Aborting a call in the middle (not running to the end of the table) | ||
149 | * may result in a memory leak at the moment. | ||
150 | *@returns xPos if still iterating through the list, otherwise it will | ||
151 | * return NULL when the end has been reached and the xPos variable has | ||
152 | * been deleted. | ||
153 | */ | ||
154 | void *getNextItemPos( void *xPos ); | ||
155 | |||
156 | /** A helpful operator to make accessing items easier. Please note that | ||
157 | * this simply returns a pointer to the data stored internally, and cannot | ||
158 | * be used like the STL operator to store new data, use insert for that. | ||
159 | *@param id The identifier used to store the requested item. | ||
160 | *@returns The data value assosiated with the given id, or NULL if it | ||
161 | * wasn't found in the table. | ||
162 | */ | ||
163 | const void *operator[](const void *id); | ||
164 | |||
165 | /** | ||
166 | * Delete the specified item from the hashtable. This actually keeps the | ||
167 | * data and marks it deleted. For all intents and purposes to the user it | ||
168 | * is deleted, except that the space is still used until a rehash is forced. | ||
169 | * This means that in hashtables where elements are being inserted and | ||
170 | * deleted frequently you may run into a higher rate of expansion. | ||
171 | *@param id The ID to delete. | ||
172 | *@param nSkip The number of similar id's to skip before deleting in a | ||
173 | * hashtable that allows duplicates. | ||
174 | *@returns True if the element was found and deleted, false otherwise. | ||
175 | */ | ||
176 | bool del( const void *id, int nSkip=0 ); | ||
177 | |||
178 | private: | ||
179 | /** | ||
180 | * Contains info related to a position in the hashtable. Used for | ||
181 | * searching through hashtables one item at a time, in order. This class | ||
182 | * should never be created by anything but a HashTable, and should never | ||
183 | * be referenced directly. Instead the hashtable returns a void pointer, | ||
184 | * which is what should be passed back in next time you use a search | ||
185 | * function. Always finish a search, since the object is deleted at the | ||
186 | * end of the search. | ||
187 | *@author Mike Buland | ||
188 | */ | ||
189 | class HashPos | ||
190 | { | ||
191 | public: | ||
192 | /** Create a blank HashPos. */ | ||
193 | HashPos() { bStarted=false; nPos = 0; }; | ||
194 | /** Has the search been started? */ | ||
195 | bool bStarted; | ||
196 | /** The position (index) into the backend storage structure. */ | ||
197 | unsigned long int nPos; | ||
198 | }; | ||
199 | |||
200 | /** | ||
201 | * All data related to a single element in the hashtable. This should | ||
202 | * really only be used and manipulated by the HashTable itself. | ||
203 | *@author Mike Buland | ||
204 | */ | ||
205 | typedef struct HashNode | ||
206 | { | ||
207 | public: | ||
208 | /** Create a new, empty HashNode. */ | ||
209 | HashNode() { id = NULL; data = NULL; bDeleted = false; }; | ||
210 | /** A pointer to the original ID that was used to key the data. */ | ||
211 | const void *id; | ||
212 | /** A pointer to the data stored along with the above ID. */ | ||
213 | const void *data; | ||
214 | /** Weather or not this data should really...exist */ | ||
215 | bool bDeleted; | ||
216 | } HashNode; | ||
217 | |||
218 | private: | ||
219 | /** | ||
220 | * Just sets the values in the element to some friendly values. | ||
221 | *@param newID The new ID to store. | ||
222 | *@param newData The new Data to store. | ||
223 | */ | ||
224 | void set( int j, const void *newID, const void *newData ); | ||
225 | /** | ||
226 | * Tells you if the node is filled or not. | ||
227 | *@returns True=an ID has been stored here, False=no ID. | ||
228 | */ | ||
229 | bool isFilled( int j ); | ||
230 | /** | ||
231 | * This actually resizes, but since every resize requires a reHash to go | ||
232 | * along with it, that's the name. This actually creates a new buffer for | ||
233 | * all of the contained data and then pulls every old element that was in | ||
234 | * the old table out and performs the hashing placement calculations again. | ||
235 | * This function skips all data that was marked as deleted, so at this | ||
236 | * point it really will be. | ||
237 | *@param nNewSize The new size to set the table to while re-hashing. | ||
238 | *@returns True if the operation was successful, false otherwise. | ||
239 | */ | ||
240 | bool reHash( unsigned long int nNewSize ); | ||
241 | |||
242 | /** | ||
243 | * Helper function to allocate a new table. Really just does the memory | ||
244 | * allocation. | ||
245 | *@param nNewSize The size of the table to generate. | ||
246 | *@returns A new, blank array of HashNode objects the size you specified. | ||
247 | */ | ||
248 | HashNode *newTable( unsigned long int nNewSize ); | ||
249 | |||
250 | /** | ||
251 | * This function is used once an actual hash code is obtained. nStart is | ||
252 | * the given hash code, which is then wrapped to the size of the table. If | ||
253 | * there is data at that location, tests are performed to see if it's the | ||
254 | * right one. If it is, then it is returned, otherwise a series of further | ||
255 | * tests based on a 2^n search pattern is performed. The position of the | ||
256 | * requested data in the back-end storage is returned if found, otherwise | ||
257 | * another less useful value is returned... | ||
258 | *@param nStart The initial hashcode of the ID testing for. | ||
259 | *@param id A pointer to the id that is being searched for. | ||
260 | *@returns The real location of the data requested. | ||
261 | */ | ||
262 | unsigned long int probe( unsigned long int nStart, const void *id ); | ||
263 | |||
264 | /** | ||
265 | * Simple helper function to determine if a number is prime or not. | ||
266 | * This function runs in sqrt(n) time. | ||
267 | *@param num Number to test for prime-hood. | ||
268 | *@returns True if the number is prime, false otherwise. | ||
269 | */ | ||
270 | bool isPrime( int num ); | ||
271 | |||
272 | /** | ||
273 | * Given any number, this function finds the first number after it that is | ||
274 | * prime. Since this number is a multiple internally it's rare that the | ||
275 | * starting number would be prime. | ||
276 | *@param base The number to start the prime search on. | ||
277 | *@returns The first prime after the number given. | ||
278 | */ | ||
279 | int nextPrime( int base ); | ||
280 | |||
281 | #ifdef HASH_DEBUG_VIS | ||
282 | void printDebugLine( const char *exData ); | ||
283 | #endif | ||
284 | |||
285 | /** A pointer to the HashFunction subclass instance to use. */ | ||
286 | HashFunction *hFunc; | ||
287 | /** The complete array of HashNode objects to store data in. */ | ||
288 | HashNode *aTable; | ||
289 | /** The actual size of the table, not how many elements are in it. */ | ||
290 | unsigned long int nTableSize; | ||
291 | /** The number of elements that are in the table. */ | ||
292 | unsigned long int nSize; | ||
293 | /** The number of elements that are unavailable now. */ | ||
294 | unsigned long int nFilled; | ||
295 | /** Allow duplicate ID's in the table. */ | ||
296 | bool bAllowDupes; | ||
297 | }; | ||
298 | |||
299 | #endif | ||