diff options
Diffstat (limited to 'src/old/hashtable.h')
-rw-r--r-- | src/old/hashtable.h | 308 |
1 files changed, 308 insertions, 0 deletions
diff --git a/src/old/hashtable.h b/src/old/hashtable.h new file mode 100644 index 0000000..179b694 --- /dev/null +++ b/src/old/hashtable.h | |||
@@ -0,0 +1,308 @@ | |||
1 | /**\hashtable.h | ||
2 | * Describes the HashFunction, HashFunctionString, and HashTable classes. It | ||
3 | * was just easier to put them all in one set of files. | ||
4 | *@author Mike Buland | ||
5 | */ | ||
6 | |||
7 | #ifndef HASH_TABLE_H | ||
8 | #define HASH_TABLE_H | ||
9 | |||
10 | //Uncomment this line to see a cool text-mode visualization of what's going on | ||
11 | //#define HASH_DEBUG_VIS 1 | ||
12 | |||
13 | #include <stdlib.h> | ||
14 | #include <string.h> | ||
15 | #include <ctype.h> | ||
16 | |||
17 | #include "hashfunction.h" | ||
18 | |||
19 | /** | ||
20 | * A simple yet flexable hash-table. This uses several tricks to help ensure | ||
21 | * that the table is always running at maximum efficiency. You no longer have | ||
22 | * to specify a "danger fill level" when more space is needed a rehash is | ||
23 | * automatically trigered. Deleting elements is fully supported, as well as | ||
24 | * duplicate elements. To work with and allow duplicates simple construct your | ||
25 | * HashTable the way you normally would, but when deleting or getting elements | ||
26 | * you can specify a skip value. This effectively allows you to treat elements | ||
27 | * with duplicate ID's as though they were in a zero-based array. The first | ||
28 | * element inserted with a given ID would be at skip zero, the next at skip 1 | ||
29 | * and so on. This allows you to quickly search for elements with duplicate | ||
30 | * names, just stop when you get a null for a skip number, i.e. | ||
31 | * <pre> | ||
32 | * for( int j = 0;; j++ ) | ||
33 | * { | ||
34 | * void *pData = hash.get( myID, j ); | ||
35 | * if( !pData ) break; | ||
36 | * // Do something interesting with pData | ||
37 | * } | ||
38 | * </pre> | ||
39 | * There are new features in this HashTable that also allow for memory saving | ||
40 | * when dealing with systems where many elements are being deleted from the | ||
41 | * table. In those cases the elements deleted cannot be simply deleted, instead | ||
42 | * they have to be marked as deleted and hidden from the user, but maintained in | ||
43 | * the table so that future hashing operations don't fail. When rehashing | ||
44 | * occurs all elements marked as deleted are quietly removed. In these cases, | ||
45 | * if the number of deleted items would free enough space in the table for the | ||
46 | * table to be used efficiently without resizing, it is left the same size and | ||
47 | * rehashing is performed effectively in place, allowing the deleted items to | ||
48 | * be removed. | ||
49 | * <br> | ||
50 | * For info on adding new hashing algorithms, please see the HashFunction class. | ||
51 | *@author Mike Buland | ||
52 | *@todo Fix probing for tables that allow duplicates, and delete an item, then | ||
53 | * insert an item with the same name. | ||
54 | */ | ||
55 | class HashTable | ||
56 | { | ||
57 | public: | ||
58 | /** Constructs a hash table. | ||
59 | *@param hNewFunc A pointer to a hashfunction class to use. If this is | ||
60 | * null the default general string type will be used. | ||
61 | *@param nInitSize The initial size of the hashtable. | ||
62 | *@param bAllowDupes Setting this value to true allows the system to | ||
63 | * insert more than one copy of any given key. This can be tricky, and | ||
64 | * will require you to use the nSkip parameter on the get function. | ||
65 | */ | ||
66 | HashTable( HashFunction *hNewFunc, unsigned long int nInitSize, bool bAllowDupes=false ); | ||
67 | |||
68 | /** | ||
69 | * Destroys the hashtable, cleaning up all internal storage, but not stored | ||
70 | * elements. Also deletes the HashFunction passed in in the constructor. | ||
71 | */ | ||
72 | virtual ~HashTable(); | ||
73 | |||
74 | /** Inserts an item into the hashtable. This function will trigger a | ||
75 | * rehash if adding another item would force the table's load factor over | ||
76 | * the danger level. | ||
77 | *@param id used to find the data later. | ||
78 | *@param data The data item to insert into the table with the identifier | ||
79 | * id | ||
80 | *@returns True if insertion was successfull, and false if it failed. | ||
81 | */ | ||
82 | bool insert( const void *id, const void *data ); | ||
83 | |||
84 | /** Gets an item in the hashtable based on the id of that item. If there | ||
85 | * is more than one item with the same id you can use the nSkip parameter | ||
86 | * to access all of them. | ||
87 | *@param id The id of the item you're trying to find. | ||
88 | *@param nSkip The number of items with that id to skip before returning | ||
89 | * with the requested item. | ||
90 | *@returns A pointer to the data stored at the given id. | ||
91 | */ | ||
92 | const void *get( const void *id, unsigned long int nSkip=0 ); | ||
93 | |||
94 | const void *getKey( const void *id, unsigned long int nSkip=0 ); | ||
95 | |||
96 | /** Gets the total capacity of the hashtable. This is actually the number | ||
97 | * of total positions available inside the hashtable at the moment. This | ||
98 | * will change when the hashtable's load exceeds the danger level. | ||
99 | * Please note that this is NOT the actual amount of space available. | ||
100 | * In reality you can only access about 45-50 percent of that space. | ||
101 | *@returns The total capacity. | ||
102 | */ | ||
103 | unsigned long int getCapacity(); | ||
104 | |||
105 | /** Gets the number of filled in items in the hash table. This is roughly | ||
106 | * equivelent to the getSize function assosiated with the Lists. | ||
107 | *@returns The number of filled in items in the hash table. | ||
108 | */ | ||
109 | unsigned long int getSize(); | ||
110 | |||
111 | /** Gets the load (percentage) of filled in items in the table. This is | ||
112 | * technically the size divided by the capacity, but is definately usefull | ||
113 | * since it's required to check if it's time to rehash. | ||
114 | *@returns The table load in the range 0.0 to 1.0 | ||
115 | */ | ||
116 | double getLoad(); | ||
117 | |||
118 | /** Sets up an xPos object for use indexing the items in the table. Call | ||
119 | * this first and follow the directions for getNextItemPos below to | ||
120 | * iterate through every item in the table, while avoiding the empty | ||
121 | * spaces. | ||
122 | *@returns A pointer to a xPos object telling the hashtable where to find | ||
123 | * the item you're looking at. | ||
124 | */ | ||
125 | void *getFirstItemPos(); | ||
126 | |||
127 | /** Get the item's data that is being pointed to by xPos. This is only | ||
128 | * valid after xPos was created using getFirstItemPos and getNextItemPos | ||
129 | * was called at least once. | ||
130 | *@param xPos supplied by getFirstItemPos. | ||
131 | *@returns The key value that was used to insert the data into the table. | ||
132 | */ | ||
133 | const void *getItemData( void *xPos ); | ||
134 | |||
135 | /** Get the item's ID that is being pointed to by xPos. This is only | ||
136 | * valid after xPos was created using getFirstItemPos and getNextItemPos | ||
137 | * was called at least once. | ||
138 | *@param xPos supplied by getFirstItemPos. | ||
139 | *@returns The key value that was used to insert the data into the table. | ||
140 | */ | ||
141 | const void *getItemID( void *xPos ); | ||
142 | |||
143 | /** Used for iterating through a hash table sequentially. This will | ||
144 | * update the xPos pointer to point to the next time, all ready to | ||
145 | * be accessed with getItemID and getItemData. This must be called at | ||
146 | * least once before xPos is meaningful, and will return a NULL when it | ||
147 | * has reached the last item. | ||
148 | *@param xPos This must be an object created by a call to the function | ||
149 | * getFirstItemPos, and is only meaningful to the internal routines. | ||
150 | * Aborting a call in the middle (not running to the end of the table) | ||
151 | * may result in a memory leak at the moment. | ||
152 | *@returns xPos if still iterating through the list, otherwise it will | ||
153 | * return NULL when the end has been reached and the xPos variable has | ||
154 | * been deleted. | ||
155 | */ | ||
156 | void *getNextItemPos( void *xPos ); | ||
157 | |||
158 | /** A helpful operator to make accessing items easier. Please note that | ||
159 | * this simply returns a pointer to the data stored internally, and cannot | ||
160 | * be used like the STL operator to store new data, use insert for that. | ||
161 | *@param id The identifier used to store the requested item. | ||
162 | *@returns The data value assosiated with the given id, or NULL if it | ||
163 | * wasn't found in the table. | ||
164 | */ | ||
165 | const void *operator[](const void *id); | ||
166 | |||
167 | /** | ||
168 | * Delete the specified item from the hashtable. This actually keeps the | ||
169 | * data and marks it deleted. For all intents and purposes to the user it | ||
170 | * is deleted, except that the space is still used until a rehash is forced. | ||
171 | * This means that in hashtables where elements are being inserted and | ||
172 | * deleted frequently you may run into a higher rate of expansion. | ||
173 | *@param id The ID to delete. | ||
174 | *@param nSkip The number of similar id's to skip before deleting in a | ||
175 | * hashtable that allows duplicates. | ||
176 | *@returns True if the element was found and deleted, false otherwise. | ||
177 | */ | ||
178 | bool del( const void *id, int nSkip=0 ); | ||
179 | |||
180 | /** | ||
181 | * Deletes every entry in the hash table. See the notes on del to see what | ||
182 | * this means, except that since no data is being kept, the entire table is | ||
183 | * just marked as usable space. | ||
184 | */ | ||
185 | void clear(); | ||
186 | |||
187 | private: | ||
188 | /** | ||
189 | * Contains info related to a position in the hashtable. Used for | ||
190 | * searching through hashtables one item at a time, in order. This class | ||
191 | * should never be created by anything but a HashTable, and should never | ||
192 | * be referenced directly. Instead the hashtable returns a void pointer, | ||
193 | * which is what should be passed back in next time you use a search | ||
194 | * function. Always finish a search, since the object is deleted at the | ||
195 | * end of the search. | ||
196 | *@author Mike Buland | ||
197 | */ | ||
198 | class HashPos | ||
199 | { | ||
200 | public: | ||
201 | /** Create a blank HashPos. */ | ||
202 | HashPos() { bStarted=false; nPos = 0; }; | ||
203 | /** Has the search been started? */ | ||
204 | bool bStarted; | ||
205 | /** The position (index) into the backend storage structure. */ | ||
206 | unsigned long int nPos; | ||
207 | }; | ||
208 | |||
209 | /** | ||
210 | * All data related to a single element in the hashtable. This should | ||
211 | * really only be used and manipulated by the HashTable itself. | ||
212 | *@author Mike Buland | ||
213 | */ | ||
214 | typedef struct HashNode | ||
215 | { | ||
216 | public: | ||
217 | /** Create a new, empty HashNode. */ | ||
218 | HashNode() { id = NULL; data = NULL; bDeleted = false; }; | ||
219 | /** A pointer to the original ID that was used to key the data. */ | ||
220 | const void *id; | ||
221 | /** A pointer to the data stored along with the above ID. */ | ||
222 | const void *data; | ||
223 | /** Weather or not this data should really...exist */ | ||
224 | bool bDeleted; | ||
225 | } HashNode; | ||
226 | |||
227 | private: | ||
228 | /** | ||
229 | * Just sets the values in the element to some friendly values. | ||
230 | *@param newID The new ID to store. | ||
231 | *@param newData The new Data to store. | ||
232 | */ | ||
233 | void set( int j, const void *newID, const void *newData ); | ||
234 | /** | ||
235 | * Tells you if the node is filled or not. | ||
236 | *@returns True=an ID has been stored here, False=no ID. | ||
237 | */ | ||
238 | bool isFilled( int j ); | ||
239 | /** | ||
240 | * This actually resizes, but since every resize requires a reHash to go | ||
241 | * along with it, that's the name. This actually creates a new buffer for | ||
242 | * all of the contained data and then pulls every old element that was in | ||
243 | * the old table out and performs the hashing placement calculations again. | ||
244 | * This function skips all data that was marked as deleted, so at this | ||
245 | * point it really will be. | ||
246 | *@param nNewSize The new size to set the table to while re-hashing. | ||
247 | *@returns True if the operation was successful, false otherwise. | ||
248 | */ | ||
249 | void reHash( unsigned long int nNewSize ); | ||
250 | |||
251 | /** | ||
252 | * Helper function to allocate a new table. Really just does the memory | ||
253 | * allocation. | ||
254 | *@param nNewSize The size of the table to generate. | ||
255 | *@returns A new, blank array of HashNode objects the size you specified. | ||
256 | */ | ||
257 | HashNode *newTable( unsigned long int nNewSize ); | ||
258 | |||
259 | /** | ||
260 | * This function is used once an actual hash code is obtained. nStart is | ||
261 | * the given hash code, which is then wrapped to the size of the table. If | ||
262 | * there is data at that location, tests are performed to see if it's the | ||
263 | * right one. If it is, then it is returned, otherwise a series of further | ||
264 | * tests based on a 2^n search pattern is performed. The position of the | ||
265 | * requested data in the back-end storage is returned if found, otherwise | ||
266 | * another less useful value is returned... | ||
267 | *@param nStart The initial hashcode of the ID testing for. | ||
268 | *@param id A pointer to the id that is being searched for. | ||
269 | *@returns The real location of the data requested. | ||
270 | */ | ||
271 | unsigned long int probe( unsigned long int nStart, const void *id ); | ||
272 | |||
273 | /** | ||
274 | * Simple helper function to determine if a number is prime or not. | ||
275 | * This function runs in sqrt(n) time. | ||
276 | *@param num Number to test for prime-hood. | ||
277 | *@returns True if the number is prime, false otherwise. | ||
278 | */ | ||
279 | bool isPrime( int num ); | ||
280 | |||
281 | /** | ||
282 | * Given any number, this function finds the first number after it that is | ||
283 | * prime. Since this number is a multiple internally it's rare that the | ||
284 | * starting number would be prime. | ||
285 | *@param base The number to start the prime search on. | ||
286 | *@returns The first prime after the number given. | ||
287 | */ | ||
288 | int nextPrime( int base ); | ||
289 | |||
290 | #ifdef HASH_DEBUG_VIS | ||
291 | void printDebugLine( const char *exData ); | ||
292 | #endif | ||
293 | |||
294 | /** A pointer to the HashFunction subclass instance to use. */ | ||
295 | HashFunction *hFunc; | ||
296 | /** The complete array of HashNode objects to store data in. */ | ||
297 | HashNode *aTable; | ||
298 | /** The actual size of the table, not how many elements are in it. */ | ||
299 | unsigned long int nTableSize; | ||
300 | /** The number of elements that are in the table. */ | ||
301 | unsigned long int nSize; | ||
302 | /** The number of elements that are unavailable now. */ | ||
303 | unsigned long int nFilled; | ||
304 | /** Allow duplicate ID's in the table. */ | ||
305 | bool bAllowDupes; | ||
306 | }; | ||
307 | |||
308 | #endif | ||