summaryrefslogtreecommitdiff
path: root/src/old/hashtable.h
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2007-04-03 03:49:53 +0000
committerMike Buland <eichlan@xagasoft.com>2007-04-03 03:49:53 +0000
commitf4c20290509d7ed3a8fd5304577e7a4cc0b9d974 (patch)
tree13cdf64f7cf134f397a7165b7a3fe0807e37026b /src/old/hashtable.h
parent74d4c8cd27334fc7204d5a8773deb3d424565778 (diff)
downloadlibbu++-f4c20290509d7ed3a8fd5304577e7a4cc0b9d974.tar.gz
libbu++-f4c20290509d7ed3a8fd5304577e7a4cc0b9d974.tar.bz2
libbu++-f4c20290509d7ed3a8fd5304577e7a4cc0b9d974.tar.xz
libbu++-f4c20290509d7ed3a8fd5304577e7a4cc0b9d974.zip
Ok, no code is left in src, it's all in src/old. We'll gradually move code back
into src as it's fixed and re-org'd. This includes tests, which, I may write a unit test system into libbu++ just to make my life easier.
Diffstat (limited to 'src/old/hashtable.h')
-rw-r--r--src/old/hashtable.h308
1 files changed, 308 insertions, 0 deletions
diff --git a/src/old/hashtable.h b/src/old/hashtable.h
new file mode 100644
index 0000000..179b694
--- /dev/null
+++ b/src/old/hashtable.h
@@ -0,0 +1,308 @@
1/**\hashtable.h
2 * Describes the HashFunction, HashFunctionString, and HashTable classes. It
3 * was just easier to put them all in one set of files.
4 *@author Mike Buland
5 */
6
7#ifndef HASH_TABLE_H
8#define HASH_TABLE_H
9
10//Uncomment this line to see a cool text-mode visualization of what's going on
11//#define HASH_DEBUG_VIS 1
12
13#include <stdlib.h>
14#include <string.h>
15#include <ctype.h>
16
17#include "hashfunction.h"
18
19/**
20 * A simple yet flexable hash-table. This uses several tricks to help ensure
21 * that the table is always running at maximum efficiency. You no longer have
22 * to specify a "danger fill level" when more space is needed a rehash is
23 * automatically trigered. Deleting elements is fully supported, as well as
24 * duplicate elements. To work with and allow duplicates simple construct your
25 * HashTable the way you normally would, but when deleting or getting elements
26 * you can specify a skip value. This effectively allows you to treat elements
27 * with duplicate ID's as though they were in a zero-based array. The first
28 * element inserted with a given ID would be at skip zero, the next at skip 1
29 * and so on. This allows you to quickly search for elements with duplicate
30 * names, just stop when you get a null for a skip number, i.e.
31 * <pre>
32 * for( int j = 0;; j++ )
33 * {
34 * void *pData = hash.get( myID, j );
35 * if( !pData ) break;
36 * // Do something interesting with pData
37 * }
38 * </pre>
39 * There are new features in this HashTable that also allow for memory saving
40 * when dealing with systems where many elements are being deleted from the
41 * table. In those cases the elements deleted cannot be simply deleted, instead
42 * they have to be marked as deleted and hidden from the user, but maintained in
43 * the table so that future hashing operations don't fail. When rehashing
44 * occurs all elements marked as deleted are quietly removed. In these cases,
45 * if the number of deleted items would free enough space in the table for the
46 * table to be used efficiently without resizing, it is left the same size and
47 * rehashing is performed effectively in place, allowing the deleted items to
48 * be removed.
49 * <br>
50 * For info on adding new hashing algorithms, please see the HashFunction class.
51 *@author Mike Buland
52 *@todo Fix probing for tables that allow duplicates, and delete an item, then
53 * insert an item with the same name.
54 */
55class HashTable
56{
57public:
58 /** Constructs a hash table.
59 *@param hNewFunc A pointer to a hashfunction class to use. If this is
60 * null the default general string type will be used.
61 *@param nInitSize The initial size of the hashtable.
62 *@param bAllowDupes Setting this value to true allows the system to
63 * insert more than one copy of any given key. This can be tricky, and
64 * will require you to use the nSkip parameter on the get function.
65 */
66 HashTable( HashFunction *hNewFunc, unsigned long int nInitSize, bool bAllowDupes=false );
67
68 /**
69 * Destroys the hashtable, cleaning up all internal storage, but not stored
70 * elements. Also deletes the HashFunction passed in in the constructor.
71 */
72 virtual ~HashTable();
73
74 /** Inserts an item into the hashtable. This function will trigger a
75 * rehash if adding another item would force the table's load factor over
76 * the danger level.
77 *@param id used to find the data later.
78 *@param data The data item to insert into the table with the identifier
79 * id
80 *@returns True if insertion was successfull, and false if it failed.
81 */
82 bool insert( const void *id, const void *data );
83
84 /** Gets an item in the hashtable based on the id of that item. If there
85 * is more than one item with the same id you can use the nSkip parameter
86 * to access all of them.
87 *@param id The id of the item you're trying to find.
88 *@param nSkip The number of items with that id to skip before returning
89 * with the requested item.
90 *@returns A pointer to the data stored at the given id.
91 */
92 const void *get( const void *id, unsigned long int nSkip=0 );
93
94 const void *getKey( const void *id, unsigned long int nSkip=0 );
95
96 /** Gets the total capacity of the hashtable. This is actually the number
97 * of total positions available inside the hashtable at the moment. This
98 * will change when the hashtable's load exceeds the danger level.
99 * Please note that this is NOT the actual amount of space available.
100 * In reality you can only access about 45-50 percent of that space.
101 *@returns The total capacity.
102 */
103 unsigned long int getCapacity();
104
105 /** Gets the number of filled in items in the hash table. This is roughly
106 * equivelent to the getSize function assosiated with the Lists.
107 *@returns The number of filled in items in the hash table.
108 */
109 unsigned long int getSize();
110
111 /** Gets the load (percentage) of filled in items in the table. This is
112 * technically the size divided by the capacity, but is definately usefull
113 * since it's required to check if it's time to rehash.
114 *@returns The table load in the range 0.0 to 1.0
115 */
116 double getLoad();
117
118 /** Sets up an xPos object for use indexing the items in the table. Call
119 * this first and follow the directions for getNextItemPos below to
120 * iterate through every item in the table, while avoiding the empty
121 * spaces.
122 *@returns A pointer to a xPos object telling the hashtable where to find
123 * the item you're looking at.
124 */
125 void *getFirstItemPos();
126
127 /** Get the item's data that is being pointed to by xPos. This is only
128 * valid after xPos was created using getFirstItemPos and getNextItemPos
129 * was called at least once.
130 *@param xPos supplied by getFirstItemPos.
131 *@returns The key value that was used to insert the data into the table.
132 */
133 const void *getItemData( void *xPos );
134
135 /** Get the item's ID that is being pointed to by xPos. This is only
136 * valid after xPos was created using getFirstItemPos and getNextItemPos
137 * was called at least once.
138 *@param xPos supplied by getFirstItemPos.
139 *@returns The key value that was used to insert the data into the table.
140 */
141 const void *getItemID( void *xPos );
142
143 /** Used for iterating through a hash table sequentially. This will
144 * update the xPos pointer to point to the next time, all ready to
145 * be accessed with getItemID and getItemData. This must be called at
146 * least once before xPos is meaningful, and will return a NULL when it
147 * has reached the last item.
148 *@param xPos This must be an object created by a call to the function
149 * getFirstItemPos, and is only meaningful to the internal routines.
150 * Aborting a call in the middle (not running to the end of the table)
151 * may result in a memory leak at the moment.
152 *@returns xPos if still iterating through the list, otherwise it will
153 * return NULL when the end has been reached and the xPos variable has
154 * been deleted.
155 */
156 void *getNextItemPos( void *xPos );
157
158 /** A helpful operator to make accessing items easier. Please note that
159 * this simply returns a pointer to the data stored internally, and cannot
160 * be used like the STL operator to store new data, use insert for that.
161 *@param id The identifier used to store the requested item.
162 *@returns The data value assosiated with the given id, or NULL if it
163 * wasn't found in the table.
164 */
165 const void *operator[](const void *id);
166
167 /**
168 * Delete the specified item from the hashtable. This actually keeps the
169 * data and marks it deleted. For all intents and purposes to the user it
170 * is deleted, except that the space is still used until a rehash is forced.
171 * This means that in hashtables where elements are being inserted and
172 * deleted frequently you may run into a higher rate of expansion.
173 *@param id The ID to delete.
174 *@param nSkip The number of similar id's to skip before deleting in a
175 * hashtable that allows duplicates.
176 *@returns True if the element was found and deleted, false otherwise.
177 */
178 bool del( const void *id, int nSkip=0 );
179
180 /**
181 * Deletes every entry in the hash table. See the notes on del to see what
182 * this means, except that since no data is being kept, the entire table is
183 * just marked as usable space.
184 */
185 void clear();
186
187private:
188 /**
189 * Contains info related to a position in the hashtable. Used for
190 * searching through hashtables one item at a time, in order. This class
191 * should never be created by anything but a HashTable, and should never
192 * be referenced directly. Instead the hashtable returns a void pointer,
193 * which is what should be passed back in next time you use a search
194 * function. Always finish a search, since the object is deleted at the
195 * end of the search.
196 *@author Mike Buland
197 */
198 class HashPos
199 {
200 public:
201 /** Create a blank HashPos. */
202 HashPos() { bStarted=false; nPos = 0; };
203 /** Has the search been started? */
204 bool bStarted;
205 /** The position (index) into the backend storage structure. */
206 unsigned long int nPos;
207 };
208
209 /**
210 * All data related to a single element in the hashtable. This should
211 * really only be used and manipulated by the HashTable itself.
212 *@author Mike Buland
213 */
214 typedef struct HashNode
215 {
216 public:
217 /** Create a new, empty HashNode. */
218 HashNode() { id = NULL; data = NULL; bDeleted = false; };
219 /** A pointer to the original ID that was used to key the data. */
220 const void *id;
221 /** A pointer to the data stored along with the above ID. */
222 const void *data;
223 /** Weather or not this data should really...exist */
224 bool bDeleted;
225 } HashNode;
226
227private:
228 /**
229 * Just sets the values in the element to some friendly values.
230 *@param newID The new ID to store.
231 *@param newData The new Data to store.
232 */
233 void set( int j, const void *newID, const void *newData );
234 /**
235 * Tells you if the node is filled or not.
236 *@returns True=an ID has been stored here, False=no ID.
237 */
238 bool isFilled( int j );
239 /**
240 * This actually resizes, but since every resize requires a reHash to go
241 * along with it, that's the name. This actually creates a new buffer for
242 * all of the contained data and then pulls every old element that was in
243 * the old table out and performs the hashing placement calculations again.
244 * This function skips all data that was marked as deleted, so at this
245 * point it really will be.
246 *@param nNewSize The new size to set the table to while re-hashing.
247 *@returns True if the operation was successful, false otherwise.
248 */
249 void reHash( unsigned long int nNewSize );
250
251 /**
252 * Helper function to allocate a new table. Really just does the memory
253 * allocation.
254 *@param nNewSize The size of the table to generate.
255 *@returns A new, blank array of HashNode objects the size you specified.
256 */
257 HashNode *newTable( unsigned long int nNewSize );
258
259 /**
260 * This function is used once an actual hash code is obtained. nStart is
261 * the given hash code, which is then wrapped to the size of the table. If
262 * there is data at that location, tests are performed to see if it's the
263 * right one. If it is, then it is returned, otherwise a series of further
264 * tests based on a 2^n search pattern is performed. The position of the
265 * requested data in the back-end storage is returned if found, otherwise
266 * another less useful value is returned...
267 *@param nStart The initial hashcode of the ID testing for.
268 *@param id A pointer to the id that is being searched for.
269 *@returns The real location of the data requested.
270 */
271 unsigned long int probe( unsigned long int nStart, const void *id );
272
273 /**
274 * Simple helper function to determine if a number is prime or not.
275 * This function runs in sqrt(n) time.
276 *@param num Number to test for prime-hood.
277 *@returns True if the number is prime, false otherwise.
278 */
279 bool isPrime( int num );
280
281 /**
282 * Given any number, this function finds the first number after it that is
283 * prime. Since this number is a multiple internally it's rare that the
284 * starting number would be prime.
285 *@param base The number to start the prime search on.
286 *@returns The first prime after the number given.
287 */
288 int nextPrime( int base );
289
290#ifdef HASH_DEBUG_VIS
291 void printDebugLine( const char *exData );
292#endif
293
294 /** A pointer to the HashFunction subclass instance to use. */
295 HashFunction *hFunc;
296 /** The complete array of HashNode objects to store data in. */
297 HashNode *aTable;
298 /** The actual size of the table, not how many elements are in it. */
299 unsigned long int nTableSize;
300 /** The number of elements that are in the table. */
301 unsigned long int nSize;
302 /** The number of elements that are unavailable now. */
303 unsigned long int nFilled;
304 /** Allow duplicate ID's in the table. */
305 bool bAllowDupes;
306};
307
308#endif