diff options
| author | Mike Buland <eichlan@xagasoft.com> | 2006-05-01 17:11:04 +0000 |
|---|---|---|
| committer | Mike Buland <eichlan@xagasoft.com> | 2006-05-01 17:11:04 +0000 |
| commit | f7a9549bd6ad83f2e0bceec9cddacfa5e3f84a54 (patch) | |
| tree | 53cec4864776e07950e3c72f2a990a1017d08045 /src/hashtable.h | |
| download | libbu++-f7a9549bd6ad83f2e0bceec9cddacfa5e3f84a54.tar.gz libbu++-f7a9549bd6ad83f2e0bceec9cddacfa5e3f84a54.tar.bz2 libbu++-f7a9549bd6ad83f2e0bceec9cddacfa5e3f84a54.tar.xz libbu++-f7a9549bd6ad83f2e0bceec9cddacfa5e3f84a54.zip | |
libbu++ is finally laid out the way it should be, trunk, branches, and tags.
Diffstat (limited to '')
| -rw-r--r-- | src/hashtable.h | 299 |
1 files changed, 299 insertions, 0 deletions
diff --git a/src/hashtable.h b/src/hashtable.h new file mode 100644 index 0000000..d14be71 --- /dev/null +++ b/src/hashtable.h | |||
| @@ -0,0 +1,299 @@ | |||
| 1 | /**\hashtable.h | ||
| 2 | * Describes the HashFunction, HashFunctionString, and HashTable classes. It | ||
| 3 | * was just easier to put them all in one set of files. | ||
| 4 | *@author Mike Buland | ||
| 5 | */ | ||
| 6 | |||
| 7 | #ifndef HASH_TABLE_H | ||
| 8 | #define HASH_TABLE_H | ||
| 9 | |||
| 10 | //Uncomment this line to see a cool text-mode visualization of what's going on | ||
| 11 | //#define HASH_DEBUG_VIS 1 | ||
| 12 | |||
| 13 | #include <stdlib.h> | ||
| 14 | #include <string.h> | ||
| 15 | #include <ctype.h> | ||
| 16 | |||
| 17 | #include "hashfunction.h" | ||
| 18 | |||
| 19 | /** | ||
| 20 | * A simple yet flexable hash-table. This uses several tricks to help ensure | ||
| 21 | * that the table is always running at maximum efficiency. You no longer have | ||
| 22 | * to specify a "danger fill level" when more space is needed a rehash is | ||
| 23 | * automatically trigered. Deleting elements is fully supported, as well as | ||
| 24 | * duplicate elements. To work with and allow duplicates simple construct your | ||
| 25 | * HashTable the way you normally would, but when deleting or getting elements | ||
| 26 | * you can specify a skip value. This effectively allows you to treat elements | ||
| 27 | * with duplicate ID's as though they were in a zero-based array. The first | ||
| 28 | * element inserted with a given ID would be at skip zero, the next at skip 1 | ||
| 29 | * and so on. This allows you to quickly search for elements with duplicate | ||
| 30 | * names, just stop when you get a null for a skip number, i.e. | ||
| 31 | * <pre> | ||
| 32 | * for( int j = 0;; j++ ) | ||
| 33 | * { | ||
| 34 | * void *pData = hash.get( myID, j ); | ||
| 35 | * if( !pData ) break; | ||
| 36 | * // Do something interesting with pData | ||
| 37 | * } | ||
| 38 | * </pre> | ||
| 39 | * There are new features in this HashTable that also allow for memory saving | ||
| 40 | * when dealing with systems where many elements are being deleted from the | ||
| 41 | * table. In those cases the elements deleted cannot be simply deleted, instead | ||
| 42 | * they have to be marked as deleted and hidden from the user, but maintained in | ||
| 43 | * the table so that future hashing operations don't fail. When rehashing | ||
| 44 | * occurs all elements marked as deleted are quietly removed. In these cases, | ||
| 45 | * if the number of deleted items would free enough space in the table for the | ||
| 46 | * table to be used efficiently without resizing, it is left the same size and | ||
| 47 | * rehashing is performed effectively in place, allowing the deleted items to | ||
| 48 | * be removed. | ||
| 49 | * <br> | ||
| 50 | * For info on adding new hashing algorithms, please see the HashFunction class. | ||
| 51 | *@author Mike Buland | ||
| 52 | *@todo Fix probing for tables that allow duplicates, and delete an item, then | ||
| 53 | * insert an item with the same name. | ||
| 54 | */ | ||
| 55 | class HashTable | ||
| 56 | { | ||
| 57 | public: | ||
| 58 | /** Constructs a hash table. | ||
| 59 | *@param hNewFunc A pointer to a hashfunction class to use. If this is | ||
| 60 | * null the default general string type will be used. | ||
| 61 | *@param nInitSize The initial size of the hashtable. | ||
| 62 | *@param bAllowDupes Setting this value to true allows the system to | ||
| 63 | * insert more than one copy of any given key. This can be tricky, and | ||
| 64 | * will require you to use the nSkip parameter on the get function. | ||
| 65 | */ | ||
| 66 | HashTable( HashFunction *hNewFunc, unsigned long int nInitSize, bool bAllowDupes=false ); | ||
| 67 | |||
| 68 | /** | ||
| 69 | * Destroys the hashtable, cleaning up all internal storage, but not stored | ||
| 70 | * elements. Also deletes the HashFunction passed in in the constructor. | ||
| 71 | */ | ||
| 72 | ~HashTable(); | ||
| 73 | |||
| 74 | /** Inserts an item into the hashtable. This function will trigger a | ||
| 75 | * rehash if adding another item would force the table's load factor over | ||
| 76 | * the danger level. | ||
| 77 | *@param id used to find the data later. | ||
| 78 | *@param data The data item to insert into the table with the identifier | ||
| 79 | * id | ||
| 80 | *@returns True if insertion was successfull, and false if it failed. | ||
| 81 | */ | ||
| 82 | bool insert( const void *id, const void *data ); | ||
| 83 | |||
| 84 | /** Gets an item in the hashtable based on the id of that item. If there | ||
| 85 | * is more than one item with the same id you can use the nSkip parameter | ||
| 86 | * to access all of them. | ||
| 87 | *@param id The id of the item you're trying to find. | ||
| 88 | *@param nSkip The number of items with that id to skip before returning | ||
| 89 | * with the requested item. | ||
| 90 | *@returns A pointer to the data stored at the given id. | ||
| 91 | */ | ||
| 92 | const void *get( const void *id, unsigned long int nSkip=0 ); | ||
| 93 | |||
| 94 | /** Gets the total capacity of the hashtable. This is actually the number | ||
| 95 | * of total positions available inside the hashtable at the moment. This | ||
| 96 | * will change when the hashtable's load exceeds the danger level. | ||
| 97 | * Please note that this is NOT the actual amount of space available. | ||
| 98 | * In reality you can only access about 45-50 percent of that space. | ||
| 99 | *@returns The total capacity. | ||
| 100 | */ | ||
| 101 | unsigned long int getCapacity(); | ||
| 102 | |||
| 103 | /** Gets the number of filled in items in the hash table. This is roughly | ||
| 104 | * equivelent to the getSize function assosiated with the Lists. | ||
| 105 | *@returns The number of filled in items in the hash table. | ||
| 106 | */ | ||
| 107 | unsigned long int getSize(); | ||
| 108 | |||
| 109 | /** Gets the load (percentage) of filled in items in the table. This is | ||
| 110 | * technically the size divided by the capacity, but is definately usefull | ||
| 111 | * since it's required to check if it's time to rehash. | ||
| 112 | *@returns The table load in the range 0.0 to 1.0 | ||
| 113 | */ | ||
| 114 | double getLoad(); | ||
| 115 | |||
| 116 | /** Sets up an xPos object for use indexing the items in the table. Call | ||
| 117 | * this first and follow the directions for getNextItemPos below to | ||
| 118 | * iterate through every item in the table, while avoiding the empty | ||
| 119 | * spaces. | ||
| 120 | *@returns A pointer to a xPos object telling the hashtable where to find | ||
| 121 | * the item you're looking at. | ||
| 122 | */ | ||
| 123 | void *getFirstItemPos(); | ||
| 124 | |||
| 125 | /** Get the item's data that is being pointed to by xPos. This is only | ||
| 126 | * valid after xPos was created using getFirstItemPos and getNextItemPos | ||
| 127 | * was called at least once. | ||
| 128 | *@param xPos supplied by getFirstItemPos. | ||
| 129 | *@returns The key value that was used to insert the data into the table. | ||
| 130 | */ | ||
| 131 | const void *getItemData( void *xPos ); | ||
| 132 | |||
| 133 | /** Get the item's ID that is being pointed to by xPos. This is only | ||
| 134 | * valid after xPos was created using getFirstItemPos and getNextItemPos | ||
| 135 | * was called at least once. | ||
| 136 | *@param xPos supplied by getFirstItemPos. | ||
| 137 | *@returns The key value that was used to insert the data into the table. | ||
| 138 | */ | ||
| 139 | const void *getItemID( void *xPos ); | ||
| 140 | |||
| 141 | /** Used for iterating through a hash table sequentially. This will | ||
| 142 | * update the xPos pointer to point to the next time, all ready to | ||
| 143 | * be accessed with getItemID and getItemData. This must be called at | ||
| 144 | * least once before xPos is meaningful, and will return a NULL when it | ||
| 145 | * has reached the last item. | ||
| 146 | *@param xPos This must be an object created by a call to the function | ||
| 147 | * getFirstItemPos, and is only meaningful to the internal routines. | ||
| 148 | * Aborting a call in the middle (not running to the end of the table) | ||
| 149 | * may result in a memory leak at the moment. | ||
| 150 | *@returns xPos if still iterating through the list, otherwise it will | ||
| 151 | * return NULL when the end has been reached and the xPos variable has | ||
| 152 | * been deleted. | ||
| 153 | */ | ||
| 154 | void *getNextItemPos( void *xPos ); | ||
| 155 | |||
| 156 | /** A helpful operator to make accessing items easier. Please note that | ||
| 157 | * this simply returns a pointer to the data stored internally, and cannot | ||
| 158 | * be used like the STL operator to store new data, use insert for that. | ||
| 159 | *@param id The identifier used to store the requested item. | ||
| 160 | *@returns The data value assosiated with the given id, or NULL if it | ||
| 161 | * wasn't found in the table. | ||
| 162 | */ | ||
| 163 | const void *operator[](const void *id); | ||
| 164 | |||
| 165 | /** | ||
| 166 | * Delete the specified item from the hashtable. This actually keeps the | ||
| 167 | * data and marks it deleted. For all intents and purposes to the user it | ||
| 168 | * is deleted, except that the space is still used until a rehash is forced. | ||
| 169 | * This means that in hashtables where elements are being inserted and | ||
| 170 | * deleted frequently you may run into a higher rate of expansion. | ||
| 171 | *@param id The ID to delete. | ||
| 172 | *@param nSkip The number of similar id's to skip before deleting in a | ||
| 173 | * hashtable that allows duplicates. | ||
| 174 | *@returns True if the element was found and deleted, false otherwise. | ||
| 175 | */ | ||
| 176 | bool del( const void *id, int nSkip=0 ); | ||
| 177 | |||
| 178 | private: | ||
| 179 | /** | ||
| 180 | * Contains info related to a position in the hashtable. Used for | ||
| 181 | * searching through hashtables one item at a time, in order. This class | ||
| 182 | * should never be created by anything but a HashTable, and should never | ||
| 183 | * be referenced directly. Instead the hashtable returns a void pointer, | ||
| 184 | * which is what should be passed back in next time you use a search | ||
| 185 | * function. Always finish a search, since the object is deleted at the | ||
| 186 | * end of the search. | ||
| 187 | *@author Mike Buland | ||
| 188 | */ | ||
| 189 | class HashPos | ||
| 190 | { | ||
| 191 | public: | ||
| 192 | /** Create a blank HashPos. */ | ||
| 193 | HashPos() { bStarted=false; nPos = 0; }; | ||
| 194 | /** Has the search been started? */ | ||
| 195 | bool bStarted; | ||
| 196 | /** The position (index) into the backend storage structure. */ | ||
| 197 | unsigned long int nPos; | ||
| 198 | }; | ||
| 199 | |||
| 200 | /** | ||
| 201 | * All data related to a single element in the hashtable. This should | ||
| 202 | * really only be used and manipulated by the HashTable itself. | ||
| 203 | *@author Mike Buland | ||
| 204 | */ | ||
| 205 | typedef struct HashNode | ||
| 206 | { | ||
| 207 | public: | ||
| 208 | /** Create a new, empty HashNode. */ | ||
| 209 | HashNode() { id = NULL; data = NULL; bDeleted = false; }; | ||
| 210 | /** A pointer to the original ID that was used to key the data. */ | ||
| 211 | const void *id; | ||
| 212 | /** A pointer to the data stored along with the above ID. */ | ||
| 213 | const void *data; | ||
| 214 | /** Weather or not this data should really...exist */ | ||
| 215 | bool bDeleted; | ||
| 216 | } HashNode; | ||
| 217 | |||
| 218 | private: | ||
| 219 | /** | ||
| 220 | * Just sets the values in the element to some friendly values. | ||
| 221 | *@param newID The new ID to store. | ||
| 222 | *@param newData The new Data to store. | ||
| 223 | */ | ||
| 224 | void set( int j, const void *newID, const void *newData ); | ||
| 225 | /** | ||
| 226 | * Tells you if the node is filled or not. | ||
| 227 | *@returns True=an ID has been stored here, False=no ID. | ||
| 228 | */ | ||
| 229 | bool isFilled( int j ); | ||
| 230 | /** | ||
| 231 | * This actually resizes, but since every resize requires a reHash to go | ||
| 232 | * along with it, that's the name. This actually creates a new buffer for | ||
| 233 | * all of the contained data and then pulls every old element that was in | ||
| 234 | * the old table out and performs the hashing placement calculations again. | ||
| 235 | * This function skips all data that was marked as deleted, so at this | ||
| 236 | * point it really will be. | ||
| 237 | *@param nNewSize The new size to set the table to while re-hashing. | ||
| 238 | *@returns True if the operation was successful, false otherwise. | ||
| 239 | */ | ||
| 240 | bool reHash( unsigned long int nNewSize ); | ||
| 241 | |||
| 242 | /** | ||
| 243 | * Helper function to allocate a new table. Really just does the memory | ||
| 244 | * allocation. | ||
| 245 | *@param nNewSize The size of the table to generate. | ||
| 246 | *@returns A new, blank array of HashNode objects the size you specified. | ||
| 247 | */ | ||
| 248 | HashNode *newTable( unsigned long int nNewSize ); | ||
| 249 | |||
| 250 | /** | ||
| 251 | * This function is used once an actual hash code is obtained. nStart is | ||
| 252 | * the given hash code, which is then wrapped to the size of the table. If | ||
| 253 | * there is data at that location, tests are performed to see if it's the | ||
| 254 | * right one. If it is, then it is returned, otherwise a series of further | ||
| 255 | * tests based on a 2^n search pattern is performed. The position of the | ||
| 256 | * requested data in the back-end storage is returned if found, otherwise | ||
| 257 | * another less useful value is returned... | ||
| 258 | *@param nStart The initial hashcode of the ID testing for. | ||
| 259 | *@param id A pointer to the id that is being searched for. | ||
| 260 | *@returns The real location of the data requested. | ||
| 261 | */ | ||
| 262 | unsigned long int probe( unsigned long int nStart, const void *id ); | ||
| 263 | |||
| 264 | /** | ||
| 265 | * Simple helper function to determine if a number is prime or not. | ||
| 266 | * This function runs in sqrt(n) time. | ||
| 267 | *@param num Number to test for prime-hood. | ||
| 268 | *@returns True if the number is prime, false otherwise. | ||
| 269 | */ | ||
| 270 | bool isPrime( int num ); | ||
| 271 | |||
| 272 | /** | ||
| 273 | * Given any number, this function finds the first number after it that is | ||
| 274 | * prime. Since this number is a multiple internally it's rare that the | ||
| 275 | * starting number would be prime. | ||
| 276 | *@param base The number to start the prime search on. | ||
| 277 | *@returns The first prime after the number given. | ||
| 278 | */ | ||
| 279 | int nextPrime( int base ); | ||
| 280 | |||
| 281 | #ifdef HASH_DEBUG_VIS | ||
| 282 | void printDebugLine( const char *exData ); | ||
| 283 | #endif | ||
| 284 | |||
| 285 | /** A pointer to the HashFunction subclass instance to use. */ | ||
| 286 | HashFunction *hFunc; | ||
| 287 | /** The complete array of HashNode objects to store data in. */ | ||
| 288 | HashNode *aTable; | ||
| 289 | /** The actual size of the table, not how many elements are in it. */ | ||
| 290 | unsigned long int nTableSize; | ||
| 291 | /** The number of elements that are in the table. */ | ||
| 292 | unsigned long int nSize; | ||
| 293 | /** The number of elements that are unavailable now. */ | ||
| 294 | unsigned long int nFilled; | ||
| 295 | /** Allow duplicate ID's in the table. */ | ||
| 296 | bool bAllowDupes; | ||
| 297 | }; | ||
| 298 | |||
| 299 | #endif | ||
