1 files changed, 308 insertions, 0 deletions
diff --git a/src/old/hashtable.h b/src/old/hashtable.h
new file mode 100644
index 0000000..179b694
--- /dev/null
+++ b/src/old/hashtable.h
@@ -0,0 +1,308 @@
+/**\hashtable.h
+ * Describes the HashFunction, HashFunctionString, and HashTable classes.  It
+ * was just easier to put them all in one set of files.
+ *@author Mike Buland
+ */
+#ifndef HASH_TABLE_H
+#define HASH_TABLE_H
+//Uncomment this line to see a cool text-mode visualization of what's going on
+//#define HASH_DEBUG_VIS 1
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "hashfunction.h"
+/**
+ * A simple yet flexable hash-table.  This uses several tricks to help ensure
+ * that the table is always running at maximum efficiency.  You no longer have
+ * to specify a "danger fill level" when more space is needed a rehash is
+ * automatically trigered.  Deleting elements is fully supported, as well as
+ * duplicate elements.  To work with and allow duplicates simple construct your
+ * HashTable the way you normally would, but when deleting or getting elements
+ * you can specify a skip value.  This effectively allows you to treat elements
+ * with duplicate ID's as though they were in a zero-based array.  The first
+ * element inserted with a given ID would be at skip zero, the next at skip 1
+ * and so on.  This allows you to quickly search for elements with duplicate
+ * names, just stop when you get a null for a skip number, i.e.
+ * <pre>
+ *   for( int j = 0;; j++ )
+ *   {
+ *       void *pData = hash.get( myID, j );
+ *       if( !pData ) break;
+ *       // Do something interesting with pData
+ *   }
+ * </pre>
+ * There are new features in this HashTable that also allow for memory saving
+ * when dealing with systems where many elements are being deleted from the
+ * table.  In those cases the elements deleted cannot be simply deleted, instead
+ * they have to be marked as deleted and hidden from the user, but maintained in
+ * the table so that future hashing operations don't fail.  When rehashing
+ * occurs all elements marked as deleted are quietly removed.  In these cases,
+ * if the number of deleted items would free enough space in the table for the
+ * table to be used efficiently without resizing, it is left the same size and
+ * rehashing is performed effectively in place, allowing the deleted items to
+ * be removed.
+ * <br>
+ * For info on adding new hashing algorithms, please see the HashFunction class.
+ *@author Mike Buland
+ *@todo Fix probing for tables that allow duplicates, and delete an item, then
+ * insert an item with the same name.
+ */
+class HashTable
+{
+public:
+        /** Constructs a hash table.
+          *@param hNewFunc A pointer to a hashfunction class to use.  If this is
+          * null the default general string type will be used.
+          *@param nInitSize The initial size of the hashtable.
+          *@param bAllowDupes Setting this value to true allows the system to
+          * insert more than one copy of any given key.  This can be tricky, and
+          * will require you to use the nSkip parameter on the get function.
+          */
+        HashTable( HashFunction *hNewFunc, unsigned long int nInitSize, bool bAllowDupes=false );
+        
+        /**
+         * Destroys the hashtable, cleaning up all internal storage, but not stored
+         * elements.  Also deletes the HashFunction passed in in the constructor.
+         */
+        virtual ~HashTable();
+        /** Inserts an item into the hashtable.  This function will trigger a
+          * rehash if adding another item would force the table's load factor over
+          * the danger level.
+          *@param id used to find the data later.
+          *@param data The data item to insert into the table with the identifier
+          * id
+          *@returns True if insertion was successfull, and false if it failed.
+          */
+        bool insert( const void *id, const void *data );
+        /** Gets an item in the hashtable based on the id of that item.  If there
+          * is more than one item with the same id you can use the nSkip parameter
+          * to access all of them.
+          *@param id The id of the item you're trying to find.
+          *@param nSkip The number of items with that id to skip before returning
+          * with the requested item.
+          *@returns A pointer to the data stored at the given id.
+          */
+        const void *get( const void *id, unsigned long int nSkip=0 );
+        
+        const void *getKey( const void *id, unsigned long int nSkip=0 );
+        /** Gets the total capacity of the hashtable.  This is actually the number
+          * of total positions available inside the hashtable at the moment.  This
+          * will change when the hashtable's load exceeds the danger level.
+          * Please note that this is NOT the actual amount of space available.
+          * In reality you can only access about 45-50 percent of that space.
+          *@returns The total capacity.
+          */
+        unsigned long int getCapacity();
+        /** Gets the number of filled in items in the hash table.  This is roughly
+          * equivelent to the getSize function assosiated with the Lists.
+          *@returns The number of filled in items in the hash table.
+          */
+        unsigned long int getSize();
+        /** Gets the load (percentage) of filled in items in the table.  This is
+          * technically the size divided by the capacity, but is definately usefull
+          * since it's required to check if it's time to rehash.
+          *@returns The table load in the range 0.0 to 1.0
+          */
+        double getLoad();
+        /** Sets up an xPos object for use indexing the items in the table.  Call
+          * this first and follow the directions for getNextItemPos below to
+          * iterate through every item in the table, while avoiding the empty
+          * spaces.
+          *@returns A pointer to a xPos object telling the hashtable where to find
+          * the item you're looking at.
+          */
+        void *getFirstItemPos();
+        /** Get the item's data that is being pointed to by xPos.  This is only
+          * valid after xPos was created using getFirstItemPos and getNextItemPos
+          * was called at least once.
+          *@param xPos supplied by getFirstItemPos.
+          *@returns The key value that was used to insert the data into the table.
+          */
+        const void *getItemData( void *xPos );
+        /** Get the item's ID that is being pointed to by xPos.  This is only
+          * valid after xPos was created using getFirstItemPos and getNextItemPos
+          * was called at least once.
+          *@param xPos supplied by getFirstItemPos.
+          *@returns The key value that was used to insert the data into the table.
+          */
+        const void *getItemID( void *xPos );
+        /** Used for iterating through a hash table sequentially.  This will
+          * update the xPos pointer to point to the next time, all ready to
+          * be accessed with getItemID and getItemData.  This must be called at
+          * least once before xPos is meaningful, and will return a NULL when it
+          * has reached the last item.
+          *@param xPos This must be an object created by a call to the function
+          * getFirstItemPos, and is only meaningful to the internal routines.
+          * Aborting a call in the middle (not running to the end of the table)
+          * may result in a memory leak at the moment.
+          *@returns xPos if still iterating through the list, otherwise it will
+          * return NULL when the end has been reached and the xPos variable has
+          * been deleted.
+          */
+        void *getNextItemPos( void *xPos );
+        /** A helpful operator to make accessing items easier.  Please note that
+          * this simply returns a pointer to the data stored internally, and cannot
+          * be used like the STL operator to store new data, use insert for that.
+          *@param id The identifier used to store the requested item.
+          *@returns The data value assosiated with the given id, or NULL if it
+          * wasn't found in the table.
+          */
+        const void *operator[](const void *id);
+        /**
+         * Delete the specified item from the hashtable.  This actually keeps the
+         * data and marks it deleted.  For all intents and purposes to the user it
+         * is deleted, except that the space is still used until a rehash is forced.
+         * This means that in hashtables where elements are being inserted and
+         * deleted frequently you may run into a higher rate of expansion.
+         *@param id The ID to delete.
+         *@param nSkip The number of similar id's to skip before deleting in a
+         * hashtable that allows duplicates.
+         *@returns True if the element was found and deleted, false otherwise.
+         */
+        bool del( const void *id, int nSkip=0 );
+        /**
+         * Deletes every entry in the hash table.  See the notes on del to see what
+         * this means, except that since no data is being kept, the entire table is
+         * just marked as usable space.
+         */
+        void clear();
+private:
+        /**
+         * Contains info related to a position in the hashtable.  Used for
+         * searching through hashtables one item at a time, in order.  This class
+         * should never be created by anything but a HashTable, and should never
+         * be referenced directly.  Instead the hashtable returns a void pointer,
+         * which is what should be passed back in next time you use a search
+         * function.  Always finish a search, since the object is deleted at the
+         * end of the search.
+         *@author Mike Buland
+         */
+        class HashPos
+        {
+        public:
+                /** Create a blank HashPos. */
+                HashPos() { bStarted=false; nPos = 0; };
+                /** Has the search been started?  */
+                bool bStarted;
+                /** The position (index) into the backend storage structure. */
+                unsigned long int nPos;
+        };
+        /**
+         * All data related to a single element in the hashtable.  This should
+         * really only be used and manipulated by the HashTable itself.
+         *@author Mike Buland
+         */
+        typedef struct HashNode
+        {
+        public:
+                /** Create a new, empty HashNode. */
+                HashNode() { id = NULL; data = NULL; bDeleted = false; };
+                /** A pointer to the original ID that was used to key the data. */
+                const void *id;
+                /** A pointer to the data stored along with the above ID. */
+                const void *data;
+                /** Weather or not this data should really...exist */
+                bool bDeleted;
+        } HashNode;
+private:
+        /**
+         * Just sets the values in the element to some friendly values.
+         *@param newID The new ID to store.
+         *@param newData The new Data to store.
+         */
+        void set( int j, const void *newID, const void *newData );
+        /**
+         * Tells you if the node is filled or not.
+         *@returns True=an ID has been stored here, False=no ID.
+         */
+        bool isFilled( int j );
+        /**
+         * This actually resizes, but since every resize requires a reHash to go
+         * along with it, that's the name.  This actually creates a new buffer for
+         * all of the contained data and then pulls every old element that was in
+         * the old table out and performs the hashing placement calculations again.
+         * This function skips all data that was marked as deleted, so at this
+         * point it really will be.
+         *@param nNewSize The new size to set the table to while re-hashing.
+         *@returns True if the operation was successful, false otherwise.
+         */
+        void reHash( unsigned long int nNewSize );
+        /**
+         * Helper function to allocate a new table.  Really just does the memory
+         * allocation.
+         *@param nNewSize The size of the table to generate.
+         *@returns A new, blank array of HashNode objects the size you specified.
+         */
+        HashNode *newTable( unsigned long int nNewSize );
+        /**
+         * This function is used once an actual hash code is obtained.  nStart is
+         * the given hash code, which is then wrapped to the size of the table.  If
+         * there is data at that location, tests are performed to see if it's the
+         * right one.  If it is, then it is returned, otherwise a series of further
+         * tests based on a 2^n search pattern is performed.  The position of the
+         * requested data in the back-end storage is returned if found, otherwise
+         * another less useful value is returned...
+         *@param nStart The initial hashcode of the ID testing for.
+         *@param id A pointer to the id that is being searched for.
+         *@returns The real location of the data requested.
+         */
+        unsigned long int probe( unsigned long int nStart, const void *id );
+        /**
+         * Simple helper function to determine if a number is prime or not.
+         * This function runs in sqrt(n) time.
+         *@param num Number to test for prime-hood.
+         *@returns True if the number is prime, false otherwise.
+         */
+        bool isPrime( int num );
+        /**
+         * Given any number, this function finds the first number after it that is
+         * prime.  Since this number is a multiple internally it's rare that the
+         * starting number would be prime.
+         *@param base The number to start the prime search on.
+         *@returns The first prime after the number given.
+         */
+        int nextPrime( int base );
+        
+#ifdef HASH_DEBUG_VIS
+        void printDebugLine( const char *exData );
+#endif
+        /** A pointer to the HashFunction subclass instance to use. */
+        HashFunction *hFunc;
+        /** The complete array of HashNode objects to store data in. */
+        HashNode *aTable;
+        /** The actual size of the table, not how many elements are in it. */
+        unsigned long int nTableSize;
+        /** The number of elements that are in the table. */
+        unsigned long int nSize;
+        /** The number of elements that are unavailable now. */
+        unsigned long int nFilled;
+        /** Allow duplicate ID's in the table. */
+        bool bAllowDupes;
+};
+#endif

diff --git a/src/old/hashtable.h b/src/old/hashtable.h new file mode 100644 index 0000000..179b694 --- /dev/null +++ b/src/old/hashtable.h
@@ -0,0 +1,308 @@
	1	/**\hashtable.h
	2	* Describes the HashFunction, HashFunctionString, and HashTable classes. It
	3	* was just easier to put them all in one set of files.
	4	*@author Mike Buland
	5	*/
	6
	7	#ifndef HASH_TABLE_H
	8	#define HASH_TABLE_H
	9
	10	//Uncomment this line to see a cool text-mode visualization of what's going on
	11	//#define HASH_DEBUG_VIS 1
	12
	13	#include <stdlib.h>
	14	#include <string.h>
	15	#include <ctype.h>
	16
	17	#include "hashfunction.h"
	18
	19	/**
	20	* A simple yet flexable hash-table. This uses several tricks to help ensure
	21	* that the table is always running at maximum efficiency. You no longer have
	22	* to specify a "danger fill level" when more space is needed a rehash is
	23	* automatically trigered. Deleting elements is fully supported, as well as
	24	* duplicate elements. To work with and allow duplicates simple construct your
	25	* HashTable the way you normally would, but when deleting or getting elements
	26	* you can specify a skip value. This effectively allows you to treat elements
	27	* with duplicate ID's as though they were in a zero-based array. The first
	28	* element inserted with a given ID would be at skip zero, the next at skip 1
	29	* and so on. This allows you to quickly search for elements with duplicate
	30	* names, just stop when you get a null for a skip number, i.e.
	31	* <pre>
	32	* for( int j = 0;; j++ )
	33	* {
	34	* void *pData = hash.get( myID, j );
	35	* if( !pData ) break;
	36	* // Do something interesting with pData
	37	* }
	38	* </pre>
	39	* There are new features in this HashTable that also allow for memory saving
	40	* when dealing with systems where many elements are being deleted from the
	41	* table. In those cases the elements deleted cannot be simply deleted, instead
	42	* they have to be marked as deleted and hidden from the user, but maintained in
	43	* the table so that future hashing operations don't fail. When rehashing
	44	* occurs all elements marked as deleted are quietly removed. In these cases,
	45	* if the number of deleted items would free enough space in the table for the
	46	* table to be used efficiently without resizing, it is left the same size and
	47	* rehashing is performed effectively in place, allowing the deleted items to
	48	* be removed.
	49	* <br>
	50	* For info on adding new hashing algorithms, please see the HashFunction class.
	51	*@author Mike Buland
	52	*@todo Fix probing for tables that allow duplicates, and delete an item, then
	53	* insert an item with the same name.
	54	*/
	55	class HashTable
	56	{
	57	public:
	58	/** Constructs a hash table.
	59	*@param hNewFunc A pointer to a hashfunction class to use. If this is
	60	* null the default general string type will be used.
	61	*@param nInitSize The initial size of the hashtable.
	62	*@param bAllowDupes Setting this value to true allows the system to
	63	* insert more than one copy of any given key. This can be tricky, and
	64	* will require you to use the nSkip parameter on the get function.
	65	*/
	66	HashTable( HashFunction *hNewFunc, unsigned long int nInitSize, bool bAllowDupes=false );
	67
	68	/**
	69	* Destroys the hashtable, cleaning up all internal storage, but not stored
	70	* elements. Also deletes the HashFunction passed in in the constructor.
	71	*/
	72	virtual ~HashTable();
	73
	74	/** Inserts an item into the hashtable. This function will trigger a
	75	* rehash if adding another item would force the table's load factor over
	76	* the danger level.
	77	*@param id used to find the data later.
	78	*@param data The data item to insert into the table with the identifier
	79	* id
	80	*@returns True if insertion was successfull, and false if it failed.
	81	*/
	82	bool insert( const void id, const void data );
	83
	84	/** Gets an item in the hashtable based on the id of that item. If there
	85	* is more than one item with the same id you can use the nSkip parameter
	86	* to access all of them.
	87	*@param id The id of the item you're trying to find.
	88	*@param nSkip The number of items with that id to skip before returning
	89	* with the requested item.
	90	*@returns A pointer to the data stored at the given id.
	91	*/
	92	const void get( const void id, unsigned long int nSkip=0 );
	93
	94	const void getKey( const void id, unsigned long int nSkip=0 );
	95
	96	/** Gets the total capacity of the hashtable. This is actually the number
	97	* of total positions available inside the hashtable at the moment. This
	98	* will change when the hashtable's load exceeds the danger level.
	99	* Please note that this is NOT the actual amount of space available.
	100	* In reality you can only access about 45-50 percent of that space.
	101	*@returns The total capacity.
	102	*/
	103	unsigned long int getCapacity();
	104
	105	/** Gets the number of filled in items in the hash table. This is roughly
	106	* equivelent to the getSize function assosiated with the Lists.
	107	*@returns The number of filled in items in the hash table.
	108	*/
	109	unsigned long int getSize();
	110
	111	/** Gets the load (percentage) of filled in items in the table. This is
	112	* technically the size divided by the capacity, but is definately usefull
	113	* since it's required to check if it's time to rehash.
	114	*@returns The table load in the range 0.0 to 1.0
	115	*/
	116	double getLoad();
	117
	118	/** Sets up an xPos object for use indexing the items in the table. Call
	119	* this first and follow the directions for getNextItemPos below to
	120	* iterate through every item in the table, while avoiding the empty
	121	* spaces.
	122	*@returns A pointer to a xPos object telling the hashtable where to find
	123	* the item you're looking at.
	124	*/
	125	void *getFirstItemPos();
	126
	127	/** Get the item's data that is being pointed to by xPos. This is only
	128	* valid after xPos was created using getFirstItemPos and getNextItemPos
	129	* was called at least once.
	130	*@param xPos supplied by getFirstItemPos.
	131	*@returns The key value that was used to insert the data into the table.
	132	*/
	133	const void getItemData( void xPos );
	134
	135	/** Get the item's ID that is being pointed to by xPos. This is only
	136	* valid after xPos was created using getFirstItemPos and getNextItemPos
	137	* was called at least once.
	138	*@param xPos supplied by getFirstItemPos.
	139	*@returns The key value that was used to insert the data into the table.
	140	*/
	141	const void getItemID( void xPos );
	142
	143	/** Used for iterating through a hash table sequentially. This will
	144	* update the xPos pointer to point to the next time, all ready to
	145	* be accessed with getItemID and getItemData. This must be called at
	146	* least once before xPos is meaningful, and will return a NULL when it
	147	* has reached the last item.
	148	*@param xPos This must be an object created by a call to the function
	149	* getFirstItemPos, and is only meaningful to the internal routines.
	150	* Aborting a call in the middle (not running to the end of the table)
	151	* may result in a memory leak at the moment.
	152	*@returns xPos if still iterating through the list, otherwise it will
	153	* return NULL when the end has been reached and the xPos variable has
	154	* been deleted.
	155	*/
	156	void getNextItemPos( void xPos );
	157
	158	/** A helpful operator to make accessing items easier. Please note that
	159	* this simply returns a pointer to the data stored internally, and cannot
	160	* be used like the STL operator to store new data, use insert for that.
	161	*@param id The identifier used to store the requested item.
	162	*@returns The data value assosiated with the given id, or NULL if it
	163	* wasn't found in the table.
	164	*/
	165	const void operator[](const void id);
	166
	167	/**
	168	* Delete the specified item from the hashtable. This actually keeps the
	169	* data and marks it deleted. For all intents and purposes to the user it
	170	* is deleted, except that the space is still used until a rehash is forced.
	171	* This means that in hashtables where elements are being inserted and
	172	* deleted frequently you may run into a higher rate of expansion.
	173	*@param id The ID to delete.
	174	*@param nSkip The number of similar id's to skip before deleting in a
	175	* hashtable that allows duplicates.
	176	*@returns True if the element was found and deleted, false otherwise.
	177	*/
	178	bool del( const void *id, int nSkip=0 );
	179
	180	/**
	181	* Deletes every entry in the hash table. See the notes on del to see what
	182	* this means, except that since no data is being kept, the entire table is
	183	* just marked as usable space.
	184	*/
	185	void clear();
	186
	187	private:
	188	/**
	189	* Contains info related to a position in the hashtable. Used for
	190	* searching through hashtables one item at a time, in order. This class
	191	* should never be created by anything but a HashTable, and should never
	192	* be referenced directly. Instead the hashtable returns a void pointer,
	193	* which is what should be passed back in next time you use a search
	194	* function. Always finish a search, since the object is deleted at the
	195	* end of the search.
	196	*@author Mike Buland
	197	*/
	198	class HashPos
	199	{
	200	public:
	201	/** Create a blank HashPos. */
	202	HashPos() { bStarted=false; nPos = 0; };
	203	/** Has the search been started? */
	204	bool bStarted;
	205	/** The position (index) into the backend storage structure. */
	206	unsigned long int nPos;
	207	};
	208
	209	/**
	210	* All data related to a single element in the hashtable. This should
	211	* really only be used and manipulated by the HashTable itself.
	212	*@author Mike Buland
	213	*/
	214	typedef struct HashNode
	215	{
	216	public:
	217	/** Create a new, empty HashNode. */
	218	HashNode() { id = NULL; data = NULL; bDeleted = false; };
	219	/** A pointer to the original ID that was used to key the data. */
	220	const void *id;
	221	/** A pointer to the data stored along with the above ID. */
	222	const void *data;
	223	/** Weather or not this data should really...exist */
	224	bool bDeleted;
	225	} HashNode;
	226
	227	private:
	228	/**
	229	* Just sets the values in the element to some friendly values.
	230	*@param newID The new ID to store.
	231	*@param newData The new Data to store.
	232	*/
	233	void set( int j, const void newID, const void newData );
	234	/**
	235	* Tells you if the node is filled or not.
	236	*@returns True=an ID has been stored here, False=no ID.
	237	*/
	238	bool isFilled( int j );
	239	/**
	240	* This actually resizes, but since every resize requires a reHash to go
	241	* along with it, that's the name. This actually creates a new buffer for
	242	* all of the contained data and then pulls every old element that was in
	243	* the old table out and performs the hashing placement calculations again.
	244	* This function skips all data that was marked as deleted, so at this
	245	* point it really will be.
	246	*@param nNewSize The new size to set the table to while re-hashing.
	247	*@returns True if the operation was successful, false otherwise.
	248	*/
	249	void reHash( unsigned long int nNewSize );
	250
	251	/**
	252	* Helper function to allocate a new table. Really just does the memory
	253	* allocation.
	254	*@param nNewSize The size of the table to generate.
	255	*@returns A new, blank array of HashNode objects the size you specified.
	256	*/
	257	HashNode *newTable( unsigned long int nNewSize );
	258
	259	/**
	260	* This function is used once an actual hash code is obtained. nStart is
	261	* the given hash code, which is then wrapped to the size of the table. If
	262	* there is data at that location, tests are performed to see if it's the
	263	* right one. If it is, then it is returned, otherwise a series of further
	264	* tests based on a 2^n search pattern is performed. The position of the
	265	* requested data in the back-end storage is returned if found, otherwise
	266	* another less useful value is returned...
	267	*@param nStart The initial hashcode of the ID testing for.
	268	*@param id A pointer to the id that is being searched for.
	269	*@returns The real location of the data requested.
	270	*/
	271	unsigned long int probe( unsigned long int nStart, const void *id );
	272
	273	/**
	274	* Simple helper function to determine if a number is prime or not.
	275	* This function runs in sqrt(n) time.
	276	*@param num Number to test for prime-hood.
	277	*@returns True if the number is prime, false otherwise.
	278	*/
	279	bool isPrime( int num );
	280
	281	/**
	282	* Given any number, this function finds the first number after it that is
	283	* prime. Since this number is a multiple internally it's rare that the
	284	* starting number would be prime.
	285	*@param base The number to start the prime search on.
	286	*@returns The first prime after the number given.
	287	*/
	288	int nextPrime( int base );
	289
	290	#ifdef HASH_DEBUG_VIS
	291	void printDebugLine( const char *exData );
	292	#endif
	293
	294	/** A pointer to the HashFunction subclass instance to use. */
	295	HashFunction *hFunc;
	296	/** The complete array of HashNode objects to store data in. */
	297	HashNode *aTable;
	298	/** The actual size of the table, not how many elements are in it. */
	299	unsigned long int nTableSize;
	300	/** The number of elements that are in the table. */
	301	unsigned long int nSize;
	302	/** The number of elements that are unavailable now. */
	303	unsigned long int nFilled;
	304	/** Allow duplicate ID's in the table. */
	305	bool bAllowDupes;
	306	};
	307
	308	#endif