1 files changed, 345 insertions, 0 deletions
diff --git a/src/hashtable.cpp b/src/hashtable.cpp
new file mode 100644
index 0000000..9dfe653
--- /dev/null
+++ b/src/hashtable.cpp
@@ -0,0 +1,345 @@
+#include <string.h>
+#include <stdio.h>
+#include <math.h>
+#include "hashtable.h"
+HashTable::HashTable( HashFunction *hNewFunc, unsigned long int nInitSize, bool bAllowDupes )
+{
+        hFunc = hNewFunc;
+        nTableSize = nextPrime( nInitSize );
+        aTable = new HashNode[nTableSize];
+        //for( int j = 0; j < nTableSize; j++ ) if( aTable[j].id || aTable[j].data || aTable[j].bDeleted ) printf("Unclean entry\n");
+        nSize = 0;
+        nFilled = 0;
+        this->bAllowDupes = bAllowDupes;
+}
+HashTable::~HashTable()
+{
+        delete[] aTable;
+        delete hFunc;
+}
+void HashTable::set( int j, const void *newID, const void *newData )
+{
+        if( newData == NULL )
+        {
+                printf("Inserting NULL data is indestinguishable from uninserted data!\n");
+        }
+        aTable[j].id = newID;
+        aTable[j].data = newData;
+}
+bool HashTable::isFilled( int j )
+{
+        return (aTable[j].id != NULL)||(aTable[j].bDeleted);
+}
+bool HashTable::reHash( unsigned long int nNewSize )
+{
+        HashNode *aOldTable = aTable;
+        unsigned long int oldSize = nTableSize;
+        // If the table can still be used if we just get rid of deleted items, don't
+        // change the size of the table, otherwise, go ahead and use the number
+        // passed in.
+        if( nSize > nTableSize>>1 )
+        {
+                nTableSize = nextPrime( nNewSize );
+        }
+        aTable = newTable( nTableSize );
+        //for( int j = 0; j < nTableSize; j++ ) if( aTable[j].id || aTable[j].data || aTable[j].bDeleted ) printf("Unclean entry\n");
+        nSize = 0;
+        nFilled = 0;
+        for( unsigned long int j = 0; j < oldSize; j++ )
+        {
+                if( aOldTable[j].id != NULL && aOldTable[j].bDeleted == false )
+                {
+                        insert( aOldTable[j].id, aOldTable[j].data );
+                }
+        }
+        delete[] aOldTable;
+}
+unsigned long int HashTable::probe( unsigned long int nStart, const void *id )
+{
+        int nHash = nStart;
+        nStart = nStart%nTableSize;
+        if( bAllowDupes == true )
+        {
+                for(
+                        unsigned long int j=0;
+                        isFilled( nStart ) && j < 32;
+                        nStart = (nStart+(1<<j))%nTableSize, j++
+                        );
+                /**
+                 * This is an ugly little hack.  If the hash table is too full in allow-
+                 * dups mode we have to fall back on a linear search, otherwise you can
+                 * only get up to 32 entries with the same name.
+                 */
+                if( isFilled( nStart ) )
+                {
+                        int nOldStart = nStart;
+                        for(
+                                nStart++;
+                                isFilled( nStart ) && nStart != nOldStart;
+                                nStart = (nStart+1)%nTableSize
+                           );
+                }
+        }
+        else
+        {
+                for(
+                        unsigned long int j=0;
+                        isFilled( nStart ) && j < 32;
+                        nStart = (nStart+(1<<j))%nTableSize, j++
+                        )
+                {
+                        if( isFilled( nStart ) )
+                        {
+                                if( hFunc->cmpIDs( aTable[nStart].id, id ) == true &&
+                                        aTable[nStart].bDeleted == false )
+                                {
+                                        return nStart;
+                                }
+                        }
+                }
+        }
+        // This is our insurance, if the table is full, then go ahead and rehash,
+        // then try again.
+        if( isFilled( nStart ) )
+        {
+                reHash( getCapacity()*2 );
+                return probe( nHash, id );
+        }
+        return nStart;
+}
+HashTable::HashNode *HashTable::newTable( unsigned long int nNewSize )
+{
+        return new HashNode[nNewSize];
+}
+#ifdef HASH_DEBUG_VIS
+void HashTable::printDebugLine( const char *exData )
+{
+        char *buf = new char[getCapacity()+3];
+        int j;
+        buf[0] = '[';
+        for( j = 0; j < getCapacity(); j++ )
+        {
+                buf[j+1] = (aTable[j].bDeleted)?('X'):((isFilled( j ))?('#'):('-'));
+        }
+        buf[j+1] = ']';
+        buf[j+2] = '\0';
+        printf("%s %s\n", buf, exData );
+        delete[] buf;
+}
+#endif
+bool HashTable::insert( const void *id, const void *data )
+{
+        unsigned long int nPos = probe( hFunc->hash( id ), id )%nTableSize;
+        if( bAllowDupes == true )
+        {
+                if( aTable[nPos].id == NULL && aTable[nPos].bDeleted == false )
+                {
+                        set( nPos, id, data );
+#ifdef HASH_DEBUG_VIS
+                        printDebugLine( (const char *)id );
+#endif
+                        nSize++;
+                        nFilled++;
+                        return true;
+                }
+                else
+                {
+                        return false;
+                }
+        }
+        else
+        {
+                if( aTable[nPos].id == NULL && aTable[nPos].bDeleted == false )
+                {
+                        set( nPos, id, data );
+#ifdef HASH_DEBUG_VIS
+                        printDebugLine( (const char *)id );
+#endif
+                        nSize++;
+                        nFilled++;
+                        return true;
+                }
+                else if( hFunc->cmpIDs( aTable[nPos].id, id ) == true )
+                {
+                        set( nPos, id, data );
+#ifdef HASH_DEBUG_VIS
+                        printDebugLine( (const char *)id );
+#endif
+                        return true;
+                }
+                else
+                {
+                        return false;
+                }
+        }
+}
+const void *HashTable::get( const void *id, unsigned long int nSkip )
+{
+        unsigned long int nPos = hFunc->hash( id )%nTableSize;
+        for( unsigned long int j=0; j < 32; nPos = (nPos+(1<<j))%nTableSize, j++ )
+        {
+                if( !isFilled( nPos ) ) return NULL;
+                if( hFunc->cmpIDs( id, aTable[nPos].id ) &&
+                        aTable[nPos].bDeleted == false )
+                {
+                        if( nSkip == 0 )
+                        {
+                                return aTable[nPos].data;
+                        }
+                        else
+                        {
+                                nSkip--;
+                        }
+                }
+        }
+        return NULL;
+}
+void *HashTable::getFirstItemPos()
+{
+        HashPos *pos = new HashPos;
+        return pos;
+}
+const void *HashTable::getItemData( void *xPos )
+{
+        return aTable[((HashPos *)xPos)->nPos].data;
+}
+const void *HashTable::getItemID( void *xPos )
+{
+        return aTable[((HashPos *)xPos)->nPos].id;
+}
+void *HashTable::getNextItemPos( void *xPos )
+{
+        HashPos *pos = (HashPos *)xPos;
+        if( pos->bStarted == false )
+        {
+                pos->bStarted = true;
+                pos->nPos = 0;
+        }
+        else
+        {
+                pos->nPos++;
+        }
+        if( pos->nPos < nTableSize )
+        {
+                for( ; pos->nPos < nTableSize; pos->nPos++ )
+                {
+                        if( isFilled( pos->nPos ) &&
+                                aTable[pos->nPos].bDeleted == false )
+                        {
+                                return xPos;
+                        }
+                }
+        }
+        delete pos;
+        return NULL;
+}
+// Big-O sqrt(n)
+// Change this to be erethpothynies table with a storage
+// lookup later on.
+bool HashTable::isPrime (int num)
+{
+    if (num == 2)         // the only even prime
+        return true;
+    else if (num % 2 == 0)     // other even numbers are composite
+        return false;
+    else
+    {
+        //bool prime = true;
+        int divisor = 3;
+        int upperLimit = static_cast<int>(sqrt(num) + 1);
+        while (divisor <= upperLimit)
+        {
+            if (num % divisor == 0)
+                                return false;
+            //    prime = false;
+            divisor +=2;
+        }
+        return true;
+    }
+}
+// Big-O n^(3/2)
+int HashTable::nextPrime( int base )
+{
+        int nPrime;
+        for( nPrime = base; isPrime( nPrime ) == false; nPrime++ );
+        return nPrime;
+}
+unsigned long int HashTable::getCapacity()
+{
+        return nTableSize;
+}
+unsigned long int HashTable::getSize()
+{
+        return nSize;
+}
+double HashTable::getLoad()
+{
+        return (double)(nFilled)/(double)(nTableSize);
+}
+const void *HashTable::operator[](const void *id)
+{
+        return get( id );
+}
+bool HashTable::del( const void *id, int nSkip )
+{
+        unsigned long int nPos = hFunc->hash( id )%nTableSize;
+        for( unsigned long int j=0; j < 32; nPos = (nPos+(1<<j))%nTableSize, j++ )
+        {
+                if( !isFilled( nPos ) ) return false;
+                if( hFunc->cmpIDs( id, aTable[nPos].id ) &&
+                        aTable[nPos].bDeleted == false )
+                {
+                        if( nSkip == 0 )
+                        {
+                                aTable[nPos].bDeleted = true;
+//                              aTable[nPos].
+                                nSize--;
+#ifdef HASH_DEBUG_VIS
+                                printDebugLine( (const char *)id );
+#endif
+                                return true;
+                        }
+                        else
+                        {
+                                nSkip--;
+                        }
+                }
+        }
+        return false;
+}

diff --git a/src/hashtable.cpp b/src/hashtable.cpp new file mode 100644 index 0000000..9dfe653 --- /dev/null +++ b/src/hashtable.cpp
@@ -0,0 +1,345 @@
	1	#include <string.h>
	2	#include <stdio.h>
	3	#include <math.h>
	4
	5	#include "hashtable.h"
	6
	7	HashTable::HashTable( HashFunction *hNewFunc, unsigned long int nInitSize, bool bAllowDupes )
	8	{
	9	hFunc = hNewFunc;
	10	nTableSize = nextPrime( nInitSize );
	11	aTable = new HashNode[nTableSize];
	12	//for( int j = 0; j < nTableSize; j++ ) if( aTable[j].id \|\| aTable[j].data \|\| aTable[j].bDeleted ) printf("Unclean entry\n");
	13	nSize = 0;
	14	nFilled = 0;
	15	this->bAllowDupes = bAllowDupes;
	16	}
	17
	18	HashTable::~HashTable()
	19	{
	20	delete[] aTable;
	21	delete hFunc;
	22	}
	23
	24	void HashTable::set( int j, const void newID, const void newData )
	25	{
	26	if( newData == NULL )
	27	{
	28	printf("Inserting NULL data is indestinguishable from uninserted data!\n");
	29	}
	30	aTable[j].id = newID;
	31	aTable[j].data = newData;
	32	}
	33
	34	bool HashTable::isFilled( int j )
	35	{
	36	return (aTable[j].id != NULL)\|\|(aTable[j].bDeleted);
	37	}
	38
	39	bool HashTable::reHash( unsigned long int nNewSize )
	40	{
	41	HashNode *aOldTable = aTable;
	42	unsigned long int oldSize = nTableSize;
	43
	44	// If the table can still be used if we just get rid of deleted items, don't
	45	// change the size of the table, otherwise, go ahead and use the number
	46	// passed in.
	47	if( nSize > nTableSize>>1 )
	48	{
	49	nTableSize = nextPrime( nNewSize );
	50	}
	51
	52	aTable = newTable( nTableSize );
	53	//for( int j = 0; j < nTableSize; j++ ) if( aTable[j].id \|\| aTable[j].data \|\| aTable[j].bDeleted ) printf("Unclean entry\n");
	54
	55	nSize = 0;
	56	nFilled = 0;
	57
	58	for( unsigned long int j = 0; j < oldSize; j++ )
	59	{
	60	if( aOldTable[j].id != NULL && aOldTable[j].bDeleted == false )
	61	{
	62	insert( aOldTable[j].id, aOldTable[j].data );
	63	}
	64	}
	65
	66	delete[] aOldTable;
	67	}
	68
	69	unsigned long int HashTable::probe( unsigned long int nStart, const void *id )
	70	{
	71	int nHash = nStart;
	72	nStart = nStart%nTableSize;
	73	if( bAllowDupes == true )
	74	{
	75	for(
	76	unsigned long int j=0;
	77	isFilled( nStart ) && j < 32;
	78	nStart = (nStart+(1<<j))%nTableSize, j++
	79	);
	80
	81	/**
	82	* This is an ugly little hack. If the hash table is too full in allow-
	83	* dups mode we have to fall back on a linear search, otherwise you can
	84	* only get up to 32 entries with the same name.
	85	*/
	86	if( isFilled( nStart ) )
	87	{
	88	int nOldStart = nStart;
	89	for(
	90	nStart++;
	91	isFilled( nStart ) && nStart != nOldStart;
	92	nStart = (nStart+1)%nTableSize
	93	);
	94	}
	95	}
	96	else
	97	{
	98	for(
	99	unsigned long int j=0;
	100	isFilled( nStart ) && j < 32;
	101	nStart = (nStart+(1<<j))%nTableSize, j++
	102	)
	103	{
	104	if( isFilled( nStart ) )
	105	{
	106	if( hFunc->cmpIDs( aTable[nStart].id, id ) == true &&
	107	aTable[nStart].bDeleted == false )
	108	{
	109	return nStart;
	110	}
	111	}
	112	}
	113	}
	114	// This is our insurance, if the table is full, then go ahead and rehash,
	115	// then try again.
	116	if( isFilled( nStart ) )
	117	{
	118	reHash( getCapacity()*2 );
	119	return probe( nHash, id );
	120	}
	121	return nStart;
	122	}
	123
	124	HashTable::HashNode *HashTable::newTable( unsigned long int nNewSize )
	125	{
	126	return new HashNode[nNewSize];
	127	}
	128
	129	#ifdef HASH_DEBUG_VIS
	130	void HashTable::printDebugLine( const char *exData )
	131	{
	132	char *buf = new char[getCapacity()+3];
	133	int j;
	134	buf[0] = '[';
	135	for( j = 0; j < getCapacity(); j++ )
	136	{
	137	buf[j+1] = (aTable[j].bDeleted)?('X'):((isFilled( j ))?('#'):('-'));
	138	}
	139	buf[j+1] = ']';
	140	buf[j+2] = '\0';
	141	printf("%s %s\n", buf, exData );
	142	delete[] buf;
	143	}
	144	#endif
	145
	146	bool HashTable::insert( const void id, const void data )
	147	{
	148	unsigned long int nPos = probe( hFunc->hash( id ), id )%nTableSize;
	149
	150	if( bAllowDupes == true )
	151	{
	152	if( aTable[nPos].id == NULL && aTable[nPos].bDeleted == false )
	153	{
	154	set( nPos, id, data );
	155	#ifdef HASH_DEBUG_VIS
	156	printDebugLine( (const char *)id );
	157	#endif
	158	nSize++;
	159	nFilled++;
	160	return true;
	161	}
	162	else
	163	{
	164	return false;
	165	}
	166	}
	167	else
	168	{
	169	if( aTable[nPos].id == NULL && aTable[nPos].bDeleted == false )
	170	{
	171	set( nPos, id, data );
	172	#ifdef HASH_DEBUG_VIS
	173	printDebugLine( (const char *)id );
	174	#endif
	175	nSize++;
	176	nFilled++;
	177	return true;
	178	}
	179	else if( hFunc->cmpIDs( aTable[nPos].id, id ) == true )
	180	{
	181	set( nPos, id, data );
	182	#ifdef HASH_DEBUG_VIS
	183	printDebugLine( (const char *)id );
	184	#endif
	185	return true;
	186	}
	187	else
	188	{
	189	return false;
	190	}
	191	}
	192	}
	193
	194	const void HashTable::get( const void id, unsigned long int nSkip )
	195	{
	196	unsigned long int nPos = hFunc->hash( id )%nTableSize;
	197
	198	for( unsigned long int j=0; j < 32; nPos = (nPos+(1<<j))%nTableSize, j++ )
	199	{
	200	if( !isFilled( nPos ) ) return NULL;
	201	if( hFunc->cmpIDs( id, aTable[nPos].id ) &&
	202	aTable[nPos].bDeleted == false )
	203	{
	204	if( nSkip == 0 )
	205	{
	206	return aTable[nPos].data;
	207	}
	208	else
	209	{
	210	nSkip--;
	211	}
	212	}
	213	}
	214
	215	return NULL;
	216	}
	217
	218	void *HashTable::getFirstItemPos()
	219	{
	220	HashPos *pos = new HashPos;
	221	return pos;
	222	}
	223
	224	const void HashTable::getItemData( void xPos )
	225	{
	226	return aTable[((HashPos *)xPos)->nPos].data;
	227	}
	228
	229	const void HashTable::getItemID( void xPos )
	230	{
	231	return aTable[((HashPos *)xPos)->nPos].id;
	232	}
	233
	234	void HashTable::getNextItemPos( void xPos )
	235	{
	236	HashPos pos = (HashPos )xPos;
	237	if( pos->bStarted == false )
	238	{
	239	pos->bStarted = true;
	240	pos->nPos = 0;
	241	}
	242	else
	243	{
	244	pos->nPos++;
	245	}
	246	if( pos->nPos < nTableSize )
	247	{
	248	for( ; pos->nPos < nTableSize; pos->nPos++ )
	249	{
	250	if( isFilled( pos->nPos ) &&
	251	aTable[pos->nPos].bDeleted == false )
	252	{
	253	return xPos;
	254	}
	255	}
	256	}
	257
	258	delete pos;
	259
	260	return NULL;
	261	}
	262
	263	// Big-O sqrt(n)
	264	// Change this to be erethpothynies table with a storage
	265	// lookup later on.
	266	bool HashTable::isPrime (int num)
	267	{
	268	if (num == 2) // the only even prime
	269	return true;
	270	else if (num % 2 == 0) // other even numbers are composite
	271	return false;
	272	else
	273	{
	274	//bool prime = true;
	275	int divisor = 3;
	276	int upperLimit = static_cast<int>(sqrt(num) + 1);
	277	while (divisor <= upperLimit)
	278	{
	279	if (num % divisor == 0)
	280	return false;
	281	// prime = false;
	282	divisor +=2;
	283	}
	284	return true;
	285	}
	286	}
	287
	288	// Big-O n^(3/2)
	289	int HashTable::nextPrime( int base )
	290	{
	291	int nPrime;
	292	for( nPrime = base; isPrime( nPrime ) == false; nPrime++ );
	293	return nPrime;
	294	}
	295
	296	unsigned long int HashTable::getCapacity()
	297	{
	298	return nTableSize;
	299	}
	300
	301	unsigned long int HashTable::getSize()
	302	{
	303	return nSize;
	304	}
	305
	306	double HashTable::getLoad()
	307	{
	308	return (double)(nFilled)/(double)(nTableSize);
	309	}
	310
	311	const void HashTable::operator[](const void id)
	312	{
	313	return get( id );
	314	}
	315
	316	bool HashTable::del( const void *id, int nSkip )
	317	{
	318	unsigned long int nPos = hFunc->hash( id )%nTableSize;
	319
	320	for( unsigned long int j=0; j < 32; nPos = (nPos+(1<<j))%nTableSize, j++ )
	321	{
	322	if( !isFilled( nPos ) ) return false;
	323	if( hFunc->cmpIDs( id, aTable[nPos].id ) &&
	324	aTable[nPos].bDeleted == false )
	325	{
	326	if( nSkip == 0 )
	327	{
	328	aTable[nPos].bDeleted = true;
	329	// aTable[nPos].
	330	nSize--;
	331	#ifdef HASH_DEBUG_VIS
	332	printDebugLine( (const char *)id );
	333	#endif
	334	return true;
	335	}
	336	else
	337	{
	338	nSkip--;
	339	}
	340	}
	341	}
	342
	343	return false;
	344	}
	345