From 505410a5c3b93f81deda8b9653c2237a433be5c1 Mon Sep 17 00:00:00 2001
From: Mike Buland <eichlan@xagasoft.com>
Date: Thu, 15 Mar 2007 06:57:03 +0000
Subject: This version may break hashing of strings, but at least you can hash
 FStrings... templates are confusing.

---
 src/fstring.cpp       |  11 ++++
 src/fstring.h         | 174 +++++++++++++++++++++++++++++++++-----------------
 src/hash.cpp          |  82 +++++++++++-------------
 src/hash.h            |  37 ++++-------
 src/tests/fstring.cpp |  10 +++
 5 files changed, 186 insertions(+), 128 deletions(-)

(limited to 'src')
diff --git a/src/fstring.cpp b/src/fstring.cpp
index 8fe2941..82d024d 100644
--- a/src/fstring.cpp
+++ b/src/fstring.cpp
@@ -1,2 +1,13 @@
 #include "fstring.h"
+#include "hash.h"
+
+template<> uint32_t __calcHashCode<FString>( const FString &k )
+{
+	return __calcHashCode( k.c_str() );
+}
+
+template<> bool __cmpHashKeys<FString>( const FString &a, const FString &b )
+{
+	return a == b;
+}
 
diff --git a/src/fstring.h b/src/fstring.h
index db54cdd..95ba382 100644
--- a/src/fstring.h
+++ b/src/fstring.h
@@ -13,7 +13,15 @@ struct FStringChunk
 };
 
 /**
- *
+ * Flexible String class.  This class was designed with string passing and
+ * generation in mind.  Like the standard string class you can specify what
+ * datatype to use for each character.  Unlike the standard string class,
+ * collection of appended and prepended terms is done lazily, making long
+ * operations that involve many appends very inexpensive.  In addition internal
+ * ref-counting means that if you pass strings around between functions there's
+ * almost no overhead in time or memory since a reference is created and no
+ * data is actually copied.  This also means that you never need to put any
+ * FBasicString into a ref-counting container class.
  */
 template< typename chr=char, typename chralloc=std::allocator<chr>, typename chunkalloc=std::allocator<struct FStringChunk<chr> > >
 class FBasicString
@@ -49,6 +57,7 @@ public:
 		append( pData, nLength );
 	}
 
+	/*
 	FBasicString( MyType &rSrc ) :
 		nLength( 0 ),
 		pnRefs( NULL ),
@@ -56,9 +65,9 @@ public:
 		pLast( NULL )
 	{
 		joinShare( rSrc );
-	}
+	}*/
 
-	FBasicString( const FBasicString<chr, chralloc, chunkalloc> &rSrc ) :
+	FBasicString( const MyType &rSrc ) :
 		nLength( 0 ),
 		pnRefs( NULL ),
 		pFirst( NULL ),
@@ -68,7 +77,8 @@ public:
 		// In the case that the source were flat, we could get a reference, it
 		// would make some things faster, but not matter in many other cases.
 
-		copyFrom( rSrc );
+		joinShare( rSrc );
+		//copyFrom( rSrc );
 	}
 
 	virtual ~FBasicString()
@@ -118,28 +128,7 @@ public:
 
 	void clear()
 	{
-		if( pFirst == NULL )
-			return;
-
-		if( isShared() )
-		{
-			decRefs();
-		}
-		else
-		{
-			Chunk *i = pFirst;
-			for(;;)
-			{
-				Chunk *n = i->pNext;
-				aChr.deallocate( i->pData, i->nLength+1 );
-				aChunk.deallocate( i, 1 );
-				if( n == NULL )
-					break;
-				i = n;
-			}
-			pFirst = pLast = NULL;
-			nLength = 0;
-		}
+		realClear();
 	}
 
 	chr *c_str()
@@ -150,6 +139,15 @@ public:
 		flatten();
 		return pFirst->pData;
 	}
+	
+	const chr *c_str() const
+	{
+		if( pFirst == NULL )
+			return NULL;
+
+		flatten();
+		return pFirst->pData;
+	}
 
 	MyType &operator +=( const chr *pData )
 	{
@@ -168,26 +166,20 @@ public:
 
 	MyType &operator =( const MyType &rSrc )
 	{
-		if( rSrc.isFlat() )
-		{
+		//if( rSrc.isFlat() )
+		//{
 			joinShare( rSrc );
-		}
-		else
-		{
-			copyFrom( rSrc );
-		}
+		//}
+		//else
+		//{
+		//	copyFrom( rSrc );
+		//}
+		//
 
 		return (*this);
 	}
 	
-	MyType &operator =( MyType &rSrc )
-	{
-		joinShare( rSrc );
-
-		return (*this);
-	}
-
-	bool operator ==( const chr *pData )
+	bool operator ==( const chr *pData ) const
 	{
 		if( pFirst == NULL ) {
 			if( pData == NULL )
@@ -206,8 +198,33 @@ public:
 
 		return true;
 	}
+	
+	bool operator ==( const MyType &pData ) const
+	{
+		if( pFirst == pData.pFirst )
+			return true;
+		if( pFirst == NULL ) 
+			return false;
+
+		flatten();
+		pData.flatten();
+		const chr *a = pData.pFirst->pData;
+		chr *b = pFirst->pData;
+		for( ; *a!=(chr)0; a++, b++ )
+		{
+			if( *a != *b )
+				return false;
+		}
+
+		return true;
+	}
 
-	bool operator !=(const chr *pData )
+	bool operator !=(const chr *pData ) const
+	{
+		return !(*this == pData);
+	}
+	
+	bool operator !=(const MyType &pData ) const
 	{
 		return !(*this == pData);
 	}
@@ -218,9 +235,16 @@ public:
 
 		return pFirst->pData[nIndex];
 	}
+	
+	const chr &operator[]( long nIndex ) const
+	{
+		flatten();
+
+		return pFirst->pData[nIndex];
+	}
 
 private:
-	void flatten()
+	void flatten() const
 	{
 		if( isFlat() )
 			return;
@@ -241,9 +265,36 @@ private:
 			if( i == NULL )
 				break;
 		}
-		clear();
+		realClear();
 
-		appendChunk( pNew );
+		pLast = pFirst = pNew;
+		nLength = pNew->nLength;
+	}
+	
+	void realClear() const
+	{
+		if( pFirst == NULL )
+			return;
+
+		if( isShared() )
+		{
+			decRefs();
+		}
+		else
+		{
+			Chunk *i = pFirst;
+			for(;;)
+			{
+				Chunk *n = i->pNext;
+				aChr.deallocate( i->pData, i->nLength+1 );
+				aChunk.deallocate( i, 1 );
+				if( n == NULL )
+					break;
+				i = n;
+			}
+			pFirst = pLast = NULL;
+			nLength = 0;
+		}
 	}
 	
 	void copyFrom( const FBasicString<chr, chralloc, chunkalloc> &rSrc )
@@ -279,14 +330,14 @@ private:
 		return (pnRefs != NULL);
 	}
 
-	Chunk *newChunk()
+	Chunk *newChunk() const
 	{
 		Chunk *pNew = aChunk.allocate( 1 );
 		pNew->pNext = NULL;
 		return pNew;
 	}
 	
-	Chunk *newChunk( long nLen )
+	Chunk *newChunk( long nLen ) const
 	{
 		Chunk *pNew = aChunk.allocate( 1 );
 		pNew->pNext = NULL;
@@ -365,7 +416,7 @@ private:
 	 * that was being shared so that this copy can be changed.  This should be
 	 * added before any call that will change this object;
 	 */
-	void unShare()
+	void unShare() const
 	{
 		if( isShared() == false )
 			return;
@@ -382,8 +433,8 @@ private:
 				break;
 		}
 		decRefs();
-		appendChunk( pNew );
-		decRefs();
+		pLast = pFirst = pNew;
+		nLength = pNew->nLength;
 	}
 
 	/**
@@ -391,7 +442,7 @@ private:
 	 * count hits zero because of this, it destroys the share.  This is not
 	 * safe to call on it's own, it's much better to call unShare.
 	 */
-	void decRefs()
+	void decRefs() const
 	{
 		if( isShared() )
 		{
@@ -414,14 +465,14 @@ private:
 	 * itself.  This should only be called when the refcount for the share has
 	 * or is about to reach zero.
 	 */
-	void destroyShare()
+	void destroyShare() const
 	{
 		delete pnRefs;
 		pnRefs = NULL;
-		clear();
+		realClear();
 	}
 
-	void cpy( chr *dest, const chr *src, long count )
+	void cpy( chr *dest, const chr *src, long count ) const
 	{
 		for( int j = 0; j < count; j++ )
 		{
@@ -441,15 +492,20 @@ private:
 	}
 
 private:
-	long nLength;
+	mutable long nLength;
 	mutable uint32_t *pnRefs;
-	Chunk *pFirst;
-	Chunk *pLast;
+	mutable Chunk *pFirst;
+	mutable Chunk *pLast;
 
-	chralloc aChr;
-	chunkalloc aChunk;
+	mutable chralloc aChr;
+	mutable chunkalloc aChunk;
 };
 
 typedef FBasicString<char> FString;
 
+#include "hash.h"
+template<> uint32_t __calcHashCode<FString>( const FString &k );
+template<> bool __cmpHashKeys<FString>( const FString &a, const FString &b );
+
+
 #endif
diff --git a/src/hash.cpp b/src/hash.cpp
index d428dd6..004d6dd 100644
--- a/src/hash.cpp
+++ b/src/hash.cpp
@@ -2,48 +2,57 @@
 
 subExceptionDef( HashException )
 
-template<> uint32_t __calcHashCode<const int>( const int k )
+template<> uint32_t __calcHashCode<int>( const int &k )
 {
 	return k;
 }
 
-template<> bool __cmpHashKeys<const int>( const int a, const int b )
+template<> bool __cmpHashKeys<int>( const int &a, const int &b )
 {
 	return a == b;
 }
 
-template<> uint32_t __calcHashCode<int>( int k )
+template<> uint32_t __calcHashCode<unsigned int>( const unsigned int &k )
 {
 	return k;
 }
 
-template<> bool __cmpHashKeys<int>( int a, int b )
+template<> bool __cmpHashKeys<unsigned int>( const unsigned int &a, const unsigned int &b )
 {
 	return a == b;
 }
 
-template<> uint32_t __calcHashCode<const unsigned int>( const unsigned int k )
+template<>
+uint32_t __calcHashCode<const char *>( const char * const &k )
 {
-	return k;
-}
+	if (k == NULL)
+	{
+		return 0;
+	}
+	
+	unsigned long int nPos = 0;
+	for( const char *s = k; *s; s++ )
+	{
+		nPos = *s + (nPos << 6) + (nPos << 16) - nPos;
+	}
 
-template<> bool __cmpHashKeys<const unsigned int>( const unsigned int a, const unsigned int b )
-{
-	return a == b;
+	return nPos;
 }
 
-template<> uint32_t __calcHashCode<unsigned int>( unsigned int k )
+template<> bool __cmpHashKeys<const char *>( const char * const &a, const char * const &b )
 {
-	return k;
-}
+	if( a == b )
+		return true;
 
-template<> bool __cmpHashKeys<unsigned int>( unsigned int a, unsigned int b )
-{
-	return a == b;
+	for(int j=0; a[j] == b[j]; j++ )
+		if( *a == '\0' )
+			return true;
+
+	return false;
 }
 
 template<>
-uint32_t __calcHashCode<const char *>( const char * k )
+uint32_t __calcHashCode<char *>( char * const &k )
 {
 	if (k == NULL)
 	{
@@ -59,30 +68,19 @@ uint32_t __calcHashCode<const char *>( const char * k )
 	return nPos;
 }
 
-template<> bool __cmpHashKeys<const char *>( const char *a, const char *b )
+template<> bool __cmpHashKeys<char *>( char * const &a, char * const &b )
 {
 	if( a == b )
 		return true;
 
-	for(; *a == *b; a++, b++ )
+	for(int j=0; a[j] == b[j]; j++ )
 		if( *a == '\0' )
 			return true;
 
 	return false;
 }
 
-template<>
-uint32_t __calcHashCode<char *>( char *k )
-{
-	return __calcHashCode<const char *>((const char *)k );
-}
-
-template<> bool __cmpHashKeys<char *>( char *a, char *b )
-{
-	return __cmpHashKeys<const char *>((const char *)a, (const char *)b );
-}
-
-template<> uint32_t __calcHashCode<const std::string>( const std::string k )
+template<> uint32_t __calcHashCode<std::string>( const std::string &k )
 {
 	std::string::size_type j, sz = k.size();
 	const char *s = k.c_str();
@@ -96,28 +94,20 @@ template<> uint32_t __calcHashCode<const std::string>( const std::string k )
 	return nPos;
 }
 
-template<> bool __cmpHashKeys<const std::string>( const std::string a, const std::string b )
+template<> bool __cmpHashKeys<std::string>( const std::string &a, const std::string &b )
 {
 	return a == b;
 }
 
-template<> uint32_t __calcHashCode<std::string>( std::string k )
-{
-	return __calcHashCode<const std::string>( k );
-}
-
-template<> bool __cmpHashKeys<std::string>( std::string a, std::string b )
+template<> uint32_t __calcHashCode<Hashable>( const Hashable &k )
 {
-	return __cmpHashKeys<const std::string>( a, b );
+	return 0;
+	//return k.getHashCode();
 }
 
-template<> uint32_t __calcHashCode<Hashable &>( Hashable &k )
+template<> bool __cmpHashKeys<Hashable>( const Hashable &a, const Hashable &b )
 {
-	return k.getHashCode();
-}
-
-template<> bool __cmpHashKeys<Hashable &>( Hashable &a, Hashable &b )
-{
-	return a.compareForHash( b );
+	return false;
+	//return a.compareForHash( b );
 }
 
diff --git a/src/hash.h b/src/hash.h
index 7f1ac65..6671ae6 100644
--- a/src/hash.h
+++ b/src/hash.h
@@ -18,10 +18,10 @@ enum eHashException
 };
 
 template<typename T>
-uint32_t __calcHashCode( T k );
+uint32_t __calcHashCode( const T &k );
 
 template<typename T>
-bool __cmpHashKeys( T a, T b );
+bool __cmpHashKeys( const T &a, const T &b );
 
 struct __calcNextTSize_fast
 {
@@ -649,31 +649,22 @@ protected:
 	sizecalc szCalc;
 };
 
-template<> uint32_t __calcHashCode<const int>( const int k );
-template<> bool __cmpHashKeys<const int>( const int a, const int b );
+template<> uint32_t __calcHashCode<int>( const int &k );
+template<> bool __cmpHashKeys<int>( const int &a, const int &b );
 
-template<> uint32_t __calcHashCode<int>( int k );
-template<> bool __cmpHashKeys<int>( int a, int b );
+template<> uint32_t __calcHashCode<unsigned int>( const unsigned int &k );
+template<> bool __cmpHashKeys<unsigned int>( const unsigned int &a, const unsigned int &b );
 
-template<> uint32_t __calcHashCode<const unsigned int>( const unsigned int k );
-template<> bool __cmpHashKeys<const unsigned int>( const unsigned int a, const unsigned int b );
+template<> uint32_t __calcHashCode<const char *>( const char * const &k );
+template<> bool __cmpHashKeys<const char *>( const char * const &a, const char * const &b );
 
-template<> uint32_t __calcHashCode<unsigned int>( unsigned int k );
-template<> bool __cmpHashKeys<unsigned int>( unsigned int a, unsigned int b );
+template<> uint32_t __calcHashCode<char *>( char * const &k );
+template<> bool __cmpHashKeys<char *>( char * const &a, char * const &b );
 
-template<> uint32_t __calcHashCode<const char *>( const char *k );
-template<> bool __cmpHashKeys<const char *>( const char *a, const char *b );
+template<> uint32_t __calcHashCode<std::string>( const std::string &k );
+template<> bool __cmpHashKeys<std::string>( const std::string &a, const std::string &b );
 
-template<> uint32_t __calcHashCode<char *>( char *k );
-template<> bool __cmpHashKeys<char *>( char *a, char *b );
-
-template<> uint32_t __calcHashCode<const std::string>( const std::string k );
-template<> bool __cmpHashKeys<const std::string>( const std::string a, const std::string b );
-
-template<> uint32_t __calcHashCode<std::string>( std::string k );
-template<> bool __cmpHashKeys<std::string>( std::string a, std::string b );
-
-template<> uint32_t __calcHashCode<Hashable &>( Hashable &k );
-template<> bool __cmpHashKeys<Hashable &>( Hashable &a, Hashable &b );
+template<> uint32_t __calcHashCode<Hashable>( const Hashable &k );
+template<> bool __cmpHashKeys<Hashable>( const Hashable &a, const Hashable &b );
 
 #endif
diff --git a/src/tests/fstring.cpp b/src/tests/fstring.cpp
index cb85282..33e24b4 100644
--- a/src/tests/fstring.cpp
+++ b/src/tests/fstring.cpp
@@ -1,3 +1,4 @@
+#include "hash.h"
 #include "fstring.h"
 
 FString genThing()
@@ -11,6 +12,11 @@ FString genThing()
 	return bob;
 }
 
+void thing( FString str )
+{
+	printf("Hey:  %s\n", str.c_str() );
+}
+
 #define pem printf("---------\n%08X: %s\n%08X: %s\n", (unsigned int)str.c_str(), str.c_str(), (unsigned int)str2.c_str(), str2.c_str() );
 int main( int argc, char *argv )
 {
@@ -33,5 +39,9 @@ int main( int argc, char *argv )
 
 	str = str2;
 	pem;
+
+	thing( str2 );
+	
+	printf("%d == %d\n", __calcHashCode( str ), __calcHashCode( str.c_str() ) );
 }
 
-- 
cgit v1.2.3