From da89e6d30e57bd6dbb10b4d36b093ce9bbf5c666 Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Tue, 3 Apr 2007 04:50:36 +0000 Subject: The first batch seem to have made it alright. Unfortunately the Archive class isn't done yet, I'm going to make it rely on streams, so those will be next, then we can make it work all sortsa' well. --- src/fstring.h | 653 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 653 insertions(+) create mode 100644 src/fstring.h (limited to 'src/fstring.h') diff --git a/src/fstring.h b/src/fstring.h new file mode 100644 index 0000000..717068f --- /dev/null +++ b/src/fstring.h @@ -0,0 +1,653 @@ +#ifndef F_STRING_H +#define F_STRING_H + +#include +#include +#include "archable.h" +#include "archive.h" +#include "hash.h" + +namespace Bu +{ + template< typename chr > + struct FStringChunk + { + long nLength; + chr *pData; + FStringChunk *pNext; + }; + + /** + * Flexible String class. This class was designed with string passing and + * generation in mind. Like the standard string class you can specify what + * datatype to use for each character. Unlike the standard string class, + * collection of appended and prepended terms is done lazily, making long + * operations that involve many appends very inexpensive. In addition internal + * ref-counting means that if you pass strings around between functions there's + * almost no overhead in time or memory since a reference is created and no + * data is actually copied. This also means that you never need to put any + * FBasicString into a ref-counting container class. + */ + template< typename chr, typename chralloc=std::allocator, typename chunkalloc=std::allocator > > + class FBasicString : public Archable + { +#ifndef VALTEST +#define cpy( dest, src, size ) memcpy( dest, src, size*sizeof(chr) ) +#endif + private: + typedef struct FStringChunk Chunk; + typedef struct FBasicString MyType; + + public: + FBasicString() : + nLength( 0 ), + pnRefs( NULL ), + pFirst( NULL ), + pLast( NULL ) + { + } + + FBasicString( const chr *pData ) : + nLength( 0 ), + pnRefs( NULL ), + pFirst( NULL ), + pLast( NULL ) + { + append( pData ); + } + + FBasicString( const chr *pData, long nLength ) : + nLength( 0 ), + pnRefs( NULL ), + pFirst( NULL ), + pLast( NULL ) + { + append( pData, nLength ); + } + + FBasicString( const MyType &rSrc ) : + nLength( 0 ), + pnRefs( NULL ), + pFirst( NULL ), + pLast( NULL ) + { + // Here we have no choice but to copy, since the other guy is a const. + // In the case that the source were flat, we could get a reference, it + // would make some things faster, but not matter in many other cases. + + joinShare( rSrc ); + //copyFrom( rSrc ); + } + + FBasicString( const MyType &rSrc, long nLength ) : + nLength( 0 ), + pnRefs( NULL ), + pFirst( NULL ), + pLast( NULL ) + { + append( rSrc.pFirst->pData, nLength ); + } + + FBasicString( const MyType &rSrc, long nStart, long nLength ) : + nLength( 0 ), + pnRefs( NULL ), + pFirst( NULL ), + pLast( NULL ) + { + append( rSrc.pFirst->pData+nStart, nLength ); + } + + FBasicString( long nSize ) : + nLength( nSize ), + pnRefs( NULL ), + pFirst( NULL ), + pLast( NULL ) + { + pFirst = pLast = newChunk( nSize ); + } + + virtual ~FBasicString() + { + clear(); + } + + void append( const chr *pData ) + { + long nLen; + for( nLen = 0; pData[nLen] != (chr)0; nLen++ ); + + Chunk *pNew = newChunk( nLen ); + cpy( pNew->pData, pData, nLen ); + + appendChunk( pNew ); + } + + void append( const chr *pData, long nLen ) + { + Chunk *pNew = newChunk( nLen ); + + cpy( pNew->pData, pData, nLen ); + + appendChunk( pNew ); + } + + void prepend( const chr *pData ) + { + long nLen; + for( nLen = 0; pData[nLen] != (chr)0; nLen++ ); + + Chunk *pNew = newChunk( nLen ); + cpy( pNew->pData, pData, nLen ); + + prependChunk( pNew ); + } + + void prepend( const chr *pData, long nLen ) + { + Chunk *pNew = newChunk( nLen ); + + cpy( pNew->pData, pData, nLen ); + + prependChunk( pNew ); + } + + void clear() + { + realClear(); + } + + void resize( long nNewSize ) + { + if( nLength == nNewSize ) + return; + + flatten(); + + Chunk *pNew = newChunk( nNewSize ); + long nNewLen = (nNewSizepData, pFirst->pData, nNewLen ); + pNew->pData[nNewLen] = (chr)0; + aChr.deallocate( pFirst->pData, pFirst->nLength+1 ); + aChunk.deallocate( pFirst, 1 ); + pFirst = pLast = pNew; + nLength = nNewSize; + } + + long getSize() const + { + return nLength; + } + + chr *getStr() + { + if( pFirst == NULL ) + return NULL; + + flatten(); + return pFirst->pData; + } + + const chr *getStr() const + { + if( pFirst == NULL ) + return NULL; + + flatten(); + return pFirst->pData; + } + + chr *c_str() + { + if( pFirst == NULL ) + return NULL; + + flatten(); + return pFirst->pData; + } + + const chr *c_str() const + { + if( pFirst == NULL ) + return NULL; + + flatten(); + return pFirst->pData; + } + + MyType &operator +=( const chr *pData ) + { + append( pData ); + + return (*this); + } + + MyType &operator +=( const MyType &rSrc ) + { + rSrc.flatten(); + append( rSrc.pFirst->pData, rSrc.nLength ); + + return (*this); + } + + MyType &operator +=( const chr pData ) + { + chr tmp[2] = { pData, (chr)0 }; + append( tmp ); + + return (*this); + } + + MyType &operator =( const chr *pData ) + { + clear(); + append( pData ); + + return (*this); + } + + MyType &operator =( const MyType &rSrc ) + { + //if( rSrc.isFlat() ) + //{ + joinShare( rSrc ); + //} + //else + //{ + // copyFrom( rSrc ); + //} + // + + return (*this); + } + + bool operator ==( const chr *pData ) const + { + if( pFirst == NULL ) { + if( pData == NULL ) + return true; + return false; + } + + flatten(); + const chr *a = pData; + chr *b = pFirst->pData; + for( ; *a!=(chr)0; a++, b++ ) + { + if( *a != *b ) + return false; + } + + return true; + } + + bool operator ==( const MyType &pData ) const + { + if( pFirst == pData.pFirst ) + return true; + if( pFirst == NULL ) + return false; + + flatten(); + pData.flatten(); + const chr *a = pData.pFirst->pData; + chr *b = pFirst->pData; + for( ; *a!=(chr)0; a++, b++ ) + { + if( *a != *b ) + return false; + } + + return true; + } + + bool operator !=(const chr *pData ) const + { + return !(*this == pData); + } + + bool operator !=(const MyType &pData ) const + { + return !(*this == pData); + } + + chr &operator[]( long nIndex ) + { + flatten(); + + return pFirst->pData[nIndex]; + } + + const chr &operator[]( long nIndex ) const + { + flatten(); + + return pFirst->pData[nIndex]; + } + + bool isWS( long nIndex ) const + { + flatten(); + + return pFirst->pData[nIndex]==' ' || pFirst->pData[nIndex]=='\t' + || pFirst->pData[nIndex]=='\r' || pFirst->pData[nIndex]=='\n'; + } + + bool isAlpha( long nIndex ) const + { + flatten(); + + return (pFirst->pData[nIndex] >= 'a' && pFirst->pData[nIndex] <= 'z') + || (pFirst->pData[nIndex] >= 'A' && pFirst->pData[nIndex] <= 'Z'); + } + + void toLower() + { + flatten(); + unShare(); + + for( long j = 0; j < nLength; j++ ) + { + if( pFirst->pData[j] >= 'A' && pFirst->pData[j] <= 'Z' ) + pFirst->pData[j] -= 'A'-'a'; + } + } + + void toUpper() + { + flatten(); + unShare(); + + for( long j = 0; j < nLength; j++ ) + { + if( pFirst->pData[j] >= 'a' && pFirst->pData[j] <= 'z' ) + pFirst->pData[j] += 'A'-'a'; + } + } + + void archive( class Archive &ar ) + { + if( ar.isLoading() ) + { + clear(); + long nLen; + ar >> nLen; + + Chunk *pNew = newChunk( nLen ); + ar.read( pNew->pData, nLen*sizeof(chr) ); + appendChunk( pNew ); + } + else + { + flatten(); + + ar << nLength; + ar.write( pFirst->pData, nLength*sizeof(chr) ); + } + } + + private: + void flatten() const + { + if( isFlat() ) + return; + + if( pFirst == NULL ) + return; + + unShare(); + + Chunk *pNew = newChunk( nLength ); + chr *pos = pNew->pData; + Chunk *i = pFirst; + for(;;) + { + cpy( pos, i->pData, i->nLength ); + pos += i->nLength; + i = i->pNext; + if( i == NULL ) + break; + } + realClear(); + + pLast = pFirst = pNew; + nLength = pNew->nLength; + } + + void realClear() const + { + if( pFirst == NULL ) + return; + + if( isShared() ) + { + decRefs(); + } + else + { + Chunk *i = pFirst; + for(;;) + { + Chunk *n = i->pNext; + aChr.deallocate( i->pData, i->nLength+1 ); + aChunk.deallocate( i, 1 ); + if( n == NULL ) + break; + i = n; + } + pFirst = pLast = NULL; + nLength = 0; + } + } + + void copyFrom( const FBasicString &rSrc ) + { + if( rSrc.pFirst == NULL ) + return; + + decRefs(); + + Chunk *pNew = newChunk( rSrc.nLength ); + chr *pos = pNew->pData; + Chunk *i = rSrc.pFirst; + for(;;) + { + cpy( pos, i->pData, i->nLength ); + pos += i->nLength; + i = i->pNext; + if( i == NULL ) + break; + } + clear(); + + appendChunk( pNew ); + } + + bool isFlat() const + { + return (pFirst == pLast); + } + + bool isShared() const + { + return (pnRefs != NULL); + } + + Chunk *newChunk() const + { + Chunk *pNew = aChunk.allocate( 1 ); + pNew->pNext = NULL; + return pNew; + } + + Chunk *newChunk( long nLen ) const + { + Chunk *pNew = aChunk.allocate( 1 ); + pNew->pNext = NULL; + pNew->nLength = nLen; + pNew->pData = aChr.allocate( nLen+1 ); + pNew->pData[nLen] = (chr)0; + return pNew; + } + + void appendChunk( Chunk *pNewChunk ) + { + unShare(); + + if( pFirst == NULL ) + pLast = pFirst = pNewChunk; + else + { + pLast->pNext = pNewChunk; + pLast = pNewChunk; + } + + nLength += pNewChunk->nLength; + } + + void prependChunk( Chunk *pNewChunk ) + { + unShare(); + + if( pFirst == NULL ) + pLast = pFirst = pNewChunk; + else + { + pNewChunk->pNext = pFirst; + pFirst = pNewChunk; + } + + nLength += pNewChunk->nLength; + } + + void joinShare( MyType &rSrc ) + { + clear(); + + if( !rSrc.isFlat() ) + rSrc.flatten(); + + rSrc.initCount(); + pnRefs = rSrc.pnRefs; + (*pnRefs)++; + nLength = rSrc.nLength; + pFirst = rSrc.pFirst; + pLast = rSrc.pLast; + } + + void joinShare( const MyType &rSrc ) + { + clear(); + + rSrc.flatten(); + + if( !rSrc.isShared() ) + { + rSrc.pnRefs = new uint32_t; + (*rSrc.pnRefs) = 1; + } + pnRefs = rSrc.pnRefs; + (*pnRefs)++; + nLength = rSrc.nLength; + pFirst = rSrc.pFirst; + pLast = rSrc.pLast; + } + + /** + * This takes an object that was shared and makes a copy of the base data + * that was being shared so that this copy can be changed. This should be + * added before any call that will change this object; + */ + void unShare() const + { + if( isShared() == false ) + return; + + Chunk *pNew = newChunk( nLength ); + chr *pos = pNew->pData; + Chunk *i = pFirst; + for(;;) + { + cpy( pos, i->pData, i->nLength ); + pos += i->nLength; + i = i->pNext; + if( i == NULL ) + break; + } + decRefs(); + pLast = pFirst = pNew; + nLength = pNew->nLength; + } + + /** + * This decrements our ref count and pulls us out of the share. If the ref + * count hits zero because of this, it destroys the share. This is not + * safe to call on it's own, it's much better to call unShare. + */ + void decRefs() const + { + if( isShared() ) + { + (*pnRefs)--; + if( (*pnRefs) == 0 ) + destroyShare(); + else + { + pnRefs = NULL; + pFirst = NULL; + pLast = NULL; + nLength = 0; + } + } + } + + /** + * While the unShare function removes an instance from a share, this + * function destroys the data that was in the share, removing the share + * itself. This should only be called when the refcount for the share has + * or is about to reach zero. + */ + void destroyShare() const + { + delete pnRefs; + pnRefs = NULL; + realClear(); + } + +#ifdef VALTEST + void cpy( chr *dest, const chr *src, long count ) const + { + for( int j = 0; j < count; j++ ) + { + *dest = *src; + dest++; + src++; + } + } +#endif + + void initCount() const + { + if( !isShared() ) + { + pnRefs = new uint32_t; + (*pnRefs) = 1; + } + } + + private: + mutable long nLength; + mutable uint32_t *pnRefs; + mutable Chunk *pFirst; + mutable Chunk *pLast; + + mutable chralloc aChr; + mutable chunkalloc aChunk; + }; + + typedef FBasicString FString; + + template<> uint32_t __calcHashCode( const FString &k ); + template<> bool __cmpHashKeys( const FString &a, const FString &b ); +} + +#endif -- cgit v1.2.3