From 96759377ae8a4394d325747f597fe5b60afabf6e Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Mon, 11 Nov 2019 05:01:29 -0800 Subject: Working on TextBuilder & Text. Seriously thinking about changing Text to just use full codepoints. It would be much less work, more reliable and predictable, easier to use, but would use twice the memory. --- src/unstable/blobbuilder.cpp | 2 +- src/unstable/textbuilder.cpp | 281 +++++++++++++++++++++++++++++++++++++++++ src/unstable/textbuilder.h | 78 ++++++++++++ src/unstable/textcodec.cpp | 17 +++ src/unstable/textcodec.h | 41 ++++++ src/unstable/textcodecutf8.cpp | 25 ++++ src/unstable/textcodecutf8.h | 26 ++++ 7 files changed, 469 insertions(+), 1 deletion(-) diff --git a/src/unstable/blobbuilder.cpp b/src/unstable/blobbuilder.cpp index 43c0779..fd62cb0 100644 --- a/src/unstable/blobbuilder.cpp +++ b/src/unstable/blobbuilder.cpp @@ -93,7 +93,7 @@ Bu::BlobBuilderCore::BlobBuilderCore( const Bu::BlobBuilderCore &rSrc ) : pLast( 0 ), iLength( rSrc.iLength ) { - + throw Bu::ExceptionBase("Not yet implemented."); } Bu::BlobBuilderCore::~BlobBuilderCore() diff --git a/src/unstable/textbuilder.cpp b/src/unstable/textbuilder.cpp index 73271f8..af4dbac 100644 --- a/src/unstable/textbuilder.cpp +++ b/src/unstable/textbuilder.cpp @@ -5,4 +5,285 @@ * terms of the license contained in the file LICENSE. */ +#include "bu/textbuilder.h" + +#include "bu/exceptionbase.h" +#include "bu/text.h" + +#define PAGE_SIZE 16 + +Bu::TextBuilderCore::Chunk::Chunk( const CodePoint *pSrc, int32_t iLength ) : + iLength( iLength ), + pData( 0 ), + pNext( 0 ) +{ + if( iLength < PAGE_SIZE ) + { + pData = new CodePoint[PAGE_SIZE]; + } + else + { + pData = new CodePoint[iLength]; + } + memcpy( pData, pSrc, iLength*sizeof(CodePoint) ); +} + +Bu::TextBuilderCore::Chunk::~Chunk() +{ + delete[] pData; + pData = 0; + pNext = 0; +} + +void Bu::TextBuilderCore::Chunk::append( const Bu::CodePoint *&pSrc, + int32_t &iLength ) +{ + if( this->iLength >= PAGE_SIZE ) + { + // This chink is full, just return. + return; + } + int32_t iCopy = PAGE_SIZE-this->iLength; + if( iCopy > iLength ) + { + iCopy = iLength; + } + memcpy( pData+this->iLength, pSrc, iCopy*sizeof(Bu::CodePoint) ); + this->iLength += iCopy; + pSrc += iCopy; + iLength -= iCopy; +} + +Bu::TextBuilderCore::Chunk *Bu::TextBuilderCore::Chunk::split( int32_t iIndex ) +{ + if( iIndex == 0 ) + return NULL; + + if( iIndex >= iLength ) + return NULL; + + Chunk *pNew = new Chunk( pData+iIndex, iLength-iIndex ); + iLength -= iIndex; + pNew->pNext = pNext; + pNext = pNew; + + return pNew; +} + + +////// +// TextBuilderCore +// +Bu::TextBuilderCore::TextBuilderCore() : + pFirst( 0 ), + pLast( 0 ), + iLength( 0 ) +{ +} + +Bu::TextBuilderCore::TextBuilderCore( const TextBuilderCore &rSrc ) : + pFirst( 0 ), + pLast( 0 ), + iLength( rSrc.iLength ) +{ + throw Bu::ExceptionBase("Not yet implemented."); +} + +Bu::TextBuilderCore::~TextBuilderCore() +{ + clear(); +} + +void Bu::TextBuilderCore::clear() +{ + Chunk *pCur = pFirst; + while( pCur ) + { + Chunk *pNext = pCur->pNext; + delete pCur; + pCur = pNext; + } + pFirst = pLast = 0; + iLength = 0; +} + +void Bu::TextBuilderCore::append( const CodePoint *pSrc, int32_t iLength ) +{ + this->iLength += iLength; + if( pFirst == 0 ) + { + // Nothing in the list, just add a chunk. + pFirst = pLast = new Chunk( pSrc, iLength ); + return; + } + else if( pLast->iLength < PAGE_SIZE ) + { + // Append to the last chunk first, this will modify pSrc & iLength. + pLast->append( pSrc, iLength ); + } + + // If there's unused data at the end, append it now. + if( iLength > 0 ) + { + pLast->pNext = new Chunk( pSrc, iLength ); + pLast = pLast->pNext; + } +} + +void Bu::TextBuilderCore::prepend( const CodePoint *pSrc, int32_t iLength ) +{ + if( pFirst == 0 ) + { + pFirst = pLast = new Chunk( pSrc, iLength ); + } + else + { + Chunk *pNew = new Chunk( pSrc, iLength ); + pNew->pNext = pFirst; + pFirst = pNew; + } + this->iLength += iLength; +} + +void Bu::TextBuilderCore::insert( int32_t iBefore, const CodePoint *pSrc, int32_t iLength ) +{ + if( iBefore <= 0 ) + { + prepend( pSrc, iLength ); + return; + } + if( iBefore >= this->iLength ) + { + append( pSrc, iLength ); + return; + } + + Chunk *pCur = pFirst; + while( pCur ) + { + if( iBefore == 0 ) + { + // Insert between chunks, no splitting required. + Chunk *pNew = new Chunk( pSrc, iLength ); + pNew->pNext = pCur->pNext; + pCur->pNext = pNew; + if( pLast == pCur ) + pLast = pNew; + } + if( iBefore < pCur->iLength ) + { + // This is the chunk we need to split. + Chunk *pNew = pCur->split( iBefore ); + if( pLast == pCur ) + pLast = pNew; + continue; + } + pCur = pCur->pNext; + } + this->iLength = iLength; +} + +void Bu::TextBuilderCore::set( const CodePoint *pSrc, int32_t iLength ) +{ + clear(); + append( pSrc, iLength ); +} + +void Bu::TextBuilderCore::copyTo( void *pDestRaw, int32_t iLength ) +{ + +} + +Bu::CodePoint Bu::TextBuilderCore::getAt( int32_t iIndex ) const +{ + if( iIndex < 0 || iIndex >= iLength ) + throw Bu::ExceptionBase("Requested index is out of range."); + + Chunk *pCur = pFirst; + while( iIndex >= pCur->iLength ) + { + iIndex -= pCur->iLength; + pCur = pCur->pNext; + } + return pCur->pData[iIndex]; +} + +///// +// TextBuilder +// + +Bu::TextBuilder::TextBuilder() +{ +} + +Bu::TextBuilder::TextBuilder( const Text &rSrc ) +{ +} + +Bu::TextBuilder::TextBuilder( const TextBuilder &rSrc ) : + Bu::SharedCore( rSrc ) +{ +} + +Bu::TextBuilder::~TextBuilder() +{ +} + +void Bu::TextBuilder::set( const Text &rSrc ) +{ + _hardCopy(); + core->set( rSrc.getData(), rSrc.getSize() ); +} + +void Bu::TextBuilder::append( const Text &rSrc ) +{ + _hardCopy(); +} + +void Bu::TextBuilder::append( const CodePoint *pSrc, int32_t iLength ) +{ + _hardCopy(); +} + +void Bu::TextBuilder::prepend( const Text &rSrc ) +{ + _hardCopy(); +} + +void Bu::TextBuilder::insert( const Text &rSrc ) +{ + _hardCopy(); +} + +void Bu::TextBuilder::clear() +{ + _hardCopy(); +} + +int32_t Bu::TextBuilder::getSize() const +{ + return core->iLength; +} + +Bu::Text Bu::TextBuilder::getText() const +{ + return Text( *this ); +} + +Bu::CodePoint Bu::TextBuilder::operator[]( int32_t iIndex ) const +{ + return core->getAt( iIndex ); +} + +Bu::TextBuilder &Bu::TextBuilder::operator=( const Text &rSrc ) +{ + set( rSrc ); + return *this; +} + +Bu::TextBuilder &Bu::TextBuilder::operator==( const Text &rSrc ) +{ + set( pSrc ); + return *this; +} diff --git a/src/unstable/textbuilder.h b/src/unstable/textbuilder.h index 73271f8..22fa653 100644 --- a/src/unstable/textbuilder.h +++ b/src/unstable/textbuilder.h @@ -5,4 +5,82 @@ * terms of the license contained in the file LICENSE. */ +#ifndef BU_TEXT_BUILDER_H +#define BU_TEXT_BUILDER_H + +#include +#include "bu/sharedcore.h" +#include "bu/text.h" + +namespace Bu +{ + class TextBuilder; + + /** @cond DEVEL */ + class TextBuilderCore + { + friend class TextBuilder; + friend class SharedCore; + private: + class Chunk + { + public: + Chunk( const CodePoint *pSrc, int32_t iLength ); + ~Chunk(); + + void append( const CodePoint *&pSrc, int32_t &iLength ); + + Chunk *split( int32_t iIndex ); + + int32_t iLength; + CodePoint *pData; + Chunk *pNext; + }; + + TextBuilderCore(); + TextBuilderCore( const TextBuilderCore &rSrc ); + virtual ~TextBuilderCore(); + + void clear(); + void append( const CodePoint *pSrc, int32_t iLength ); + void prepend( const CodePoint *pSrc, int32_t iLength ); + void insert( int32_t iBefore, const CodePoint *pSrc, int32_t iLength ); + void set( const CodePoint *pSrc, int32_t iLength ); + void copyTo( void *pDestRaw, int32_t iLength ); + CodePoint getAt( int32_t iIndex ) const; + + Chunk *pFirst; + Chunk *pLast; + int32_t iLength; + }; + + class TextBuilder : public Bu::SharedCore + { + protected: + using SharedCore::core; + using SharedCore::_hardCopy; + + public: + TextBuilder(); + TextBuilder( const Text &rSrc ); + TextBuilder( const TextBuilder &rSrc ); + virtual ~TextBuilder(); + + void set( const Text &rSrc ); + void append( const Text &rSrc ); + void append( const CodePoint *pSrc, int32_t iLength ); + void prepend( const Text &rSrc ); + void insert( const Text &rSrc ); + void clear(); + + int32_t getSize() const; + Text getText() const; + CodePoint operator[]( int32_t iIndex ) const; + + TextBuilder &operator=( const Text &rSrc ); + TextBuilder &operator==( const Text &rSrc ); + }; +} + +#endif diff --git a/src/unstable/textcodec.cpp b/src/unstable/textcodec.cpp index e69de29..0ac791e 100644 --- a/src/unstable/textcodec.cpp +++ b/src/unstable/textcodec.cpp @@ -0,0 +1,17 @@ +/* + * Copyright (C) 2007-2019 Xagasoft, All rights reserved. + * + * This file is part of the libbu++ library and is released under the + * terms of the license contained in the file LICENSE. + */ + +#include "bu/textcodec.h" + +Bu::TextCodec::TextCodec() +{ +} + +Bu::TextCodec::~TextCodec() +{ +} + diff --git a/src/unstable/textcodec.h b/src/unstable/textcodec.h index e69de29..6ae392e 100644 --- a/src/unstable/textcodec.h +++ b/src/unstable/textcodec.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2007-2019 Xagasoft, All rights reserved. + * + * This file is part of the libbu++ library and is released under the + * terms of the license contained in the file LICENSE. + */ + +#ifndef TEXT_CODEC_H +#define TEXT_CODEC_H + +#include "bu/text.h" +#include "bu/blob.h" + +namespace Bu +{ + class Text; + class Blob; + class TextBuilder; + class BlobBuilder; + + /** + * Represents a textual format and the routines to convert from that + * format to unicode code points and vica versa. + */ + class TextCodec + { + public: + TextCodec(); + virtual ~TextCodec(); + +// virtual Blob encode( const Text &rSource ); +// virtual Text decode( const Blob &rSource, int32_t &rBytesUsed ); + + virtual void encode( BlobBuilder &rTarget, const Text &rSource )=0; + virtual int32_t decode( TextBuilder &rTarget, const Blob &rSource )=0; + + private: + }; +}; + +#endif diff --git a/src/unstable/textcodecutf8.cpp b/src/unstable/textcodecutf8.cpp index e69de29..ce4e0a2 100644 --- a/src/unstable/textcodecutf8.cpp +++ b/src/unstable/textcodecutf8.cpp @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2007-2019 Xagasoft, All rights reserved. + * + * This file is part of the libbu++ library and is released under the + * terms of the license contained in the file LICENSE. + */ + +#include "bu/textcodecutf8.h" + +Bu::TextCodecUtf8::TextCodecUtf8() +{ +} + +Bu::TextCodecUtf8::~TextCodecUtf8() +{ +} + +void Bu::TextCodecUtf8::encode( BlobBuilder &rTarget, const Text &rSource ) +{ +} + +int32_t Bu::TextCodecUtf8::decode( TextBuilder &rTarget, const Blob &rSource ) +{ +} + diff --git a/src/unstable/textcodecutf8.h b/src/unstable/textcodecutf8.h index e69de29..f565f57 100644 --- a/src/unstable/textcodecutf8.h +++ b/src/unstable/textcodecutf8.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2007-2019 Xagasoft, All rights reserved. + * + * This file is part of the libbu++ library and is released under the + * terms of the license contained in the file LICENSE. + */ + +#ifndef TEXT_CODEC_UTF8_H +#define TEXT_CODEC_UTF8_H + +#include "bu/textcodec.h" + +namespace Bu +{ + class TextCodecUtf8 : public TextCodec + { + public: + TextCodecUtf8(); + virtual ~TextCodecUtf8(); + + virtual void encode( BlobBuilder &rTarget, const Text &rSource ); + virtual int32_t decode( TextBuilder &rTarget, const Blob &rSource ); + }; +} + +#endif -- cgit v1.2.3