From 7c335ede527eaf4a3053ef35b1299141d34aaf40 Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Fri, 19 Nov 2010 05:54:14 +0000 Subject: I now think that this may not work out at all. It looks like if we want proper Unicode handling we'll need to implement a series of codecs and converters as well as tables of codepages and lookups. It'll be interesting, I guess, but it makes me care a lot less about proper encoding. Anyway, UtfString uses shorts instead of chars, so it's a step in the right direction, but still not enough to be able to handle proper UTF-16 encoding, maybe UCS-2 encoding, but... ...that's lame. Bu::FBasicString has been generalized a bit with optimizations from libc for char based strings. It also, unfortunately, still uses char-only functions in several places, those all rely on char casting strings at the moment just to get the thing to compile. Basically, it's not a good UTF-16 solution yet, and it may never be and remain compatible with char based strings. --- src/fbasicstring.h | 52 ++++++++++++++++++++++++++++++++++++++++++++-------- src/utfstring.cpp | 29 +++++++++++++++++++++++++++++ src/utfstring.h | 24 ++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 8 deletions(-) create mode 100644 src/utfstring.cpp create mode 100644 src/utfstring.h (limited to 'src') diff --git a/src/fbasicstring.h b/src/fbasicstring.h index 7167f4a..af11bb2 100644 --- a/src/fbasicstring.h +++ b/src/fbasicstring.h @@ -39,6 +39,42 @@ namespace Bu template< typename chr, int nMinSize, typename chralloc, typename chunkalloc> class FBasicString; + template + size_t strlen( const chr *pData ) + { + for( size_t tLen = 0;; ++tLen ) + { + if( pData[tLen] == (chr)0 ) + return tLen; + } + return -1; + } + + template + size_t strlen( const char *pData ) + { + return ::strlen( pData ); + } + + template + int strncmp( const chr *a, const chr *b, size_t iLen ) + { + for( size_t iPos = 0; iPos < iLen; iPos++ ) + { + if( a[iPos] != b[iPos] ) + { + return a[iPos]-b[iPos]; + } + } + return 0; + } + + template + int strncmp( const char *a, const char *b, size_t iLen ) + { + return ::strncmp( a, b, iLen ); + } + template struct FStringCore { @@ -1044,7 +1080,7 @@ namespace Bu */ void insert( long nPos, const chr *pData ) { - insert( nPos, pData, strlen( pData ) ); + insert( nPos, pData, Bu::strlen( pData ) ); } void remove( long nPos, long nLen ) @@ -1170,13 +1206,13 @@ namespace Bu if( iStart < 0 ) iStart = 0; if( iStart >= core->nLength ) - return ""; + return (const chr[]){(chr)0}; if( iSize < 0 ) iSize = core->nLength; if( iStart+iSize > core->nLength ) iSize = core->nLength-iStart; if( iSize == 0 ) - return ""; + return (const chr[]){(chr)0}; flatten(); MyType ret( core->pFirst->pData+iStart, iSize ); @@ -1438,11 +1474,11 @@ namespace Bu wordexp_t result; /* Expand the string for the program to run. */ - switch (wordexp (core->pFirst->pData, &result, 0)) + switch (wordexp ((char *)core->pFirst->pData, &result, 0)) { case 0: /* Successful. */ { - set( result.we_wordv[0] ); + set( (chr *)result.we_wordv[0] ); wordfree( &result ); return; } @@ -1954,7 +1990,7 @@ namespace Bu long iLen = vsnprintf( NULL, 0, sFrmt, ap ); Chunk *pNew = core->newChunk( iLen ); - vsnprintf( pNew->pData, iLen+1, sFrmt, ap ); + vsnprintf( (char *)pNew->pData, iLen+1, sFrmt, ap ); core->appendChunk( pNew ); va_end( ap ); @@ -1971,7 +2007,7 @@ namespace Bu long iLen = vsnprintf( NULL, 0, sFrmt, ap ); Chunk *pNew = core->newChunk( iLen ); - vsnprintf( pNew->pData, iLen+1, sFrmt, ap ); + vsnprintf( (char *)pNew->pData, iLen+1, sFrmt, ap ); core->appendChunk( pNew ); va_end( ap ); @@ -1988,7 +2024,7 @@ namespace Bu long iLen = vsnprintf( NULL, 0, sFrmt, ap ); Chunk *pNew = core->newChunk( iLen ); - vsnprintf( pNew->pData, iLen+1, sFrmt, ap ); + vsnprintf( (char *)pNew->pData, iLen+1, sFrmt, ap ); core->prependChunk( pNew ); va_end( ap ); diff --git a/src/utfstring.cpp b/src/utfstring.cpp new file mode 100644 index 0000000..ae5efaf --- /dev/null +++ b/src/utfstring.cpp @@ -0,0 +1,29 @@ +#include "bu/utfstring.h" + +template class Bu::FBasicString; + +template<> uint32_t Bu::__calcHashCode( const Bu::UtfString &k ) +{ + long j, sz = k.getSize()*2; + const char *s = (const char *)k.getStr(); + + long nPos = 0; + for( j = 0; j < sz; j++, s++ ) + { + nPos = *s + (nPos << 6) + (nPos << 16) - nPos; + } + + return nPos; +} + +template<> bool Bu::__cmpHashKeys( + const Bu::UtfString &a, const Bu::UtfString &b ) +{ + return a == b; +} + +template<> void Bu::__tracer_format( const Bu::UtfString &v ) +{ + printf("(%ld)\"%s\"", v.getSize(), (const char *)v.getStr() ); +} + diff --git a/src/utfstring.h b/src/utfstring.h new file mode 100644 index 0000000..bbacb74 --- /dev/null +++ b/src/utfstring.h @@ -0,0 +1,24 @@ +#ifndef BU_UTF_STRING_H +#define BU_UTF_STRING_H + +#include "bu/fbasicstring.h" + +namespace Bu +{ + typedef FBasicString UtfString; + + template + uint32_t __calcHashCode( const T &k ); + + template + bool __cmpHashKeys( const T &a, const T &b ); + + template<> uint32_t __calcHashCode( const UtfString &k ); + template<> bool __cmpHashKeys( + const UtfString &a, const UtfString &b ); + + template void __tracer_format( const t &v ); + template<> void __tracer_format( const UtfString &v ); +} + +#endif -- cgit v1.2.3