From d605d6c3c04c1e26121f9b1c5c1d2dbcc5f7bc37 Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Mon, 13 May 2019 19:47:19 -0700 Subject: UtfString & Json overhaul. UtfString supports a load of new stuff, and Json uses UtfString exclusively now. --- src/tools/jsontool.cpp | 4 +- src/unstable/json.cpp | 95 +++++++++-------- src/unstable/json.h | 67 ++++++------ src/unstable/utfstring.cpp | 255 ++++++++++++++++++++++++++++++++++++++++----- src/unstable/utfstring.h | 23 ++++ 5 files changed, 343 insertions(+), 101 deletions(-) (limited to 'src') diff --git a/src/tools/jsontool.cpp b/src/tools/jsontool.cpp index 4b6f232..e086fb8 100644 --- a/src/tools/jsontool.cpp +++ b/src/tools/jsontool.cpp @@ -16,8 +16,8 @@ void printThing( Bu::Json &j, int iDepth=0 ) case Bu::Json::Object: Bu::println(""); { - Bu::StringList lKeys = j.getKeys(); - for( Bu::StringList::iterator i = lKeys.begin(); i; i++ ) + Bu::UtfStringList lKeys = j.getKeys(); + for( Bu::UtfStringList::iterator i = lKeys.begin(); i; i++ ) { for(int k = 0; k < iDepth+1; k++ ) Bu::print(" "); diff --git a/src/unstable/json.cpp b/src/unstable/json.cpp index d7e84d9..b1414a9 100644 --- a/src/unstable/json.cpp +++ b/src/unstable/json.cpp @@ -15,7 +15,7 @@ Bu::Json::Json() : Bu::Json::Json( const Bu::UtfString &sValue ) : eType( String ), - uDat( sValue.get() ) + uDat( sValue ) { } @@ -57,7 +57,7 @@ Bu::Json::Json( Type eType ) : break; case String: - uDat.pString = new Bu::String(); + uDat.pString = new Bu::UtfString(); break; case Number: @@ -75,7 +75,7 @@ Bu::Json::Json( Bu::Stream &sInput ) : parse( sInput ); } -Bu::Json::Json( char &c, Bu::Stream &sInput ) : +Bu::Json::Json( Bu::UtfChar &c, Bu::Stream &sInput ) : eType( Invalid ) { parse( c, sInput ); @@ -97,7 +97,7 @@ Bu::Json::Type Bu::Json::getType() const return eType; } -Bu::String Bu::Json::getString() const +Bu::UtfString Bu::Json::getString() const { if( eType != String ) throw Bu::ExceptionBase( @@ -132,7 +132,7 @@ bool Bu::Json::isNull() const return eType == Null; } -Bu::Json &Bu::Json::operator[]( const Bu::String &sKey ) const +Bu::Json &Bu::Json::operator[]( const Bu::UtfString &sKey ) const { if( eType != Object ) throw Bu::ExceptionBase( @@ -158,15 +158,13 @@ int Bu::Json::getSize() const return uDat.pObject->getSize(); else if( eType == Array ) return uDat.pArray->getSize(); - else if( eType == String ) - return uDat.pString->getSize(); else throw Bu::ExceptionBase( "Size requseted from json type that doesn't support it." ); } -Bu::StringList Bu::Json::getKeys() const +Bu::UtfStringList Bu::Json::getKeys() const { return uDat.pObject->getKeys(); } @@ -196,33 +194,40 @@ bool Bu::Json::has( const Bu::String &sKey ) const return uDat.pObject->has( sKey ); } -void Bu::Json::insert( const Bu::String &sKey, Bu::Json *pObj ) +Bu::Json &Bu::Json::insert( const Bu::String &sKey, Bu::Json *pObj ) { uDat.pObject->insert( sKey, pObj ); + return *this; } -void Bu::Json::insert( const Bu::String &sKey, const Bu::Json &rObj ) + +Bu::Json &Bu::Json::insert( const Bu::String &sKey, const Bu::Json &rObj ) { uDat.pObject->insert( sKey, new Bu::Json( rObj ) ); + return *this; } -void Bu::Json::insert( const Bu::String &sKey, const Bu::String &sValue ) +Bu::Json &Bu::Json::insert( const Bu::String &sKey, const Bu::String &sValue ) { uDat.pObject->insert( sKey, new Json( sValue ) ); + return *this; } -void Bu::Json::insert( const Bu::String &sKey, const char *sValue ) +Bu::Json &Bu::Json::insert( const Bu::String &sKey, const char *sValue ) { uDat.pObject->insert( sKey, new Json( sValue ) ); + return *this; } -void Bu::Json::insert( const Bu::String &sKey, double dValue ) +Bu::Json &Bu::Json::insert( const Bu::String &sKey, double dValue ) { uDat.pObject->insert( sKey, new Json( dValue ) ); + return *this; } -void Bu::Json::insert( const Bu::String &sKey, bool bValue ) +Bu::Json &Bu::Json::insert( const Bu::String &sKey, bool bValue ) { uDat.pObject->insert( sKey, new Json( bValue ) ); + return *this; } Bu::Json &Bu::Json::insertObject( const Bu::String &sKey ) @@ -239,29 +244,34 @@ Bu::Json &Bu::Json::insertArray( const Bu::String &sKey ) return *pAr; } -void Bu::Json::append( Bu::Json *pObj ) +Bu::Json &Bu::Json::append( Bu::Json *pObj ) { uDat.pArray->append( pObj ); + return *this; } -void Bu::Json::append( const Bu::String &sValue ) +Bu::Json &Bu::Json::append( const Bu::String &sValue ) { uDat.pArray->append( new Json( sValue ) ); + return *this; } -void Bu::Json::append( const char *sValue ) +Bu::Json &Bu::Json::append( const char *sValue ) { uDat.pArray->append( new Json( sValue ) ); + return *this; } -void Bu::Json::append( double dValue ) +Bu::Json &Bu::Json::append( double dValue ) { uDat.pArray->append( new Json( dValue ) ); + return *this; } -void Bu::Json::append( bool bValue ) +Bu::Json &Bu::Json::append( bool bValue ) { uDat.pArray->append( new Json( bValue ) ); + return *this; } Bu::Json &Bu::Json::appendObject() @@ -282,7 +292,7 @@ void Bu::Json::parse( Bu::Stream &sInput ) { reset(); - char c; + Bu::UtfChar c; next("json"); parse( c, sInput ); @@ -294,7 +304,7 @@ void Bu::Json::parse( const Bu::String &sInput ) parse( mb ); } -void Bu::Json::parse( char &c, Bu::Stream &sInput ) +void Bu::Json::parse( Bu::UtfChar &c, Bu::Stream &sInput ) { while( c == ' ' || c == '\t' || c == '\r' || c == '\n' ) { @@ -434,9 +444,9 @@ void Bu::Json::writeStable( Bu::Stream &sOutput ) const { sOutput.write("{", 1 ); bool bFirst = true; - Bu::List lKey = uDat.pObject->getKeys(); + Bu::List lKey = uDat.pObject->getKeys(); lKey.sort(); - for( Bu::List::iterator i = lKey.begin(); i; i++ ) + for( Bu::List::iterator i = lKey.begin(); i; i++ ) { if( bFirst == true ) bFirst = false; @@ -480,7 +490,7 @@ Bu::Json &Bu::Json::operator=( const Bu::Json &rSrc ) break; case String: - uDat.pString = new Bu::String( *rSrc.uDat.pString ); + uDat.pString = new Bu::UtfString( *rSrc.uDat.pString ); break; case Number: @@ -513,7 +523,8 @@ Bu::Json &Bu::Json::operator=( const Bu::Json &rSrc ) return *this; } -void Bu::Json::parseString( char &c, Bu::Stream &sInput, Bu::String &sOut ) +void Bu::Json::parseString( Bu::UtfChar &c, Bu::Stream &sInput, + Bu::UtfString &sOut ) { skipWs( c, sInput ); bool bEscape = false; @@ -577,14 +588,14 @@ void Bu::Json::parseString( char &c, Bu::Stream &sInput, Bu::String &sOut ) } } -void Bu::Json::parseString( char &c, Bu::Stream &sInput ) +void Bu::Json::parseString( Bu::UtfChar &c, Bu::Stream &sInput ) { eType = String; - uDat.pString = new Bu::String(); + uDat.pString = new Bu::UtfString(); parseString( c, sInput, *uDat.pString ); } -void Bu::Json::parseObject( char &c, Bu::Stream &sInput ) +void Bu::Json::parseObject( Bu::UtfChar &c, Bu::Stream &sInput ) { skipWs( c, sInput ); eType = Object; @@ -602,7 +613,7 @@ void Bu::Json::parseObject( char &c, Bu::Stream &sInput ) for(;;) { - Bu::String sKey; + Bu::UtfString sKey; parseString( c, sInput, sKey ); skipWs( c, sInput ); if( c != ':' ) @@ -628,7 +639,7 @@ void Bu::Json::parseObject( char &c, Bu::Stream &sInput ) } } -void Bu::Json::parseArray( char &c, Bu::Stream &sInput ) +void Bu::Json::parseArray( Bu::UtfChar &c, Bu::Stream &sInput ) { skipWs( c, sInput ); @@ -667,7 +678,7 @@ void Bu::Json::parseArray( char &c, Bu::Stream &sInput ) } } -void Bu::Json::parseNumber( char &c, Bu::Stream &sInput ) +void Bu::Json::parseNumber( Bu::UtfChar &c, Bu::Stream &sInput ) { skipWs( c, sInput ); @@ -702,7 +713,7 @@ void Bu::Json::parseNumber( char &c, Bu::Stream &sInput ) uDat.dNumber = atof( sBuf.getStr() ); } -void Bu::Json::parseLiteral( char &c, Bu::Stream &sInput ) +void Bu::Json::parseLiteral( Bu::UtfChar &c, Bu::Stream &sInput ) { skipWs( c, sInput ); @@ -736,27 +747,27 @@ void Bu::Json::parseLiteral( char &c, Bu::Stream &sInput ) } } -bool Bu::Json::readChar( char &c, Bu::Stream &sInput ) +bool Bu::Json::readChar( Bu::UtfChar &c, Bu::Stream &sInput ) { - if( sInput.read( &c, 1 ) == 0 ) + if( Bu::UtfString::readPoint( sInput, c ) == 0 && sInput.isEos() ) return false; return true; } -void Bu::Json::readChar( char &c, Bu::Stream &sInput, const char *sSection ) +void Bu::Json::readChar( Bu::UtfChar &c, Bu::Stream &sInput, const char *sSection ) { - if( sInput.read( &c, 1 ) == 0 ) + if( Bu::UtfString::readPoint( sInput, c ) == 0 && sInput.isEos() ) { throw Bu::ExceptionBase( sSection ); } } -bool Bu::Json::isWs( char c ) +bool Bu::Json::isWs( Bu::UtfChar c ) { return c == ' ' || c == '\t' || c == '\r' || c == '\n'; } -void Bu::Json::skipWs( char &c, Bu::Stream &sInput ) +void Bu::Json::skipWs( Bu::UtfChar &c, Bu::Stream &sInput ) { while( isWs( c ) ) { @@ -764,10 +775,10 @@ void Bu::Json::skipWs( char &c, Bu::Stream &sInput ) } } -void Bu::Json::writeStr( const Bu::String &sStr, Bu::Stream &sOutput ) const +void Bu::Json::writeStr( const Bu::UtfString &sStr, Bu::Stream &sOutput ) const { sOutput.write("\"", 1 ); - for( Bu::String::const_iterator i = sStr.begin(); i; i++ ) + for( Bu::UtfString::const_iterator i = sStr.begin(); i; i++ ) { switch( *i ) { @@ -807,12 +818,12 @@ void Bu::Json::writeStr( const Bu::String &sStr, Bu::Stream &sOutput ) const if( *i < 32 ) sOutput.write( Bu::String("\\u%1"). - arg( (int32_t)*i, Bu::Fmt::hex(4).fill('0') ). + arg( (uint32_t)*i, Bu::Fmt::hex(4).fill('0') ). end().getStr(), 6 ); else - sOutput.write( &(*i), 1 ); + Bu::UtfString::writePoint( sOutput, *i ); break; } } diff --git a/src/unstable/json.h b/src/unstable/json.h index 4c85dd9..2ea62a2 100644 --- a/src/unstable/json.h +++ b/src/unstable/json.h @@ -10,12 +10,13 @@ namespace Bu { class Stream; + typedef Bu::List UtfStringList; class Json { private: - Json( char &c, Bu::Stream &sInput ); - typedef Bu::Hash JsonHash; + Json( Bu::UtfChar &c, Bu::Stream &sInput ); + typedef Bu::Hash JsonHash; typedef Bu::Array JsonList; public: @@ -45,33 +46,33 @@ namespace Bu virtual ~Json(); Type getType() const; - Bu::String getString() const; + Bu::UtfString getString() const; double getNumber() const; bool getBoolean() const; bool isNull() const; - Json &operator[]( const Bu::String &sKey ) const; + Json &operator[]( const Bu::UtfString &sKey ) const; Json &operator[]( int iIndex ) const; int getSize() const; - Bu::StringList getKeys() const; + Bu::UtfStringList getKeys() const; iterator begin(); const_iterator begin() const; iterator end(); const_iterator end() const; bool has( const Bu::String &sKey ) const; - void insert( const Bu::String &sKey, Bu::Json *pObj ); - void insert( const Bu::String &sKey, const Bu::Json &rObj ); - void insert( const Bu::String &sKey, const Bu::String &sValue ); - void insert( const Bu::String &sKey, const char *sValue ); - void insert( const Bu::String &sKey, double dValue ); - void insert( const Bu::String &sKey, bool bValue ); + Json &insert( const Bu::String &sKey, Bu::Json *pObj ); + Json &insert( const Bu::String &sKey, const Bu::Json &rObj ); + Json &insert( const Bu::String &sKey, const Bu::String &sValue ); + Json &insert( const Bu::String &sKey, const char *sValue ); + Json &insert( const Bu::String &sKey, double dValue ); + Json &insert( const Bu::String &sKey, bool bValue ); Json &insertObject( const Bu::String &sKey ); Json &insertArray( const Bu::String &sKey ); - void append( Bu::Json *pObj ); - void append( const Bu::String &sValue ); - void append( const char *sValue ); - void append( double dValue ); - void append( bool bValue ); + Json &append( Bu::Json *pObj ); + Json &append( const Bu::String &sValue ); + Json &append( const char *sValue ); + Json &append( double dValue ); + Json &append( bool bValue ); Json &appendObject(); Json &appendArray(); @@ -87,18 +88,20 @@ namespace Bu Bu::Json &operator=( const Bu::Json &rSrc ); private: - void parse( char &c, Bu::Stream &sInput ); - void parseString( char &c, Bu::Stream &sInput, Bu::String &sOut ); - void parseString( char &c, Bu::Stream &sInput ); - void parseObject( char &c, Bu::Stream &sInput ); - void parseArray( char &c, Bu::Stream &sInput ); - void parseNumber( char &c, Bu::Stream &sInput ); - void parseLiteral( char &c, Bu::Stream &sInput ); - bool readChar( char &c, Bu::Stream &sInput ); - void readChar( char &c, Bu::Stream &sInput, const char *sSection ); - bool isWs( char c ); - void skipWs( char &c, Bu::Stream &sInput ); - void writeStr( const Bu::String &sStr, Bu::Stream &sOutput ) const; + void parse( Bu::UtfChar &c, Bu::Stream &sInput ); + void parseString( Bu::UtfChar &c, Bu::Stream &sInput, + Bu::UtfString &sOut ); + void parseString( Bu::UtfChar &c, Bu::Stream &sInput ); + void parseObject( Bu::UtfChar &c, Bu::Stream &sInput ); + void parseArray( Bu::UtfChar &c, Bu::Stream &sInput ); + void parseNumber( Bu::UtfChar &c, Bu::Stream &sInput ); + void parseLiteral( Bu::UtfChar &c, Bu::Stream &sInput ); + bool readChar( Bu::UtfChar &c, Bu::Stream &sInput ); + void readChar( Bu::UtfChar &c, Bu::Stream &sInput, + const char *sSection ); + bool isWs( Bu::UtfChar c ); + void skipWs( Bu::UtfChar &c, Bu::Stream &sInput ); + void writeStr( const Bu::UtfString &sStr, Bu::Stream &sOutput ) const; private: Type eType; @@ -106,14 +109,16 @@ namespace Bu { DatUnion() : pObject( NULL ) { } DatUnion( const Bu::String &sValue ) : - pString( new Bu::String( sValue ) ) { } + pString( new Bu::UtfString( sValue ) ) { } + DatUnion( const Bu::UtfString &sValue ) : + pString( new Bu::UtfString( sValue ) ) { } DatUnion( const char *sValue ) : - pString( new Bu::String( sValue ) ) { } + pString( new Bu::UtfString( sValue ) ) { } DatUnion( double dValue ) : dNumber( dValue ) { } DatUnion( bool bValue ) : bBoolean( bValue ) { } JsonHash *pObject; JsonList *pArray; - Bu::String *pString; + Bu::UtfString *pString; double dNumber; bool bBoolean; } uDat; diff --git a/src/unstable/utfstring.cpp b/src/unstable/utfstring.cpp index f945725..46c78e6 100644 --- a/src/unstable/utfstring.cpp +++ b/src/unstable/utfstring.cpp @@ -12,8 +12,21 @@ #include "bu/config.h" #include "bu/sio.h" #include "bu/membuf.h" +#include "bu/formatter.h" + using Bu::sio; +uint8_t Bu::UtfString::utf8_lmask[8] = { + 0x00, + 0x01, + 0x03, + 0x07, + 0x0f, + 0x1f, + 0x3f, + 0x7f +}; + Bu::UtfString::UtfString() { } @@ -111,27 +124,17 @@ void Bu::UtfString::append( const UtfString &rSrc ) void Bu::UtfString::setUtf8( const Bu::String &sInput ) { - static uint8_t lmask[8] = { - 0x00, - 0x01, - 0x03, - 0x07, - 0x0f, - 0x1f, - 0x3f, - 0x7f - }; for( Bu::String::const_iterator i = sInput.begin(); i; i++ ) { if( ((int)(uint8_t)*i)&0x80 ) { int iBytes = 1; for(; (((uint8_t)(*i))<= 1; iBytes-- ) { i++; - uPt |= ((*i)&lmask[6])<<(6*(iBytes-1)); + uPt |= ((*i)&utf8_lmask[6])<<(6*(iBytes-1)); } append( uPt ); } @@ -321,6 +324,133 @@ void Bu::UtfString::write( Bu::Stream &sOut, Encoding eEnc ) const } } +int Bu::UtfString::readPoint( Bu::Stream &sIn, Bu::UtfChar &c, + Bu::UtfString::Encoding sEnc ) +{ + switch( sEnc ) + { + case Utf8: + { + uint8_t i; + int iRead = 1; + if( sIn.read( &i, 1 ) < 1 ) + return 0; + if( ((int)i)&0x80 ) + { + int iBytes = 1; + for(; (((uint8_t)i)<= 1; iBytes-- ) + { + if( sIn.read( &i, 1 ) < 1 ) + return 0; + c |= (i&utf8_lmask[6])<<(6*(iBytes-1)); + } + return iRead; + } + else + { + c = (Bu::UtfChar)i; + return 1; + } + } + break; + + case Utf16: + case Utf16be: + case Utf16le: + case Utf32: + case Utf32be: + case Utf32le: + case Ucs2: + case Ucs4: + case GuessEncoding: + throw Bu::ExceptionBase("Not implemented."); + break; + } + return -1; +} + +int Bu::UtfString::writePoint( Bu::Stream &sOut, const Bu::UtfChar &c, + Bu::UtfString::Encoding sEnc ) +{ + switch( sEnc ) + { + case Utf8: + { + uint8_t uByte; + if( c >= 0x010000 ) + { + // Four bytes + // 111 111111 111111 111111 + uByte = (c>>18)|0xF0; + sOut.write( &uByte, 1 ); + uByte = ((c>>12)&0x3F)|0x80; + sOut.write( &uByte, 1 ); + uByte = ((c>>6)&0x3F)|0x80; + sOut.write( &uByte, 1 ); + uByte = (c&0x3F)|0x80; + sOut.write( &uByte, 1 ); + return 4; + } + else if( c >= 0x800 ) + { + // Three bytes + // 1111 111111 111111 + uByte = (c>>12)|0xE0; + sOut.write( &uByte, 1 ); + uByte = ((c>>6)&0x3F)|0x80; + sOut.write( &uByte, 1 ); + uByte = (c&0x3F)|0x80; + sOut.write( &uByte, 1 ); + return 3; + } + else if( c >= 0x80 ) + { + // Two bytes + // 11111 111111 + uByte = (c>>6)|0xC0; + sOut.write( &uByte, 1 ); + uByte = (c&0x3F)|0x80; + sOut.write( &uByte, 1 ); + return 2; + } + else + { + // One byte + uByte = c; + sOut.write( &uByte, 1 ); + return 1; + } + } + break; + + case Utf16: + case Utf16be: + case Utf16le: + case Utf32: + case Utf32be: + case Utf32le: + case Ucs2: + case Ucs4: + case GuessEncoding: + throw Bu::ExceptionBase("Not implemented."); + break; + } + return -1; +} + +int32_t Bu::UtfString::toInt32( int iRadix ) const +{ + return strtol( get().getStr(), NULL, iRadix ); +} + +int64_t Bu::UtfString::toInt64( int iRadix ) const +{ + return strtoll( get().getStr(), NULL, iRadix ); +} + void Bu::UtfString::writeUtf8( Bu::Stream &sOut ) const { int iPos = 0; @@ -496,6 +626,33 @@ bool Bu::UtfString::operator==( const Bu::UtfString &rhs ) const return aData == rhs.aData; } +bool Bu::UtfString::operator==( const Bu::String &rhs ) const +{ + // Nieve comparison + if( aData.getSize() != rhs.getSize() ) + return false; + + for( int j = 0; j < aData.getSize(); j++ ) + { + if( aData[j] != rhs[j] ) + return false; + } + + return true; +} + +bool Bu::UtfString::operator==( const char *rhs ) const +{ + // Nieve comparison + for( int j = 0; j < aData.getSize(); j++ ) + { + if( rhs[j] == '\0' || aData[j] != rhs[j] ) + return false; + } + + return true; +} + Bu::UtfString &Bu::UtfString::operator+=( const Bu::UtfString &rhs ) { append( rhs ); @@ -508,6 +665,56 @@ Bu::UtfString &Bu::UtfString::operator+=( const UtfChar &rhs ) return *this; } +bool Bu::UtfString::operator<( const Bu::UtfString &rhs ) const +{ + for( int j = 0; j < aData.getSize() && j < rhs.aData.getSize(); j++ ) + { + if( aData[j] != rhs.aData[j] ) + return aData[j] < rhs.aData[j]; + } + + return false; +} + +bool Bu::UtfString::operator<=( const Bu::UtfString &rhs ) const +{ + for( int j = 0; j < aData.getSize() && j < rhs.aData.getSize(); j++ ) + { + if( aData[j] != rhs.aData[j] ) + return aData[j] < rhs.aData[j]; + } + + if( aData.getSize() == rhs.aData.getSize() ) + return true; + + return false; +} + +bool Bu::UtfString::operator>( const Bu::UtfString &rhs ) const +{ + for( int j = 0; j < aData.getSize() && j < rhs.aData.getSize(); j++ ) + { + if( aData[j] != rhs.aData[j] ) + return aData[j] > rhs.aData[j]; + } + + return false; +} + +bool Bu::UtfString::operator>=( const Bu::UtfString &rhs ) const +{ + for( int j = 0; j < aData.getSize() && j < rhs.aData.getSize(); j++ ) + { + if( aData[j] != rhs.aData[j] ) + return aData[j] > rhs.aData[j]; + } + + if( aData.getSize() == rhs.aData.getSize() ) + return true; + + return false; +} + Bu::String Bu::UtfString::get( Encoding eEnc ) const { Bu::MemBuf mb; @@ -537,16 +744,6 @@ void Bu::UtfString::debug() const /* void Bu::UtfString::debugUtf8( const Bu::String &sUtf8 ) { - static uint8_t lmask[8] = { - 0x00, - 0x01, - 0x03, - 0x07, - 0x0f, - 0x1f, - 0x3f, - 0x7f - }; for( Bu::String::const_iterator i = sUtf8.begin(); i; i++ ) { if( i != sUtf8.begin() ) @@ -558,9 +755,9 @@ void Bu::UtfString::debugUtf8( const Bu::String &sUtf8 ) int iBytes = 1; for(; (((uint8_t)(*i))<= 1; iBytes-- ) { // sio << "iBytes = " << iBytes << ", shift = " << (6*(iBytes-1)) @@ -568,9 +765,9 @@ void Bu::UtfString::debugUtf8( const Bu::String &sUtf8 ) // sio << "next: " << Bu::Fmt().radix(2).width(8).fill('0') // << (int)(uint8_t)*i << sio.nl // << "mask: " << Bu::Fmt().radix(2).width(8).fill('0') -// << (int)lmask[6] << sio.nl; +// << (int)utf8_lmask[6] << sio.nl; i++; - uPt |= ((*i)&lmask[6])<<(6*(iBytes-1)); + uPt |= ((*i)&utf8_lmask[6])<<(6*(iBytes-1)); } sio << uPt; // sio << " (" << Bu::Fmt( 8, 2 ).fill('0') @@ -602,3 +799,9 @@ template<> bool Bu::__cmpHashKeys( { return a == b; } + +Bu::Formatter Bu::operator<<( Bu::Formatter &f, const Bu::UtfString &s ) +{ + return f << s.get(); +} + diff --git a/src/unstable/utfstring.h b/src/unstable/utfstring.h index 5085ec0..285b680 100644 --- a/src/unstable/utfstring.h +++ b/src/unstable/utfstring.h @@ -190,6 +190,18 @@ namespace Bu */ void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ) const; + /** + * Reads as many bytes from the given stream, starting at the current + * position, as required to read a single UtfChar (code point). + */ + static int readPoint( Bu::Stream &sIn, UtfChar &c, + Encoding sEnc=Utf8 ); + static int writePoint( Bu::Stream &sOut, const UtfChar &c, + Encoding sEnc=Utf8 ); + + int32_t toInt32( int iRadix=10 ) const; + int64_t toInt64( int iRadix=10 ) const; + /** * This encodes the UtfString in the given encoding and returns it as * a binary Bu::String. Like write, this also includes the proper BOM @@ -216,9 +228,16 @@ namespace Bu UtfChar nextChar( int &iIndex ) const; bool operator==( const Bu::UtfString &rhs ) const; + bool operator==( const Bu::String &rhs ) const; + bool operator==( const char *rhs ) const; UtfString &operator+=( const Bu::UtfString &rhs ); UtfString &operator+=( const UtfChar &rhs ); + bool operator<( const Bu::UtfString &rhs ) const; + bool operator<=( const Bu::UtfString &rhs ) const; + bool operator>( const Bu::UtfString &rhs ) const; + bool operator>=( const Bu::UtfString &rhs ) const; + private: void append16( uint16_t i ) { aData.append( i ); } @@ -237,6 +256,7 @@ namespace Bu void writeUtf32le( Bu::Stream &sOut ) const; private: + static uint8_t utf8_lmask[8]; Bu::Array aData; int iRawLen; int iCharLen; @@ -254,6 +274,9 @@ namespace Bu template<> uint32_t __calcHashCode( const UtfString &k ); template<> bool __cmpHashKeys( const UtfString &a, const UtfString &b ); + + class Formatter; + Bu::Formatter operator<<( Bu::Formatter &f, const Bu::UtfString &s ); }; #endif -- cgit v1.2.3