aboutsummaryrefslogtreecommitdiff
path: root/src/unstable
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2019-05-13 19:47:19 -0700
committerMike Buland <eichlan@xagasoft.com>2019-05-13 19:47:19 -0700
commitd605d6c3c04c1e26121f9b1c5c1d2dbcc5f7bc37 (patch)
tree0cd21d420fc67ae757ec2475610c4624fd714363 /src/unstable
parent62753c815b5ec34ebfae37a3c89187a01cc17160 (diff)
downloadlibbu++-d605d6c3c04c1e26121f9b1c5c1d2dbcc5f7bc37.tar.gz
libbu++-d605d6c3c04c1e26121f9b1c5c1d2dbcc5f7bc37.tar.bz2
libbu++-d605d6c3c04c1e26121f9b1c5c1d2dbcc5f7bc37.tar.xz
libbu++-d605d6c3c04c1e26121f9b1c5c1d2dbcc5f7bc37.zip
UtfString & Json overhaul.
UtfString supports a load of new stuff, and Json uses UtfString exclusively now.
Diffstat (limited to 'src/unstable')
-rw-r--r--src/unstable/json.cpp95
-rw-r--r--src/unstable/json.h67
-rw-r--r--src/unstable/utfstring.cpp255
-rw-r--r--src/unstable/utfstring.h23
4 files changed, 341 insertions, 99 deletions
diff --git a/src/unstable/json.cpp b/src/unstable/json.cpp
index d7e84d9..b1414a9 100644
--- a/src/unstable/json.cpp
+++ b/src/unstable/json.cpp
@@ -15,7 +15,7 @@ Bu::Json::Json() :
15 15
16Bu::Json::Json( const Bu::UtfString &sValue ) : 16Bu::Json::Json( const Bu::UtfString &sValue ) :
17 eType( String ), 17 eType( String ),
18 uDat( sValue.get() ) 18 uDat( sValue )
19{ 19{
20} 20}
21 21
@@ -57,7 +57,7 @@ Bu::Json::Json( Type eType ) :
57 break; 57 break;
58 58
59 case String: 59 case String:
60 uDat.pString = new Bu::String(); 60 uDat.pString = new Bu::UtfString();
61 break; 61 break;
62 62
63 case Number: 63 case Number:
@@ -75,7 +75,7 @@ Bu::Json::Json( Bu::Stream &sInput ) :
75 parse( sInput ); 75 parse( sInput );
76} 76}
77 77
78Bu::Json::Json( char &c, Bu::Stream &sInput ) : 78Bu::Json::Json( Bu::UtfChar &c, Bu::Stream &sInput ) :
79 eType( Invalid ) 79 eType( Invalid )
80{ 80{
81 parse( c, sInput ); 81 parse( c, sInput );
@@ -97,7 +97,7 @@ Bu::Json::Type Bu::Json::getType() const
97 return eType; 97 return eType;
98} 98}
99 99
100Bu::String Bu::Json::getString() const 100Bu::UtfString Bu::Json::getString() const
101{ 101{
102 if( eType != String ) 102 if( eType != String )
103 throw Bu::ExceptionBase( 103 throw Bu::ExceptionBase(
@@ -132,7 +132,7 @@ bool Bu::Json::isNull() const
132 return eType == Null; 132 return eType == Null;
133} 133}
134 134
135Bu::Json &Bu::Json::operator[]( const Bu::String &sKey ) const 135Bu::Json &Bu::Json::operator[]( const Bu::UtfString &sKey ) const
136{ 136{
137 if( eType != Object ) 137 if( eType != Object )
138 throw Bu::ExceptionBase( 138 throw Bu::ExceptionBase(
@@ -158,15 +158,13 @@ int Bu::Json::getSize() const
158 return uDat.pObject->getSize(); 158 return uDat.pObject->getSize();
159 else if( eType == Array ) 159 else if( eType == Array )
160 return uDat.pArray->getSize(); 160 return uDat.pArray->getSize();
161 else if( eType == String )
162 return uDat.pString->getSize();
163 else 161 else
164 throw Bu::ExceptionBase( 162 throw Bu::ExceptionBase(
165 "Size requseted from json type that doesn't support it." 163 "Size requseted from json type that doesn't support it."
166 ); 164 );
167} 165}
168 166
169Bu::StringList Bu::Json::getKeys() const 167Bu::UtfStringList Bu::Json::getKeys() const
170{ 168{
171 return uDat.pObject->getKeys(); 169 return uDat.pObject->getKeys();
172} 170}
@@ -196,33 +194,40 @@ bool Bu::Json::has( const Bu::String &sKey ) const
196 return uDat.pObject->has( sKey ); 194 return uDat.pObject->has( sKey );
197} 195}
198 196
199void Bu::Json::insert( const Bu::String &sKey, Bu::Json *pObj ) 197Bu::Json &Bu::Json::insert( const Bu::String &sKey, Bu::Json *pObj )
200{ 198{
201 uDat.pObject->insert( sKey, pObj ); 199 uDat.pObject->insert( sKey, pObj );
200 return *this;
202} 201}
203void Bu::Json::insert( const Bu::String &sKey, const Bu::Json &rObj ) 202
203Bu::Json &Bu::Json::insert( const Bu::String &sKey, const Bu::Json &rObj )
204{ 204{
205 uDat.pObject->insert( sKey, new Bu::Json( rObj ) ); 205 uDat.pObject->insert( sKey, new Bu::Json( rObj ) );
206 return *this;
206} 207}
207 208
208void Bu::Json::insert( const Bu::String &sKey, const Bu::String &sValue ) 209Bu::Json &Bu::Json::insert( const Bu::String &sKey, const Bu::String &sValue )
209{ 210{
210 uDat.pObject->insert( sKey, new Json( sValue ) ); 211 uDat.pObject->insert( sKey, new Json( sValue ) );
212 return *this;
211} 213}
212 214
213void Bu::Json::insert( const Bu::String &sKey, const char *sValue ) 215Bu::Json &Bu::Json::insert( const Bu::String &sKey, const char *sValue )
214{ 216{
215 uDat.pObject->insert( sKey, new Json( sValue ) ); 217 uDat.pObject->insert( sKey, new Json( sValue ) );
218 return *this;
216} 219}
217 220
218void Bu::Json::insert( const Bu::String &sKey, double dValue ) 221Bu::Json &Bu::Json::insert( const Bu::String &sKey, double dValue )
219{ 222{
220 uDat.pObject->insert( sKey, new Json( dValue ) ); 223 uDat.pObject->insert( sKey, new Json( dValue ) );
224 return *this;
221} 225}
222 226
223void Bu::Json::insert( const Bu::String &sKey, bool bValue ) 227Bu::Json &Bu::Json::insert( const Bu::String &sKey, bool bValue )
224{ 228{
225 uDat.pObject->insert( sKey, new Json( bValue ) ); 229 uDat.pObject->insert( sKey, new Json( bValue ) );
230 return *this;
226} 231}
227 232
228Bu::Json &Bu::Json::insertObject( const Bu::String &sKey ) 233Bu::Json &Bu::Json::insertObject( const Bu::String &sKey )
@@ -239,29 +244,34 @@ Bu::Json &Bu::Json::insertArray( const Bu::String &sKey )
239 return *pAr; 244 return *pAr;
240} 245}
241 246
242void Bu::Json::append( Bu::Json *pObj ) 247Bu::Json &Bu::Json::append( Bu::Json *pObj )
243{ 248{
244 uDat.pArray->append( pObj ); 249 uDat.pArray->append( pObj );
250 return *this;
245} 251}
246 252
247void Bu::Json::append( const Bu::String &sValue ) 253Bu::Json &Bu::Json::append( const Bu::String &sValue )
248{ 254{
249 uDat.pArray->append( new Json( sValue ) ); 255 uDat.pArray->append( new Json( sValue ) );
256 return *this;
250} 257}
251 258
252void Bu::Json::append( const char *sValue ) 259Bu::Json &Bu::Json::append( const char *sValue )
253{ 260{
254 uDat.pArray->append( new Json( sValue ) ); 261 uDat.pArray->append( new Json( sValue ) );
262 return *this;
255} 263}
256 264
257void Bu::Json::append( double dValue ) 265Bu::Json &Bu::Json::append( double dValue )
258{ 266{
259 uDat.pArray->append( new Json( dValue ) ); 267 uDat.pArray->append( new Json( dValue ) );
268 return *this;
260} 269}
261 270
262void Bu::Json::append( bool bValue ) 271Bu::Json &Bu::Json::append( bool bValue )
263{ 272{
264 uDat.pArray->append( new Json( bValue ) ); 273 uDat.pArray->append( new Json( bValue ) );
274 return *this;
265} 275}
266 276
267Bu::Json &Bu::Json::appendObject() 277Bu::Json &Bu::Json::appendObject()
@@ -282,7 +292,7 @@ void Bu::Json::parse( Bu::Stream &sInput )
282{ 292{
283 reset(); 293 reset();
284 294
285 char c; 295 Bu::UtfChar c;
286 next("json"); 296 next("json");
287 297
288 parse( c, sInput ); 298 parse( c, sInput );
@@ -294,7 +304,7 @@ void Bu::Json::parse( const Bu::String &sInput )
294 parse( mb ); 304 parse( mb );
295} 305}
296 306
297void Bu::Json::parse( char &c, Bu::Stream &sInput ) 307void Bu::Json::parse( Bu::UtfChar &c, Bu::Stream &sInput )
298{ 308{
299 while( c == ' ' || c == '\t' || c == '\r' || c == '\n' ) 309 while( c == ' ' || c == '\t' || c == '\r' || c == '\n' )
300 { 310 {
@@ -434,9 +444,9 @@ void Bu::Json::writeStable( Bu::Stream &sOutput ) const
434 { 444 {
435 sOutput.write("{", 1 ); 445 sOutput.write("{", 1 );
436 bool bFirst = true; 446 bool bFirst = true;
437 Bu::List<Bu::String> lKey = uDat.pObject->getKeys(); 447 Bu::List<Bu::UtfString> lKey = uDat.pObject->getKeys();
438 lKey.sort(); 448 lKey.sort();
439 for( Bu::List<Bu::String>::iterator i = lKey.begin(); i; i++ ) 449 for( Bu::List<Bu::UtfString>::iterator i = lKey.begin(); i; i++ )
440 { 450 {
441 if( bFirst == true ) 451 if( bFirst == true )
442 bFirst = false; 452 bFirst = false;
@@ -480,7 +490,7 @@ Bu::Json &Bu::Json::operator=( const Bu::Json &rSrc )
480 break; 490 break;
481 491
482 case String: 492 case String:
483 uDat.pString = new Bu::String( *rSrc.uDat.pString ); 493 uDat.pString = new Bu::UtfString( *rSrc.uDat.pString );
484 break; 494 break;
485 495
486 case Number: 496 case Number:
@@ -513,7 +523,8 @@ Bu::Json &Bu::Json::operator=( const Bu::Json &rSrc )
513 return *this; 523 return *this;
514} 524}
515 525
516void Bu::Json::parseString( char &c, Bu::Stream &sInput, Bu::String &sOut ) 526void Bu::Json::parseString( Bu::UtfChar &c, Bu::Stream &sInput,
527 Bu::UtfString &sOut )
517{ 528{
518 skipWs( c, sInput ); 529 skipWs( c, sInput );
519 bool bEscape = false; 530 bool bEscape = false;
@@ -577,14 +588,14 @@ void Bu::Json::parseString( char &c, Bu::Stream &sInput, Bu::String &sOut )
577 } 588 }
578} 589}
579 590
580void Bu::Json::parseString( char &c, Bu::Stream &sInput ) 591void Bu::Json::parseString( Bu::UtfChar &c, Bu::Stream &sInput )
581{ 592{
582 eType = String; 593 eType = String;
583 uDat.pString = new Bu::String(); 594 uDat.pString = new Bu::UtfString();
584 parseString( c, sInput, *uDat.pString ); 595 parseString( c, sInput, *uDat.pString );
585} 596}
586 597
587void Bu::Json::parseObject( char &c, Bu::Stream &sInput ) 598void Bu::Json::parseObject( Bu::UtfChar &c, Bu::Stream &sInput )
588{ 599{
589 skipWs( c, sInput ); 600 skipWs( c, sInput );
590 eType = Object; 601 eType = Object;
@@ -602,7 +613,7 @@ void Bu::Json::parseObject( char &c, Bu::Stream &sInput )
602 613
603 for(;;) 614 for(;;)
604 { 615 {
605 Bu::String sKey; 616 Bu::UtfString sKey;
606 parseString( c, sInput, sKey ); 617 parseString( c, sInput, sKey );
607 skipWs( c, sInput ); 618 skipWs( c, sInput );
608 if( c != ':' ) 619 if( c != ':' )
@@ -628,7 +639,7 @@ void Bu::Json::parseObject( char &c, Bu::Stream &sInput )
628 } 639 }
629} 640}
630 641
631void Bu::Json::parseArray( char &c, Bu::Stream &sInput ) 642void Bu::Json::parseArray( Bu::UtfChar &c, Bu::Stream &sInput )
632{ 643{
633 skipWs( c, sInput ); 644 skipWs( c, sInput );
634 645
@@ -667,7 +678,7 @@ void Bu::Json::parseArray( char &c, Bu::Stream &sInput )
667 } 678 }
668} 679}
669 680
670void Bu::Json::parseNumber( char &c, Bu::Stream &sInput ) 681void Bu::Json::parseNumber( Bu::UtfChar &c, Bu::Stream &sInput )
671{ 682{
672 skipWs( c, sInput ); 683 skipWs( c, sInput );
673 684
@@ -702,7 +713,7 @@ void Bu::Json::parseNumber( char &c, Bu::Stream &sInput )
702 uDat.dNumber = atof( sBuf.getStr() ); 713 uDat.dNumber = atof( sBuf.getStr() );
703} 714}
704 715
705void Bu::Json::parseLiteral( char &c, Bu::Stream &sInput ) 716void Bu::Json::parseLiteral( Bu::UtfChar &c, Bu::Stream &sInput )
706{ 717{
707 skipWs( c, sInput ); 718 skipWs( c, sInput );
708 719
@@ -736,27 +747,27 @@ void Bu::Json::parseLiteral( char &c, Bu::Stream &sInput )
736 } 747 }
737} 748}
738 749
739bool Bu::Json::readChar( char &c, Bu::Stream &sInput ) 750bool Bu::Json::readChar( Bu::UtfChar &c, Bu::Stream &sInput )
740{ 751{
741 if( sInput.read( &c, 1 ) == 0 ) 752 if( Bu::UtfString::readPoint( sInput, c ) == 0 && sInput.isEos() )
742 return false; 753 return false;
743 return true; 754 return true;
744} 755}
745 756
746void Bu::Json::readChar( char &c, Bu::Stream &sInput, const char *sSection ) 757void Bu::Json::readChar( Bu::UtfChar &c, Bu::Stream &sInput, const char *sSection )
747{ 758{
748 if( sInput.read( &c, 1 ) == 0 ) 759 if( Bu::UtfString::readPoint( sInput, c ) == 0 && sInput.isEos() )
749 { 760 {
750 throw Bu::ExceptionBase( sSection ); 761 throw Bu::ExceptionBase( sSection );
751 } 762 }
752} 763}
753 764
754bool Bu::Json::isWs( char c ) 765bool Bu::Json::isWs( Bu::UtfChar c )
755{ 766{
756 return c == ' ' || c == '\t' || c == '\r' || c == '\n'; 767 return c == ' ' || c == '\t' || c == '\r' || c == '\n';
757} 768}
758 769
759void Bu::Json::skipWs( char &c, Bu::Stream &sInput ) 770void Bu::Json::skipWs( Bu::UtfChar &c, Bu::Stream &sInput )
760{ 771{
761 while( isWs( c ) ) 772 while( isWs( c ) )
762 { 773 {
@@ -764,10 +775,10 @@ void Bu::Json::skipWs( char &c, Bu::Stream &sInput )
764 } 775 }
765} 776}
766 777
767void Bu::Json::writeStr( const Bu::String &sStr, Bu::Stream &sOutput ) const 778void Bu::Json::writeStr( const Bu::UtfString &sStr, Bu::Stream &sOutput ) const
768{ 779{
769 sOutput.write("\"", 1 ); 780 sOutput.write("\"", 1 );
770 for( Bu::String::const_iterator i = sStr.begin(); i; i++ ) 781 for( Bu::UtfString::const_iterator i = sStr.begin(); i; i++ )
771 { 782 {
772 switch( *i ) 783 switch( *i )
773 { 784 {
@@ -807,12 +818,12 @@ void Bu::Json::writeStr( const Bu::String &sStr, Bu::Stream &sOutput ) const
807 if( *i < 32 ) 818 if( *i < 32 )
808 sOutput.write( 819 sOutput.write(
809 Bu::String("\\u%1"). 820 Bu::String("\\u%1").
810 arg( (int32_t)*i, Bu::Fmt::hex(4).fill('0') ). 821 arg( (uint32_t)*i, Bu::Fmt::hex(4).fill('0') ).
811 end().getStr(), 822 end().getStr(),
812 6 823 6
813 ); 824 );
814 else 825 else
815 sOutput.write( &(*i), 1 ); 826 Bu::UtfString::writePoint( sOutput, *i );
816 break; 827 break;
817 } 828 }
818 } 829 }
diff --git a/src/unstable/json.h b/src/unstable/json.h
index 4c85dd9..2ea62a2 100644
--- a/src/unstable/json.h
+++ b/src/unstable/json.h
@@ -10,12 +10,13 @@
10namespace Bu 10namespace Bu
11{ 11{
12 class Stream; 12 class Stream;
13 typedef Bu::List<Bu::UtfString> UtfStringList;
13 14
14 class Json 15 class Json
15 { 16 {
16 private: 17 private:
17 Json( char &c, Bu::Stream &sInput ); 18 Json( Bu::UtfChar &c, Bu::Stream &sInput );
18 typedef Bu::Hash<Bu::String, Json *> JsonHash; 19 typedef Bu::Hash<Bu::UtfString, Json *> JsonHash;
19 typedef Bu::Array<Json *> JsonList; 20 typedef Bu::Array<Json *> JsonList;
20 21
21 public: 22 public:
@@ -45,33 +46,33 @@ namespace Bu
45 virtual ~Json(); 46 virtual ~Json();
46 47
47 Type getType() const; 48 Type getType() const;
48 Bu::String getString() const; 49 Bu::UtfString getString() const;
49 double getNumber() const; 50 double getNumber() const;
50 bool getBoolean() const; 51 bool getBoolean() const;
51 bool isNull() const; 52 bool isNull() const;
52 Json &operator[]( const Bu::String &sKey ) const; 53 Json &operator[]( const Bu::UtfString &sKey ) const;
53 Json &operator[]( int iIndex ) const; 54 Json &operator[]( int iIndex ) const;
54 int getSize() const; 55 int getSize() const;
55 Bu::StringList getKeys() const; 56 Bu::UtfStringList getKeys() const;
56 iterator begin(); 57 iterator begin();
57 const_iterator begin() const; 58 const_iterator begin() const;
58 iterator end(); 59 iterator end();
59 const_iterator end() const; 60 const_iterator end() const;
60 61
61 bool has( const Bu::String &sKey ) const; 62 bool has( const Bu::String &sKey ) const;
62 void insert( const Bu::String &sKey, Bu::Json *pObj ); 63 Json &insert( const Bu::String &sKey, Bu::Json *pObj );
63 void insert( const Bu::String &sKey, const Bu::Json &rObj ); 64 Json &insert( const Bu::String &sKey, const Bu::Json &rObj );
64 void insert( const Bu::String &sKey, const Bu::String &sValue ); 65 Json &insert( const Bu::String &sKey, const Bu::String &sValue );
65 void insert( const Bu::String &sKey, const char *sValue ); 66 Json &insert( const Bu::String &sKey, const char *sValue );
66 void insert( const Bu::String &sKey, double dValue ); 67 Json &insert( const Bu::String &sKey, double dValue );
67 void insert( const Bu::String &sKey, bool bValue ); 68 Json &insert( const Bu::String &sKey, bool bValue );
68 Json &insertObject( const Bu::String &sKey ); 69 Json &insertObject( const Bu::String &sKey );
69 Json &insertArray( const Bu::String &sKey ); 70 Json &insertArray( const Bu::String &sKey );
70 void append( Bu::Json *pObj ); 71 Json &append( Bu::Json *pObj );
71 void append( const Bu::String &sValue ); 72 Json &append( const Bu::String &sValue );
72 void append( const char *sValue ); 73 Json &append( const char *sValue );
73 void append( double dValue ); 74 Json &append( double dValue );
74 void append( bool bValue ); 75 Json &append( bool bValue );
75 Json &appendObject(); 76 Json &appendObject();
76 Json &appendArray(); 77 Json &appendArray();
77 78
@@ -87,18 +88,20 @@ namespace Bu
87 Bu::Json &operator=( const Bu::Json &rSrc ); 88 Bu::Json &operator=( const Bu::Json &rSrc );
88 89
89 private: 90 private:
90 void parse( char &c, Bu::Stream &sInput ); 91 void parse( Bu::UtfChar &c, Bu::Stream &sInput );
91 void parseString( char &c, Bu::Stream &sInput, Bu::String &sOut ); 92 void parseString( Bu::UtfChar &c, Bu::Stream &sInput,
92 void parseString( char &c, Bu::Stream &sInput ); 93 Bu::UtfString &sOut );
93 void parseObject( char &c, Bu::Stream &sInput ); 94 void parseString( Bu::UtfChar &c, Bu::Stream &sInput );
94 void parseArray( char &c, Bu::Stream &sInput ); 95 void parseObject( Bu::UtfChar &c, Bu::Stream &sInput );
95 void parseNumber( char &c, Bu::Stream &sInput ); 96 void parseArray( Bu::UtfChar &c, Bu::Stream &sInput );
96 void parseLiteral( char &c, Bu::Stream &sInput ); 97 void parseNumber( Bu::UtfChar &c, Bu::Stream &sInput );
97 bool readChar( char &c, Bu::Stream &sInput ); 98 void parseLiteral( Bu::UtfChar &c, Bu::Stream &sInput );
98 void readChar( char &c, Bu::Stream &sInput, const char *sSection ); 99 bool readChar( Bu::UtfChar &c, Bu::Stream &sInput );
99 bool isWs( char c ); 100 void readChar( Bu::UtfChar &c, Bu::Stream &sInput,
100 void skipWs( char &c, Bu::Stream &sInput ); 101 const char *sSection );
101 void writeStr( const Bu::String &sStr, Bu::Stream &sOutput ) const; 102 bool isWs( Bu::UtfChar c );
103 void skipWs( Bu::UtfChar &c, Bu::Stream &sInput );
104 void writeStr( const Bu::UtfString &sStr, Bu::Stream &sOutput ) const;
102 105
103 private: 106 private:
104 Type eType; 107 Type eType;
@@ -106,14 +109,16 @@ namespace Bu
106 { 109 {
107 DatUnion() : pObject( NULL ) { } 110 DatUnion() : pObject( NULL ) { }
108 DatUnion( const Bu::String &sValue ) : 111 DatUnion( const Bu::String &sValue ) :
109 pString( new Bu::String( sValue ) ) { } 112 pString( new Bu::UtfString( sValue ) ) { }
113 DatUnion( const Bu::UtfString &sValue ) :
114 pString( new Bu::UtfString( sValue ) ) { }
110 DatUnion( const char *sValue ) : 115 DatUnion( const char *sValue ) :
111 pString( new Bu::String( sValue ) ) { } 116 pString( new Bu::UtfString( sValue ) ) { }
112 DatUnion( double dValue ) : dNumber( dValue ) { } 117 DatUnion( double dValue ) : dNumber( dValue ) { }
113 DatUnion( bool bValue ) : bBoolean( bValue ) { } 118 DatUnion( bool bValue ) : bBoolean( bValue ) { }
114 JsonHash *pObject; 119 JsonHash *pObject;
115 JsonList *pArray; 120 JsonList *pArray;
116 Bu::String *pString; 121 Bu::UtfString *pString;
117 double dNumber; 122 double dNumber;
118 bool bBoolean; 123 bool bBoolean;
119 } uDat; 124 } uDat;
diff --git a/src/unstable/utfstring.cpp b/src/unstable/utfstring.cpp
index f945725..46c78e6 100644
--- a/src/unstable/utfstring.cpp
+++ b/src/unstable/utfstring.cpp
@@ -12,8 +12,21 @@
12#include "bu/config.h" 12#include "bu/config.h"
13#include "bu/sio.h" 13#include "bu/sio.h"
14#include "bu/membuf.h" 14#include "bu/membuf.h"
15#include "bu/formatter.h"
16
15using Bu::sio; 17using Bu::sio;
16 18
19uint8_t Bu::UtfString::utf8_lmask[8] = {
20 0x00,
21 0x01,
22 0x03,
23 0x07,
24 0x0f,
25 0x1f,
26 0x3f,
27 0x7f
28};
29
17Bu::UtfString::UtfString() 30Bu::UtfString::UtfString()
18{ 31{
19} 32}
@@ -111,27 +124,17 @@ void Bu::UtfString::append( const UtfString &rSrc )
111 124
112void Bu::UtfString::setUtf8( const Bu::String &sInput ) 125void Bu::UtfString::setUtf8( const Bu::String &sInput )
113{ 126{
114 static uint8_t lmask[8] = {
115 0x00,
116 0x01,
117 0x03,
118 0x07,
119 0x0f,
120 0x1f,
121 0x3f,
122 0x7f
123 };
124 for( Bu::String::const_iterator i = sInput.begin(); i; i++ ) 127 for( Bu::String::const_iterator i = sInput.begin(); i; i++ )
125 { 128 {
126 if( ((int)(uint8_t)*i)&0x80 ) 129 if( ((int)(uint8_t)*i)&0x80 )
127 { 130 {
128 int iBytes = 1; 131 int iBytes = 1;
129 for(; (((uint8_t)(*i))<<iBytes)&0x80; iBytes++ ) { } 132 for(; (((uint8_t)(*i))<<iBytes)&0x80; iBytes++ ) { }
130 Bu::UtfChar uPt = ((*i) & lmask[7-iBytes])<<(6*(iBytes-1)); 133 Bu::UtfChar uPt = ((*i) & utf8_lmask[7-iBytes])<<(6*(iBytes-1));
131 for( iBytes--; iBytes >= 1; iBytes-- ) 134 for( iBytes--; iBytes >= 1; iBytes-- )
132 { 135 {
133 i++; 136 i++;
134 uPt |= ((*i)&lmask[6])<<(6*(iBytes-1)); 137 uPt |= ((*i)&utf8_lmask[6])<<(6*(iBytes-1));
135 } 138 }
136 append( uPt ); 139 append( uPt );
137 } 140 }
@@ -321,6 +324,133 @@ void Bu::UtfString::write( Bu::Stream &sOut, Encoding eEnc ) const
321 } 324 }
322} 325}
323 326
327int Bu::UtfString::readPoint( Bu::Stream &sIn, Bu::UtfChar &c,
328 Bu::UtfString::Encoding sEnc )
329{
330 switch( sEnc )
331 {
332 case Utf8:
333 {
334 uint8_t i;
335 int iRead = 1;
336 if( sIn.read( &i, 1 ) < 1 )
337 return 0;
338 if( ((int)i)&0x80 )
339 {
340 int iBytes = 1;
341 for(; (((uint8_t)i)<<iBytes)&0x80; iBytes++ ) { }
342 iRead = iBytes;
343 c = (i & utf8_lmask[7-iBytes])<<(6*(iBytes-1));
344 for( iBytes--; iBytes >= 1; iBytes-- )
345 {
346 if( sIn.read( &i, 1 ) < 1 )
347 return 0;
348 c |= (i&utf8_lmask[6])<<(6*(iBytes-1));
349 }
350 return iRead;
351 }
352 else
353 {
354 c = (Bu::UtfChar)i;
355 return 1;
356 }
357 }
358 break;
359
360 case Utf16:
361 case Utf16be:
362 case Utf16le:
363 case Utf32:
364 case Utf32be:
365 case Utf32le:
366 case Ucs2:
367 case Ucs4:
368 case GuessEncoding:
369 throw Bu::ExceptionBase("Not implemented.");
370 break;
371 }
372 return -1;
373}
374
375int Bu::UtfString::writePoint( Bu::Stream &sOut, const Bu::UtfChar &c,
376 Bu::UtfString::Encoding sEnc )
377{
378 switch( sEnc )
379 {
380 case Utf8:
381 {
382 uint8_t uByte;
383 if( c >= 0x010000 )
384 {
385 // Four bytes
386 // 111 111111 111111 111111
387 uByte = (c>>18)|0xF0;
388 sOut.write( &uByte, 1 );
389 uByte = ((c>>12)&0x3F)|0x80;
390 sOut.write( &uByte, 1 );
391 uByte = ((c>>6)&0x3F)|0x80;
392 sOut.write( &uByte, 1 );
393 uByte = (c&0x3F)|0x80;
394 sOut.write( &uByte, 1 );
395 return 4;
396 }
397 else if( c >= 0x800 )
398 {
399 // Three bytes
400 // 1111 111111 111111
401 uByte = (c>>12)|0xE0;
402 sOut.write( &uByte, 1 );
403 uByte = ((c>>6)&0x3F)|0x80;
404 sOut.write( &uByte, 1 );
405 uByte = (c&0x3F)|0x80;
406 sOut.write( &uByte, 1 );
407 return 3;
408 }
409 else if( c >= 0x80 )
410 {
411 // Two bytes
412 // 11111 111111
413 uByte = (c>>6)|0xC0;
414 sOut.write( &uByte, 1 );
415 uByte = (c&0x3F)|0x80;
416 sOut.write( &uByte, 1 );
417 return 2;
418 }
419 else
420 {
421 // One byte
422 uByte = c;
423 sOut.write( &uByte, 1 );
424 return 1;
425 }
426 }
427 break;
428
429 case Utf16:
430 case Utf16be:
431 case Utf16le:
432 case Utf32:
433 case Utf32be:
434 case Utf32le:
435 case Ucs2:
436 case Ucs4:
437 case GuessEncoding:
438 throw Bu::ExceptionBase("Not implemented.");
439 break;
440 }
441 return -1;
442}
443
444int32_t Bu::UtfString::toInt32( int iRadix ) const
445{
446 return strtol( get().getStr(), NULL, iRadix );
447}
448
449int64_t Bu::UtfString::toInt64( int iRadix ) const
450{
451 return strtoll( get().getStr(), NULL, iRadix );
452}
453
324void Bu::UtfString::writeUtf8( Bu::Stream &sOut ) const 454void Bu::UtfString::writeUtf8( Bu::Stream &sOut ) const
325{ 455{
326 int iPos = 0; 456 int iPos = 0;
@@ -496,6 +626,33 @@ bool Bu::UtfString::operator==( const Bu::UtfString &rhs ) const
496 return aData == rhs.aData; 626 return aData == rhs.aData;
497} 627}
498 628
629bool Bu::UtfString::operator==( const Bu::String &rhs ) const
630{
631 // Nieve comparison
632 if( aData.getSize() != rhs.getSize() )
633 return false;
634
635 for( int j = 0; j < aData.getSize(); j++ )
636 {
637 if( aData[j] != rhs[j] )
638 return false;
639 }
640
641 return true;
642}
643
644bool Bu::UtfString::operator==( const char *rhs ) const
645{
646 // Nieve comparison
647 for( int j = 0; j < aData.getSize(); j++ )
648 {
649 if( rhs[j] == '\0' || aData[j] != rhs[j] )
650 return false;
651 }
652
653 return true;
654}
655
499Bu::UtfString &Bu::UtfString::operator+=( const Bu::UtfString &rhs ) 656Bu::UtfString &Bu::UtfString::operator+=( const Bu::UtfString &rhs )
500{ 657{
501 append( rhs ); 658 append( rhs );
@@ -508,6 +665,56 @@ Bu::UtfString &Bu::UtfString::operator+=( const UtfChar &rhs )
508 return *this; 665 return *this;
509} 666}
510 667
668bool Bu::UtfString::operator<( const Bu::UtfString &rhs ) const
669{
670 for( int j = 0; j < aData.getSize() && j < rhs.aData.getSize(); j++ )
671 {
672 if( aData[j] != rhs.aData[j] )
673 return aData[j] < rhs.aData[j];
674 }
675
676 return false;
677}
678
679bool Bu::UtfString::operator<=( const Bu::UtfString &rhs ) const
680{
681 for( int j = 0; j < aData.getSize() && j < rhs.aData.getSize(); j++ )
682 {
683 if( aData[j] != rhs.aData[j] )
684 return aData[j] < rhs.aData[j];
685 }
686
687 if( aData.getSize() == rhs.aData.getSize() )
688 return true;
689
690 return false;
691}
692
693bool Bu::UtfString::operator>( const Bu::UtfString &rhs ) const
694{
695 for( int j = 0; j < aData.getSize() && j < rhs.aData.getSize(); j++ )
696 {
697 if( aData[j] != rhs.aData[j] )
698 return aData[j] > rhs.aData[j];
699 }
700
701 return false;
702}
703
704bool Bu::UtfString::operator>=( const Bu::UtfString &rhs ) const
705{
706 for( int j = 0; j < aData.getSize() && j < rhs.aData.getSize(); j++ )
707 {
708 if( aData[j] != rhs.aData[j] )
709 return aData[j] > rhs.aData[j];
710 }
711
712 if( aData.getSize() == rhs.aData.getSize() )
713 return true;
714
715 return false;
716}
717
511Bu::String Bu::UtfString::get( Encoding eEnc ) const 718Bu::String Bu::UtfString::get( Encoding eEnc ) const
512{ 719{
513 Bu::MemBuf mb; 720 Bu::MemBuf mb;
@@ -537,16 +744,6 @@ void Bu::UtfString::debug() const
537/* 744/*
538void Bu::UtfString::debugUtf8( const Bu::String &sUtf8 ) 745void Bu::UtfString::debugUtf8( const Bu::String &sUtf8 )
539{ 746{
540 static uint8_t lmask[8] = {
541 0x00,
542 0x01,
543 0x03,
544 0x07,
545 0x0f,
546 0x1f,
547 0x3f,
548 0x7f
549 };
550 for( Bu::String::const_iterator i = sUtf8.begin(); i; i++ ) 747 for( Bu::String::const_iterator i = sUtf8.begin(); i; i++ )
551 { 748 {
552 if( i != sUtf8.begin() ) 749 if( i != sUtf8.begin() )
@@ -558,9 +755,9 @@ void Bu::UtfString::debugUtf8( const Bu::String &sUtf8 )
558 int iBytes = 1; 755 int iBytes = 1;
559 for(; (((uint8_t)(*i))<<iBytes)&0x80; iBytes++ ) { } 756 for(; (((uint8_t)(*i))<<iBytes)&0x80; iBytes++ ) { }
560// sio << "iBytes = " << iBytes << sio.nl; 757// sio << "iBytes = " << iBytes << sio.nl;
561 Bu::UtfChar uPt = ((*i) & lmask[7-iBytes])<<(6*(iBytes-1)); 758 Bu::UtfChar uPt = ((*i) & utf8_lmask[7-iBytes])<<(6*(iBytes-1));
562// sio << "mask: " << Bu::Fmt().radix(2).width(8).fill('0') 759// sio << "mask: " << Bu::Fmt().radix(2).width(8).fill('0')
563// << (int)lmask[7-iBytes] << sio.nl; 760// << (int)utf8_lmask[7-iBytes] << sio.nl;
564 for( iBytes--; iBytes >= 1; iBytes-- ) 761 for( iBytes--; iBytes >= 1; iBytes-- )
565 { 762 {
566// sio << "iBytes = " << iBytes << ", shift = " << (6*(iBytes-1)) 763// sio << "iBytes = " << iBytes << ", shift = " << (6*(iBytes-1))
@@ -568,9 +765,9 @@ void Bu::UtfString::debugUtf8( const Bu::String &sUtf8 )
568// sio << "next: " << Bu::Fmt().radix(2).width(8).fill('0') 765// sio << "next: " << Bu::Fmt().radix(2).width(8).fill('0')
569// << (int)(uint8_t)*i << sio.nl 766// << (int)(uint8_t)*i << sio.nl
570// << "mask: " << Bu::Fmt().radix(2).width(8).fill('0') 767// << "mask: " << Bu::Fmt().radix(2).width(8).fill('0')
571// << (int)lmask[6] << sio.nl; 768// << (int)utf8_lmask[6] << sio.nl;
572 i++; 769 i++;
573 uPt |= ((*i)&lmask[6])<<(6*(iBytes-1)); 770 uPt |= ((*i)&utf8_lmask[6])<<(6*(iBytes-1));
574 } 771 }
575 sio << uPt; 772 sio << uPt;
576// sio << " (" << Bu::Fmt( 8, 2 ).fill('0') 773// sio << " (" << Bu::Fmt( 8, 2 ).fill('0')
@@ -602,3 +799,9 @@ template<> bool Bu::__cmpHashKeys<Bu::UtfString>(
602{ 799{
603 return a == b; 800 return a == b;
604} 801}
802
803Bu::Formatter Bu::operator<<( Bu::Formatter &f, const Bu::UtfString &s )
804{
805 return f << s.get();
806}
807
diff --git a/src/unstable/utfstring.h b/src/unstable/utfstring.h
index 5085ec0..285b680 100644
--- a/src/unstable/utfstring.h
+++ b/src/unstable/utfstring.h
@@ -191,6 +191,18 @@ namespace Bu
191 void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ) const; 191 void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ) const;
192 192
193 /** 193 /**
194 * Reads as many bytes from the given stream, starting at the current
195 * position, as required to read a single UtfChar (code point).
196 */
197 static int readPoint( Bu::Stream &sIn, UtfChar &c,
198 Encoding sEnc=Utf8 );
199 static int writePoint( Bu::Stream &sOut, const UtfChar &c,
200 Encoding sEnc=Utf8 );
201
202 int32_t toInt32( int iRadix=10 ) const;
203 int64_t toInt64( int iRadix=10 ) const;
204
205 /**
194 * This encodes the UtfString in the given encoding and returns it as 206 * This encodes the UtfString in the given encoding and returns it as
195 * a binary Bu::String. Like write, this also includes the proper BOM 207 * a binary Bu::String. Like write, this also includes the proper BOM
196 * at the begining. 208 * at the begining.
@@ -216,9 +228,16 @@ namespace Bu
216 UtfChar nextChar( int &iIndex ) const; 228 UtfChar nextChar( int &iIndex ) const;
217 229
218 bool operator==( const Bu::UtfString &rhs ) const; 230 bool operator==( const Bu::UtfString &rhs ) const;
231 bool operator==( const Bu::String &rhs ) const;
232 bool operator==( const char *rhs ) const;
219 UtfString &operator+=( const Bu::UtfString &rhs ); 233 UtfString &operator+=( const Bu::UtfString &rhs );
220 UtfString &operator+=( const UtfChar &rhs ); 234 UtfString &operator+=( const UtfChar &rhs );
221 235
236 bool operator<( const Bu::UtfString &rhs ) const;
237 bool operator<=( const Bu::UtfString &rhs ) const;
238 bool operator>( const Bu::UtfString &rhs ) const;
239 bool operator>=( const Bu::UtfString &rhs ) const;
240
222 private: 241 private:
223 void append16( uint16_t i ) { aData.append( i ); } 242 void append16( uint16_t i ) { aData.append( i ); }
224 243
@@ -237,6 +256,7 @@ namespace Bu
237 void writeUtf32le( Bu::Stream &sOut ) const; 256 void writeUtf32le( Bu::Stream &sOut ) const;
238 257
239 private: 258 private:
259 static uint8_t utf8_lmask[8];
240 Bu::Array<uint16_t> aData; 260 Bu::Array<uint16_t> aData;
241 int iRawLen; 261 int iRawLen;
242 int iCharLen; 262 int iCharLen;
@@ -254,6 +274,9 @@ namespace Bu
254 template<> uint32_t __calcHashCode<UtfString>( const UtfString &k ); 274 template<> uint32_t __calcHashCode<UtfString>( const UtfString &k );
255 template<> bool __cmpHashKeys<UtfString>( 275 template<> bool __cmpHashKeys<UtfString>(
256 const UtfString &a, const UtfString &b ); 276 const UtfString &a, const UtfString &b );
277
278 class Formatter;
279 Bu::Formatter operator<<( Bu::Formatter &f, const Bu::UtfString &s );
257}; 280};
258 281
259#endif 282#endif