diff options
author | Mike Buland <eichlan@xagasoft.com> | 2019-05-13 19:47:19 -0700 |
---|---|---|
committer | Mike Buland <eichlan@xagasoft.com> | 2019-05-13 19:47:19 -0700 |
commit | d605d6c3c04c1e26121f9b1c5c1d2dbcc5f7bc37 (patch) | |
tree | 0cd21d420fc67ae757ec2475610c4624fd714363 /src/unstable | |
parent | 62753c815b5ec34ebfae37a3c89187a01cc17160 (diff) | |
download | libbu++-d605d6c3c04c1e26121f9b1c5c1d2dbcc5f7bc37.tar.gz libbu++-d605d6c3c04c1e26121f9b1c5c1d2dbcc5f7bc37.tar.bz2 libbu++-d605d6c3c04c1e26121f9b1c5c1d2dbcc5f7bc37.tar.xz libbu++-d605d6c3c04c1e26121f9b1c5c1d2dbcc5f7bc37.zip |
UtfString & Json overhaul.
UtfString supports a load of new stuff, and Json uses UtfString
exclusively now.
Diffstat (limited to 'src/unstable')
-rw-r--r-- | src/unstable/json.cpp | 95 | ||||
-rw-r--r-- | src/unstable/json.h | 67 | ||||
-rw-r--r-- | src/unstable/utfstring.cpp | 255 | ||||
-rw-r--r-- | src/unstable/utfstring.h | 23 |
4 files changed, 341 insertions, 99 deletions
diff --git a/src/unstable/json.cpp b/src/unstable/json.cpp index d7e84d9..b1414a9 100644 --- a/src/unstable/json.cpp +++ b/src/unstable/json.cpp | |||
@@ -15,7 +15,7 @@ Bu::Json::Json() : | |||
15 | 15 | ||
16 | Bu::Json::Json( const Bu::UtfString &sValue ) : | 16 | Bu::Json::Json( const Bu::UtfString &sValue ) : |
17 | eType( String ), | 17 | eType( String ), |
18 | uDat( sValue.get() ) | 18 | uDat( sValue ) |
19 | { | 19 | { |
20 | } | 20 | } |
21 | 21 | ||
@@ -57,7 +57,7 @@ Bu::Json::Json( Type eType ) : | |||
57 | break; | 57 | break; |
58 | 58 | ||
59 | case String: | 59 | case String: |
60 | uDat.pString = new Bu::String(); | 60 | uDat.pString = new Bu::UtfString(); |
61 | break; | 61 | break; |
62 | 62 | ||
63 | case Number: | 63 | case Number: |
@@ -75,7 +75,7 @@ Bu::Json::Json( Bu::Stream &sInput ) : | |||
75 | parse( sInput ); | 75 | parse( sInput ); |
76 | } | 76 | } |
77 | 77 | ||
78 | Bu::Json::Json( char &c, Bu::Stream &sInput ) : | 78 | Bu::Json::Json( Bu::UtfChar &c, Bu::Stream &sInput ) : |
79 | eType( Invalid ) | 79 | eType( Invalid ) |
80 | { | 80 | { |
81 | parse( c, sInput ); | 81 | parse( c, sInput ); |
@@ -97,7 +97,7 @@ Bu::Json::Type Bu::Json::getType() const | |||
97 | return eType; | 97 | return eType; |
98 | } | 98 | } |
99 | 99 | ||
100 | Bu::String Bu::Json::getString() const | 100 | Bu::UtfString Bu::Json::getString() const |
101 | { | 101 | { |
102 | if( eType != String ) | 102 | if( eType != String ) |
103 | throw Bu::ExceptionBase( | 103 | throw Bu::ExceptionBase( |
@@ -132,7 +132,7 @@ bool Bu::Json::isNull() const | |||
132 | return eType == Null; | 132 | return eType == Null; |
133 | } | 133 | } |
134 | 134 | ||
135 | Bu::Json &Bu::Json::operator[]( const Bu::String &sKey ) const | 135 | Bu::Json &Bu::Json::operator[]( const Bu::UtfString &sKey ) const |
136 | { | 136 | { |
137 | if( eType != Object ) | 137 | if( eType != Object ) |
138 | throw Bu::ExceptionBase( | 138 | throw Bu::ExceptionBase( |
@@ -158,15 +158,13 @@ int Bu::Json::getSize() const | |||
158 | return uDat.pObject->getSize(); | 158 | return uDat.pObject->getSize(); |
159 | else if( eType == Array ) | 159 | else if( eType == Array ) |
160 | return uDat.pArray->getSize(); | 160 | return uDat.pArray->getSize(); |
161 | else if( eType == String ) | ||
162 | return uDat.pString->getSize(); | ||
163 | else | 161 | else |
164 | throw Bu::ExceptionBase( | 162 | throw Bu::ExceptionBase( |
165 | "Size requseted from json type that doesn't support it." | 163 | "Size requseted from json type that doesn't support it." |
166 | ); | 164 | ); |
167 | } | 165 | } |
168 | 166 | ||
169 | Bu::StringList Bu::Json::getKeys() const | 167 | Bu::UtfStringList Bu::Json::getKeys() const |
170 | { | 168 | { |
171 | return uDat.pObject->getKeys(); | 169 | return uDat.pObject->getKeys(); |
172 | } | 170 | } |
@@ -196,33 +194,40 @@ bool Bu::Json::has( const Bu::String &sKey ) const | |||
196 | return uDat.pObject->has( sKey ); | 194 | return uDat.pObject->has( sKey ); |
197 | } | 195 | } |
198 | 196 | ||
199 | void Bu::Json::insert( const Bu::String &sKey, Bu::Json *pObj ) | 197 | Bu::Json &Bu::Json::insert( const Bu::String &sKey, Bu::Json *pObj ) |
200 | { | 198 | { |
201 | uDat.pObject->insert( sKey, pObj ); | 199 | uDat.pObject->insert( sKey, pObj ); |
200 | return *this; | ||
202 | } | 201 | } |
203 | void Bu::Json::insert( const Bu::String &sKey, const Bu::Json &rObj ) | 202 | |
203 | Bu::Json &Bu::Json::insert( const Bu::String &sKey, const Bu::Json &rObj ) | ||
204 | { | 204 | { |
205 | uDat.pObject->insert( sKey, new Bu::Json( rObj ) ); | 205 | uDat.pObject->insert( sKey, new Bu::Json( rObj ) ); |
206 | return *this; | ||
206 | } | 207 | } |
207 | 208 | ||
208 | void Bu::Json::insert( const Bu::String &sKey, const Bu::String &sValue ) | 209 | Bu::Json &Bu::Json::insert( const Bu::String &sKey, const Bu::String &sValue ) |
209 | { | 210 | { |
210 | uDat.pObject->insert( sKey, new Json( sValue ) ); | 211 | uDat.pObject->insert( sKey, new Json( sValue ) ); |
212 | return *this; | ||
211 | } | 213 | } |
212 | 214 | ||
213 | void Bu::Json::insert( const Bu::String &sKey, const char *sValue ) | 215 | Bu::Json &Bu::Json::insert( const Bu::String &sKey, const char *sValue ) |
214 | { | 216 | { |
215 | uDat.pObject->insert( sKey, new Json( sValue ) ); | 217 | uDat.pObject->insert( sKey, new Json( sValue ) ); |
218 | return *this; | ||
216 | } | 219 | } |
217 | 220 | ||
218 | void Bu::Json::insert( const Bu::String &sKey, double dValue ) | 221 | Bu::Json &Bu::Json::insert( const Bu::String &sKey, double dValue ) |
219 | { | 222 | { |
220 | uDat.pObject->insert( sKey, new Json( dValue ) ); | 223 | uDat.pObject->insert( sKey, new Json( dValue ) ); |
224 | return *this; | ||
221 | } | 225 | } |
222 | 226 | ||
223 | void Bu::Json::insert( const Bu::String &sKey, bool bValue ) | 227 | Bu::Json &Bu::Json::insert( const Bu::String &sKey, bool bValue ) |
224 | { | 228 | { |
225 | uDat.pObject->insert( sKey, new Json( bValue ) ); | 229 | uDat.pObject->insert( sKey, new Json( bValue ) ); |
230 | return *this; | ||
226 | } | 231 | } |
227 | 232 | ||
228 | Bu::Json &Bu::Json::insertObject( const Bu::String &sKey ) | 233 | Bu::Json &Bu::Json::insertObject( const Bu::String &sKey ) |
@@ -239,29 +244,34 @@ Bu::Json &Bu::Json::insertArray( const Bu::String &sKey ) | |||
239 | return *pAr; | 244 | return *pAr; |
240 | } | 245 | } |
241 | 246 | ||
242 | void Bu::Json::append( Bu::Json *pObj ) | 247 | Bu::Json &Bu::Json::append( Bu::Json *pObj ) |
243 | { | 248 | { |
244 | uDat.pArray->append( pObj ); | 249 | uDat.pArray->append( pObj ); |
250 | return *this; | ||
245 | } | 251 | } |
246 | 252 | ||
247 | void Bu::Json::append( const Bu::String &sValue ) | 253 | Bu::Json &Bu::Json::append( const Bu::String &sValue ) |
248 | { | 254 | { |
249 | uDat.pArray->append( new Json( sValue ) ); | 255 | uDat.pArray->append( new Json( sValue ) ); |
256 | return *this; | ||
250 | } | 257 | } |
251 | 258 | ||
252 | void Bu::Json::append( const char *sValue ) | 259 | Bu::Json &Bu::Json::append( const char *sValue ) |
253 | { | 260 | { |
254 | uDat.pArray->append( new Json( sValue ) ); | 261 | uDat.pArray->append( new Json( sValue ) ); |
262 | return *this; | ||
255 | } | 263 | } |
256 | 264 | ||
257 | void Bu::Json::append( double dValue ) | 265 | Bu::Json &Bu::Json::append( double dValue ) |
258 | { | 266 | { |
259 | uDat.pArray->append( new Json( dValue ) ); | 267 | uDat.pArray->append( new Json( dValue ) ); |
268 | return *this; | ||
260 | } | 269 | } |
261 | 270 | ||
262 | void Bu::Json::append( bool bValue ) | 271 | Bu::Json &Bu::Json::append( bool bValue ) |
263 | { | 272 | { |
264 | uDat.pArray->append( new Json( bValue ) ); | 273 | uDat.pArray->append( new Json( bValue ) ); |
274 | return *this; | ||
265 | } | 275 | } |
266 | 276 | ||
267 | Bu::Json &Bu::Json::appendObject() | 277 | Bu::Json &Bu::Json::appendObject() |
@@ -282,7 +292,7 @@ void Bu::Json::parse( Bu::Stream &sInput ) | |||
282 | { | 292 | { |
283 | reset(); | 293 | reset(); |
284 | 294 | ||
285 | char c; | 295 | Bu::UtfChar c; |
286 | next("json"); | 296 | next("json"); |
287 | 297 | ||
288 | parse( c, sInput ); | 298 | parse( c, sInput ); |
@@ -294,7 +304,7 @@ void Bu::Json::parse( const Bu::String &sInput ) | |||
294 | parse( mb ); | 304 | parse( mb ); |
295 | } | 305 | } |
296 | 306 | ||
297 | void Bu::Json::parse( char &c, Bu::Stream &sInput ) | 307 | void Bu::Json::parse( Bu::UtfChar &c, Bu::Stream &sInput ) |
298 | { | 308 | { |
299 | while( c == ' ' || c == '\t' || c == '\r' || c == '\n' ) | 309 | while( c == ' ' || c == '\t' || c == '\r' || c == '\n' ) |
300 | { | 310 | { |
@@ -434,9 +444,9 @@ void Bu::Json::writeStable( Bu::Stream &sOutput ) const | |||
434 | { | 444 | { |
435 | sOutput.write("{", 1 ); | 445 | sOutput.write("{", 1 ); |
436 | bool bFirst = true; | 446 | bool bFirst = true; |
437 | Bu::List<Bu::String> lKey = uDat.pObject->getKeys(); | 447 | Bu::List<Bu::UtfString> lKey = uDat.pObject->getKeys(); |
438 | lKey.sort(); | 448 | lKey.sort(); |
439 | for( Bu::List<Bu::String>::iterator i = lKey.begin(); i; i++ ) | 449 | for( Bu::List<Bu::UtfString>::iterator i = lKey.begin(); i; i++ ) |
440 | { | 450 | { |
441 | if( bFirst == true ) | 451 | if( bFirst == true ) |
442 | bFirst = false; | 452 | bFirst = false; |
@@ -480,7 +490,7 @@ Bu::Json &Bu::Json::operator=( const Bu::Json &rSrc ) | |||
480 | break; | 490 | break; |
481 | 491 | ||
482 | case String: | 492 | case String: |
483 | uDat.pString = new Bu::String( *rSrc.uDat.pString ); | 493 | uDat.pString = new Bu::UtfString( *rSrc.uDat.pString ); |
484 | break; | 494 | break; |
485 | 495 | ||
486 | case Number: | 496 | case Number: |
@@ -513,7 +523,8 @@ Bu::Json &Bu::Json::operator=( const Bu::Json &rSrc ) | |||
513 | return *this; | 523 | return *this; |
514 | } | 524 | } |
515 | 525 | ||
516 | void Bu::Json::parseString( char &c, Bu::Stream &sInput, Bu::String &sOut ) | 526 | void Bu::Json::parseString( Bu::UtfChar &c, Bu::Stream &sInput, |
527 | Bu::UtfString &sOut ) | ||
517 | { | 528 | { |
518 | skipWs( c, sInput ); | 529 | skipWs( c, sInput ); |
519 | bool bEscape = false; | 530 | bool bEscape = false; |
@@ -577,14 +588,14 @@ void Bu::Json::parseString( char &c, Bu::Stream &sInput, Bu::String &sOut ) | |||
577 | } | 588 | } |
578 | } | 589 | } |
579 | 590 | ||
580 | void Bu::Json::parseString( char &c, Bu::Stream &sInput ) | 591 | void Bu::Json::parseString( Bu::UtfChar &c, Bu::Stream &sInput ) |
581 | { | 592 | { |
582 | eType = String; | 593 | eType = String; |
583 | uDat.pString = new Bu::String(); | 594 | uDat.pString = new Bu::UtfString(); |
584 | parseString( c, sInput, *uDat.pString ); | 595 | parseString( c, sInput, *uDat.pString ); |
585 | } | 596 | } |
586 | 597 | ||
587 | void Bu::Json::parseObject( char &c, Bu::Stream &sInput ) | 598 | void Bu::Json::parseObject( Bu::UtfChar &c, Bu::Stream &sInput ) |
588 | { | 599 | { |
589 | skipWs( c, sInput ); | 600 | skipWs( c, sInput ); |
590 | eType = Object; | 601 | eType = Object; |
@@ -602,7 +613,7 @@ void Bu::Json::parseObject( char &c, Bu::Stream &sInput ) | |||
602 | 613 | ||
603 | for(;;) | 614 | for(;;) |
604 | { | 615 | { |
605 | Bu::String sKey; | 616 | Bu::UtfString sKey; |
606 | parseString( c, sInput, sKey ); | 617 | parseString( c, sInput, sKey ); |
607 | skipWs( c, sInput ); | 618 | skipWs( c, sInput ); |
608 | if( c != ':' ) | 619 | if( c != ':' ) |
@@ -628,7 +639,7 @@ void Bu::Json::parseObject( char &c, Bu::Stream &sInput ) | |||
628 | } | 639 | } |
629 | } | 640 | } |
630 | 641 | ||
631 | void Bu::Json::parseArray( char &c, Bu::Stream &sInput ) | 642 | void Bu::Json::parseArray( Bu::UtfChar &c, Bu::Stream &sInput ) |
632 | { | 643 | { |
633 | skipWs( c, sInput ); | 644 | skipWs( c, sInput ); |
634 | 645 | ||
@@ -667,7 +678,7 @@ void Bu::Json::parseArray( char &c, Bu::Stream &sInput ) | |||
667 | } | 678 | } |
668 | } | 679 | } |
669 | 680 | ||
670 | void Bu::Json::parseNumber( char &c, Bu::Stream &sInput ) | 681 | void Bu::Json::parseNumber( Bu::UtfChar &c, Bu::Stream &sInput ) |
671 | { | 682 | { |
672 | skipWs( c, sInput ); | 683 | skipWs( c, sInput ); |
673 | 684 | ||
@@ -702,7 +713,7 @@ void Bu::Json::parseNumber( char &c, Bu::Stream &sInput ) | |||
702 | uDat.dNumber = atof( sBuf.getStr() ); | 713 | uDat.dNumber = atof( sBuf.getStr() ); |
703 | } | 714 | } |
704 | 715 | ||
705 | void Bu::Json::parseLiteral( char &c, Bu::Stream &sInput ) | 716 | void Bu::Json::parseLiteral( Bu::UtfChar &c, Bu::Stream &sInput ) |
706 | { | 717 | { |
707 | skipWs( c, sInput ); | 718 | skipWs( c, sInput ); |
708 | 719 | ||
@@ -736,27 +747,27 @@ void Bu::Json::parseLiteral( char &c, Bu::Stream &sInput ) | |||
736 | } | 747 | } |
737 | } | 748 | } |
738 | 749 | ||
739 | bool Bu::Json::readChar( char &c, Bu::Stream &sInput ) | 750 | bool Bu::Json::readChar( Bu::UtfChar &c, Bu::Stream &sInput ) |
740 | { | 751 | { |
741 | if( sInput.read( &c, 1 ) == 0 ) | 752 | if( Bu::UtfString::readPoint( sInput, c ) == 0 && sInput.isEos() ) |
742 | return false; | 753 | return false; |
743 | return true; | 754 | return true; |
744 | } | 755 | } |
745 | 756 | ||
746 | void Bu::Json::readChar( char &c, Bu::Stream &sInput, const char *sSection ) | 757 | void Bu::Json::readChar( Bu::UtfChar &c, Bu::Stream &sInput, const char *sSection ) |
747 | { | 758 | { |
748 | if( sInput.read( &c, 1 ) == 0 ) | 759 | if( Bu::UtfString::readPoint( sInput, c ) == 0 && sInput.isEos() ) |
749 | { | 760 | { |
750 | throw Bu::ExceptionBase( sSection ); | 761 | throw Bu::ExceptionBase( sSection ); |
751 | } | 762 | } |
752 | } | 763 | } |
753 | 764 | ||
754 | bool Bu::Json::isWs( char c ) | 765 | bool Bu::Json::isWs( Bu::UtfChar c ) |
755 | { | 766 | { |
756 | return c == ' ' || c == '\t' || c == '\r' || c == '\n'; | 767 | return c == ' ' || c == '\t' || c == '\r' || c == '\n'; |
757 | } | 768 | } |
758 | 769 | ||
759 | void Bu::Json::skipWs( char &c, Bu::Stream &sInput ) | 770 | void Bu::Json::skipWs( Bu::UtfChar &c, Bu::Stream &sInput ) |
760 | { | 771 | { |
761 | while( isWs( c ) ) | 772 | while( isWs( c ) ) |
762 | { | 773 | { |
@@ -764,10 +775,10 @@ void Bu::Json::skipWs( char &c, Bu::Stream &sInput ) | |||
764 | } | 775 | } |
765 | } | 776 | } |
766 | 777 | ||
767 | void Bu::Json::writeStr( const Bu::String &sStr, Bu::Stream &sOutput ) const | 778 | void Bu::Json::writeStr( const Bu::UtfString &sStr, Bu::Stream &sOutput ) const |
768 | { | 779 | { |
769 | sOutput.write("\"", 1 ); | 780 | sOutput.write("\"", 1 ); |
770 | for( Bu::String::const_iterator i = sStr.begin(); i; i++ ) | 781 | for( Bu::UtfString::const_iterator i = sStr.begin(); i; i++ ) |
771 | { | 782 | { |
772 | switch( *i ) | 783 | switch( *i ) |
773 | { | 784 | { |
@@ -807,12 +818,12 @@ void Bu::Json::writeStr( const Bu::String &sStr, Bu::Stream &sOutput ) const | |||
807 | if( *i < 32 ) | 818 | if( *i < 32 ) |
808 | sOutput.write( | 819 | sOutput.write( |
809 | Bu::String("\\u%1"). | 820 | Bu::String("\\u%1"). |
810 | arg( (int32_t)*i, Bu::Fmt::hex(4).fill('0') ). | 821 | arg( (uint32_t)*i, Bu::Fmt::hex(4).fill('0') ). |
811 | end().getStr(), | 822 | end().getStr(), |
812 | 6 | 823 | 6 |
813 | ); | 824 | ); |
814 | else | 825 | else |
815 | sOutput.write( &(*i), 1 ); | 826 | Bu::UtfString::writePoint( sOutput, *i ); |
816 | break; | 827 | break; |
817 | } | 828 | } |
818 | } | 829 | } |
diff --git a/src/unstable/json.h b/src/unstable/json.h index 4c85dd9..2ea62a2 100644 --- a/src/unstable/json.h +++ b/src/unstable/json.h | |||
@@ -10,12 +10,13 @@ | |||
10 | namespace Bu | 10 | namespace Bu |
11 | { | 11 | { |
12 | class Stream; | 12 | class Stream; |
13 | typedef Bu::List<Bu::UtfString> UtfStringList; | ||
13 | 14 | ||
14 | class Json | 15 | class Json |
15 | { | 16 | { |
16 | private: | 17 | private: |
17 | Json( char &c, Bu::Stream &sInput ); | 18 | Json( Bu::UtfChar &c, Bu::Stream &sInput ); |
18 | typedef Bu::Hash<Bu::String, Json *> JsonHash; | 19 | typedef Bu::Hash<Bu::UtfString, Json *> JsonHash; |
19 | typedef Bu::Array<Json *> JsonList; | 20 | typedef Bu::Array<Json *> JsonList; |
20 | 21 | ||
21 | public: | 22 | public: |
@@ -45,33 +46,33 @@ namespace Bu | |||
45 | virtual ~Json(); | 46 | virtual ~Json(); |
46 | 47 | ||
47 | Type getType() const; | 48 | Type getType() const; |
48 | Bu::String getString() const; | 49 | Bu::UtfString getString() const; |
49 | double getNumber() const; | 50 | double getNumber() const; |
50 | bool getBoolean() const; | 51 | bool getBoolean() const; |
51 | bool isNull() const; | 52 | bool isNull() const; |
52 | Json &operator[]( const Bu::String &sKey ) const; | 53 | Json &operator[]( const Bu::UtfString &sKey ) const; |
53 | Json &operator[]( int iIndex ) const; | 54 | Json &operator[]( int iIndex ) const; |
54 | int getSize() const; | 55 | int getSize() const; |
55 | Bu::StringList getKeys() const; | 56 | Bu::UtfStringList getKeys() const; |
56 | iterator begin(); | 57 | iterator begin(); |
57 | const_iterator begin() const; | 58 | const_iterator begin() const; |
58 | iterator end(); | 59 | iterator end(); |
59 | const_iterator end() const; | 60 | const_iterator end() const; |
60 | 61 | ||
61 | bool has( const Bu::String &sKey ) const; | 62 | bool has( const Bu::String &sKey ) const; |
62 | void insert( const Bu::String &sKey, Bu::Json *pObj ); | 63 | Json &insert( const Bu::String &sKey, Bu::Json *pObj ); |
63 | void insert( const Bu::String &sKey, const Bu::Json &rObj ); | 64 | Json &insert( const Bu::String &sKey, const Bu::Json &rObj ); |
64 | void insert( const Bu::String &sKey, const Bu::String &sValue ); | 65 | Json &insert( const Bu::String &sKey, const Bu::String &sValue ); |
65 | void insert( const Bu::String &sKey, const char *sValue ); | 66 | Json &insert( const Bu::String &sKey, const char *sValue ); |
66 | void insert( const Bu::String &sKey, double dValue ); | 67 | Json &insert( const Bu::String &sKey, double dValue ); |
67 | void insert( const Bu::String &sKey, bool bValue ); | 68 | Json &insert( const Bu::String &sKey, bool bValue ); |
68 | Json &insertObject( const Bu::String &sKey ); | 69 | Json &insertObject( const Bu::String &sKey ); |
69 | Json &insertArray( const Bu::String &sKey ); | 70 | Json &insertArray( const Bu::String &sKey ); |
70 | void append( Bu::Json *pObj ); | 71 | Json &append( Bu::Json *pObj ); |
71 | void append( const Bu::String &sValue ); | 72 | Json &append( const Bu::String &sValue ); |
72 | void append( const char *sValue ); | 73 | Json &append( const char *sValue ); |
73 | void append( double dValue ); | 74 | Json &append( double dValue ); |
74 | void append( bool bValue ); | 75 | Json &append( bool bValue ); |
75 | Json &appendObject(); | 76 | Json &appendObject(); |
76 | Json &appendArray(); | 77 | Json &appendArray(); |
77 | 78 | ||
@@ -87,18 +88,20 @@ namespace Bu | |||
87 | Bu::Json &operator=( const Bu::Json &rSrc ); | 88 | Bu::Json &operator=( const Bu::Json &rSrc ); |
88 | 89 | ||
89 | private: | 90 | private: |
90 | void parse( char &c, Bu::Stream &sInput ); | 91 | void parse( Bu::UtfChar &c, Bu::Stream &sInput ); |
91 | void parseString( char &c, Bu::Stream &sInput, Bu::String &sOut ); | 92 | void parseString( Bu::UtfChar &c, Bu::Stream &sInput, |
92 | void parseString( char &c, Bu::Stream &sInput ); | 93 | Bu::UtfString &sOut ); |
93 | void parseObject( char &c, Bu::Stream &sInput ); | 94 | void parseString( Bu::UtfChar &c, Bu::Stream &sInput ); |
94 | void parseArray( char &c, Bu::Stream &sInput ); | 95 | void parseObject( Bu::UtfChar &c, Bu::Stream &sInput ); |
95 | void parseNumber( char &c, Bu::Stream &sInput ); | 96 | void parseArray( Bu::UtfChar &c, Bu::Stream &sInput ); |
96 | void parseLiteral( char &c, Bu::Stream &sInput ); | 97 | void parseNumber( Bu::UtfChar &c, Bu::Stream &sInput ); |
97 | bool readChar( char &c, Bu::Stream &sInput ); | 98 | void parseLiteral( Bu::UtfChar &c, Bu::Stream &sInput ); |
98 | void readChar( char &c, Bu::Stream &sInput, const char *sSection ); | 99 | bool readChar( Bu::UtfChar &c, Bu::Stream &sInput ); |
99 | bool isWs( char c ); | 100 | void readChar( Bu::UtfChar &c, Bu::Stream &sInput, |
100 | void skipWs( char &c, Bu::Stream &sInput ); | 101 | const char *sSection ); |
101 | void writeStr( const Bu::String &sStr, Bu::Stream &sOutput ) const; | 102 | bool isWs( Bu::UtfChar c ); |
103 | void skipWs( Bu::UtfChar &c, Bu::Stream &sInput ); | ||
104 | void writeStr( const Bu::UtfString &sStr, Bu::Stream &sOutput ) const; | ||
102 | 105 | ||
103 | private: | 106 | private: |
104 | Type eType; | 107 | Type eType; |
@@ -106,14 +109,16 @@ namespace Bu | |||
106 | { | 109 | { |
107 | DatUnion() : pObject( NULL ) { } | 110 | DatUnion() : pObject( NULL ) { } |
108 | DatUnion( const Bu::String &sValue ) : | 111 | DatUnion( const Bu::String &sValue ) : |
109 | pString( new Bu::String( sValue ) ) { } | 112 | pString( new Bu::UtfString( sValue ) ) { } |
113 | DatUnion( const Bu::UtfString &sValue ) : | ||
114 | pString( new Bu::UtfString( sValue ) ) { } | ||
110 | DatUnion( const char *sValue ) : | 115 | DatUnion( const char *sValue ) : |
111 | pString( new Bu::String( sValue ) ) { } | 116 | pString( new Bu::UtfString( sValue ) ) { } |
112 | DatUnion( double dValue ) : dNumber( dValue ) { } | 117 | DatUnion( double dValue ) : dNumber( dValue ) { } |
113 | DatUnion( bool bValue ) : bBoolean( bValue ) { } | 118 | DatUnion( bool bValue ) : bBoolean( bValue ) { } |
114 | JsonHash *pObject; | 119 | JsonHash *pObject; |
115 | JsonList *pArray; | 120 | JsonList *pArray; |
116 | Bu::String *pString; | 121 | Bu::UtfString *pString; |
117 | double dNumber; | 122 | double dNumber; |
118 | bool bBoolean; | 123 | bool bBoolean; |
119 | } uDat; | 124 | } uDat; |
diff --git a/src/unstable/utfstring.cpp b/src/unstable/utfstring.cpp index f945725..46c78e6 100644 --- a/src/unstable/utfstring.cpp +++ b/src/unstable/utfstring.cpp | |||
@@ -12,8 +12,21 @@ | |||
12 | #include "bu/config.h" | 12 | #include "bu/config.h" |
13 | #include "bu/sio.h" | 13 | #include "bu/sio.h" |
14 | #include "bu/membuf.h" | 14 | #include "bu/membuf.h" |
15 | #include "bu/formatter.h" | ||
16 | |||
15 | using Bu::sio; | 17 | using Bu::sio; |
16 | 18 | ||
19 | uint8_t Bu::UtfString::utf8_lmask[8] = { | ||
20 | 0x00, | ||
21 | 0x01, | ||
22 | 0x03, | ||
23 | 0x07, | ||
24 | 0x0f, | ||
25 | 0x1f, | ||
26 | 0x3f, | ||
27 | 0x7f | ||
28 | }; | ||
29 | |||
17 | Bu::UtfString::UtfString() | 30 | Bu::UtfString::UtfString() |
18 | { | 31 | { |
19 | } | 32 | } |
@@ -111,27 +124,17 @@ void Bu::UtfString::append( const UtfString &rSrc ) | |||
111 | 124 | ||
112 | void Bu::UtfString::setUtf8( const Bu::String &sInput ) | 125 | void Bu::UtfString::setUtf8( const Bu::String &sInput ) |
113 | { | 126 | { |
114 | static uint8_t lmask[8] = { | ||
115 | 0x00, | ||
116 | 0x01, | ||
117 | 0x03, | ||
118 | 0x07, | ||
119 | 0x0f, | ||
120 | 0x1f, | ||
121 | 0x3f, | ||
122 | 0x7f | ||
123 | }; | ||
124 | for( Bu::String::const_iterator i = sInput.begin(); i; i++ ) | 127 | for( Bu::String::const_iterator i = sInput.begin(); i; i++ ) |
125 | { | 128 | { |
126 | if( ((int)(uint8_t)*i)&0x80 ) | 129 | if( ((int)(uint8_t)*i)&0x80 ) |
127 | { | 130 | { |
128 | int iBytes = 1; | 131 | int iBytes = 1; |
129 | for(; (((uint8_t)(*i))<<iBytes)&0x80; iBytes++ ) { } | 132 | for(; (((uint8_t)(*i))<<iBytes)&0x80; iBytes++ ) { } |
130 | Bu::UtfChar uPt = ((*i) & lmask[7-iBytes])<<(6*(iBytes-1)); | 133 | Bu::UtfChar uPt = ((*i) & utf8_lmask[7-iBytes])<<(6*(iBytes-1)); |
131 | for( iBytes--; iBytes >= 1; iBytes-- ) | 134 | for( iBytes--; iBytes >= 1; iBytes-- ) |
132 | { | 135 | { |
133 | i++; | 136 | i++; |
134 | uPt |= ((*i)&lmask[6])<<(6*(iBytes-1)); | 137 | uPt |= ((*i)&utf8_lmask[6])<<(6*(iBytes-1)); |
135 | } | 138 | } |
136 | append( uPt ); | 139 | append( uPt ); |
137 | } | 140 | } |
@@ -321,6 +324,133 @@ void Bu::UtfString::write( Bu::Stream &sOut, Encoding eEnc ) const | |||
321 | } | 324 | } |
322 | } | 325 | } |
323 | 326 | ||
327 | int Bu::UtfString::readPoint( Bu::Stream &sIn, Bu::UtfChar &c, | ||
328 | Bu::UtfString::Encoding sEnc ) | ||
329 | { | ||
330 | switch( sEnc ) | ||
331 | { | ||
332 | case Utf8: | ||
333 | { | ||
334 | uint8_t i; | ||
335 | int iRead = 1; | ||
336 | if( sIn.read( &i, 1 ) < 1 ) | ||
337 | return 0; | ||
338 | if( ((int)i)&0x80 ) | ||
339 | { | ||
340 | int iBytes = 1; | ||
341 | for(; (((uint8_t)i)<<iBytes)&0x80; iBytes++ ) { } | ||
342 | iRead = iBytes; | ||
343 | c = (i & utf8_lmask[7-iBytes])<<(6*(iBytes-1)); | ||
344 | for( iBytes--; iBytes >= 1; iBytes-- ) | ||
345 | { | ||
346 | if( sIn.read( &i, 1 ) < 1 ) | ||
347 | return 0; | ||
348 | c |= (i&utf8_lmask[6])<<(6*(iBytes-1)); | ||
349 | } | ||
350 | return iRead; | ||
351 | } | ||
352 | else | ||
353 | { | ||
354 | c = (Bu::UtfChar)i; | ||
355 | return 1; | ||
356 | } | ||
357 | } | ||
358 | break; | ||
359 | |||
360 | case Utf16: | ||
361 | case Utf16be: | ||
362 | case Utf16le: | ||
363 | case Utf32: | ||
364 | case Utf32be: | ||
365 | case Utf32le: | ||
366 | case Ucs2: | ||
367 | case Ucs4: | ||
368 | case GuessEncoding: | ||
369 | throw Bu::ExceptionBase("Not implemented."); | ||
370 | break; | ||
371 | } | ||
372 | return -1; | ||
373 | } | ||
374 | |||
375 | int Bu::UtfString::writePoint( Bu::Stream &sOut, const Bu::UtfChar &c, | ||
376 | Bu::UtfString::Encoding sEnc ) | ||
377 | { | ||
378 | switch( sEnc ) | ||
379 | { | ||
380 | case Utf8: | ||
381 | { | ||
382 | uint8_t uByte; | ||
383 | if( c >= 0x010000 ) | ||
384 | { | ||
385 | // Four bytes | ||
386 | // 111 111111 111111 111111 | ||
387 | uByte = (c>>18)|0xF0; | ||
388 | sOut.write( &uByte, 1 ); | ||
389 | uByte = ((c>>12)&0x3F)|0x80; | ||
390 | sOut.write( &uByte, 1 ); | ||
391 | uByte = ((c>>6)&0x3F)|0x80; | ||
392 | sOut.write( &uByte, 1 ); | ||
393 | uByte = (c&0x3F)|0x80; | ||
394 | sOut.write( &uByte, 1 ); | ||
395 | return 4; | ||
396 | } | ||
397 | else if( c >= 0x800 ) | ||
398 | { | ||
399 | // Three bytes | ||
400 | // 1111 111111 111111 | ||
401 | uByte = (c>>12)|0xE0; | ||
402 | sOut.write( &uByte, 1 ); | ||
403 | uByte = ((c>>6)&0x3F)|0x80; | ||
404 | sOut.write( &uByte, 1 ); | ||
405 | uByte = (c&0x3F)|0x80; | ||
406 | sOut.write( &uByte, 1 ); | ||
407 | return 3; | ||
408 | } | ||
409 | else if( c >= 0x80 ) | ||
410 | { | ||
411 | // Two bytes | ||
412 | // 11111 111111 | ||
413 | uByte = (c>>6)|0xC0; | ||
414 | sOut.write( &uByte, 1 ); | ||
415 | uByte = (c&0x3F)|0x80; | ||
416 | sOut.write( &uByte, 1 ); | ||
417 | return 2; | ||
418 | } | ||
419 | else | ||
420 | { | ||
421 | // One byte | ||
422 | uByte = c; | ||
423 | sOut.write( &uByte, 1 ); | ||
424 | return 1; | ||
425 | } | ||
426 | } | ||
427 | break; | ||
428 | |||
429 | case Utf16: | ||
430 | case Utf16be: | ||
431 | case Utf16le: | ||
432 | case Utf32: | ||
433 | case Utf32be: | ||
434 | case Utf32le: | ||
435 | case Ucs2: | ||
436 | case Ucs4: | ||
437 | case GuessEncoding: | ||
438 | throw Bu::ExceptionBase("Not implemented."); | ||
439 | break; | ||
440 | } | ||
441 | return -1; | ||
442 | } | ||
443 | |||
444 | int32_t Bu::UtfString::toInt32( int iRadix ) const | ||
445 | { | ||
446 | return strtol( get().getStr(), NULL, iRadix ); | ||
447 | } | ||
448 | |||
449 | int64_t Bu::UtfString::toInt64( int iRadix ) const | ||
450 | { | ||
451 | return strtoll( get().getStr(), NULL, iRadix ); | ||
452 | } | ||
453 | |||
324 | void Bu::UtfString::writeUtf8( Bu::Stream &sOut ) const | 454 | void Bu::UtfString::writeUtf8( Bu::Stream &sOut ) const |
325 | { | 455 | { |
326 | int iPos = 0; | 456 | int iPos = 0; |
@@ -496,6 +626,33 @@ bool Bu::UtfString::operator==( const Bu::UtfString &rhs ) const | |||
496 | return aData == rhs.aData; | 626 | return aData == rhs.aData; |
497 | } | 627 | } |
498 | 628 | ||
629 | bool Bu::UtfString::operator==( const Bu::String &rhs ) const | ||
630 | { | ||
631 | // Nieve comparison | ||
632 | if( aData.getSize() != rhs.getSize() ) | ||
633 | return false; | ||
634 | |||
635 | for( int j = 0; j < aData.getSize(); j++ ) | ||
636 | { | ||
637 | if( aData[j] != rhs[j] ) | ||
638 | return false; | ||
639 | } | ||
640 | |||
641 | return true; | ||
642 | } | ||
643 | |||
644 | bool Bu::UtfString::operator==( const char *rhs ) const | ||
645 | { | ||
646 | // Nieve comparison | ||
647 | for( int j = 0; j < aData.getSize(); j++ ) | ||
648 | { | ||
649 | if( rhs[j] == '\0' || aData[j] != rhs[j] ) | ||
650 | return false; | ||
651 | } | ||
652 | |||
653 | return true; | ||
654 | } | ||
655 | |||
499 | Bu::UtfString &Bu::UtfString::operator+=( const Bu::UtfString &rhs ) | 656 | Bu::UtfString &Bu::UtfString::operator+=( const Bu::UtfString &rhs ) |
500 | { | 657 | { |
501 | append( rhs ); | 658 | append( rhs ); |
@@ -508,6 +665,56 @@ Bu::UtfString &Bu::UtfString::operator+=( const UtfChar &rhs ) | |||
508 | return *this; | 665 | return *this; |
509 | } | 666 | } |
510 | 667 | ||
668 | bool Bu::UtfString::operator<( const Bu::UtfString &rhs ) const | ||
669 | { | ||
670 | for( int j = 0; j < aData.getSize() && j < rhs.aData.getSize(); j++ ) | ||
671 | { | ||
672 | if( aData[j] != rhs.aData[j] ) | ||
673 | return aData[j] < rhs.aData[j]; | ||
674 | } | ||
675 | |||
676 | return false; | ||
677 | } | ||
678 | |||
679 | bool Bu::UtfString::operator<=( const Bu::UtfString &rhs ) const | ||
680 | { | ||
681 | for( int j = 0; j < aData.getSize() && j < rhs.aData.getSize(); j++ ) | ||
682 | { | ||
683 | if( aData[j] != rhs.aData[j] ) | ||
684 | return aData[j] < rhs.aData[j]; | ||
685 | } | ||
686 | |||
687 | if( aData.getSize() == rhs.aData.getSize() ) | ||
688 | return true; | ||
689 | |||
690 | return false; | ||
691 | } | ||
692 | |||
693 | bool Bu::UtfString::operator>( const Bu::UtfString &rhs ) const | ||
694 | { | ||
695 | for( int j = 0; j < aData.getSize() && j < rhs.aData.getSize(); j++ ) | ||
696 | { | ||
697 | if( aData[j] != rhs.aData[j] ) | ||
698 | return aData[j] > rhs.aData[j]; | ||
699 | } | ||
700 | |||
701 | return false; | ||
702 | } | ||
703 | |||
704 | bool Bu::UtfString::operator>=( const Bu::UtfString &rhs ) const | ||
705 | { | ||
706 | for( int j = 0; j < aData.getSize() && j < rhs.aData.getSize(); j++ ) | ||
707 | { | ||
708 | if( aData[j] != rhs.aData[j] ) | ||
709 | return aData[j] > rhs.aData[j]; | ||
710 | } | ||
711 | |||
712 | if( aData.getSize() == rhs.aData.getSize() ) | ||
713 | return true; | ||
714 | |||
715 | return false; | ||
716 | } | ||
717 | |||
511 | Bu::String Bu::UtfString::get( Encoding eEnc ) const | 718 | Bu::String Bu::UtfString::get( Encoding eEnc ) const |
512 | { | 719 | { |
513 | Bu::MemBuf mb; | 720 | Bu::MemBuf mb; |
@@ -537,16 +744,6 @@ void Bu::UtfString::debug() const | |||
537 | /* | 744 | /* |
538 | void Bu::UtfString::debugUtf8( const Bu::String &sUtf8 ) | 745 | void Bu::UtfString::debugUtf8( const Bu::String &sUtf8 ) |
539 | { | 746 | { |
540 | static uint8_t lmask[8] = { | ||
541 | 0x00, | ||
542 | 0x01, | ||
543 | 0x03, | ||
544 | 0x07, | ||
545 | 0x0f, | ||
546 | 0x1f, | ||
547 | 0x3f, | ||
548 | 0x7f | ||
549 | }; | ||
550 | for( Bu::String::const_iterator i = sUtf8.begin(); i; i++ ) | 747 | for( Bu::String::const_iterator i = sUtf8.begin(); i; i++ ) |
551 | { | 748 | { |
552 | if( i != sUtf8.begin() ) | 749 | if( i != sUtf8.begin() ) |
@@ -558,9 +755,9 @@ void Bu::UtfString::debugUtf8( const Bu::String &sUtf8 ) | |||
558 | int iBytes = 1; | 755 | int iBytes = 1; |
559 | for(; (((uint8_t)(*i))<<iBytes)&0x80; iBytes++ ) { } | 756 | for(; (((uint8_t)(*i))<<iBytes)&0x80; iBytes++ ) { } |
560 | // sio << "iBytes = " << iBytes << sio.nl; | 757 | // sio << "iBytes = " << iBytes << sio.nl; |
561 | Bu::UtfChar uPt = ((*i) & lmask[7-iBytes])<<(6*(iBytes-1)); | 758 | Bu::UtfChar uPt = ((*i) & utf8_lmask[7-iBytes])<<(6*(iBytes-1)); |
562 | // sio << "mask: " << Bu::Fmt().radix(2).width(8).fill('0') | 759 | // sio << "mask: " << Bu::Fmt().radix(2).width(8).fill('0') |
563 | // << (int)lmask[7-iBytes] << sio.nl; | 760 | // << (int)utf8_lmask[7-iBytes] << sio.nl; |
564 | for( iBytes--; iBytes >= 1; iBytes-- ) | 761 | for( iBytes--; iBytes >= 1; iBytes-- ) |
565 | { | 762 | { |
566 | // sio << "iBytes = " << iBytes << ", shift = " << (6*(iBytes-1)) | 763 | // sio << "iBytes = " << iBytes << ", shift = " << (6*(iBytes-1)) |
@@ -568,9 +765,9 @@ void Bu::UtfString::debugUtf8( const Bu::String &sUtf8 ) | |||
568 | // sio << "next: " << Bu::Fmt().radix(2).width(8).fill('0') | 765 | // sio << "next: " << Bu::Fmt().radix(2).width(8).fill('0') |
569 | // << (int)(uint8_t)*i << sio.nl | 766 | // << (int)(uint8_t)*i << sio.nl |
570 | // << "mask: " << Bu::Fmt().radix(2).width(8).fill('0') | 767 | // << "mask: " << Bu::Fmt().radix(2).width(8).fill('0') |
571 | // << (int)lmask[6] << sio.nl; | 768 | // << (int)utf8_lmask[6] << sio.nl; |
572 | i++; | 769 | i++; |
573 | uPt |= ((*i)&lmask[6])<<(6*(iBytes-1)); | 770 | uPt |= ((*i)&utf8_lmask[6])<<(6*(iBytes-1)); |
574 | } | 771 | } |
575 | sio << uPt; | 772 | sio << uPt; |
576 | // sio << " (" << Bu::Fmt( 8, 2 ).fill('0') | 773 | // sio << " (" << Bu::Fmt( 8, 2 ).fill('0') |
@@ -602,3 +799,9 @@ template<> bool Bu::__cmpHashKeys<Bu::UtfString>( | |||
602 | { | 799 | { |
603 | return a == b; | 800 | return a == b; |
604 | } | 801 | } |
802 | |||
803 | Bu::Formatter Bu::operator<<( Bu::Formatter &f, const Bu::UtfString &s ) | ||
804 | { | ||
805 | return f << s.get(); | ||
806 | } | ||
807 | |||
diff --git a/src/unstable/utfstring.h b/src/unstable/utfstring.h index 5085ec0..285b680 100644 --- a/src/unstable/utfstring.h +++ b/src/unstable/utfstring.h | |||
@@ -191,6 +191,18 @@ namespace Bu | |||
191 | void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ) const; | 191 | void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ) const; |
192 | 192 | ||
193 | /** | 193 | /** |
194 | * Reads as many bytes from the given stream, starting at the current | ||
195 | * position, as required to read a single UtfChar (code point). | ||
196 | */ | ||
197 | static int readPoint( Bu::Stream &sIn, UtfChar &c, | ||
198 | Encoding sEnc=Utf8 ); | ||
199 | static int writePoint( Bu::Stream &sOut, const UtfChar &c, | ||
200 | Encoding sEnc=Utf8 ); | ||
201 | |||
202 | int32_t toInt32( int iRadix=10 ) const; | ||
203 | int64_t toInt64( int iRadix=10 ) const; | ||
204 | |||
205 | /** | ||
194 | * This encodes the UtfString in the given encoding and returns it as | 206 | * This encodes the UtfString in the given encoding and returns it as |
195 | * a binary Bu::String. Like write, this also includes the proper BOM | 207 | * a binary Bu::String. Like write, this also includes the proper BOM |
196 | * at the begining. | 208 | * at the begining. |
@@ -216,9 +228,16 @@ namespace Bu | |||
216 | UtfChar nextChar( int &iIndex ) const; | 228 | UtfChar nextChar( int &iIndex ) const; |
217 | 229 | ||
218 | bool operator==( const Bu::UtfString &rhs ) const; | 230 | bool operator==( const Bu::UtfString &rhs ) const; |
231 | bool operator==( const Bu::String &rhs ) const; | ||
232 | bool operator==( const char *rhs ) const; | ||
219 | UtfString &operator+=( const Bu::UtfString &rhs ); | 233 | UtfString &operator+=( const Bu::UtfString &rhs ); |
220 | UtfString &operator+=( const UtfChar &rhs ); | 234 | UtfString &operator+=( const UtfChar &rhs ); |
221 | 235 | ||
236 | bool operator<( const Bu::UtfString &rhs ) const; | ||
237 | bool operator<=( const Bu::UtfString &rhs ) const; | ||
238 | bool operator>( const Bu::UtfString &rhs ) const; | ||
239 | bool operator>=( const Bu::UtfString &rhs ) const; | ||
240 | |||
222 | private: | 241 | private: |
223 | void append16( uint16_t i ) { aData.append( i ); } | 242 | void append16( uint16_t i ) { aData.append( i ); } |
224 | 243 | ||
@@ -237,6 +256,7 @@ namespace Bu | |||
237 | void writeUtf32le( Bu::Stream &sOut ) const; | 256 | void writeUtf32le( Bu::Stream &sOut ) const; |
238 | 257 | ||
239 | private: | 258 | private: |
259 | static uint8_t utf8_lmask[8]; | ||
240 | Bu::Array<uint16_t> aData; | 260 | Bu::Array<uint16_t> aData; |
241 | int iRawLen; | 261 | int iRawLen; |
242 | int iCharLen; | 262 | int iCharLen; |
@@ -254,6 +274,9 @@ namespace Bu | |||
254 | template<> uint32_t __calcHashCode<UtfString>( const UtfString &k ); | 274 | template<> uint32_t __calcHashCode<UtfString>( const UtfString &k ); |
255 | template<> bool __cmpHashKeys<UtfString>( | 275 | template<> bool __cmpHashKeys<UtfString>( |
256 | const UtfString &a, const UtfString &b ); | 276 | const UtfString &a, const UtfString &b ); |
277 | |||
278 | class Formatter; | ||
279 | Bu::Formatter operator<<( Bu::Formatter &f, const Bu::UtfString &s ); | ||
257 | }; | 280 | }; |
258 | 281 | ||
259 | #endif | 282 | #endif |