diff options
| -rw-r--r-- | src/tests/utf.cpp | 42 | ||||
| -rw-r--r-- | src/utfstring.cpp | 143 | ||||
| -rw-r--r-- | src/utfstring.h | 2 |
3 files changed, 174 insertions, 13 deletions
diff --git a/src/tests/utf.cpp b/src/tests/utf.cpp index 9e075e2..01bac7e 100644 --- a/src/tests/utf.cpp +++ b/src/tests/utf.cpp | |||
| @@ -4,6 +4,46 @@ | |||
| 4 | 4 | ||
| 5 | int main( int argc, char *argv[] ) | 5 | int main( int argc, char *argv[] ) |
| 6 | { | 6 | { |
| 7 | Bu::File fIn("utf8.in", Bu::File::Read ); | ||
| 8 | Bu::String sUtf8; | ||
| 9 | char buf[4096]; | ||
| 10 | while( !fIn.isEos() ) | ||
| 11 | { | ||
| 12 | int iAmnt = fIn.read( buf, 4096 ); | ||
| 13 | sUtf8.append( buf, iAmnt ); | ||
| 14 | } | ||
| 15 | Bu::UtfString us( sUtf8, Bu::UtfString::Utf8 ); | ||
| 16 | us.debug(); | ||
| 17 | { | ||
| 18 | Bu::File fOut("utf8.out", Bu::File::WriteNew ); | ||
| 19 | us.write( fOut, Bu::UtfString::Utf8 ); | ||
| 20 | } | ||
| 21 | { | ||
| 22 | Bu::File fOut("utf16.out", Bu::File::WriteNew ); | ||
| 23 | us.write( fOut, Bu::UtfString::Utf16 ); | ||
| 24 | } | ||
| 25 | { | ||
| 26 | Bu::File fOut("utf16le.out", Bu::File::WriteNew ); | ||
| 27 | us.write( fOut, Bu::UtfString::Utf16le ); | ||
| 28 | } | ||
| 29 | { | ||
| 30 | Bu::File fOut("utf16be.out", Bu::File::WriteNew ); | ||
| 31 | us.write( fOut, Bu::UtfString::Utf16be ); | ||
| 32 | } | ||
| 33 | { | ||
| 34 | Bu::File fOut("utf32.out", Bu::File::WriteNew ); | ||
| 35 | us.write( fOut, Bu::UtfString::Utf32 ); | ||
| 36 | } | ||
| 37 | { | ||
| 38 | Bu::File fOut("utf32le.out", Bu::File::WriteNew ); | ||
| 39 | us.write( fOut, Bu::UtfString::Utf32le ); | ||
| 40 | } | ||
| 41 | { | ||
| 42 | Bu::File fOut("utf32be.out", Bu::File::WriteNew ); | ||
| 43 | us.write( fOut, Bu::UtfString::Utf32be ); | ||
| 44 | } | ||
| 45 | |||
| 46 | /* | ||
| 7 | argc--, argv++; | 47 | argc--, argv++; |
| 8 | 48 | ||
| 9 | for( char **sFile = argv; *sFile; sFile++ ) | 49 | for( char **sFile = argv; *sFile; sFile++ ) |
| @@ -17,7 +57,9 @@ int main( int argc, char *argv[] ) | |||
| 17 | sUtf8.append( buf, iAmnt ); | 57 | sUtf8.append( buf, iAmnt ); |
| 18 | } | 58 | } |
| 19 | Bu::UtfString us( sUtf8, Bu::UtfString::Utf16 ); | 59 | Bu::UtfString us( sUtf8, Bu::UtfString::Utf16 ); |
| 60 | |||
| 20 | us.debug(); | 61 | us.debug(); |
| 21 | } | 62 | } |
| 63 | */ | ||
| 22 | } | 64 | } |
| 23 | 65 | ||
diff --git a/src/utfstring.cpp b/src/utfstring.cpp index c9da52f..3f57618 100644 --- a/src/utfstring.cpp +++ b/src/utfstring.cpp | |||
| @@ -259,8 +259,8 @@ void Bu::UtfString::write( Bu::Stream &sOut, Encoding eEnc ) | |||
| 259 | break; | 259 | break; |
| 260 | 260 | ||
| 261 | case Utf16: | 261 | case Utf16: |
| 262 | writeUtf16( sOut ); | 262 | // writeUtf16( sOut ); |
| 263 | break; | 263 | // break; |
| 264 | 264 | ||
| 265 | case Utf16be: | 265 | case Utf16be: |
| 266 | writeUtf16be( sOut ); | 266 | writeUtf16be( sOut ); |
| @@ -271,8 +271,8 @@ void Bu::UtfString::write( Bu::Stream &sOut, Encoding eEnc ) | |||
| 271 | break; | 271 | break; |
| 272 | 272 | ||
| 273 | case Utf32: | 273 | case Utf32: |
| 274 | writeUtf32( sOut ); | 274 | // writeUtf32( sOut ); |
| 275 | break; | 275 | // break; |
| 276 | 276 | ||
| 277 | case Utf32be: | 277 | case Utf32be: |
| 278 | writeUtf32be( sOut ); | 278 | writeUtf32be( sOut ); |
| @@ -300,30 +300,151 @@ void Bu::UtfString::write( Bu::Stream &sOut, Encoding eEnc ) | |||
| 300 | 300 | ||
| 301 | void Bu::UtfString::writeUtf8( Bu::Stream &sOut ) | 301 | void Bu::UtfString::writeUtf8( Bu::Stream &sOut ) |
| 302 | { | 302 | { |
| 303 | int iPos = 0; | ||
| 304 | while( iPos < aData.getSize() ) | ||
| 305 | { | ||
| 306 | uint8_t uByte; | ||
| 307 | Bu::UtfChar chr = nextChar( iPos ); | ||
| 308 | if( chr >= 0x010000 ) | ||
| 309 | { | ||
| 310 | // Four bytes | ||
| 311 | // 111 111111 111111 111111 | ||
| 312 | uByte = (chr>>18)|0xF0; | ||
| 313 | sOut.write( &uByte, 1 ); | ||
| 314 | uByte = (chr>>12)&0x3F|0x80; | ||
| 315 | sOut.write( &uByte, 1 ); | ||
| 316 | uByte = (chr>>6)&0x3F|0x80; | ||
| 317 | sOut.write( &uByte, 1 ); | ||
| 318 | uByte = (chr&0x3F)|0x80; | ||
| 319 | sOut.write( &uByte, 1 ); | ||
| 320 | } | ||
| 321 | else if( chr >= 0x800 ) | ||
| 322 | { | ||
| 323 | // Three bytes | ||
| 324 | // 1111 111111 111111 | ||
| 325 | uByte = (chr>>12)|0xE0; | ||
| 326 | sOut.write( &uByte, 1 ); | ||
| 327 | uByte = (chr>>6)&0x3F|0x80; | ||
| 328 | sOut.write( &uByte, 1 ); | ||
| 329 | uByte = (chr&0x3F)|0x80; | ||
| 330 | sOut.write( &uByte, 1 ); | ||
| 331 | } | ||
| 332 | else if( chr >= 0x80 ) | ||
| 333 | { | ||
| 334 | // Two bytes | ||
| 335 | // 11111 111111 | ||
| 336 | uByte = (chr>>6)|0xC0; | ||
| 337 | sOut.write( &uByte, 1 ); | ||
| 338 | uByte = (chr&0x3F)|0x80; | ||
| 339 | sOut.write( &uByte, 1 ); | ||
| 340 | } | ||
| 341 | else | ||
| 342 | { | ||
| 343 | // One byte | ||
| 344 | uByte = chr; | ||
| 345 | sOut.write( &uByte, 1 ); | ||
| 346 | } | ||
| 347 | } | ||
| 303 | } | 348 | } |
| 304 | 349 | /* | |
| 305 | void Bu::UtfString::writeUtf16( Bu::Stream &sOut ) | 350 | void Bu::UtfString::writeUtf16( Bu::Stream &sOut ) |
| 306 | { | 351 | { |
| 307 | } | 352 | } |
| 308 | 353 | */ | |
| 309 | void Bu::UtfString::writeUtf16be( Bu::Stream &sOut ) | 354 | void Bu::UtfString::writeUtf16be( Bu::Stream &sOut ) |
| 310 | { | 355 | { |
| 356 | #if BYTE_ORDER == BIG_ENDIAN | ||
| 357 | uint16_t iTmp = 0xFEFF; // Byte Order Marker | ||
| 358 | sOut.write( &iTmp, 2 ); | ||
| 359 | for( Array<uint16_t>::iterator i = aData.begin(); i; i++ ) | ||
| 360 | { | ||
| 361 | iTmp = *i; | ||
| 362 | sOut.write( &iTmp, 2 ); | ||
| 363 | } | ||
| 364 | #else | ||
| 365 | uint16_t iTmp = 0xFEFF; // Byte Order Marker | ||
| 366 | iTmp = (iTmp>>8) | (iTmp<<8); | ||
| 367 | sOut.write( &iTmp, 2 ); | ||
| 368 | for( Array<uint16_t>::iterator i = aData.begin(); i; i++ ) | ||
| 369 | { | ||
| 370 | iTmp = *i; | ||
| 371 | iTmp = (iTmp>>8) | (iTmp<<8); | ||
| 372 | sOut.write( &iTmp, 2 ); | ||
| 373 | } | ||
| 374 | #endif | ||
| 311 | } | 375 | } |
| 312 | 376 | ||
| 313 | void Bu::UtfString::writeUtf16le( Bu::Stream &sOut ) | 377 | void Bu::UtfString::writeUtf16le( Bu::Stream &sOut ) |
| 314 | { | 378 | { |
| 315 | } | 379 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 316 | 380 | uint16_t iTmp = 0xFEFF; // Byte Order Marker | |
| 317 | void Bu::UtfString::writeUtf32( Bu::Stream &sOut ) | 381 | sOut.write( &iTmp, 2 ); |
| 318 | { | 382 | for( Array<uint16_t>::iterator i = aData.begin(); i; i++ ) |
| 383 | { | ||
| 384 | iTmp = *i; | ||
| 385 | sOut.write( &iTmp, 2 ); | ||
| 386 | } | ||
| 387 | #else | ||
| 388 | uint16_t iTmp = 0xFEFF; // Byte Order Marker | ||
| 389 | iTmp = (iTmp>>8) | (iTmp<<8); | ||
| 390 | sOut.write( &iTmp, 2 ); | ||
| 391 | for( Array<uint16_t>::iterator i = aData.begin(); i; i++ ) | ||
| 392 | { | ||
| 393 | iTmp = *i; | ||
| 394 | iTmp = (iTmp>>8) | (iTmp<<8); | ||
| 395 | sOut.write( &iTmp, 2 ); | ||
| 396 | } | ||
| 397 | #endif | ||
| 319 | } | 398 | } |
| 320 | 399 | ||
| 321 | void Bu::UtfString::writeUtf32be( Bu::Stream &sOut ) | 400 | void Bu::UtfString::writeUtf32be( Bu::Stream &sOut ) |
| 322 | { | 401 | { |
| 402 | #if BYTE_ORDER == BIG_ENDIAN | ||
| 403 | uint32_t iTmp = 0xFEFF; // Byte Order Marker | ||
| 404 | sOut.write( &iTmp, 4 ); | ||
| 405 | int i = 0; | ||
| 406 | while( i < aData.getSize() ) | ||
| 407 | { | ||
| 408 | iTmp = nextChar( i ); | ||
| 409 | sOut.write( &iTmp, 4 ); | ||
| 410 | } | ||
| 411 | #else | ||
| 412 | uint32_t iTmp = 0xFEFF; // Byte Order Marker | ||
| 413 | iTmp = (iTmp>>24)|(iTmp<<24)|((iTmp&0xff0000)>>8)|((iTmp&0xff00)<<8); | ||
| 414 | sOut.write( &iTmp, 4 ); | ||
| 415 | int i = 0; | ||
| 416 | while( i < aData.getSize() ) | ||
| 417 | { | ||
| 418 | iTmp = nextChar( i ); | ||
| 419 | iTmp = (iTmp>>24)|(iTmp<<24)|((iTmp&0xff0000)>>8)|((iTmp&0xff00)<<8); | ||
| 420 | sOut.write( &iTmp, 4 ); | ||
| 421 | } | ||
| 422 | #endif | ||
| 323 | } | 423 | } |
| 324 | 424 | ||
| 325 | void Bu::UtfString::writeUtf32le( Bu::Stream &sOut ) | 425 | void Bu::UtfString::writeUtf32le( Bu::Stream &sOut ) |
| 326 | { | 426 | { |
| 427 | #if BYTE_ORDER == LITTLE_ENDIAN | ||
| 428 | uint32_t iTmp = 0xFEFF; // Byte Order Marker | ||
| 429 | sOut.write( &iTmp, 4 ); | ||
| 430 | int i = 0; | ||
| 431 | while( i < aData.getSize() ) | ||
| 432 | { | ||
| 433 | iTmp = nextChar( i ); | ||
| 434 | sOut.write( &iTmp, 4 ); | ||
| 435 | } | ||
| 436 | #else | ||
| 437 | uint32_t iTmp = 0xFEFF; // Byte Order Marker | ||
| 438 | iTmp = (iTmp>>24)|(iTmp<<24)|((iTmp&0xff0000)>>8)|((iTmp&0xff00)<<8); | ||
| 439 | sOut.write( &iTmp, 4 ); | ||
| 440 | int i = 0; | ||
| 441 | while( i < aData.getSize() ) | ||
| 442 | { | ||
| 443 | iTmp = nextChar( i ); | ||
| 444 | iTmp = (iTmp>>24)|(iTmp<<24)|((iTmp&0xff0000)>>8)|((iTmp&0xff00)<<8); | ||
| 445 | sOut.write( &iTmp, 4 ); | ||
| 446 | } | ||
| 447 | #endif | ||
| 327 | } | 448 | } |
| 328 | 449 | ||
| 329 | Bu::UtfChar Bu::UtfString::get( int iIndex ) | 450 | Bu::UtfChar Bu::UtfString::get( int iIndex ) |
| @@ -362,7 +483,7 @@ void Bu::UtfString::debug() | |||
| 362 | { | 483 | { |
| 363 | if( i > 0 ) | 484 | if( i > 0 ) |
| 364 | sio << ", "; | 485 | sio << ", "; |
| 365 | sio << "0x" << Fmt::hex() << get( i ); | 486 | sio << "0x" << Fmt::hex() << nextChar( i ); |
| 366 | } | 487 | } |
| 367 | sio << sio.nl; | 488 | sio << sio.nl; |
| 368 | } | 489 | } |
diff --git a/src/utfstring.h b/src/utfstring.h index 8448ea4..be3e6ad 100644 --- a/src/utfstring.h +++ b/src/utfstring.h | |||
| @@ -86,10 +86,8 @@ namespace Bu | |||
| 86 | 86 | ||
| 87 | void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ); | 87 | void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ); |
| 88 | void writeUtf8( Bu::Stream &sOut ); | 88 | void writeUtf8( Bu::Stream &sOut ); |
| 89 | void writeUtf16( Bu::Stream &sOut ); | ||
| 90 | void writeUtf16be( Bu::Stream &sOut ); | 89 | void writeUtf16be( Bu::Stream &sOut ); |
| 91 | void writeUtf16le( Bu::Stream &sOut ); | 90 | void writeUtf16le( Bu::Stream &sOut ); |
| 92 | void writeUtf32( Bu::Stream &sOut ); | ||
| 93 | void writeUtf32be( Bu::Stream &sOut ); | 91 | void writeUtf32be( Bu::Stream &sOut ); |
| 94 | void writeUtf32le( Bu::Stream &sOut ); | 92 | void writeUtf32le( Bu::Stream &sOut ); |
| 95 | 93 | ||
