diff options
author | Mike Buland <eichlan@xagasoft.com> | 2011-04-07 05:44:42 +0000 |
---|---|---|
committer | Mike Buland <eichlan@xagasoft.com> | 2011-04-07 05:44:42 +0000 |
commit | 27aecbc60be6c80ce221f29c01f743de714faa63 (patch) | |
tree | 84838a5e55088ed95c3f4ca35f6ff4c27f56812b | |
parent | 5de54062d8bf9bdfde17a02e4aef91341146162d (diff) | |
download | libbu++-27aecbc60be6c80ce221f29c01f743de714faa63.tar.gz libbu++-27aecbc60be6c80ce221f29c01f743de714faa63.tar.bz2 libbu++-27aecbc60be6c80ce221f29c01f743de714faa63.tar.xz libbu++-27aecbc60be6c80ce221f29c01f743de714faa63.zip |
Pretty sure all utf encoders and decoders are complete and tested.
-rw-r--r-- | src/tests/utf.cpp | 42 | ||||
-rw-r--r-- | src/utfstring.cpp | 143 | ||||
-rw-r--r-- | src/utfstring.h | 2 |
3 files changed, 174 insertions, 13 deletions
diff --git a/src/tests/utf.cpp b/src/tests/utf.cpp index 9e075e2..01bac7e 100644 --- a/src/tests/utf.cpp +++ b/src/tests/utf.cpp | |||
@@ -4,6 +4,46 @@ | |||
4 | 4 | ||
5 | int main( int argc, char *argv[] ) | 5 | int main( int argc, char *argv[] ) |
6 | { | 6 | { |
7 | Bu::File fIn("utf8.in", Bu::File::Read ); | ||
8 | Bu::String sUtf8; | ||
9 | char buf[4096]; | ||
10 | while( !fIn.isEos() ) | ||
11 | { | ||
12 | int iAmnt = fIn.read( buf, 4096 ); | ||
13 | sUtf8.append( buf, iAmnt ); | ||
14 | } | ||
15 | Bu::UtfString us( sUtf8, Bu::UtfString::Utf8 ); | ||
16 | us.debug(); | ||
17 | { | ||
18 | Bu::File fOut("utf8.out", Bu::File::WriteNew ); | ||
19 | us.write( fOut, Bu::UtfString::Utf8 ); | ||
20 | } | ||
21 | { | ||
22 | Bu::File fOut("utf16.out", Bu::File::WriteNew ); | ||
23 | us.write( fOut, Bu::UtfString::Utf16 ); | ||
24 | } | ||
25 | { | ||
26 | Bu::File fOut("utf16le.out", Bu::File::WriteNew ); | ||
27 | us.write( fOut, Bu::UtfString::Utf16le ); | ||
28 | } | ||
29 | { | ||
30 | Bu::File fOut("utf16be.out", Bu::File::WriteNew ); | ||
31 | us.write( fOut, Bu::UtfString::Utf16be ); | ||
32 | } | ||
33 | { | ||
34 | Bu::File fOut("utf32.out", Bu::File::WriteNew ); | ||
35 | us.write( fOut, Bu::UtfString::Utf32 ); | ||
36 | } | ||
37 | { | ||
38 | Bu::File fOut("utf32le.out", Bu::File::WriteNew ); | ||
39 | us.write( fOut, Bu::UtfString::Utf32le ); | ||
40 | } | ||
41 | { | ||
42 | Bu::File fOut("utf32be.out", Bu::File::WriteNew ); | ||
43 | us.write( fOut, Bu::UtfString::Utf32be ); | ||
44 | } | ||
45 | |||
46 | /* | ||
7 | argc--, argv++; | 47 | argc--, argv++; |
8 | 48 | ||
9 | for( char **sFile = argv; *sFile; sFile++ ) | 49 | for( char **sFile = argv; *sFile; sFile++ ) |
@@ -17,7 +57,9 @@ int main( int argc, char *argv[] ) | |||
17 | sUtf8.append( buf, iAmnt ); | 57 | sUtf8.append( buf, iAmnt ); |
18 | } | 58 | } |
19 | Bu::UtfString us( sUtf8, Bu::UtfString::Utf16 ); | 59 | Bu::UtfString us( sUtf8, Bu::UtfString::Utf16 ); |
60 | |||
20 | us.debug(); | 61 | us.debug(); |
21 | } | 62 | } |
63 | */ | ||
22 | } | 64 | } |
23 | 65 | ||
diff --git a/src/utfstring.cpp b/src/utfstring.cpp index c9da52f..3f57618 100644 --- a/src/utfstring.cpp +++ b/src/utfstring.cpp | |||
@@ -259,8 +259,8 @@ void Bu::UtfString::write( Bu::Stream &sOut, Encoding eEnc ) | |||
259 | break; | 259 | break; |
260 | 260 | ||
261 | case Utf16: | 261 | case Utf16: |
262 | writeUtf16( sOut ); | 262 | // writeUtf16( sOut ); |
263 | break; | 263 | // break; |
264 | 264 | ||
265 | case Utf16be: | 265 | case Utf16be: |
266 | writeUtf16be( sOut ); | 266 | writeUtf16be( sOut ); |
@@ -271,8 +271,8 @@ void Bu::UtfString::write( Bu::Stream &sOut, Encoding eEnc ) | |||
271 | break; | 271 | break; |
272 | 272 | ||
273 | case Utf32: | 273 | case Utf32: |
274 | writeUtf32( sOut ); | 274 | // writeUtf32( sOut ); |
275 | break; | 275 | // break; |
276 | 276 | ||
277 | case Utf32be: | 277 | case Utf32be: |
278 | writeUtf32be( sOut ); | 278 | writeUtf32be( sOut ); |
@@ -300,30 +300,151 @@ void Bu::UtfString::write( Bu::Stream &sOut, Encoding eEnc ) | |||
300 | 300 | ||
301 | void Bu::UtfString::writeUtf8( Bu::Stream &sOut ) | 301 | void Bu::UtfString::writeUtf8( Bu::Stream &sOut ) |
302 | { | 302 | { |
303 | int iPos = 0; | ||
304 | while( iPos < aData.getSize() ) | ||
305 | { | ||
306 | uint8_t uByte; | ||
307 | Bu::UtfChar chr = nextChar( iPos ); | ||
308 | if( chr >= 0x010000 ) | ||
309 | { | ||
310 | // Four bytes | ||
311 | // 111 111111 111111 111111 | ||
312 | uByte = (chr>>18)|0xF0; | ||
313 | sOut.write( &uByte, 1 ); | ||
314 | uByte = (chr>>12)&0x3F|0x80; | ||
315 | sOut.write( &uByte, 1 ); | ||
316 | uByte = (chr>>6)&0x3F|0x80; | ||
317 | sOut.write( &uByte, 1 ); | ||
318 | uByte = (chr&0x3F)|0x80; | ||
319 | sOut.write( &uByte, 1 ); | ||
320 | } | ||
321 | else if( chr >= 0x800 ) | ||
322 | { | ||
323 | // Three bytes | ||
324 | // 1111 111111 111111 | ||
325 | uByte = (chr>>12)|0xE0; | ||
326 | sOut.write( &uByte, 1 ); | ||
327 | uByte = (chr>>6)&0x3F|0x80; | ||
328 | sOut.write( &uByte, 1 ); | ||
329 | uByte = (chr&0x3F)|0x80; | ||
330 | sOut.write( &uByte, 1 ); | ||
331 | } | ||
332 | else if( chr >= 0x80 ) | ||
333 | { | ||
334 | // Two bytes | ||
335 | // 11111 111111 | ||
336 | uByte = (chr>>6)|0xC0; | ||
337 | sOut.write( &uByte, 1 ); | ||
338 | uByte = (chr&0x3F)|0x80; | ||
339 | sOut.write( &uByte, 1 ); | ||
340 | } | ||
341 | else | ||
342 | { | ||
343 | // One byte | ||
344 | uByte = chr; | ||
345 | sOut.write( &uByte, 1 ); | ||
346 | } | ||
347 | } | ||
303 | } | 348 | } |
304 | 349 | /* | |
305 | void Bu::UtfString::writeUtf16( Bu::Stream &sOut ) | 350 | void Bu::UtfString::writeUtf16( Bu::Stream &sOut ) |
306 | { | 351 | { |
307 | } | 352 | } |
308 | 353 | */ | |
309 | void Bu::UtfString::writeUtf16be( Bu::Stream &sOut ) | 354 | void Bu::UtfString::writeUtf16be( Bu::Stream &sOut ) |
310 | { | 355 | { |
356 | #if BYTE_ORDER == BIG_ENDIAN | ||
357 | uint16_t iTmp = 0xFEFF; // Byte Order Marker | ||
358 | sOut.write( &iTmp, 2 ); | ||
359 | for( Array<uint16_t>::iterator i = aData.begin(); i; i++ ) | ||
360 | { | ||
361 | iTmp = *i; | ||
362 | sOut.write( &iTmp, 2 ); | ||
363 | } | ||
364 | #else | ||
365 | uint16_t iTmp = 0xFEFF; // Byte Order Marker | ||
366 | iTmp = (iTmp>>8) | (iTmp<<8); | ||
367 | sOut.write( &iTmp, 2 ); | ||
368 | for( Array<uint16_t>::iterator i = aData.begin(); i; i++ ) | ||
369 | { | ||
370 | iTmp = *i; | ||
371 | iTmp = (iTmp>>8) | (iTmp<<8); | ||
372 | sOut.write( &iTmp, 2 ); | ||
373 | } | ||
374 | #endif | ||
311 | } | 375 | } |
312 | 376 | ||
313 | void Bu::UtfString::writeUtf16le( Bu::Stream &sOut ) | 377 | void Bu::UtfString::writeUtf16le( Bu::Stream &sOut ) |
314 | { | 378 | { |
315 | } | 379 | #if BYTE_ORDER == LITTLE_ENDIAN |
316 | 380 | uint16_t iTmp = 0xFEFF; // Byte Order Marker | |
317 | void Bu::UtfString::writeUtf32( Bu::Stream &sOut ) | 381 | sOut.write( &iTmp, 2 ); |
318 | { | 382 | for( Array<uint16_t>::iterator i = aData.begin(); i; i++ ) |
383 | { | ||
384 | iTmp = *i; | ||
385 | sOut.write( &iTmp, 2 ); | ||
386 | } | ||
387 | #else | ||
388 | uint16_t iTmp = 0xFEFF; // Byte Order Marker | ||
389 | iTmp = (iTmp>>8) | (iTmp<<8); | ||
390 | sOut.write( &iTmp, 2 ); | ||
391 | for( Array<uint16_t>::iterator i = aData.begin(); i; i++ ) | ||
392 | { | ||
393 | iTmp = *i; | ||
394 | iTmp = (iTmp>>8) | (iTmp<<8); | ||
395 | sOut.write( &iTmp, 2 ); | ||
396 | } | ||
397 | #endif | ||
319 | } | 398 | } |
320 | 399 | ||
321 | void Bu::UtfString::writeUtf32be( Bu::Stream &sOut ) | 400 | void Bu::UtfString::writeUtf32be( Bu::Stream &sOut ) |
322 | { | 401 | { |
402 | #if BYTE_ORDER == BIG_ENDIAN | ||
403 | uint32_t iTmp = 0xFEFF; // Byte Order Marker | ||
404 | sOut.write( &iTmp, 4 ); | ||
405 | int i = 0; | ||
406 | while( i < aData.getSize() ) | ||
407 | { | ||
408 | iTmp = nextChar( i ); | ||
409 | sOut.write( &iTmp, 4 ); | ||
410 | } | ||
411 | #else | ||
412 | uint32_t iTmp = 0xFEFF; // Byte Order Marker | ||
413 | iTmp = (iTmp>>24)|(iTmp<<24)|((iTmp&0xff0000)>>8)|((iTmp&0xff00)<<8); | ||
414 | sOut.write( &iTmp, 4 ); | ||
415 | int i = 0; | ||
416 | while( i < aData.getSize() ) | ||
417 | { | ||
418 | iTmp = nextChar( i ); | ||
419 | iTmp = (iTmp>>24)|(iTmp<<24)|((iTmp&0xff0000)>>8)|((iTmp&0xff00)<<8); | ||
420 | sOut.write( &iTmp, 4 ); | ||
421 | } | ||
422 | #endif | ||
323 | } | 423 | } |
324 | 424 | ||
325 | void Bu::UtfString::writeUtf32le( Bu::Stream &sOut ) | 425 | void Bu::UtfString::writeUtf32le( Bu::Stream &sOut ) |
326 | { | 426 | { |
427 | #if BYTE_ORDER == LITTLE_ENDIAN | ||
428 | uint32_t iTmp = 0xFEFF; // Byte Order Marker | ||
429 | sOut.write( &iTmp, 4 ); | ||
430 | int i = 0; | ||
431 | while( i < aData.getSize() ) | ||
432 | { | ||
433 | iTmp = nextChar( i ); | ||
434 | sOut.write( &iTmp, 4 ); | ||
435 | } | ||
436 | #else | ||
437 | uint32_t iTmp = 0xFEFF; // Byte Order Marker | ||
438 | iTmp = (iTmp>>24)|(iTmp<<24)|((iTmp&0xff0000)>>8)|((iTmp&0xff00)<<8); | ||
439 | sOut.write( &iTmp, 4 ); | ||
440 | int i = 0; | ||
441 | while( i < aData.getSize() ) | ||
442 | { | ||
443 | iTmp = nextChar( i ); | ||
444 | iTmp = (iTmp>>24)|(iTmp<<24)|((iTmp&0xff0000)>>8)|((iTmp&0xff00)<<8); | ||
445 | sOut.write( &iTmp, 4 ); | ||
446 | } | ||
447 | #endif | ||
327 | } | 448 | } |
328 | 449 | ||
329 | Bu::UtfChar Bu::UtfString::get( int iIndex ) | 450 | Bu::UtfChar Bu::UtfString::get( int iIndex ) |
@@ -362,7 +483,7 @@ void Bu::UtfString::debug() | |||
362 | { | 483 | { |
363 | if( i > 0 ) | 484 | if( i > 0 ) |
364 | sio << ", "; | 485 | sio << ", "; |
365 | sio << "0x" << Fmt::hex() << get( i ); | 486 | sio << "0x" << Fmt::hex() << nextChar( i ); |
366 | } | 487 | } |
367 | sio << sio.nl; | 488 | sio << sio.nl; |
368 | } | 489 | } |
diff --git a/src/utfstring.h b/src/utfstring.h index 8448ea4..be3e6ad 100644 --- a/src/utfstring.h +++ b/src/utfstring.h | |||
@@ -86,10 +86,8 @@ namespace Bu | |||
86 | 86 | ||
87 | void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ); | 87 | void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ); |
88 | void writeUtf8( Bu::Stream &sOut ); | 88 | void writeUtf8( Bu::Stream &sOut ); |
89 | void writeUtf16( Bu::Stream &sOut ); | ||
90 | void writeUtf16be( Bu::Stream &sOut ); | 89 | void writeUtf16be( Bu::Stream &sOut ); |
91 | void writeUtf16le( Bu::Stream &sOut ); | 90 | void writeUtf16le( Bu::Stream &sOut ); |
92 | void writeUtf32( Bu::Stream &sOut ); | ||
93 | void writeUtf32be( Bu::Stream &sOut ); | 91 | void writeUtf32be( Bu::Stream &sOut ); |
94 | void writeUtf32le( Bu::Stream &sOut ); | 92 | void writeUtf32le( Bu::Stream &sOut ); |
95 | 93 | ||