aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2011-04-07 05:44:42 +0000
committerMike Buland <eichlan@xagasoft.com>2011-04-07 05:44:42 +0000
commit27aecbc60be6c80ce221f29c01f743de714faa63 (patch)
tree84838a5e55088ed95c3f4ca35f6ff4c27f56812b
parent5de54062d8bf9bdfde17a02e4aef91341146162d (diff)
downloadlibbu++-27aecbc60be6c80ce221f29c01f743de714faa63.tar.gz
libbu++-27aecbc60be6c80ce221f29c01f743de714faa63.tar.bz2
libbu++-27aecbc60be6c80ce221f29c01f743de714faa63.tar.xz
libbu++-27aecbc60be6c80ce221f29c01f743de714faa63.zip
Pretty sure all utf encoders and decoders are complete and tested.
-rw-r--r--src/tests/utf.cpp42
-rw-r--r--src/utfstring.cpp143
-rw-r--r--src/utfstring.h2
3 files changed, 174 insertions, 13 deletions
diff --git a/src/tests/utf.cpp b/src/tests/utf.cpp
index 9e075e2..01bac7e 100644
--- a/src/tests/utf.cpp
+++ b/src/tests/utf.cpp
@@ -4,6 +4,46 @@
4 4
5int main( int argc, char *argv[] ) 5int main( int argc, char *argv[] )
6{ 6{
7 Bu::File fIn("utf8.in", Bu::File::Read );
8 Bu::String sUtf8;
9 char buf[4096];
10 while( !fIn.isEos() )
11 {
12 int iAmnt = fIn.read( buf, 4096 );
13 sUtf8.append( buf, iAmnt );
14 }
15 Bu::UtfString us( sUtf8, Bu::UtfString::Utf8 );
16 us.debug();
17 {
18 Bu::File fOut("utf8.out", Bu::File::WriteNew );
19 us.write( fOut, Bu::UtfString::Utf8 );
20 }
21 {
22 Bu::File fOut("utf16.out", Bu::File::WriteNew );
23 us.write( fOut, Bu::UtfString::Utf16 );
24 }
25 {
26 Bu::File fOut("utf16le.out", Bu::File::WriteNew );
27 us.write( fOut, Bu::UtfString::Utf16le );
28 }
29 {
30 Bu::File fOut("utf16be.out", Bu::File::WriteNew );
31 us.write( fOut, Bu::UtfString::Utf16be );
32 }
33 {
34 Bu::File fOut("utf32.out", Bu::File::WriteNew );
35 us.write( fOut, Bu::UtfString::Utf32 );
36 }
37 {
38 Bu::File fOut("utf32le.out", Bu::File::WriteNew );
39 us.write( fOut, Bu::UtfString::Utf32le );
40 }
41 {
42 Bu::File fOut("utf32be.out", Bu::File::WriteNew );
43 us.write( fOut, Bu::UtfString::Utf32be );
44 }
45
46 /*
7 argc--, argv++; 47 argc--, argv++;
8 48
9 for( char **sFile = argv; *sFile; sFile++ ) 49 for( char **sFile = argv; *sFile; sFile++ )
@@ -17,7 +57,9 @@ int main( int argc, char *argv[] )
17 sUtf8.append( buf, iAmnt ); 57 sUtf8.append( buf, iAmnt );
18 } 58 }
19 Bu::UtfString us( sUtf8, Bu::UtfString::Utf16 ); 59 Bu::UtfString us( sUtf8, Bu::UtfString::Utf16 );
60
20 us.debug(); 61 us.debug();
21 } 62 }
63 */
22} 64}
23 65
diff --git a/src/utfstring.cpp b/src/utfstring.cpp
index c9da52f..3f57618 100644
--- a/src/utfstring.cpp
+++ b/src/utfstring.cpp
@@ -259,8 +259,8 @@ void Bu::UtfString::write( Bu::Stream &sOut, Encoding eEnc )
259 break; 259 break;
260 260
261 case Utf16: 261 case Utf16:
262 writeUtf16( sOut ); 262// writeUtf16( sOut );
263 break; 263// break;
264 264
265 case Utf16be: 265 case Utf16be:
266 writeUtf16be( sOut ); 266 writeUtf16be( sOut );
@@ -271,8 +271,8 @@ void Bu::UtfString::write( Bu::Stream &sOut, Encoding eEnc )
271 break; 271 break;
272 272
273 case Utf32: 273 case Utf32:
274 writeUtf32( sOut ); 274// writeUtf32( sOut );
275 break; 275// break;
276 276
277 case Utf32be: 277 case Utf32be:
278 writeUtf32be( sOut ); 278 writeUtf32be( sOut );
@@ -300,30 +300,151 @@ void Bu::UtfString::write( Bu::Stream &sOut, Encoding eEnc )
300 300
301void Bu::UtfString::writeUtf8( Bu::Stream &sOut ) 301void Bu::UtfString::writeUtf8( Bu::Stream &sOut )
302{ 302{
303 int iPos = 0;
304 while( iPos < aData.getSize() )
305 {
306 uint8_t uByte;
307 Bu::UtfChar chr = nextChar( iPos );
308 if( chr >= 0x010000 )
309 {
310 // Four bytes
311 // 111 111111 111111 111111
312 uByte = (chr>>18)|0xF0;
313 sOut.write( &uByte, 1 );
314 uByte = (chr>>12)&0x3F|0x80;
315 sOut.write( &uByte, 1 );
316 uByte = (chr>>6)&0x3F|0x80;
317 sOut.write( &uByte, 1 );
318 uByte = (chr&0x3F)|0x80;
319 sOut.write( &uByte, 1 );
320 }
321 else if( chr >= 0x800 )
322 {
323 // Three bytes
324 // 1111 111111 111111
325 uByte = (chr>>12)|0xE0;
326 sOut.write( &uByte, 1 );
327 uByte = (chr>>6)&0x3F|0x80;
328 sOut.write( &uByte, 1 );
329 uByte = (chr&0x3F)|0x80;
330 sOut.write( &uByte, 1 );
331 }
332 else if( chr >= 0x80 )
333 {
334 // Two bytes
335 // 11111 111111
336 uByte = (chr>>6)|0xC0;
337 sOut.write( &uByte, 1 );
338 uByte = (chr&0x3F)|0x80;
339 sOut.write( &uByte, 1 );
340 }
341 else
342 {
343 // One byte
344 uByte = chr;
345 sOut.write( &uByte, 1 );
346 }
347 }
303} 348}
304 349/*
305void Bu::UtfString::writeUtf16( Bu::Stream &sOut ) 350void Bu::UtfString::writeUtf16( Bu::Stream &sOut )
306{ 351{
307} 352}
308 353*/
309void Bu::UtfString::writeUtf16be( Bu::Stream &sOut ) 354void Bu::UtfString::writeUtf16be( Bu::Stream &sOut )
310{ 355{
356#if BYTE_ORDER == BIG_ENDIAN
357 uint16_t iTmp = 0xFEFF; // Byte Order Marker
358 sOut.write( &iTmp, 2 );
359 for( Array<uint16_t>::iterator i = aData.begin(); i; i++ )
360 {
361 iTmp = *i;
362 sOut.write( &iTmp, 2 );
363 }
364#else
365 uint16_t iTmp = 0xFEFF; // Byte Order Marker
366 iTmp = (iTmp>>8) | (iTmp<<8);
367 sOut.write( &iTmp, 2 );
368 for( Array<uint16_t>::iterator i = aData.begin(); i; i++ )
369 {
370 iTmp = *i;
371 iTmp = (iTmp>>8) | (iTmp<<8);
372 sOut.write( &iTmp, 2 );
373 }
374#endif
311} 375}
312 376
313void Bu::UtfString::writeUtf16le( Bu::Stream &sOut ) 377void Bu::UtfString::writeUtf16le( Bu::Stream &sOut )
314{ 378{
315} 379#if BYTE_ORDER == LITTLE_ENDIAN
316 380 uint16_t iTmp = 0xFEFF; // Byte Order Marker
317void Bu::UtfString::writeUtf32( Bu::Stream &sOut ) 381 sOut.write( &iTmp, 2 );
318{ 382 for( Array<uint16_t>::iterator i = aData.begin(); i; i++ )
383 {
384 iTmp = *i;
385 sOut.write( &iTmp, 2 );
386 }
387#else
388 uint16_t iTmp = 0xFEFF; // Byte Order Marker
389 iTmp = (iTmp>>8) | (iTmp<<8);
390 sOut.write( &iTmp, 2 );
391 for( Array<uint16_t>::iterator i = aData.begin(); i; i++ )
392 {
393 iTmp = *i;
394 iTmp = (iTmp>>8) | (iTmp<<8);
395 sOut.write( &iTmp, 2 );
396 }
397#endif
319} 398}
320 399
321void Bu::UtfString::writeUtf32be( Bu::Stream &sOut ) 400void Bu::UtfString::writeUtf32be( Bu::Stream &sOut )
322{ 401{
402#if BYTE_ORDER == BIG_ENDIAN
403 uint32_t iTmp = 0xFEFF; // Byte Order Marker
404 sOut.write( &iTmp, 4 );
405 int i = 0;
406 while( i < aData.getSize() )
407 {
408 iTmp = nextChar( i );
409 sOut.write( &iTmp, 4 );
410 }
411#else
412 uint32_t iTmp = 0xFEFF; // Byte Order Marker
413 iTmp = (iTmp>>24)|(iTmp<<24)|((iTmp&0xff0000)>>8)|((iTmp&0xff00)<<8);
414 sOut.write( &iTmp, 4 );
415 int i = 0;
416 while( i < aData.getSize() )
417 {
418 iTmp = nextChar( i );
419 iTmp = (iTmp>>24)|(iTmp<<24)|((iTmp&0xff0000)>>8)|((iTmp&0xff00)<<8);
420 sOut.write( &iTmp, 4 );
421 }
422#endif
323} 423}
324 424
325void Bu::UtfString::writeUtf32le( Bu::Stream &sOut ) 425void Bu::UtfString::writeUtf32le( Bu::Stream &sOut )
326{ 426{
427#if BYTE_ORDER == LITTLE_ENDIAN
428 uint32_t iTmp = 0xFEFF; // Byte Order Marker
429 sOut.write( &iTmp, 4 );
430 int i = 0;
431 while( i < aData.getSize() )
432 {
433 iTmp = nextChar( i );
434 sOut.write( &iTmp, 4 );
435 }
436#else
437 uint32_t iTmp = 0xFEFF; // Byte Order Marker
438 iTmp = (iTmp>>24)|(iTmp<<24)|((iTmp&0xff0000)>>8)|((iTmp&0xff00)<<8);
439 sOut.write( &iTmp, 4 );
440 int i = 0;
441 while( i < aData.getSize() )
442 {
443 iTmp = nextChar( i );
444 iTmp = (iTmp>>24)|(iTmp<<24)|((iTmp&0xff0000)>>8)|((iTmp&0xff00)<<8);
445 sOut.write( &iTmp, 4 );
446 }
447#endif
327} 448}
328 449
329Bu::UtfChar Bu::UtfString::get( int iIndex ) 450Bu::UtfChar Bu::UtfString::get( int iIndex )
@@ -362,7 +483,7 @@ void Bu::UtfString::debug()
362 { 483 {
363 if( i > 0 ) 484 if( i > 0 )
364 sio << ", "; 485 sio << ", ";
365 sio << "0x" << Fmt::hex() << get( i ); 486 sio << "0x" << Fmt::hex() << nextChar( i );
366 } 487 }
367 sio << sio.nl; 488 sio << sio.nl;
368} 489}
diff --git a/src/utfstring.h b/src/utfstring.h
index 8448ea4..be3e6ad 100644
--- a/src/utfstring.h
+++ b/src/utfstring.h
@@ -86,10 +86,8 @@ namespace Bu
86 86
87 void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ); 87 void write( Bu::Stream &sOut, Encoding eEnc=Utf8 );
88 void writeUtf8( Bu::Stream &sOut ); 88 void writeUtf8( Bu::Stream &sOut );
89 void writeUtf16( Bu::Stream &sOut );
90 void writeUtf16be( Bu::Stream &sOut ); 89 void writeUtf16be( Bu::Stream &sOut );
91 void writeUtf16le( Bu::Stream &sOut ); 90 void writeUtf16le( Bu::Stream &sOut );
92 void writeUtf32( Bu::Stream &sOut );
93 void writeUtf32be( Bu::Stream &sOut ); 91 void writeUtf32be( Bu::Stream &sOut );
94 void writeUtf32le( Bu::Stream &sOut ); 92 void writeUtf32le( Bu::Stream &sOut );
95 93