aboutsummaryrefslogtreecommitdiff
path: root/src/utfstring.cpp
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2011-04-07 05:44:42 +0000
committerMike Buland <eichlan@xagasoft.com>2011-04-07 05:44:42 +0000
commit27aecbc60be6c80ce221f29c01f743de714faa63 (patch)
tree84838a5e55088ed95c3f4ca35f6ff4c27f56812b /src/utfstring.cpp
parent5de54062d8bf9bdfde17a02e4aef91341146162d (diff)
downloadlibbu++-27aecbc60be6c80ce221f29c01f743de714faa63.tar.gz
libbu++-27aecbc60be6c80ce221f29c01f743de714faa63.tar.bz2
libbu++-27aecbc60be6c80ce221f29c01f743de714faa63.tar.xz
libbu++-27aecbc60be6c80ce221f29c01f743de714faa63.zip
Pretty sure all utf encoders and decoders are complete and tested.
Diffstat (limited to 'src/utfstring.cpp')
-rw-r--r--src/utfstring.cpp143
1 files changed, 132 insertions, 11 deletions
diff --git a/src/utfstring.cpp b/src/utfstring.cpp
index c9da52f..3f57618 100644
--- a/src/utfstring.cpp
+++ b/src/utfstring.cpp
@@ -259,8 +259,8 @@ void Bu::UtfString::write( Bu::Stream &sOut, Encoding eEnc )
259 break; 259 break;
260 260
261 case Utf16: 261 case Utf16:
262 writeUtf16( sOut ); 262// writeUtf16( sOut );
263 break; 263// break;
264 264
265 case Utf16be: 265 case Utf16be:
266 writeUtf16be( sOut ); 266 writeUtf16be( sOut );
@@ -271,8 +271,8 @@ void Bu::UtfString::write( Bu::Stream &sOut, Encoding eEnc )
271 break; 271 break;
272 272
273 case Utf32: 273 case Utf32:
274 writeUtf32( sOut ); 274// writeUtf32( sOut );
275 break; 275// break;
276 276
277 case Utf32be: 277 case Utf32be:
278 writeUtf32be( sOut ); 278 writeUtf32be( sOut );
@@ -300,30 +300,151 @@ void Bu::UtfString::write( Bu::Stream &sOut, Encoding eEnc )
300 300
301void Bu::UtfString::writeUtf8( Bu::Stream &sOut ) 301void Bu::UtfString::writeUtf8( Bu::Stream &sOut )
302{ 302{
303 int iPos = 0;
304 while( iPos < aData.getSize() )
305 {
306 uint8_t uByte;
307 Bu::UtfChar chr = nextChar( iPos );
308 if( chr >= 0x010000 )
309 {
310 // Four bytes
311 // 111 111111 111111 111111
312 uByte = (chr>>18)|0xF0;
313 sOut.write( &uByte, 1 );
314 uByte = (chr>>12)&0x3F|0x80;
315 sOut.write( &uByte, 1 );
316 uByte = (chr>>6)&0x3F|0x80;
317 sOut.write( &uByte, 1 );
318 uByte = (chr&0x3F)|0x80;
319 sOut.write( &uByte, 1 );
320 }
321 else if( chr >= 0x800 )
322 {
323 // Three bytes
324 // 1111 111111 111111
325 uByte = (chr>>12)|0xE0;
326 sOut.write( &uByte, 1 );
327 uByte = (chr>>6)&0x3F|0x80;
328 sOut.write( &uByte, 1 );
329 uByte = (chr&0x3F)|0x80;
330 sOut.write( &uByte, 1 );
331 }
332 else if( chr >= 0x80 )
333 {
334 // Two bytes
335 // 11111 111111
336 uByte = (chr>>6)|0xC0;
337 sOut.write( &uByte, 1 );
338 uByte = (chr&0x3F)|0x80;
339 sOut.write( &uByte, 1 );
340 }
341 else
342 {
343 // One byte
344 uByte = chr;
345 sOut.write( &uByte, 1 );
346 }
347 }
303} 348}
304 349/*
305void Bu::UtfString::writeUtf16( Bu::Stream &sOut ) 350void Bu::UtfString::writeUtf16( Bu::Stream &sOut )
306{ 351{
307} 352}
308 353*/
309void Bu::UtfString::writeUtf16be( Bu::Stream &sOut ) 354void Bu::UtfString::writeUtf16be( Bu::Stream &sOut )
310{ 355{
356#if BYTE_ORDER == BIG_ENDIAN
357 uint16_t iTmp = 0xFEFF; // Byte Order Marker
358 sOut.write( &iTmp, 2 );
359 for( Array<uint16_t>::iterator i = aData.begin(); i; i++ )
360 {
361 iTmp = *i;
362 sOut.write( &iTmp, 2 );
363 }
364#else
365 uint16_t iTmp = 0xFEFF; // Byte Order Marker
366 iTmp = (iTmp>>8) | (iTmp<<8);
367 sOut.write( &iTmp, 2 );
368 for( Array<uint16_t>::iterator i = aData.begin(); i; i++ )
369 {
370 iTmp = *i;
371 iTmp = (iTmp>>8) | (iTmp<<8);
372 sOut.write( &iTmp, 2 );
373 }
374#endif
311} 375}
312 376
313void Bu::UtfString::writeUtf16le( Bu::Stream &sOut ) 377void Bu::UtfString::writeUtf16le( Bu::Stream &sOut )
314{ 378{
315} 379#if BYTE_ORDER == LITTLE_ENDIAN
316 380 uint16_t iTmp = 0xFEFF; // Byte Order Marker
317void Bu::UtfString::writeUtf32( Bu::Stream &sOut ) 381 sOut.write( &iTmp, 2 );
318{ 382 for( Array<uint16_t>::iterator i = aData.begin(); i; i++ )
383 {
384 iTmp = *i;
385 sOut.write( &iTmp, 2 );
386 }
387#else
388 uint16_t iTmp = 0xFEFF; // Byte Order Marker
389 iTmp = (iTmp>>8) | (iTmp<<8);
390 sOut.write( &iTmp, 2 );
391 for( Array<uint16_t>::iterator i = aData.begin(); i; i++ )
392 {
393 iTmp = *i;
394 iTmp = (iTmp>>8) | (iTmp<<8);
395 sOut.write( &iTmp, 2 );
396 }
397#endif
319} 398}
320 399
321void Bu::UtfString::writeUtf32be( Bu::Stream &sOut ) 400void Bu::UtfString::writeUtf32be( Bu::Stream &sOut )
322{ 401{
402#if BYTE_ORDER == BIG_ENDIAN
403 uint32_t iTmp = 0xFEFF; // Byte Order Marker
404 sOut.write( &iTmp, 4 );
405 int i = 0;
406 while( i < aData.getSize() )
407 {
408 iTmp = nextChar( i );
409 sOut.write( &iTmp, 4 );
410 }
411#else
412 uint32_t iTmp = 0xFEFF; // Byte Order Marker
413 iTmp = (iTmp>>24)|(iTmp<<24)|((iTmp&0xff0000)>>8)|((iTmp&0xff00)<<8);
414 sOut.write( &iTmp, 4 );
415 int i = 0;
416 while( i < aData.getSize() )
417 {
418 iTmp = nextChar( i );
419 iTmp = (iTmp>>24)|(iTmp<<24)|((iTmp&0xff0000)>>8)|((iTmp&0xff00)<<8);
420 sOut.write( &iTmp, 4 );
421 }
422#endif
323} 423}
324 424
325void Bu::UtfString::writeUtf32le( Bu::Stream &sOut ) 425void Bu::UtfString::writeUtf32le( Bu::Stream &sOut )
326{ 426{
427#if BYTE_ORDER == LITTLE_ENDIAN
428 uint32_t iTmp = 0xFEFF; // Byte Order Marker
429 sOut.write( &iTmp, 4 );
430 int i = 0;
431 while( i < aData.getSize() )
432 {
433 iTmp = nextChar( i );
434 sOut.write( &iTmp, 4 );
435 }
436#else
437 uint32_t iTmp = 0xFEFF; // Byte Order Marker
438 iTmp = (iTmp>>24)|(iTmp<<24)|((iTmp&0xff0000)>>8)|((iTmp&0xff00)<<8);
439 sOut.write( &iTmp, 4 );
440 int i = 0;
441 while( i < aData.getSize() )
442 {
443 iTmp = nextChar( i );
444 iTmp = (iTmp>>24)|(iTmp<<24)|((iTmp&0xff0000)>>8)|((iTmp&0xff00)<<8);
445 sOut.write( &iTmp, 4 );
446 }
447#endif
327} 448}
328 449
329Bu::UtfChar Bu::UtfString::get( int iIndex ) 450Bu::UtfChar Bu::UtfString::get( int iIndex )
@@ -362,7 +483,7 @@ void Bu::UtfString::debug()
362 { 483 {
363 if( i > 0 ) 484 if( i > 0 )
364 sio << ", "; 485 sio << ", ";
365 sio << "0x" << Fmt::hex() << get( i ); 486 sio << "0x" << Fmt::hex() << nextChar( i );
366 } 487 }
367 sio << sio.nl; 488 sio << sio.nl;
368} 489}