From 88004d87d513dcba767b1dae1e5199a89b22ce36 Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Tue, 22 Mar 2011 19:25:42 +0000 Subject: We now have a UTF-8 test parser, I'm going to move it into a functor, I think. --- src/utfstring.cpp | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'src/utfstring.cpp') diff --git a/src/utfstring.cpp b/src/utfstring.cpp index eb23713..0e2060b 100644 --- a/src/utfstring.cpp +++ b/src/utfstring.cpp @@ -7,6 +7,8 @@ #include "bu/utfstring.h" +#include "bu/string.h" + Bu::UtfString::UtfString() { } @@ -15,3 +17,55 @@ Bu::UtfString::~UtfString() { } +#include "bu/sio.h" +using Bu::sio; + +void Bu::UtfString::debugUtf8( const Bu::String &sUtf8 ) +{ + static uint8_t lmask[8] = { + 0x00, + 0x01, + 0x03, + 0x07, + 0x0f, + 0x1f, + 0x3f, + 0x7f + }; + for( Bu::String::const_iterator i = sUtf8.begin(); i; i++ ) + { + if( i != sUtf8.begin() ) + sio << ", "; + if( ((int)(uint8_t)*i)&0x80 ) + { +// sio << "Flag byte: " << Bu::Fmt().radix(2).width(8).fill('0') +// << (int)(uint8_t)*i << sio.nl; + int iBytes = 1; + for(; (((uint8_t)(*i))<= 1; iBytes-- ) + { +// sio << "iBytes = " << iBytes << ", shift = " << (6*(iBytes-1)) +// << sio.nl; +// sio << "next: " << Bu::Fmt().radix(2).width(8).fill('0') +// << (int)(uint8_t)*i << sio.nl +// << "mask: " << Bu::Fmt().radix(2).width(8).fill('0') +// << (int)lmask[6] << sio.nl; + i++; + uPt |= ((*i)&lmask[6])<<(6*(iBytes-1)); + } + sio << uPt; +// sio << " (" << Bu::Fmt( 8, 2 ).fill('0') +// << uPt << ")"; + } + else + { + sio << (int)((uint8_t)*i); + } + } + sio << sio.nl; +} + -- cgit v1.2.3