diff options
| author | Mike Buland <eichlan@xagasoft.com> | 2011-03-22 19:25:42 +0000 |
|---|---|---|
| committer | Mike Buland <eichlan@xagasoft.com> | 2011-03-22 19:25:42 +0000 |
| commit | 88004d87d513dcba767b1dae1e5199a89b22ce36 (patch) | |
| tree | 06051330e18e44407edc25d28fe978e0637ed90e /src/utfstring.cpp | |
| parent | 9d7ee5a5b9b6ca2093043b7c584df02913739b02 (diff) | |
| download | libbu++-88004d87d513dcba767b1dae1e5199a89b22ce36.tar.gz libbu++-88004d87d513dcba767b1dae1e5199a89b22ce36.tar.bz2 libbu++-88004d87d513dcba767b1dae1e5199a89b22ce36.tar.xz libbu++-88004d87d513dcba767b1dae1e5199a89b22ce36.zip | |
We now have a UTF-8 test parser, I'm going to move it into a functor, I think.
Diffstat (limited to 'src/utfstring.cpp')
| -rw-r--r-- | src/utfstring.cpp | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/src/utfstring.cpp b/src/utfstring.cpp index eb23713..0e2060b 100644 --- a/src/utfstring.cpp +++ b/src/utfstring.cpp | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | 7 | ||
| 8 | #include "bu/utfstring.h" | 8 | #include "bu/utfstring.h" |
| 9 | 9 | ||
| 10 | #include "bu/string.h" | ||
| 11 | |||
| 10 | Bu::UtfString::UtfString() | 12 | Bu::UtfString::UtfString() |
| 11 | { | 13 | { |
| 12 | } | 14 | } |
| @@ -15,3 +17,55 @@ Bu::UtfString::~UtfString() | |||
| 15 | { | 17 | { |
| 16 | } | 18 | } |
| 17 | 19 | ||
| 20 | #include "bu/sio.h" | ||
| 21 | using Bu::sio; | ||
| 22 | |||
| 23 | void Bu::UtfString::debugUtf8( const Bu::String &sUtf8 ) | ||
| 24 | { | ||
| 25 | static uint8_t lmask[8] = { | ||
| 26 | 0x00, | ||
| 27 | 0x01, | ||
| 28 | 0x03, | ||
| 29 | 0x07, | ||
| 30 | 0x0f, | ||
| 31 | 0x1f, | ||
| 32 | 0x3f, | ||
| 33 | 0x7f | ||
| 34 | }; | ||
| 35 | for( Bu::String::const_iterator i = sUtf8.begin(); i; i++ ) | ||
| 36 | { | ||
| 37 | if( i != sUtf8.begin() ) | ||
| 38 | sio << ", "; | ||
| 39 | if( ((int)(uint8_t)*i)&0x80 ) | ||
| 40 | { | ||
| 41 | // sio << "Flag byte: " << Bu::Fmt().radix(2).width(8).fill('0') | ||
| 42 | // << (int)(uint8_t)*i << sio.nl; | ||
| 43 | int iBytes = 1; | ||
| 44 | for(; (((uint8_t)(*i))<<iBytes)&0x80; iBytes++ ) { } | ||
| 45 | // sio << "iBytes = " << iBytes << sio.nl; | ||
| 46 | point uPt = ((*i) & lmask[7-iBytes])<<(6*(iBytes-1)); | ||
| 47 | // sio << "mask: " << Bu::Fmt().radix(2).width(8).fill('0') | ||
| 48 | // << (int)lmask[7-iBytes] << sio.nl; | ||
| 49 | for( iBytes--; iBytes >= 1; iBytes-- ) | ||
| 50 | { | ||
| 51 | // sio << "iBytes = " << iBytes << ", shift = " << (6*(iBytes-1)) | ||
| 52 | // << sio.nl; | ||
| 53 | // sio << "next: " << Bu::Fmt().radix(2).width(8).fill('0') | ||
| 54 | // << (int)(uint8_t)*i << sio.nl | ||
| 55 | // << "mask: " << Bu::Fmt().radix(2).width(8).fill('0') | ||
| 56 | // << (int)lmask[6] << sio.nl; | ||
| 57 | i++; | ||
| 58 | uPt |= ((*i)&lmask[6])<<(6*(iBytes-1)); | ||
| 59 | } | ||
| 60 | sio << uPt; | ||
| 61 | // sio << " (" << Bu::Fmt( 8, 2 ).fill('0') | ||
| 62 | // << uPt << ")"; | ||
| 63 | } | ||
| 64 | else | ||
| 65 | { | ||
| 66 | sio << (int)((uint8_t)*i); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | sio << sio.nl; | ||
| 70 | } | ||
| 71 | |||
