diff options
author | Mike Buland <eichlan@xagasoft.com> | 2011-03-22 19:25:42 +0000 |
---|---|---|
committer | Mike Buland <eichlan@xagasoft.com> | 2011-03-22 19:25:42 +0000 |
commit | 88004d87d513dcba767b1dae1e5199a89b22ce36 (patch) | |
tree | 06051330e18e44407edc25d28fe978e0637ed90e /src/utfstring.cpp | |
parent | 9d7ee5a5b9b6ca2093043b7c584df02913739b02 (diff) | |
download | libbu++-88004d87d513dcba767b1dae1e5199a89b22ce36.tar.gz libbu++-88004d87d513dcba767b1dae1e5199a89b22ce36.tar.bz2 libbu++-88004d87d513dcba767b1dae1e5199a89b22ce36.tar.xz libbu++-88004d87d513dcba767b1dae1e5199a89b22ce36.zip |
We now have a UTF-8 test parser, I'm going to move it into a functor, I think.
Diffstat (limited to 'src/utfstring.cpp')
-rw-r--r-- | src/utfstring.cpp | 54 |
1 files changed, 54 insertions, 0 deletions
diff --git a/src/utfstring.cpp b/src/utfstring.cpp index eb23713..0e2060b 100644 --- a/src/utfstring.cpp +++ b/src/utfstring.cpp | |||
@@ -7,6 +7,8 @@ | |||
7 | 7 | ||
8 | #include "bu/utfstring.h" | 8 | #include "bu/utfstring.h" |
9 | 9 | ||
10 | #include "bu/string.h" | ||
11 | |||
10 | Bu::UtfString::UtfString() | 12 | Bu::UtfString::UtfString() |
11 | { | 13 | { |
12 | } | 14 | } |
@@ -15,3 +17,55 @@ Bu::UtfString::~UtfString() | |||
15 | { | 17 | { |
16 | } | 18 | } |
17 | 19 | ||
20 | #include "bu/sio.h" | ||
21 | using Bu::sio; | ||
22 | |||
23 | void Bu::UtfString::debugUtf8( const Bu::String &sUtf8 ) | ||
24 | { | ||
25 | static uint8_t lmask[8] = { | ||
26 | 0x00, | ||
27 | 0x01, | ||
28 | 0x03, | ||
29 | 0x07, | ||
30 | 0x0f, | ||
31 | 0x1f, | ||
32 | 0x3f, | ||
33 | 0x7f | ||
34 | }; | ||
35 | for( Bu::String::const_iterator i = sUtf8.begin(); i; i++ ) | ||
36 | { | ||
37 | if( i != sUtf8.begin() ) | ||
38 | sio << ", "; | ||
39 | if( ((int)(uint8_t)*i)&0x80 ) | ||
40 | { | ||
41 | // sio << "Flag byte: " << Bu::Fmt().radix(2).width(8).fill('0') | ||
42 | // << (int)(uint8_t)*i << sio.nl; | ||
43 | int iBytes = 1; | ||
44 | for(; (((uint8_t)(*i))<<iBytes)&0x80; iBytes++ ) { } | ||
45 | // sio << "iBytes = " << iBytes << sio.nl; | ||
46 | point uPt = ((*i) & lmask[7-iBytes])<<(6*(iBytes-1)); | ||
47 | // sio << "mask: " << Bu::Fmt().radix(2).width(8).fill('0') | ||
48 | // << (int)lmask[7-iBytes] << sio.nl; | ||
49 | for( iBytes--; iBytes >= 1; iBytes-- ) | ||
50 | { | ||
51 | // sio << "iBytes = " << iBytes << ", shift = " << (6*(iBytes-1)) | ||
52 | // << sio.nl; | ||
53 | // sio << "next: " << Bu::Fmt().radix(2).width(8).fill('0') | ||
54 | // << (int)(uint8_t)*i << sio.nl | ||
55 | // << "mask: " << Bu::Fmt().radix(2).width(8).fill('0') | ||
56 | // << (int)lmask[6] << sio.nl; | ||
57 | i++; | ||
58 | uPt |= ((*i)&lmask[6])<<(6*(iBytes-1)); | ||
59 | } | ||
60 | sio << uPt; | ||
61 | // sio << " (" << Bu::Fmt( 8, 2 ).fill('0') | ||
62 | // << uPt << ")"; | ||
63 | } | ||
64 | else | ||
65 | { | ||
66 | sio << (int)((uint8_t)*i); | ||
67 | } | ||
68 | } | ||
69 | sio << sio.nl; | ||
70 | } | ||
71 | |||