diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/tests/utf.cpp | 22 | ||||
| -rw-r--r-- | src/utfstring.cpp | 54 | ||||
| -rw-r--r-- | src/utfstring.h | 4 |
3 files changed, 80 insertions, 0 deletions
diff --git a/src/tests/utf.cpp b/src/tests/utf.cpp new file mode 100644 index 0000000..59d49c6 --- /dev/null +++ b/src/tests/utf.cpp | |||
| @@ -0,0 +1,22 @@ | |||
| 1 | #include <bu/file.h> | ||
| 2 | #include <bu/string.h> | ||
| 3 | #include <bu/utfstring.h> | ||
| 4 | |||
| 5 | int main( int argc, char *argv[] ) | ||
| 6 | { | ||
| 7 | argc--, argv++; | ||
| 8 | |||
| 9 | for( char **sFile = argv; *sFile; sFile++ ) | ||
| 10 | { | ||
| 11 | Bu::File fIn( *sFile, Bu::File::Read ); | ||
| 12 | Bu::String sUtf8; | ||
| 13 | char buf[4096]; | ||
| 14 | while( !fIn.isEos() ) | ||
| 15 | { | ||
| 16 | int iAmnt = fIn.read( buf, 4096 ); | ||
| 17 | sUtf8.append( buf, iAmnt ); | ||
| 18 | } | ||
| 19 | Bu::UtfString::debugUtf8( sUtf8 ); | ||
| 20 | } | ||
| 21 | } | ||
| 22 | |||
diff --git a/src/utfstring.cpp b/src/utfstring.cpp index eb23713..0e2060b 100644 --- a/src/utfstring.cpp +++ b/src/utfstring.cpp | |||
| @@ -7,6 +7,8 @@ | |||
| 7 | 7 | ||
| 8 | #include "bu/utfstring.h" | 8 | #include "bu/utfstring.h" |
| 9 | 9 | ||
| 10 | #include "bu/string.h" | ||
| 11 | |||
| 10 | Bu::UtfString::UtfString() | 12 | Bu::UtfString::UtfString() |
| 11 | { | 13 | { |
| 12 | } | 14 | } |
| @@ -15,3 +17,55 @@ Bu::UtfString::~UtfString() | |||
| 15 | { | 17 | { |
| 16 | } | 18 | } |
| 17 | 19 | ||
| 20 | #include "bu/sio.h" | ||
| 21 | using Bu::sio; | ||
| 22 | |||
| 23 | void Bu::UtfString::debugUtf8( const Bu::String &sUtf8 ) | ||
| 24 | { | ||
| 25 | static uint8_t lmask[8] = { | ||
| 26 | 0x00, | ||
| 27 | 0x01, | ||
| 28 | 0x03, | ||
| 29 | 0x07, | ||
| 30 | 0x0f, | ||
| 31 | 0x1f, | ||
| 32 | 0x3f, | ||
| 33 | 0x7f | ||
| 34 | }; | ||
| 35 | for( Bu::String::const_iterator i = sUtf8.begin(); i; i++ ) | ||
| 36 | { | ||
| 37 | if( i != sUtf8.begin() ) | ||
| 38 | sio << ", "; | ||
| 39 | if( ((int)(uint8_t)*i)&0x80 ) | ||
| 40 | { | ||
| 41 | // sio << "Flag byte: " << Bu::Fmt().radix(2).width(8).fill('0') | ||
| 42 | // << (int)(uint8_t)*i << sio.nl; | ||
| 43 | int iBytes = 1; | ||
| 44 | for(; (((uint8_t)(*i))<<iBytes)&0x80; iBytes++ ) { } | ||
| 45 | // sio << "iBytes = " << iBytes << sio.nl; | ||
| 46 | point uPt = ((*i) & lmask[7-iBytes])<<(6*(iBytes-1)); | ||
| 47 | // sio << "mask: " << Bu::Fmt().radix(2).width(8).fill('0') | ||
| 48 | // << (int)lmask[7-iBytes] << sio.nl; | ||
| 49 | for( iBytes--; iBytes >= 1; iBytes-- ) | ||
| 50 | { | ||
| 51 | // sio << "iBytes = " << iBytes << ", shift = " << (6*(iBytes-1)) | ||
| 52 | // << sio.nl; | ||
| 53 | // sio << "next: " << Bu::Fmt().radix(2).width(8).fill('0') | ||
| 54 | // << (int)(uint8_t)*i << sio.nl | ||
| 55 | // << "mask: " << Bu::Fmt().radix(2).width(8).fill('0') | ||
| 56 | // << (int)lmask[6] << sio.nl; | ||
| 57 | i++; | ||
| 58 | uPt |= ((*i)&lmask[6])<<(6*(iBytes-1)); | ||
| 59 | } | ||
| 60 | sio << uPt; | ||
| 61 | // sio << " (" << Bu::Fmt( 8, 2 ).fill('0') | ||
| 62 | // << uPt << ")"; | ||
| 63 | } | ||
| 64 | else | ||
| 65 | { | ||
| 66 | sio << (int)((uint8_t)*i); | ||
| 67 | } | ||
| 68 | } | ||
| 69 | sio << sio.nl; | ||
| 70 | } | ||
| 71 | |||
diff --git a/src/utfstring.h b/src/utfstring.h index 56e544e..3bdf51c 100644 --- a/src/utfstring.h +++ b/src/utfstring.h | |||
| @@ -12,6 +12,8 @@ | |||
| 12 | 12 | ||
| 13 | namespace Bu | 13 | namespace Bu |
| 14 | { | 14 | { |
| 15 | class String; | ||
| 16 | |||
| 15 | class UtfString | 17 | class UtfString |
| 16 | { | 18 | { |
| 17 | public: | 19 | public: |
| @@ -20,6 +22,8 @@ namespace Bu | |||
| 20 | 22 | ||
| 21 | typedef uint32_t point; | 23 | typedef uint32_t point; |
| 22 | 24 | ||
| 25 | static void debugUtf8( const Bu::String &sUtf8 ); | ||
| 26 | |||
| 23 | private: | 27 | private: |
| 24 | // typedef BasicString<uint16_t> RawString; | 28 | // typedef BasicString<uint16_t> RawString; |
| 25 | // RawString rsStore; | 29 | // RawString rsStore; |
