aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2011-03-22 19:25:42 +0000
committerMike Buland <eichlan@xagasoft.com>2011-03-22 19:25:42 +0000
commit88004d87d513dcba767b1dae1e5199a89b22ce36 (patch)
tree06051330e18e44407edc25d28fe978e0637ed90e
parent9d7ee5a5b9b6ca2093043b7c584df02913739b02 (diff)
downloadlibbu++-88004d87d513dcba767b1dae1e5199a89b22ce36.tar.gz
libbu++-88004d87d513dcba767b1dae1e5199a89b22ce36.tar.bz2
libbu++-88004d87d513dcba767b1dae1e5199a89b22ce36.tar.xz
libbu++-88004d87d513dcba767b1dae1e5199a89b22ce36.zip
We now have a UTF-8 test parser, I'm going to move it into a functor, I think.
-rw-r--r--src/tests/utf.cpp22
-rw-r--r--src/utfstring.cpp54
-rw-r--r--src/utfstring.h4
3 files changed, 80 insertions, 0 deletions
diff --git a/src/tests/utf.cpp b/src/tests/utf.cpp
new file mode 100644
index 0000000..59d49c6
--- /dev/null
+++ b/src/tests/utf.cpp
@@ -0,0 +1,22 @@
1#include <bu/file.h>
2#include <bu/string.h>
3#include <bu/utfstring.h>
4
5int main( int argc, char *argv[] )
6{
7 argc--, argv++;
8
9 for( char **sFile = argv; *sFile; sFile++ )
10 {
11 Bu::File fIn( *sFile, Bu::File::Read );
12 Bu::String sUtf8;
13 char buf[4096];
14 while( !fIn.isEos() )
15 {
16 int iAmnt = fIn.read( buf, 4096 );
17 sUtf8.append( buf, iAmnt );
18 }
19 Bu::UtfString::debugUtf8( sUtf8 );
20 }
21}
22
diff --git a/src/utfstring.cpp b/src/utfstring.cpp
index eb23713..0e2060b 100644
--- a/src/utfstring.cpp
+++ b/src/utfstring.cpp
@@ -7,6 +7,8 @@
7 7
8#include "bu/utfstring.h" 8#include "bu/utfstring.h"
9 9
10#include "bu/string.h"
11
10Bu::UtfString::UtfString() 12Bu::UtfString::UtfString()
11{ 13{
12} 14}
@@ -15,3 +17,55 @@ Bu::UtfString::~UtfString()
15{ 17{
16} 18}
17 19
20#include "bu/sio.h"
21using Bu::sio;
22
23void Bu::UtfString::debugUtf8( const Bu::String &sUtf8 )
24{
25 static uint8_t lmask[8] = {
26 0x00,
27 0x01,
28 0x03,
29 0x07,
30 0x0f,
31 0x1f,
32 0x3f,
33 0x7f
34 };
35 for( Bu::String::const_iterator i = sUtf8.begin(); i; i++ )
36 {
37 if( i != sUtf8.begin() )
38 sio << ", ";
39 if( ((int)(uint8_t)*i)&0x80 )
40 {
41// sio << "Flag byte: " << Bu::Fmt().radix(2).width(8).fill('0')
42// << (int)(uint8_t)*i << sio.nl;
43 int iBytes = 1;
44 for(; (((uint8_t)(*i))<<iBytes)&0x80; iBytes++ ) { }
45// sio << "iBytes = " << iBytes << sio.nl;
46 point uPt = ((*i) & lmask[7-iBytes])<<(6*(iBytes-1));
47// sio << "mask: " << Bu::Fmt().radix(2).width(8).fill('0')
48// << (int)lmask[7-iBytes] << sio.nl;
49 for( iBytes--; iBytes >= 1; iBytes-- )
50 {
51// sio << "iBytes = " << iBytes << ", shift = " << (6*(iBytes-1))
52// << sio.nl;
53// sio << "next: " << Bu::Fmt().radix(2).width(8).fill('0')
54// << (int)(uint8_t)*i << sio.nl
55// << "mask: " << Bu::Fmt().radix(2).width(8).fill('0')
56// << (int)lmask[6] << sio.nl;
57 i++;
58 uPt |= ((*i)&lmask[6])<<(6*(iBytes-1));
59 }
60 sio << uPt;
61// sio << " (" << Bu::Fmt( 8, 2 ).fill('0')
62// << uPt << ")";
63 }
64 else
65 {
66 sio << (int)((uint8_t)*i);
67 }
68 }
69 sio << sio.nl;
70}
71
diff --git a/src/utfstring.h b/src/utfstring.h
index 56e544e..3bdf51c 100644
--- a/src/utfstring.h
+++ b/src/utfstring.h
@@ -12,6 +12,8 @@
12 12
13namespace Bu 13namespace Bu
14{ 14{
15 class String;
16
15 class UtfString 17 class UtfString
16 { 18 {
17 public: 19 public:
@@ -20,6 +22,8 @@ namespace Bu
20 22
21 typedef uint32_t point; 23 typedef uint32_t point;
22 24
25 static void debugUtf8( const Bu::String &sUtf8 );
26
23 private: 27 private:
24// typedef BasicString<uint16_t> RawString; 28// typedef BasicString<uint16_t> RawString;
25// RawString rsStore; 29// RawString rsStore;