summaryrefslogtreecommitdiff
path: root/src/utfstring.h
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2011-04-04 07:22:10 +0000
committerMike Buland <eichlan@xagasoft.com>2011-04-04 07:22:10 +0000
commitabbf45c1da7f3e3a542e6c6339a1bab31283f22e (patch)
tree1d40f79bbe315294507bb9bfedfbe2b01e815c1a /src/utfstring.h
parentbc5fc82538f220f62f231d5bdda5910752156a32 (diff)
downloadlibbu++-abbf45c1da7f3e3a542e6c6339a1bab31283f22e.tar.gz
libbu++-abbf45c1da7f3e3a542e6c6339a1bab31283f22e.tar.bz2
libbu++-abbf45c1da7f3e3a542e6c6339a1bab31283f22e.tar.xz
libbu++-abbf45c1da7f3e3a542e6c6339a1bab31283f22e.zip
I made some awesome progress on the UtfString system, it stores in native utf16
encoding to make things easier (little endian in our case). It can currently read utf8 and utf16be, but not BOM. It will give you full unicode code points instead of the raw utf16 values, which is pretty slick.
Diffstat (limited to 'src/utfstring.h')
-rw-r--r--src/utfstring.h20
1 files changed, 18 insertions, 2 deletions
diff --git a/src/utfstring.h b/src/utfstring.h
index 6f85e93..79ef62e 100644
--- a/src/utfstring.h
+++ b/src/utfstring.h
@@ -9,9 +9,12 @@
9#define BU_UTF_STRING_H 9#define BU_UTF_STRING_H
10 10
11#include <stdint.h> 11#include <stdint.h>
12#include "bu/array.h"
12 13
13namespace Bu 14namespace Bu
14{ 15{
16 class String;
17
15 /** 18 /**
16 * UtfChar isn't actually a character, unicode specifies "code points" not 19 * UtfChar isn't actually a character, unicode specifies "code points" not
17 * characters. The main reason for this is that not all code points define 20 * characters. The main reason for this is that not all code points define
@@ -40,10 +43,23 @@ namespace Bu
40 UtfString( const Bu::String &sInput, Encoding eEnc=Utf8 ); 43 UtfString( const Bu::String &sInput, Encoding eEnc=Utf8 );
41 virtual ~UtfString(); 44 virtual ~UtfString();
42 45
43 static void debugUtf8( const Bu::String &sUtf8 ); 46 void append( UtfChar ch );
47
48 void set( const Bu::String &sInput, Encoding eEnc=Utf8 );
49 void setUtf8( const Bu::String &sInput );
50 void setUtf16( const Bu::String &sInput );
51// void setUtf16be( const Bu::String &sInput );
52// void setUtf16le( const Bu::String &sInput );
53
54 void debug();
55
56 UtfChar get( int iIndex );
57
58 private:
59 void append16( uint16_t i ) { aData.append( i ); }
44 60
45 private: 61 private:
46 uint16_t *pData; 62 Bu::Array<uint16_t> aData;
47 int iRawLen; 63 int iRawLen;
48 int iCharLen; 64 int iCharLen;
49 }; 65 };