aboutsummaryrefslogtreecommitdiff
path: root/utf16.cpp
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2011-04-04 14:59:13 +0000
committerMike Buland <eichlan@xagasoft.com>2011-04-04 14:59:13 +0000
commit6ff23ac5097f7d92ac8840c2ba17d1dbf1eb80a5 (patch)
treefc70404d66854bba713bff2350f5f69f43bd85bc /utf16.cpp
parentabbf45c1da7f3e3a542e6c6339a1bab31283f22e (diff)
downloadlibbu++-6ff23ac5097f7d92ac8840c2ba17d1dbf1eb80a5.tar.gz
libbu++-6ff23ac5097f7d92ac8840c2ba17d1dbf1eb80a5.tar.bz2
libbu++-6ff23ac5097f7d92ac8840c2ba17d1dbf1eb80a5.tar.xz
libbu++-6ff23ac5097f7d92ac8840c2ba17d1dbf1eb80a5.zip
UtfString is going really well. It can now parse Utf8, Utf16 (le,be), and
Utf32 (le,be). The internal storage seems to be working fine, although we do have a problem with random access, but at least we can tell which half of a surrogate pair we're on, so we can always rapidly determine the entire code point from any utf16 index that we're on. The only optomization that I'm not doing yet is reading in entire 16bit or 32bit words at a time and converting them from their byte order to native. There are a few potential issues with that, so we'll see. I added a couple of testing datafiles and a test program, I'll delete them all just as soon as it's verified to write correctly.
Diffstat (limited to 'utf16.cpp')
-rw-r--r--utf16.cpp42
1 files changed, 42 insertions, 0 deletions
diff --git a/utf16.cpp b/utf16.cpp
new file mode 100644
index 0000000..eedb521
--- /dev/null
+++ b/utf16.cpp
@@ -0,0 +1,42 @@
1#include <stdio.h>
2#include <stdint.h>
3
4void bitprint( uint16_t u )
5{
6 for( int i = 15; i >= 0; i-- )
7 printf("%c", (u&(1<<i))?'1':'0');
8 printf("\n");
9}
10
11void bitprint( uint32_t u )
12{
13 for( int i = 31; i >= 0; i-- )
14 printf("%c", (u&(1<<i))?'1':'0');
15 printf("\n");
16}
17
18void utoutf16( uint32_t in, uint16_t &outHi, uint16_t &outLo )
19{
20 outHi = (((in-0x10000)>>10)&0x3FF)| 0xD800u;
21 outLo = ((in-0x10000)&0x3FF)| 0xDC00u;
22 printf("0x%X == 0x%X, 0x%X\n", in, outHi, outLo );
23}
24
25int32_t utf16tou( uint16_t hi, uint16_t lo )
26{
27 return (((uint32_t)hi&0x3FF)<<10 | lo&0x3FF)+0x10000;
28}
29
30int main()
31{
32 bitprint( 0xD800u );
33 bitprint( 0xDC00u );
34 uint16_t hi, lo;
35 utoutf16( 0x1D11E, hi, lo ); // Cat face with wry smile
36 utoutf16( 0x10FFFD, hi, lo ); // Cat face with wry smile
37 utoutf16( 0x1F63C, hi, lo ); // Cat face with wry smile
38 bitprint( hi );
39 bitprint( lo );
40 printf("0x%X\n", utf16tou( hi, lo ) );
41 return 0;
42}