diff options
author | Mike Buland <eichlan@xagasoft.com> | 2010-11-19 05:54:14 +0000 |
---|---|---|
committer | Mike Buland <eichlan@xagasoft.com> | 2010-11-19 05:54:14 +0000 |
commit | 7c335ede527eaf4a3053ef35b1299141d34aaf40 (patch) | |
tree | 5ececc9181090cce540a5e4fbe28eda97e4e5c2b | |
parent | 2fe32ba19571ff775a55f61eca355a46f269393e (diff) | |
download | libbu++-7c335ede527eaf4a3053ef35b1299141d34aaf40.tar.gz libbu++-7c335ede527eaf4a3053ef35b1299141d34aaf40.tar.bz2 libbu++-7c335ede527eaf4a3053ef35b1299141d34aaf40.tar.xz libbu++-7c335ede527eaf4a3053ef35b1299141d34aaf40.zip |
I now think that this may not work out at all. It looks like if we want proper
Unicode handling we'll need to implement a series of codecs and converters as
well as tables of codepages and lookups. It'll be interesting, I guess, but
it makes me care a lot less about proper encoding. Anyway, UtfString uses
shorts instead of chars, so it's a step in the right direction, but still not
enough to be able to handle proper UTF-16 encoding, maybe UCS-2 encoding, but...
...that's lame. Bu::FBasicString has been generalized a bit with optimizations
from libc for char based strings. It also, unfortunately, still uses char-only
functions in several places, those all rely on char casting strings at the
moment just to get the thing to compile. Basically, it's not a good UTF-16
solution yet, and it may never be and remain compatible with char based strings.
-rw-r--r-- | src/fbasicstring.h | 52 | ||||
-rw-r--r-- | src/utfstring.cpp | 29 | ||||
-rw-r--r-- | src/utfstring.h | 24 |
3 files changed, 97 insertions, 8 deletions
diff --git a/src/fbasicstring.h b/src/fbasicstring.h index 7167f4a..af11bb2 100644 --- a/src/fbasicstring.h +++ b/src/fbasicstring.h | |||
@@ -39,6 +39,42 @@ namespace Bu | |||
39 | template< typename chr, int nMinSize, typename chralloc, | 39 | template< typename chr, int nMinSize, typename chralloc, |
40 | typename chunkalloc> class FBasicString; | 40 | typename chunkalloc> class FBasicString; |
41 | 41 | ||
42 | template<typename chr> | ||
43 | size_t strlen( const chr *pData ) | ||
44 | { | ||
45 | for( size_t tLen = 0;; ++tLen ) | ||
46 | { | ||
47 | if( pData[tLen] == (chr)0 ) | ||
48 | return tLen; | ||
49 | } | ||
50 | return -1; | ||
51 | } | ||
52 | |||
53 | template<char> | ||
54 | size_t strlen( const char *pData ) | ||
55 | { | ||
56 | return ::strlen( pData ); | ||
57 | } | ||
58 | |||
59 | template<typename chr> | ||
60 | int strncmp( const chr *a, const chr *b, size_t iLen ) | ||
61 | { | ||
62 | for( size_t iPos = 0; iPos < iLen; iPos++ ) | ||
63 | { | ||
64 | if( a[iPos] != b[iPos] ) | ||
65 | { | ||
66 | return a[iPos]-b[iPos]; | ||
67 | } | ||
68 | } | ||
69 | return 0; | ||
70 | } | ||
71 | |||
72 | template<char> | ||
73 | int strncmp( const char *a, const char *b, size_t iLen ) | ||
74 | { | ||
75 | return ::strncmp( a, b, iLen ); | ||
76 | } | ||
77 | |||
42 | template<typename chr, int nMinSize, typename chralloc, typename chunkalloc> | 78 | template<typename chr, int nMinSize, typename chralloc, typename chunkalloc> |
43 | struct FStringCore | 79 | struct FStringCore |
44 | { | 80 | { |
@@ -1044,7 +1080,7 @@ namespace Bu | |||
1044 | */ | 1080 | */ |
1045 | void insert( long nPos, const chr *pData ) | 1081 | void insert( long nPos, const chr *pData ) |
1046 | { | 1082 | { |
1047 | insert( nPos, pData, strlen( pData ) ); | 1083 | insert( nPos, pData, Bu::strlen( pData ) ); |
1048 | } | 1084 | } |
1049 | 1085 | ||
1050 | void remove( long nPos, long nLen ) | 1086 | void remove( long nPos, long nLen ) |
@@ -1170,13 +1206,13 @@ namespace Bu | |||
1170 | if( iStart < 0 ) | 1206 | if( iStart < 0 ) |
1171 | iStart = 0; | 1207 | iStart = 0; |
1172 | if( iStart >= core->nLength ) | 1208 | if( iStart >= core->nLength ) |
1173 | return ""; | 1209 | return (const chr[]){(chr)0}; |
1174 | if( iSize < 0 ) | 1210 | if( iSize < 0 ) |
1175 | iSize = core->nLength; | 1211 | iSize = core->nLength; |
1176 | if( iStart+iSize > core->nLength ) | 1212 | if( iStart+iSize > core->nLength ) |
1177 | iSize = core->nLength-iStart; | 1213 | iSize = core->nLength-iStart; |
1178 | if( iSize == 0 ) | 1214 | if( iSize == 0 ) |
1179 | return ""; | 1215 | return (const chr[]){(chr)0}; |
1180 | 1216 | ||
1181 | flatten(); | 1217 | flatten(); |
1182 | MyType ret( core->pFirst->pData+iStart, iSize ); | 1218 | MyType ret( core->pFirst->pData+iStart, iSize ); |
@@ -1438,11 +1474,11 @@ namespace Bu | |||
1438 | wordexp_t result; | 1474 | wordexp_t result; |
1439 | 1475 | ||
1440 | /* Expand the string for the program to run. */ | 1476 | /* Expand the string for the program to run. */ |
1441 | switch (wordexp (core->pFirst->pData, &result, 0)) | 1477 | switch (wordexp ((char *)core->pFirst->pData, &result, 0)) |
1442 | { | 1478 | { |
1443 | case 0: /* Successful. */ | 1479 | case 0: /* Successful. */ |
1444 | { | 1480 | { |
1445 | set( result.we_wordv[0] ); | 1481 | set( (chr *)result.we_wordv[0] ); |
1446 | wordfree( &result ); | 1482 | wordfree( &result ); |
1447 | return; | 1483 | return; |
1448 | } | 1484 | } |
@@ -1954,7 +1990,7 @@ namespace Bu | |||
1954 | long iLen = vsnprintf( NULL, 0, sFrmt, ap ); | 1990 | long iLen = vsnprintf( NULL, 0, sFrmt, ap ); |
1955 | 1991 | ||
1956 | Chunk *pNew = core->newChunk( iLen ); | 1992 | Chunk *pNew = core->newChunk( iLen ); |
1957 | vsnprintf( pNew->pData, iLen+1, sFrmt, ap ); | 1993 | vsnprintf( (char *)pNew->pData, iLen+1, sFrmt, ap ); |
1958 | core->appendChunk( pNew ); | 1994 | core->appendChunk( pNew ); |
1959 | 1995 | ||
1960 | va_end( ap ); | 1996 | va_end( ap ); |
@@ -1971,7 +2007,7 @@ namespace Bu | |||
1971 | long iLen = vsnprintf( NULL, 0, sFrmt, ap ); | 2007 | long iLen = vsnprintf( NULL, 0, sFrmt, ap ); |
1972 | 2008 | ||
1973 | Chunk *pNew = core->newChunk( iLen ); | 2009 | Chunk *pNew = core->newChunk( iLen ); |
1974 | vsnprintf( pNew->pData, iLen+1, sFrmt, ap ); | 2010 | vsnprintf( (char *)pNew->pData, iLen+1, sFrmt, ap ); |
1975 | core->appendChunk( pNew ); | 2011 | core->appendChunk( pNew ); |
1976 | 2012 | ||
1977 | va_end( ap ); | 2013 | va_end( ap ); |
@@ -1988,7 +2024,7 @@ namespace Bu | |||
1988 | long iLen = vsnprintf( NULL, 0, sFrmt, ap ); | 2024 | long iLen = vsnprintf( NULL, 0, sFrmt, ap ); |
1989 | 2025 | ||
1990 | Chunk *pNew = core->newChunk( iLen ); | 2026 | Chunk *pNew = core->newChunk( iLen ); |
1991 | vsnprintf( pNew->pData, iLen+1, sFrmt, ap ); | 2027 | vsnprintf( (char *)pNew->pData, iLen+1, sFrmt, ap ); |
1992 | core->prependChunk( pNew ); | 2028 | core->prependChunk( pNew ); |
1993 | 2029 | ||
1994 | va_end( ap ); | 2030 | va_end( ap ); |
diff --git a/src/utfstring.cpp b/src/utfstring.cpp new file mode 100644 index 0000000..ae5efaf --- /dev/null +++ b/src/utfstring.cpp | |||
@@ -0,0 +1,29 @@ | |||
1 | #include "bu/utfstring.h" | ||
2 | |||
3 | template class Bu::FBasicString<short>; | ||
4 | |||
5 | template<> uint32_t Bu::__calcHashCode<Bu::UtfString>( const Bu::UtfString &k ) | ||
6 | { | ||
7 | long j, sz = k.getSize()*2; | ||
8 | const char *s = (const char *)k.getStr(); | ||
9 | |||
10 | long nPos = 0; | ||
11 | for( j = 0; j < sz; j++, s++ ) | ||
12 | { | ||
13 | nPos = *s + (nPos << 6) + (nPos << 16) - nPos; | ||
14 | } | ||
15 | |||
16 | return nPos; | ||
17 | } | ||
18 | |||
19 | template<> bool Bu::__cmpHashKeys<Bu::UtfString>( | ||
20 | const Bu::UtfString &a, const Bu::UtfString &b ) | ||
21 | { | ||
22 | return a == b; | ||
23 | } | ||
24 | |||
25 | template<> void Bu::__tracer_format<Bu::UtfString>( const Bu::UtfString &v ) | ||
26 | { | ||
27 | printf("(%ld)\"%s\"", v.getSize(), (const char *)v.getStr() ); | ||
28 | } | ||
29 | |||
diff --git a/src/utfstring.h b/src/utfstring.h new file mode 100644 index 0000000..bbacb74 --- /dev/null +++ b/src/utfstring.h | |||
@@ -0,0 +1,24 @@ | |||
1 | #ifndef BU_UTF_STRING_H | ||
2 | #define BU_UTF_STRING_H | ||
3 | |||
4 | #include "bu/fbasicstring.h" | ||
5 | |||
6 | namespace Bu | ||
7 | { | ||
8 | typedef FBasicString<short> UtfString; | ||
9 | |||
10 | template<typename T> | ||
11 | uint32_t __calcHashCode( const T &k ); | ||
12 | |||
13 | template<typename T> | ||
14 | bool __cmpHashKeys( const T &a, const T &b ); | ||
15 | |||
16 | template<> uint32_t __calcHashCode<UtfString>( const UtfString &k ); | ||
17 | template<> bool __cmpHashKeys<UtfString>( | ||
18 | const UtfString &a, const UtfString &b ); | ||
19 | |||
20 | template<typename t> void __tracer_format( const t &v ); | ||
21 | template<> void __tracer_format<UtfString>( const UtfString &v ); | ||
22 | } | ||
23 | |||
24 | #endif | ||