aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2010-11-19 05:54:14 +0000
committerMike Buland <eichlan@xagasoft.com>2010-11-19 05:54:14 +0000
commit7c335ede527eaf4a3053ef35b1299141d34aaf40 (patch)
tree5ececc9181090cce540a5e4fbe28eda97e4e5c2b
parent2fe32ba19571ff775a55f61eca355a46f269393e (diff)
downloadlibbu++-7c335ede527eaf4a3053ef35b1299141d34aaf40.tar.gz
libbu++-7c335ede527eaf4a3053ef35b1299141d34aaf40.tar.bz2
libbu++-7c335ede527eaf4a3053ef35b1299141d34aaf40.tar.xz
libbu++-7c335ede527eaf4a3053ef35b1299141d34aaf40.zip
I now think that this may not work out at all. It looks like if we want proper
Unicode handling we'll need to implement a series of codecs and converters as well as tables of codepages and lookups. It'll be interesting, I guess, but it makes me care a lot less about proper encoding. Anyway, UtfString uses shorts instead of chars, so it's a step in the right direction, but still not enough to be able to handle proper UTF-16 encoding, maybe UCS-2 encoding, but... ...that's lame. Bu::FBasicString has been generalized a bit with optimizations from libc for char based strings. It also, unfortunately, still uses char-only functions in several places, those all rely on char casting strings at the moment just to get the thing to compile. Basically, it's not a good UTF-16 solution yet, and it may never be and remain compatible with char based strings.
-rw-r--r--src/fbasicstring.h52
-rw-r--r--src/utfstring.cpp29
-rw-r--r--src/utfstring.h24
3 files changed, 97 insertions, 8 deletions
diff --git a/src/fbasicstring.h b/src/fbasicstring.h
index 7167f4a..af11bb2 100644
--- a/src/fbasicstring.h
+++ b/src/fbasicstring.h
@@ -39,6 +39,42 @@ namespace Bu
39 template< typename chr, int nMinSize, typename chralloc, 39 template< typename chr, int nMinSize, typename chralloc,
40 typename chunkalloc> class FBasicString; 40 typename chunkalloc> class FBasicString;
41 41
42 template<typename chr>
43 size_t strlen( const chr *pData )
44 {
45 for( size_t tLen = 0;; ++tLen )
46 {
47 if( pData[tLen] == (chr)0 )
48 return tLen;
49 }
50 return -1;
51 }
52
53 template<char>
54 size_t strlen( const char *pData )
55 {
56 return ::strlen( pData );
57 }
58
59 template<typename chr>
60 int strncmp( const chr *a, const chr *b, size_t iLen )
61 {
62 for( size_t iPos = 0; iPos < iLen; iPos++ )
63 {
64 if( a[iPos] != b[iPos] )
65 {
66 return a[iPos]-b[iPos];
67 }
68 }
69 return 0;
70 }
71
72 template<char>
73 int strncmp( const char *a, const char *b, size_t iLen )
74 {
75 return ::strncmp( a, b, iLen );
76 }
77
42 template<typename chr, int nMinSize, typename chralloc, typename chunkalloc> 78 template<typename chr, int nMinSize, typename chralloc, typename chunkalloc>
43 struct FStringCore 79 struct FStringCore
44 { 80 {
@@ -1044,7 +1080,7 @@ namespace Bu
1044 */ 1080 */
1045 void insert( long nPos, const chr *pData ) 1081 void insert( long nPos, const chr *pData )
1046 { 1082 {
1047 insert( nPos, pData, strlen( pData ) ); 1083 insert( nPos, pData, Bu::strlen( pData ) );
1048 } 1084 }
1049 1085
1050 void remove( long nPos, long nLen ) 1086 void remove( long nPos, long nLen )
@@ -1170,13 +1206,13 @@ namespace Bu
1170 if( iStart < 0 ) 1206 if( iStart < 0 )
1171 iStart = 0; 1207 iStart = 0;
1172 if( iStart >= core->nLength ) 1208 if( iStart >= core->nLength )
1173 return ""; 1209 return (const chr[]){(chr)0};
1174 if( iSize < 0 ) 1210 if( iSize < 0 )
1175 iSize = core->nLength; 1211 iSize = core->nLength;
1176 if( iStart+iSize > core->nLength ) 1212 if( iStart+iSize > core->nLength )
1177 iSize = core->nLength-iStart; 1213 iSize = core->nLength-iStart;
1178 if( iSize == 0 ) 1214 if( iSize == 0 )
1179 return ""; 1215 return (const chr[]){(chr)0};
1180 1216
1181 flatten(); 1217 flatten();
1182 MyType ret( core->pFirst->pData+iStart, iSize ); 1218 MyType ret( core->pFirst->pData+iStart, iSize );
@@ -1438,11 +1474,11 @@ namespace Bu
1438 wordexp_t result; 1474 wordexp_t result;
1439 1475
1440 /* Expand the string for the program to run. */ 1476 /* Expand the string for the program to run. */
1441 switch (wordexp (core->pFirst->pData, &result, 0)) 1477 switch (wordexp ((char *)core->pFirst->pData, &result, 0))
1442 { 1478 {
1443 case 0: /* Successful. */ 1479 case 0: /* Successful. */
1444 { 1480 {
1445 set( result.we_wordv[0] ); 1481 set( (chr *)result.we_wordv[0] );
1446 wordfree( &result ); 1482 wordfree( &result );
1447 return; 1483 return;
1448 } 1484 }
@@ -1954,7 +1990,7 @@ namespace Bu
1954 long iLen = vsnprintf( NULL, 0, sFrmt, ap ); 1990 long iLen = vsnprintf( NULL, 0, sFrmt, ap );
1955 1991
1956 Chunk *pNew = core->newChunk( iLen ); 1992 Chunk *pNew = core->newChunk( iLen );
1957 vsnprintf( pNew->pData, iLen+1, sFrmt, ap ); 1993 vsnprintf( (char *)pNew->pData, iLen+1, sFrmt, ap );
1958 core->appendChunk( pNew ); 1994 core->appendChunk( pNew );
1959 1995
1960 va_end( ap ); 1996 va_end( ap );
@@ -1971,7 +2007,7 @@ namespace Bu
1971 long iLen = vsnprintf( NULL, 0, sFrmt, ap ); 2007 long iLen = vsnprintf( NULL, 0, sFrmt, ap );
1972 2008
1973 Chunk *pNew = core->newChunk( iLen ); 2009 Chunk *pNew = core->newChunk( iLen );
1974 vsnprintf( pNew->pData, iLen+1, sFrmt, ap ); 2010 vsnprintf( (char *)pNew->pData, iLen+1, sFrmt, ap );
1975 core->appendChunk( pNew ); 2011 core->appendChunk( pNew );
1976 2012
1977 va_end( ap ); 2013 va_end( ap );
@@ -1988,7 +2024,7 @@ namespace Bu
1988 long iLen = vsnprintf( NULL, 0, sFrmt, ap ); 2024 long iLen = vsnprintf( NULL, 0, sFrmt, ap );
1989 2025
1990 Chunk *pNew = core->newChunk( iLen ); 2026 Chunk *pNew = core->newChunk( iLen );
1991 vsnprintf( pNew->pData, iLen+1, sFrmt, ap ); 2027 vsnprintf( (char *)pNew->pData, iLen+1, sFrmt, ap );
1992 core->prependChunk( pNew ); 2028 core->prependChunk( pNew );
1993 2029
1994 va_end( ap ); 2030 va_end( ap );
diff --git a/src/utfstring.cpp b/src/utfstring.cpp
new file mode 100644
index 0000000..ae5efaf
--- /dev/null
+++ b/src/utfstring.cpp
@@ -0,0 +1,29 @@
1#include "bu/utfstring.h"
2
3template class Bu::FBasicString<short>;
4
5template<> uint32_t Bu::__calcHashCode<Bu::UtfString>( const Bu::UtfString &k )
6{
7 long j, sz = k.getSize()*2;
8 const char *s = (const char *)k.getStr();
9
10 long nPos = 0;
11 for( j = 0; j < sz; j++, s++ )
12 {
13 nPos = *s + (nPos << 6) + (nPos << 16) - nPos;
14 }
15
16 return nPos;
17}
18
19template<> bool Bu::__cmpHashKeys<Bu::UtfString>(
20 const Bu::UtfString &a, const Bu::UtfString &b )
21{
22 return a == b;
23}
24
25template<> void Bu::__tracer_format<Bu::UtfString>( const Bu::UtfString &v )
26{
27 printf("(%ld)\"%s\"", v.getSize(), (const char *)v.getStr() );
28}
29
diff --git a/src/utfstring.h b/src/utfstring.h
new file mode 100644
index 0000000..bbacb74
--- /dev/null
+++ b/src/utfstring.h
@@ -0,0 +1,24 @@
1#ifndef BU_UTF_STRING_H
2#define BU_UTF_STRING_H
3
4#include "bu/fbasicstring.h"
5
6namespace Bu
7{
8 typedef FBasicString<short> UtfString;
9
10 template<typename T>
11 uint32_t __calcHashCode( const T &k );
12
13 template<typename T>
14 bool __cmpHashKeys( const T &a, const T &b );
15
16 template<> uint32_t __calcHashCode<UtfString>( const UtfString &k );
17 template<> bool __cmpHashKeys<UtfString>(
18 const UtfString &a, const UtfString &b );
19
20 template<typename t> void __tracer_format( const t &v );
21 template<> void __tracer_format<UtfString>( const UtfString &v );
22}
23
24#endif