From 7c335ede527eaf4a3053ef35b1299141d34aaf40 Mon Sep 17 00:00:00 2001
From: Mike Buland <eichlan@xagasoft.com>
Date: Fri, 19 Nov 2010 05:54:14 +0000
Subject: I now think that this may not work out at all.  It looks like if we
 want proper Unicode handling we'll need to implement a series of codecs and
 converters as well as tables of codepages and lookups.  It'll be interesting,
 I guess, but it makes me care a lot less about proper encoding.  Anyway,
 UtfString uses shorts instead of chars, so it's a step in the right
 direction, but still not enough to be able to handle proper UTF-16 encoding,
 maybe UCS-2 encoding, but... ...that's lame.  Bu::FBasicString has been
 generalized a bit with optimizations from libc for char based strings.  It
 also, unfortunately, still uses char-only functions in several places, those
 all rely on char casting strings at the moment just to get the thing to
 compile.  Basically, it's not a good UTF-16 solution yet, and it may never be
 and remain compatible with char based strings.

---
 src/utfstring.cpp | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 src/utfstring.cpp

(limited to 'src/utfstring.cpp')
diff --git a/src/utfstring.cpp b/src/utfstring.cpp
new file mode 100644
index 0000000..ae5efaf
--- /dev/null
+++ b/src/utfstring.cpp
@@ -0,0 +1,29 @@
+#include "bu/utfstring.h"
+
+template class Bu::FBasicString<short>;
+
+template<> uint32_t Bu::__calcHashCode<Bu::UtfString>( const Bu::UtfString &k )
+{
+	long j, sz = k.getSize()*2;
+	const char *s = (const char *)k.getStr();
+
+	long nPos = 0;
+	for( j = 0; j < sz; j++, s++ )
+	{
+		nPos = *s + (nPos << 6) + (nPos << 16) - nPos;
+	}
+
+	return nPos;	
+}
+
+template<> bool Bu::__cmpHashKeys<Bu::UtfString>(
+		const Bu::UtfString &a, const Bu::UtfString &b )
+{
+	return a == b;
+}
+
+template<> void Bu::__tracer_format<Bu::UtfString>( const Bu::UtfString &v )
+{
+	printf("(%ld)\"%s\"", v.getSize(), (const char *)v.getStr() );
+}
+
-- 
cgit v1.2.3