From 469bbcf0701e1eb8a6670c23145b0da87357e178 Mon Sep 17 00:00:00 2001
From: Mike Buland <eichlan@xagasoft.com>
Date: Sun, 25 Mar 2012 20:00:08 +0000
Subject: Code is all reorganized.  We're about ready to release.  I should
 write up a little explenation of the arrangement.

---
 src/utfstring.h | 174 --------------------------------------------------------
 1 file changed, 174 deletions(-)
 delete mode 100644 src/utfstring.h

(limited to 'src/utfstring.h')

diff --git a/src/utfstring.h b/src/utfstring.h
deleted file mode 100644
index 477e272..0000000
--- a/src/utfstring.h
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Copyright (C) 2007-2011 Xagasoft, All rights reserved.
- *
- * This file is part of the libbu++ library and is released under the
- * terms of the license contained in the file LICENSE.
- */
-
-#ifndef BU_UTF_STRING_H
-#define BU_UTF_STRING_H
-
-#include <stdint.h>
-#include "bu/array.h"
-
-namespace Bu
-{
-	class String;
-	class Stream;
-
-	/**
-	 * UtfChar isn't actually a character, unicode specifies "code points" not
-	 * characters.  The main reason for this is that not all code points define
-	 * usable characters.  Some control text directionality, some apply
-	 * properties to other code points which are characters.  However, most of
-	 * these distinctions are only important when implementing displays that
-	 * comply with the Unicode standard fully.
-	 */
-	typedef uint32_t UtfChar;
-
-	/**
-	 * A unicode string.  This class represents a string of unicode code points.
-	 * Every character in unicode can be represented with 21 bits, but we don't
-	 * have a datatype that's 24 bits long, so we return all code points as a
-	 * 32 bit unsigned value represented by Bu::UtfChar.  However, the UtfString
-	 * class, for efficiency purposes doesn't store 32 bit values internally.
-	 * It represents all code points in the native utf16 encodeng.  This means
-	 * that it may be very difficult to quickly determine the length of a
-	 * UtfString in code points.  Unlike many Unicode handling systems, this
-	 * one actually works with complete code points.  When using this class you
-	 * don't ever have to know about the inner workings of the different
-	 * encoding schemes.  All of the data is dealt with as whole code points.
-	 *
-	 * As an aside, this means that when encoding a UtfString to a Utf16
-	 * encoding that matches your archetecture this operation will be very
-	 * fast since it will effectively be a raw dump of the internal data
-	 * structures.  However, it is highly reccomended that you DO NOT use the
-	 * little endian encodings if you can possibly avoid it.  They are not
-	 * reccomended by the Unicode Consortium and are mainly supported as a
-	 * means of communicating with other systems that encode their data
-	 * incorrectly.  That said, whenever UtfString encodes the contained string
-	 * it always includes a BOM at the begining (the byte order marker) so that
-	 * proper byte order can be easily determined by the program reading the
-	 * data.
-	 *
-	 *@todo Investigate http://www.unicode.org/reports/tr6/ for compression.
-	 */
-	class UtfString
-	{
-	public:
-		enum Encoding
-		{
-			Utf8,
-			Utf16,
-			Utf16be,
-			Utf16le,
-			Utf32,
-			Utf32be,
-			Utf32le,
-			Ucs2,
-			Ucs4,
-			GuessEncoding
-		};
-
-		UtfString();
-		UtfString( const Bu::String &sInput, Encoding eEnc=Utf8 );
-		virtual ~UtfString();
-
-		class iterator
-		{
-		private:
-			iterator( UtfString *pSrc, int iCodePos ) :
-				pSrc( pSrc ), iCodePos( iCodePos )
-			{
-			}
-
-		public:
-			iterator() :
-				pSrc( NULL ), iCodePos( 0 )
-			{
-			}
-
-			UtfChar operator*()
-			{
-				if( !pSrc )
-					throw Bu::ExceptionBase("invalid UtfString::iterator dereferenced.");
-				return pSrc->nextChar( iCodePos );
-			}
-
-		private:
-			UtfString *pSrc;
-			int iCodePos;
-		};
-
-		/**
-		 * Append a UtfChar (A unicode code point) to the string.  This can be
-		 * any valid code point, and is just the value of the code point, no
-		 * encoding necessary.
-		 */
-		void append( UtfChar ch );
-
-		/**
-		 * Set the value of the entire string based on the given input and
-		 * encoding.  The default encoding is Utf8, which is compatible with
-		 * 7-bit ascii, so it's a great choice for setting UtfStrings from
-		 * string literals in code.
-		 */
-		void set( const Bu::String &sInput, Encoding eEnc=Utf8 );
-
-		/**
-		 * This encodes the UtfString in the given encoding and outputs it to
-		 * the provided stream.  all Utf16 and Utf32 encodings will have the
-		 * correct BOM (byte order marker) at the begining.
-		 */
-		void write( Bu::Stream &sOut, Encoding eEnc=Utf8 );
-
-		/**
-		 * This encodes the UtfString in the given encoding and returns it as
-		 * a binary Bu::String.  Like write, this also includes the proper BOM
-		 * at the begining.
-		 */
-		Bu::String get( Encoding eEnc=Utf8 );
-
-		void debug();
-
-		/**
-		 * This may or may not stick around, given an index, this returns a
-		 * codepoint, however there isn't necesarilly a 1:1 ratio between
-		 * indexes and code points.
-		 */
-		UtfChar get( int iIndex );
-
-		/**
-		 * This is what to use if you want to iterate through a section of the
-		 * UtfString and you want to use a numerical index.  In most cases it
-		 * will be much easier to use an iterator, though.  Given an index this
-		 * will return the codepoint at that position and increment iIndex an
-		 * appropriate amount for it to point to the next code point.
-		 */
-		UtfChar nextChar( int &iIndex );
-
-	private:
-		void append16( uint16_t i ) { aData.append( i ); }
-
-		void setUtf8( const Bu::String &sInput );
-		void setUtf16( const Bu::String &sInput );
-		void setUtf16be( const Bu::String &sInput );
-		void setUtf16le( const Bu::String &sInput );
-		void setUtf32( const Bu::String &sInput );
-		void setUtf32be( const Bu::String &sInput );
-		void setUtf32le( const Bu::String &sInput );
-		
-		void writeUtf8( Bu::Stream &sOut );
-		void writeUtf16be( Bu::Stream &sOut );
-		void writeUtf16le( Bu::Stream &sOut );
-		void writeUtf32be( Bu::Stream &sOut );
-		void writeUtf32le( Bu::Stream &sOut );
-
-	private:
-		Bu::Array<uint16_t> aData;
-		int iRawLen;
-		int iCharLen;
-	};
-};
-
-#endif
-- 
cgit v1.2.3