From ec05778d5718a7912e506764d443a78d6a6179e3 Mon Sep 17 00:00:00 2001
From: Mike Buland <eichlan@xagasoft.com>
Date: Mon, 5 Nov 2012 22:41:51 +0000
Subject: Converted tabs to spaces with tabconv.

---
 src/unstable/utfstring.h | 482 +++++++++++++++++++++++------------------------
 1 file changed, 241 insertions(+), 241 deletions(-)

(limited to 'src/unstable/utfstring.h')

diff --git a/src/unstable/utfstring.h b/src/unstable/utfstring.h
index 1bd4cce..560faae 100644
--- a/src/unstable/utfstring.h
+++ b/src/unstable/utfstring.h
@@ -13,247 +13,247 @@
 
 namespace Bu
 {
-	class String;
-	class Stream;
-
-	/**
-	 * UtfChar isn't actually a character, unicode specifies "code points" not
-	 * characters.  The main reason for this is that not all code points define
-	 * usable characters.  Some control text directionality, some apply
-	 * properties to other code points which are characters.  However, most of
-	 * these distinctions are only important when implementing displays that
-	 * comply with the Unicode standard fully.
-	 */
-	typedef uint32_t UtfChar;
-
-	/**
-	 * A unicode string.  This class represents a string of unicode code points.
-	 * Every character in unicode can be represented with 21 bits, but we don't
-	 * have a datatype that's 24 bits long, so we return all code points as a
-	 * 32 bit unsigned value represented by Bu::UtfChar.  However, the UtfString
-	 * class, for efficiency purposes doesn't store 32 bit values internally.
-	 * It represents all code points in the native utf16 encodeng.  This means
-	 * that it may be very difficult to quickly determine the length of a
-	 * UtfString in code points.  Unlike many Unicode handling systems, this
-	 * one actually works with complete code points.  When using this class you
-	 * don't ever have to know about the inner workings of the different
-	 * encoding schemes.  All of the data is dealt with as whole code points.
-	 *
-	 * As an aside, this means that when encoding a UtfString to a Utf16
-	 * encoding that matches your archetecture this operation will be very
-	 * fast since it will effectively be a raw dump of the internal data
-	 * structures.  However, it is highly reccomended that you DO NOT use the
-	 * little endian encodings if you can possibly avoid it.  They are not
-	 * reccomended by the Unicode Consortium and are mainly supported as a
-	 * means of communicating with other systems that encode their data
-	 * incorrectly.  That said, whenever UtfString encodes the contained string
-	 * it always includes a BOM at the begining (the byte order marker) so that
-	 * proper byte order can be easily determined by the program reading the
-	 * data.
-	 *
-	 *@todo Investigate http://www.unicode.org/reports/tr6/ for compression.
-	 */
-	class UtfString
-	{
-	public:
-		enum Encoding
-		{
-			Utf8,
-			Utf16,
-			Utf16be,
-			Utf16le,
-			Utf32,
-			Utf32be,
-			Utf32le,
-			Ucs2,
-			Ucs4,
-			GuessEncoding
-		};
-
-		UtfString();
-		UtfString( const Bu::String &sInput, Encoding eEnc=Utf8 );
-		UtfString( const char *sInput, Encoding eEnc=Utf8 );
-		virtual ~UtfString();
-
-		class iterator
-		{
-		friend class UtfString;
-		private:
-			iterator( UtfString *pSrc, int iCodePos ) :
-				pSrc( pSrc ), iCodePos( iCodePos )
-			{
-			}
-
-		public:
-			iterator() :
-				pSrc( NULL ), iCodePos( 0 )
-			{
-			}
-
-			UtfChar operator*()
-			{
-				if( !pSrc )
-					throw Bu::ExceptionBase("invalid UtfString::iterator dereferenced.");
-				return pSrc->get( iCodePos );
-			}
-
-			iterator operator++()
-			{
-				pSrc->nextChar( iCodePos );
-				return *this;
-			}
-
-			iterator operator++( int )
-			{
-				pSrc->nextChar( iCodePos );
-				return *this;
-			}
-
-			operator bool() const
-			{
-				return iCodePos < pSrc->aData.getSize();
-			}
-
-		private:
-			UtfString *pSrc;
-			int iCodePos;
-		};
-		
-		class const_iterator
-		{
-		friend class UtfString;
-		private:
-			const_iterator( const UtfString *pSrc, int iCodePos ) :
-				pSrc( pSrc ), iCodePos( iCodePos )
-			{
-			}
-
-		public:
-			const_iterator() :
-				pSrc( NULL ), iCodePos( 0 )
-			{
-			}
-
-			UtfChar operator*()
-			{
-				if( !pSrc )
-					throw Bu::ExceptionBase("invalid UtfString::iterator dereferenced.");
-				return pSrc->get( iCodePos );
-			}
-
-			const_iterator operator++()
-			{
-				pSrc->nextChar( iCodePos );
-				return *this;
-			}
-
-			const_iterator operator++( int )
-			{
-				pSrc->nextChar( iCodePos );
-				return *this;
-			}
-
-			operator bool() const
-			{
-				return iCodePos < pSrc->aData.getSize();
-			}
-
-		private:
-			const UtfString *pSrc;
-			int iCodePos;
-		};
-
-		iterator begin();
-		const_iterator begin() const;
-
-		/**
-		 * Append a UtfChar (A unicode code point) to the string.  This can be
-		 * any valid code point, and is just the value of the code point, no
-		 * encoding necessary.
-		 */
-		void append( UtfChar ch );
-
-		void append( const UtfString &rSrc );
-
-		/**
-		 * Set the value of the entire string based on the given input and
-		 * encoding.  The default encoding is Utf8, which is compatible with
-		 * 7-bit ascii, so it's a great choice for setting UtfStrings from
-		 * string literals in code.
-		 */
-		void set( const Bu::String &sInput, Encoding eEnc=Utf8 );
-
-		/**
-		 * This encodes the UtfString in the given encoding and outputs it to
-		 * the provided stream.  all Utf16 and Utf32 encodings will have the
-		 * correct BOM (byte order marker) at the begining.
-		 */
-		void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ) const;
-
-		/**
-		 * This encodes the UtfString in the given encoding and returns it as
-		 * a binary Bu::String.  Like write, this also includes the proper BOM
-		 * at the begining.
-		 */
-		Bu::String get( Encoding eEnc=Utf8 ) const;
-
-		void debug() const;
-
-		/**
-		 * This may or may not stick around, given an index, this returns a
-		 * codepoint, however there isn't necesarilly a 1:1 ratio between
-		 * indexes and code points.
-		 */
-		UtfChar get( int iIndex ) const;
-
-		/**
-		 * This is what to use if you want to iterate through a section of the
-		 * UtfString and you want to use a numerical index.  In most cases it
-		 * will be much easier to use an iterator, though.  Given an index this
-		 * will return the codepoint at that position and increment iIndex an
-		 * appropriate amount for it to point to the next code point.
-		 */
-		UtfChar nextChar( int &iIndex ) const;
-
-		bool operator==( const Bu::UtfString &rhs ) const;
-		UtfString &operator+=( const Bu::UtfString &rhs );
-		UtfString &operator+=( const UtfChar &rhs );
-
-	private:
-		void append16( uint16_t i ) { aData.append( i ); }
-
-		void setUtf8( const Bu::String &sInput );
-		void setUtf16( const Bu::String &sInput );
-		void setUtf16be( const Bu::String &sInput );
-		void setUtf16le( const Bu::String &sInput );
-		void setUtf32( const Bu::String &sInput );
-		void setUtf32be( const Bu::String &sInput );
-		void setUtf32le( const Bu::String &sInput );
-		
-		void writeUtf8( Bu::Stream &sOut ) const;
-		void writeUtf16be( Bu::Stream &sOut ) const;
-		void writeUtf16le( Bu::Stream &sOut ) const;
-		void writeUtf32be( Bu::Stream &sOut ) const;
-		void writeUtf32le( Bu::Stream &sOut ) const;
-
-	private:
-		Bu::Array<uint16_t> aData;
-		int iRawLen;
-		int iCharLen;
-	};
-
-	//
-	// Hash support
-	//
-	template<typename T>
-	uint32_t __calcHashCode( const T &k );
-
-	template<typename T>
-	bool __cmpHashKeys( const T &a, const T &b );
-
-	template<> uint32_t __calcHashCode<UtfString>( const UtfString &k );
-	template<> bool __cmpHashKeys<UtfString>(
-		const UtfString &a, const UtfString &b );
+    class String;
+    class Stream;
+
+    /**
+     * UtfChar isn't actually a character, unicode specifies "code points" not
+     * characters.  The main reason for this is that not all code points define
+     * usable characters.  Some control text directionality, some apply
+     * properties to other code points which are characters.  However, most of
+     * these distinctions are only important when implementing displays that
+     * comply with the Unicode standard fully.
+     */
+    typedef uint32_t UtfChar;
+
+    /**
+     * A unicode string.  This class represents a string of unicode code points.
+     * Every character in unicode can be represented with 21 bits, but we don't
+     * have a datatype that's 24 bits long, so we return all code points as a
+     * 32 bit unsigned value represented by Bu::UtfChar.  However, the UtfString
+     * class, for efficiency purposes doesn't store 32 bit values internally.
+     * It represents all code points in the native utf16 encodeng.  This means
+     * that it may be very difficult to quickly determine the length of a
+     * UtfString in code points.  Unlike many Unicode handling systems, this
+     * one actually works with complete code points.  When using this class you
+     * don't ever have to know about the inner workings of the different
+     * encoding schemes.  All of the data is dealt with as whole code points.
+     *
+     * As an aside, this means that when encoding a UtfString to a Utf16
+     * encoding that matches your archetecture this operation will be very
+     * fast since it will effectively be a raw dump of the internal data
+     * structures.  However, it is highly reccomended that you DO NOT use the
+     * little endian encodings if you can possibly avoid it.  They are not
+     * reccomended by the Unicode Consortium and are mainly supported as a
+     * means of communicating with other systems that encode their data
+     * incorrectly.  That said, whenever UtfString encodes the contained string
+     * it always includes a BOM at the begining (the byte order marker) so that
+     * proper byte order can be easily determined by the program reading the
+     * data.
+     *
+     *@todo Investigate http://www.unicode.org/reports/tr6/ for compression.
+     */
+    class UtfString
+    {
+    public:
+        enum Encoding
+        {
+            Utf8,
+            Utf16,
+            Utf16be,
+            Utf16le,
+            Utf32,
+            Utf32be,
+            Utf32le,
+            Ucs2,
+            Ucs4,
+            GuessEncoding
+        };
+
+        UtfString();
+        UtfString( const Bu::String &sInput, Encoding eEnc=Utf8 );
+        UtfString( const char *sInput, Encoding eEnc=Utf8 );
+        virtual ~UtfString();
+
+        class iterator
+        {
+        friend class UtfString;
+        private:
+            iterator( UtfString *pSrc, int iCodePos ) :
+                pSrc( pSrc ), iCodePos( iCodePos )
+            {
+            }
+
+        public:
+            iterator() :
+                pSrc( NULL ), iCodePos( 0 )
+            {
+            }
+
+            UtfChar operator*()
+            {
+                if( !pSrc )
+                    throw Bu::ExceptionBase("invalid UtfString::iterator dereferenced.");
+                return pSrc->get( iCodePos );
+            }
+
+            iterator operator++()
+            {
+                pSrc->nextChar( iCodePos );
+                return *this;
+            }
+
+            iterator operator++( int )
+            {
+                pSrc->nextChar( iCodePos );
+                return *this;
+            }
+
+            operator bool() const
+            {
+                return iCodePos < pSrc->aData.getSize();
+            }
+
+        private:
+            UtfString *pSrc;
+            int iCodePos;
+        };
+        
+        class const_iterator
+        {
+        friend class UtfString;
+        private:
+            const_iterator( const UtfString *pSrc, int iCodePos ) :
+                pSrc( pSrc ), iCodePos( iCodePos )
+            {
+            }
+
+        public:
+            const_iterator() :
+                pSrc( NULL ), iCodePos( 0 )
+            {
+            }
+
+            UtfChar operator*()
+            {
+                if( !pSrc )
+                    throw Bu::ExceptionBase("invalid UtfString::iterator dereferenced.");
+                return pSrc->get( iCodePos );
+            }
+
+            const_iterator operator++()
+            {
+                pSrc->nextChar( iCodePos );
+                return *this;
+            }
+
+            const_iterator operator++( int )
+            {
+                pSrc->nextChar( iCodePos );
+                return *this;
+            }
+
+            operator bool() const
+            {
+                return iCodePos < pSrc->aData.getSize();
+            }
+
+        private:
+            const UtfString *pSrc;
+            int iCodePos;
+        };
+
+        iterator begin();
+        const_iterator begin() const;
+
+        /**
+         * Append a UtfChar (A unicode code point) to the string.  This can be
+         * any valid code point, and is just the value of the code point, no
+         * encoding necessary.
+         */
+        void append( UtfChar ch );
+
+        void append( const UtfString &rSrc );
+
+        /**
+         * Set the value of the entire string based on the given input and
+         * encoding.  The default encoding is Utf8, which is compatible with
+         * 7-bit ascii, so it's a great choice for setting UtfStrings from
+         * string literals in code.
+         */
+        void set( const Bu::String &sInput, Encoding eEnc=Utf8 );
+
+        /**
+         * This encodes the UtfString in the given encoding and outputs it to
+         * the provided stream.  all Utf16 and Utf32 encodings will have the
+         * correct BOM (byte order marker) at the begining.
+         */
+        void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ) const;
+
+        /**
+         * This encodes the UtfString in the given encoding and returns it as
+         * a binary Bu::String.  Like write, this also includes the proper BOM
+         * at the begining.
+         */
+        Bu::String get( Encoding eEnc=Utf8 ) const;
+
+        void debug() const;
+
+        /**
+         * This may or may not stick around, given an index, this returns a
+         * codepoint, however there isn't necesarilly a 1:1 ratio between
+         * indexes and code points.
+         */
+        UtfChar get( int iIndex ) const;
+
+        /**
+         * This is what to use if you want to iterate through a section of the
+         * UtfString and you want to use a numerical index.  In most cases it
+         * will be much easier to use an iterator, though.  Given an index this
+         * will return the codepoint at that position and increment iIndex an
+         * appropriate amount for it to point to the next code point.
+         */
+        UtfChar nextChar( int &iIndex ) const;
+
+        bool operator==( const Bu::UtfString &rhs ) const;
+        UtfString &operator+=( const Bu::UtfString &rhs );
+        UtfString &operator+=( const UtfChar &rhs );
+
+    private:
+        void append16( uint16_t i ) { aData.append( i ); }
+
+        void setUtf8( const Bu::String &sInput );
+        void setUtf16( const Bu::String &sInput );
+        void setUtf16be( const Bu::String &sInput );
+        void setUtf16le( const Bu::String &sInput );
+        void setUtf32( const Bu::String &sInput );
+        void setUtf32be( const Bu::String &sInput );
+        void setUtf32le( const Bu::String &sInput );
+        
+        void writeUtf8( Bu::Stream &sOut ) const;
+        void writeUtf16be( Bu::Stream &sOut ) const;
+        void writeUtf16le( Bu::Stream &sOut ) const;
+        void writeUtf32be( Bu::Stream &sOut ) const;
+        void writeUtf32le( Bu::Stream &sOut ) const;
+
+    private:
+        Bu::Array<uint16_t> aData;
+        int iRawLen;
+        int iCharLen;
+    };
+
+    //
+    // Hash support
+    //
+    template<typename T>
+    uint32_t __calcHashCode( const T &k );
+
+    template<typename T>
+    bool __cmpHashKeys( const T &a, const T &b );
+
+    template<> uint32_t __calcHashCode<UtfString>( const UtfString &k );
+    template<> bool __cmpHashKeys<UtfString>(
+        const UtfString &a, const UtfString &b );
 };
 
 #endif
-- 
cgit v1.2.3