Converted tabs to spaces with tabconv.

author: Mike Buland <eichlan@xagasoft.com> 2012-11-05 22:41:51 +0000
committer: Mike Buland <eichlan@xagasoft.com> 2012-11-05 22:41:51 +0000
commit: ec05778d5718a7912e506764d443a78d6a6179e3 (patch)
tree: 78a9a01532180030c095acefc45763f07c14edb8 /src/unstable/utfstring.h
parent: b20414ac1fe80a71a90601f4cd1767fa7014a9ba (diff)
download: libbu++-ec05778d5718a7912e506764d443a78d6a6179e3.tar.gz
libbu++-ec05778d5718a7912e506764d443a78d6a6179e3.tar.bz2
libbu++-ec05778d5718a7912e506764d443a78d6a6179e3.tar.xz
libbu++-ec05778d5718a7912e506764d443a78d6a6179e3.zip
1 files changed, 241 insertions, 241 deletions
diff --git a/src/unstable/utfstring.h b/src/unstable/utfstring.h
index 1bd4cce..560faae 100644
--- a/src/unstable/utfstring.h
+++ b/src/unstable/utfstring.h
@@ -13,247 +13,247 @@
 namespace Bu
 {
-        class String;
+    class String;
-        class Stream;
+    class Stream;
-        /**
+    /**
-         * UtfChar isn't actually a character, unicode specifies "code points" not
+     * UtfChar isn't actually a character, unicode specifies "code points" not
-         * characters.  The main reason for this is that not all code points define
+     * characters.  The main reason for this is that not all code points define
-         * usable characters.  Some control text directionality, some apply
+     * usable characters.  Some control text directionality, some apply
-         * properties to other code points which are characters.  However, most of
+     * properties to other code points which are characters.  However, most of
-         * these distinctions are only important when implementing displays that
+     * these distinctions are only important when implementing displays that
-         * comply with the Unicode standard fully.
+     * comply with the Unicode standard fully.
-         */
+     */
-        typedef uint32_t UtfChar;
+    typedef uint32_t UtfChar;
-        /**
+    /**
-         * A unicode string.  This class represents a string of unicode code points.
+     * A unicode string.  This class represents a string of unicode code points.
-         * Every character in unicode can be represented with 21 bits, but we don't
+     * Every character in unicode can be represented with 21 bits, but we don't
-         * have a datatype that's 24 bits long, so we return all code points as a
+     * have a datatype that's 24 bits long, so we return all code points as a
-         * 32 bit unsigned value represented by Bu::UtfChar.  However, the UtfString
+     * 32 bit unsigned value represented by Bu::UtfChar.  However, the UtfString
-         * class, for efficiency purposes doesn't store 32 bit values internally.
+     * class, for efficiency purposes doesn't store 32 bit values internally.
-         * It represents all code points in the native utf16 encodeng.  This means
+     * It represents all code points in the native utf16 encodeng.  This means
-         * that it may be very difficult to quickly determine the length of a
+     * that it may be very difficult to quickly determine the length of a
-         * UtfString in code points.  Unlike many Unicode handling systems, this
+     * UtfString in code points.  Unlike many Unicode handling systems, this
-         * one actually works with complete code points.  When using this class you
+     * one actually works with complete code points.  When using this class you
-         * don't ever have to know about the inner workings of the different
+     * don't ever have to know about the inner workings of the different
-         * encoding schemes.  All of the data is dealt with as whole code points.
+     * encoding schemes.  All of the data is dealt with as whole code points.
-         *
+     *
-         * As an aside, this means that when encoding a UtfString to a Utf16
+     * As an aside, this means that when encoding a UtfString to a Utf16
-         * encoding that matches your archetecture this operation will be very
+     * encoding that matches your archetecture this operation will be very
-         * fast since it will effectively be a raw dump of the internal data
+     * fast since it will effectively be a raw dump of the internal data
-         * structures.  However, it is highly reccomended that you DO NOT use the
+     * structures.  However, it is highly reccomended that you DO NOT use the
-         * little endian encodings if you can possibly avoid it.  They are not
+     * little endian encodings if you can possibly avoid it.  They are not
-         * reccomended by the Unicode Consortium and are mainly supported as a
+     * reccomended by the Unicode Consortium and are mainly supported as a
-         * means of communicating with other systems that encode their data
+     * means of communicating with other systems that encode their data
-         * incorrectly.  That said, whenever UtfString encodes the contained string
+     * incorrectly.  That said, whenever UtfString encodes the contained string
-         * it always includes a BOM at the begining (the byte order marker) so that
+     * it always includes a BOM at the begining (the byte order marker) so that
-         * proper byte order can be easily determined by the program reading the
+     * proper byte order can be easily determined by the program reading the
-         * data.
+     * data.
-         *
+     *
-         *@todo Investigate http://www.unicode.org/reports/tr6/ for compression.
+     *@todo Investigate http://www.unicode.org/reports/tr6/ for compression.
-         */
+     */
-        class UtfString
+    class UtfString
-        {
+    {
-        public:
+    public:
-                enum Encoding
+        enum Encoding
-                {
+        {
-                        Utf8,
+            Utf8,
-                        Utf16,
+            Utf16,
-                        Utf16be,
+            Utf16be,
-                        Utf16le,
+            Utf16le,
-                        Utf32,
+            Utf32,
-                        Utf32be,
+            Utf32be,
-                        Utf32le,
+            Utf32le,
-                        Ucs2,
+            Ucs2,
-                        Ucs4,
+            Ucs4,
-                        GuessEncoding
+            GuessEncoding
-                };
+        };
-                UtfString();
+        UtfString();
-                UtfString( const Bu::String &sInput, Encoding eEnc=Utf8 );
+        UtfString( const Bu::String &sInput, Encoding eEnc=Utf8 );
-                UtfString( const char *sInput, Encoding eEnc=Utf8 );
+        UtfString( const char *sInput, Encoding eEnc=Utf8 );
-                virtual ~UtfString();
+        virtual ~UtfString();
-                class iterator
+        class iterator
-                {
+        {
-                friend class UtfString;
+        friend class UtfString;
-                private:
+        private:
-                        iterator( UtfString *pSrc, int iCodePos ) :
+            iterator( UtfString *pSrc, int iCodePos ) :
-                                pSrc( pSrc ), iCodePos( iCodePos )
+                pSrc( pSrc ), iCodePos( iCodePos )
-                        {
+            {
-                        }
+            }
-                public:
+        public:
-                        iterator() :
+            iterator() :
-                                pSrc( NULL ), iCodePos( 0 )
+                pSrc( NULL ), iCodePos( 0 )
-                        {
+            {
-                        }
+            }
-                        UtfChar operator*()
+            UtfChar operator*()
-                        {
+            {
-                                if( !pSrc )
+                if( !pSrc )
-                                        throw Bu::ExceptionBase("invalid UtfString::iterator dereferenced.");
+                    throw Bu::ExceptionBase("invalid UtfString::iterator dereferenced.");
-                                return pSrc->get( iCodePos );
+                return pSrc->get( iCodePos );
-                        }
+            }
-                        iterator operator++()
+            iterator operator++()
-                        {
+            {
-                                pSrc->nextChar( iCodePos );
+                pSrc->nextChar( iCodePos );
-                                return *this;
+                return *this;
-                        }
+            }
-                        iterator operator++( int )
+            iterator operator++( int )
-                        {
+            {
-                                pSrc->nextChar( iCodePos );
+                pSrc->nextChar( iCodePos );
-                                return *this;
+                return *this;
-                        }
+            }
-                        operator bool() const
+            operator bool() const
-                        {
+            {
-                                return iCodePos < pSrc->aData.getSize();
+                return iCodePos < pSrc->aData.getSize();
-                        }
+            }
-                private:
+        private:
-                        UtfString *pSrc;
+            UtfString *pSrc;
-                        int iCodePos;
+            int iCodePos;
-                };
+        };
-                
+        
-                class const_iterator
+        class const_iterator
-                {
+        {
-                friend class UtfString;
+        friend class UtfString;
-                private:
+        private:
-                        const_iterator( const UtfString *pSrc, int iCodePos ) :
+            const_iterator( const UtfString *pSrc, int iCodePos ) :
-                                pSrc( pSrc ), iCodePos( iCodePos )
+                pSrc( pSrc ), iCodePos( iCodePos )
-                        {
+            {
-                        }
+            }
-                public:
+        public:
-                        const_iterator() :
+            const_iterator() :
-                                pSrc( NULL ), iCodePos( 0 )
+                pSrc( NULL ), iCodePos( 0 )
-                        {
+            {
-                        }
+            }
-                        UtfChar operator*()
+            UtfChar operator*()
-                        {
+            {
-                                if( !pSrc )
+                if( !pSrc )
-                                        throw Bu::ExceptionBase("invalid UtfString::iterator dereferenced.");
+                    throw Bu::ExceptionBase("invalid UtfString::iterator dereferenced.");
-                                return pSrc->get( iCodePos );
+                return pSrc->get( iCodePos );
-                        }
+            }
-                        const_iterator operator++()
+            const_iterator operator++()
-                        {
+            {
-                                pSrc->nextChar( iCodePos );
+                pSrc->nextChar( iCodePos );
-                                return *this;
+                return *this;
-                        }
+            }
-                        const_iterator operator++( int )
+            const_iterator operator++( int )
-                        {
+            {
-                                pSrc->nextChar( iCodePos );
+                pSrc->nextChar( iCodePos );
-                                return *this;
+                return *this;
-                        }
+            }
-                        operator bool() const
+            operator bool() const
-                        {
+            {
-                                return iCodePos < pSrc->aData.getSize();
+                return iCodePos < pSrc->aData.getSize();
-                        }
+            }
-                private:
+        private:
-                        const UtfString *pSrc;
+            const UtfString *pSrc;
-                        int iCodePos;
+            int iCodePos;
-                };
+        };
-                iterator begin();
+        iterator begin();
-                const_iterator begin() const;
+        const_iterator begin() const;
-                /**
+        /**
-                 * Append a UtfChar (A unicode code point) to the string.  This can be
+         * Append a UtfChar (A unicode code point) to the string.  This can be
-                 * any valid code point, and is just the value of the code point, no
+         * any valid code point, and is just the value of the code point, no
-                 * encoding necessary.
+         * encoding necessary.
-                 */
+         */
-                void append( UtfChar ch );
+        void append( UtfChar ch );
-                void append( const UtfString &rSrc );
+        void append( const UtfString &rSrc );
-                /**
+        /**
-                 * Set the value of the entire string based on the given input and
+         * Set the value of the entire string based on the given input and
-                 * encoding.  The default encoding is Utf8, which is compatible with
+         * encoding.  The default encoding is Utf8, which is compatible with
-                 * 7-bit ascii, so it's a great choice for setting UtfStrings from
+         * 7-bit ascii, so it's a great choice for setting UtfStrings from
-                 * string literals in code.
+         * string literals in code.
-                 */
+         */
-                void set( const Bu::String &sInput, Encoding eEnc=Utf8 );
+        void set( const Bu::String &sInput, Encoding eEnc=Utf8 );
-                /**
+        /**
-                 * This encodes the UtfString in the given encoding and outputs it to
+         * This encodes the UtfString in the given encoding and outputs it to
-                 * the provided stream.  all Utf16 and Utf32 encodings will have the
+         * the provided stream.  all Utf16 and Utf32 encodings will have the
-                 * correct BOM (byte order marker) at the begining.
+         * correct BOM (byte order marker) at the begining.
-                 */
+         */
-                void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ) const;
+        void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ) const;
-                /**
+        /**
-                 * This encodes the UtfString in the given encoding and returns it as
+         * This encodes the UtfString in the given encoding and returns it as
-                 * a binary Bu::String.  Like write, this also includes the proper BOM
+         * a binary Bu::String.  Like write, this also includes the proper BOM
-                 * at the begining.
+         * at the begining.
-                 */
+         */
-                Bu::String get( Encoding eEnc=Utf8 ) const;
+        Bu::String get( Encoding eEnc=Utf8 ) const;
-                void debug() const;
+        void debug() const;
-                /**
+        /**
-                 * This may or may not stick around, given an index, this returns a
+         * This may or may not stick around, given an index, this returns a
-                 * codepoint, however there isn't necesarilly a 1:1 ratio between
+         * codepoint, however there isn't necesarilly a 1:1 ratio between
-                 * indexes and code points.
+         * indexes and code points.
-                 */
+         */
-                UtfChar get( int iIndex ) const;
+        UtfChar get( int iIndex ) const;
-                /**
+        /**
-                 * This is what to use if you want to iterate through a section of the
+         * This is what to use if you want to iterate through a section of the
-                 * UtfString and you want to use a numerical index.  In most cases it
+         * UtfString and you want to use a numerical index.  In most cases it
-                 * will be much easier to use an iterator, though.  Given an index this
+         * will be much easier to use an iterator, though.  Given an index this
-                 * will return the codepoint at that position and increment iIndex an
+         * will return the codepoint at that position and increment iIndex an
-                 * appropriate amount for it to point to the next code point.
+         * appropriate amount for it to point to the next code point.
-                 */
+         */
-                UtfChar nextChar( int &iIndex ) const;
+        UtfChar nextChar( int &iIndex ) const;
-                bool operator==( const Bu::UtfString &rhs ) const;
+        bool operator==( const Bu::UtfString &rhs ) const;
-                UtfString &operator+=( const Bu::UtfString &rhs );
+        UtfString &operator+=( const Bu::UtfString &rhs );
-                UtfString &operator+=( const UtfChar &rhs );
+        UtfString &operator+=( const UtfChar &rhs );
-        private:
+    private:
-                void append16( uint16_t i ) { aData.append( i ); }
+        void append16( uint16_t i ) { aData.append( i ); }
-                void setUtf8( const Bu::String &sInput );
+        void setUtf8( const Bu::String &sInput );
-                void setUtf16( const Bu::String &sInput );
+        void setUtf16( const Bu::String &sInput );
-                void setUtf16be( const Bu::String &sInput );
+        void setUtf16be( const Bu::String &sInput );
-                void setUtf16le( const Bu::String &sInput );
+        void setUtf16le( const Bu::String &sInput );
-                void setUtf32( const Bu::String &sInput );
+        void setUtf32( const Bu::String &sInput );
-                void setUtf32be( const Bu::String &sInput );
+        void setUtf32be( const Bu::String &sInput );
-                void setUtf32le( const Bu::String &sInput );
+        void setUtf32le( const Bu::String &sInput );
-                
+        
-                void writeUtf8( Bu::Stream &sOut ) const;
+        void writeUtf8( Bu::Stream &sOut ) const;
-                void writeUtf16be( Bu::Stream &sOut ) const;
+        void writeUtf16be( Bu::Stream &sOut ) const;
-                void writeUtf16le( Bu::Stream &sOut ) const;
+        void writeUtf16le( Bu::Stream &sOut ) const;
-                void writeUtf32be( Bu::Stream &sOut ) const;
+        void writeUtf32be( Bu::Stream &sOut ) const;
-                void writeUtf32le( Bu::Stream &sOut ) const;
+        void writeUtf32le( Bu::Stream &sOut ) const;
-        private:
+    private:
-                Bu::Array<uint16_t> aData;
+        Bu::Array<uint16_t> aData;
-                int iRawLen;
+        int iRawLen;
-                int iCharLen;
+        int iCharLen;
-        };
+    };
-        //
+    //
-        // Hash support
+    // Hash support
-        //
+    //
-        template<typename T>
+    template<typename T>
-        uint32_t __calcHashCode( const T &k );
+    uint32_t __calcHashCode( const T &k );
-        template<typename T>
+    template<typename T>
-        bool __cmpHashKeys( const T &a, const T &b );
+    bool __cmpHashKeys( const T &a, const T &b );
-        template<> uint32_t __calcHashCode<UtfString>( const UtfString &k );
+    template<> uint32_t __calcHashCode<UtfString>( const UtfString &k );
-        template<> bool __cmpHashKeys<UtfString>(
+    template<> bool __cmpHashKeys<UtfString>(
-                const UtfString &a, const UtfString &b );
+        const UtfString &a, const UtfString &b );
 };
 #endif
author	Mike Buland <eichlan@xagasoft.com>	2012-11-05 22:41:51 +0000
committer	Mike Buland <eichlan@xagasoft.com>	2012-11-05 22:41:51 +0000
commit	ec05778d5718a7912e506764d443a78d6a6179e3 (patch)
tree	78a9a01532180030c095acefc45763f07c14edb8 /src/unstable/utfstring.h
parent	b20414ac1fe80a71a90601f4cd1767fa7014a9ba (diff)
download	libbu++-ec05778d5718a7912e506764d443a78d6a6179e3.tar.gz libbu++-ec05778d5718a7912e506764d443a78d6a6179e3.tar.bz2 libbu++-ec05778d5718a7912e506764d443a78d6a6179e3.tar.xz libbu++-ec05778d5718a7912e506764d443a78d6a6179e3.zip

diff --git a/src/unstable/utfstring.h b/src/unstable/utfstring.h index 1bd4cce..560faae 100644 --- a/src/unstable/utfstring.h +++ b/src/unstable/utfstring.h
@@ -13,247 +13,247 @@
13		13
14	namespace Bu	14	namespace Bu
15	{	15	{
16	class String;	16	class String;
17	class Stream;	17	class Stream;
18		18
19	/**	19	/**
20	* UtfChar isn't actually a character, unicode specifies "code points" not	20	* UtfChar isn't actually a character, unicode specifies "code points" not
21	* characters. The main reason for this is that not all code points define	21	* characters. The main reason for this is that not all code points define
22	* usable characters. Some control text directionality, some apply	22	* usable characters. Some control text directionality, some apply
23	* properties to other code points which are characters. However, most of	23	* properties to other code points which are characters. However, most of
24	* these distinctions are only important when implementing displays that	24	* these distinctions are only important when implementing displays that
25	* comply with the Unicode standard fully.	25	* comply with the Unicode standard fully.
26	*/	26	*/
27	typedef uint32_t UtfChar;	27	typedef uint32_t UtfChar;
28		28
29	/**	29	/**
30	* A unicode string. This class represents a string of unicode code points.	30	* A unicode string. This class represents a string of unicode code points.
31	* Every character in unicode can be represented with 21 bits, but we don't	31	* Every character in unicode can be represented with 21 bits, but we don't
32	* have a datatype that's 24 bits long, so we return all code points as a	32	* have a datatype that's 24 bits long, so we return all code points as a
33	* 32 bit unsigned value represented by Bu::UtfChar. However, the UtfString	33	* 32 bit unsigned value represented by Bu::UtfChar. However, the UtfString
34	* class, for efficiency purposes doesn't store 32 bit values internally.	34	* class, for efficiency purposes doesn't store 32 bit values internally.
35	* It represents all code points in the native utf16 encodeng. This means	35	* It represents all code points in the native utf16 encodeng. This means
36	* that it may be very difficult to quickly determine the length of a	36	* that it may be very difficult to quickly determine the length of a
37	* UtfString in code points. Unlike many Unicode handling systems, this	37	* UtfString in code points. Unlike many Unicode handling systems, this
38	* one actually works with complete code points. When using this class you	38	* one actually works with complete code points. When using this class you
39	* don't ever have to know about the inner workings of the different	39	* don't ever have to know about the inner workings of the different
40	* encoding schemes. All of the data is dealt with as whole code points.	40	* encoding schemes. All of the data is dealt with as whole code points.
41	*	41	*
42	* As an aside, this means that when encoding a UtfString to a Utf16	42	* As an aside, this means that when encoding a UtfString to a Utf16
43	* encoding that matches your archetecture this operation will be very	43	* encoding that matches your archetecture this operation will be very
44	* fast since it will effectively be a raw dump of the internal data	44	* fast since it will effectively be a raw dump of the internal data
45	* structures. However, it is highly reccomended that you DO NOT use the	45	* structures. However, it is highly reccomended that you DO NOT use the
46	* little endian encodings if you can possibly avoid it. They are not	46	* little endian encodings if you can possibly avoid it. They are not
47	* reccomended by the Unicode Consortium and are mainly supported as a	47	* reccomended by the Unicode Consortium and are mainly supported as a
48	* means of communicating with other systems that encode their data	48	* means of communicating with other systems that encode their data
49	* incorrectly. That said, whenever UtfString encodes the contained string	49	* incorrectly. That said, whenever UtfString encodes the contained string
50	* it always includes a BOM at the begining (the byte order marker) so that	50	* it always includes a BOM at the begining (the byte order marker) so that
51	* proper byte order can be easily determined by the program reading the	51	* proper byte order can be easily determined by the program reading the
52	* data.	52	* data.
53	*	53	*
54	*@todo Investigate http://www.unicode.org/reports/tr6/ for compression.	54	*@todo Investigate http://www.unicode.org/reports/tr6/ for compression.
55	*/	55	*/
56	class UtfString	56	class UtfString
57	{	57	{
58	public:	58	public:
59	enum Encoding	59	enum Encoding
60	{	60	{
61	Utf8,	61	Utf8,
62	Utf16,	62	Utf16,
63	Utf16be,	63	Utf16be,
64	Utf16le,	64	Utf16le,
65	Utf32,	65	Utf32,
66	Utf32be,	66	Utf32be,
67	Utf32le,	67	Utf32le,
68	Ucs2,	68	Ucs2,
69	Ucs4,	69	Ucs4,
70	GuessEncoding	70	GuessEncoding
71	};	71	};
72		72
73	UtfString();	73	UtfString();
74	UtfString( const Bu::String &sInput, Encoding eEnc=Utf8 );	74	UtfString( const Bu::String &sInput, Encoding eEnc=Utf8 );
75	UtfString( const char *sInput, Encoding eEnc=Utf8 );	75	UtfString( const char *sInput, Encoding eEnc=Utf8 );
76	virtual ~UtfString();	76	virtual ~UtfString();
77		77
78	class iterator	78	class iterator
79	{	79	{
80	friend class UtfString;	80	friend class UtfString;
81	private:	81	private:
82	iterator( UtfString *pSrc, int iCodePos ) :	82	iterator( UtfString *pSrc, int iCodePos ) :
83	pSrc( pSrc ), iCodePos( iCodePos )	83	pSrc( pSrc ), iCodePos( iCodePos )
84	{	84	{
85	}	85	}
86		86
87	public:	87	public:
88	iterator() :	88	iterator() :
89	pSrc( NULL ), iCodePos( 0 )	89	pSrc( NULL ), iCodePos( 0 )
90	{	90	{
91	}	91	}
92		92
93	UtfChar operator*()	93	UtfChar operator*()
94	{	94	{
95	if( !pSrc )	95	if( !pSrc )
96	throw Bu::ExceptionBase("invalid UtfString::iterator dereferenced.");	96	throw Bu::ExceptionBase("invalid UtfString::iterator dereferenced.");
97	return pSrc->get( iCodePos );	97	return pSrc->get( iCodePos );
98	}	98	}
99		99
100	iterator operator++()	100	iterator operator++()
101	{	101	{
102	pSrc->nextChar( iCodePos );	102	pSrc->nextChar( iCodePos );
103	return *this;	103	return *this;
104	}	104	}
105		105
106	iterator operator++( int )	106	iterator operator++( int )
107	{	107	{
108	pSrc->nextChar( iCodePos );	108	pSrc->nextChar( iCodePos );
109	return *this;	109	return *this;
110	}	110	}
111		111
112	operator bool() const	112	operator bool() const
113	{	113	{
114	return iCodePos < pSrc->aData.getSize();	114	return iCodePos < pSrc->aData.getSize();
115	}	115	}
116		116
117	private:	117	private:
118	UtfString *pSrc;	118	UtfString *pSrc;
119	int iCodePos;	119	int iCodePos;
120	};	120	};
121		121
122	class const_iterator	122	class const_iterator
123	{	123	{
124	friend class UtfString;	124	friend class UtfString;
125	private:	125	private:
126	const_iterator( const UtfString *pSrc, int iCodePos ) :	126	const_iterator( const UtfString *pSrc, int iCodePos ) :
127	pSrc( pSrc ), iCodePos( iCodePos )	127	pSrc( pSrc ), iCodePos( iCodePos )
128	{	128	{
129	}	129	}
130		130
131	public:	131	public:
132	const_iterator() :	132	const_iterator() :
133	pSrc( NULL ), iCodePos( 0 )	133	pSrc( NULL ), iCodePos( 0 )
134	{	134	{
135	}	135	}
136		136
137	UtfChar operator*()	137	UtfChar operator*()
138	{	138	{
139	if( !pSrc )	139	if( !pSrc )
140	throw Bu::ExceptionBase("invalid UtfString::iterator dereferenced.");	140	throw Bu::ExceptionBase("invalid UtfString::iterator dereferenced.");
141	return pSrc->get( iCodePos );	141	return pSrc->get( iCodePos );
142	}	142	}
143		143
144	const_iterator operator++()	144	const_iterator operator++()
145	{	145	{
146	pSrc->nextChar( iCodePos );	146	pSrc->nextChar( iCodePos );
147	return *this;	147	return *this;
148	}	148	}
149		149
150	const_iterator operator++( int )	150	const_iterator operator++( int )
151	{	151	{
152	pSrc->nextChar( iCodePos );	152	pSrc->nextChar( iCodePos );
153	return *this;	153	return *this;
154	}	154	}
155		155
156	operator bool() const	156	operator bool() const
157	{	157	{
158	return iCodePos < pSrc->aData.getSize();	158	return iCodePos < pSrc->aData.getSize();
159	}	159	}
160		160
161	private:	161	private:
162	const UtfString *pSrc;	162	const UtfString *pSrc;
163	int iCodePos;	163	int iCodePos;
164	};	164	};
165		165
166	iterator begin();	166	iterator begin();
167	const_iterator begin() const;	167	const_iterator begin() const;
168		168
169	/**	169	/**
170	* Append a UtfChar (A unicode code point) to the string. This can be	170	* Append a UtfChar (A unicode code point) to the string. This can be
171	* any valid code point, and is just the value of the code point, no	171	* any valid code point, and is just the value of the code point, no
172	* encoding necessary.	172	* encoding necessary.
173	*/	173	*/
174	void append( UtfChar ch );	174	void append( UtfChar ch );
175		175
176	void append( const UtfString &rSrc );	176	void append( const UtfString &rSrc );
177		177
178	/**	178	/**
179	* Set the value of the entire string based on the given input and	179	* Set the value of the entire string based on the given input and
180	* encoding. The default encoding is Utf8, which is compatible with	180	* encoding. The default encoding is Utf8, which is compatible with
181	* 7-bit ascii, so it's a great choice for setting UtfStrings from	181	* 7-bit ascii, so it's a great choice for setting UtfStrings from
182	* string literals in code.	182	* string literals in code.
183	*/	183	*/
184	void set( const Bu::String &sInput, Encoding eEnc=Utf8 );	184	void set( const Bu::String &sInput, Encoding eEnc=Utf8 );
185		185
186	/**	186	/**
187	* This encodes the UtfString in the given encoding and outputs it to	187	* This encodes the UtfString in the given encoding and outputs it to
188	* the provided stream. all Utf16 and Utf32 encodings will have the	188	* the provided stream. all Utf16 and Utf32 encodings will have the
189	* correct BOM (byte order marker) at the begining.	189	* correct BOM (byte order marker) at the begining.
190	*/	190	*/
191	void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ) const;	191	void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ) const;
192		192
193	/**	193	/**
194	* This encodes the UtfString in the given encoding and returns it as	194	* This encodes the UtfString in the given encoding and returns it as
195	* a binary Bu::String. Like write, this also includes the proper BOM	195	* a binary Bu::String. Like write, this also includes the proper BOM
196	* at the begining.	196	* at the begining.
197	*/	197	*/
198	Bu::String get( Encoding eEnc=Utf8 ) const;	198	Bu::String get( Encoding eEnc=Utf8 ) const;
199		199
200	void debug() const;	200	void debug() const;
201		201
202	/**	202	/**
203	* This may or may not stick around, given an index, this returns a	203	* This may or may not stick around, given an index, this returns a
204	* codepoint, however there isn't necesarilly a 1:1 ratio between	204	* codepoint, however there isn't necesarilly a 1:1 ratio between
205	* indexes and code points.	205	* indexes and code points.
206	*/	206	*/
207	UtfChar get( int iIndex ) const;	207	UtfChar get( int iIndex ) const;
208		208
209	/**	209	/**
210	* This is what to use if you want to iterate through a section of the	210	* This is what to use if you want to iterate through a section of the
211	* UtfString and you want to use a numerical index. In most cases it	211	* UtfString and you want to use a numerical index. In most cases it
212	* will be much easier to use an iterator, though. Given an index this	212	* will be much easier to use an iterator, though. Given an index this
213	* will return the codepoint at that position and increment iIndex an	213	* will return the codepoint at that position and increment iIndex an
214	* appropriate amount for it to point to the next code point.	214	* appropriate amount for it to point to the next code point.
215	*/	215	*/
216	UtfChar nextChar( int &iIndex ) const;	216	UtfChar nextChar( int &iIndex ) const;
217		217
218	bool operator==( const Bu::UtfString &rhs ) const;	218	bool operator==( const Bu::UtfString &rhs ) const;
219	UtfString &operator+=( const Bu::UtfString &rhs );	219	UtfString &operator+=( const Bu::UtfString &rhs );
220	UtfString &operator+=( const UtfChar &rhs );	220	UtfString &operator+=( const UtfChar &rhs );
221		221
222	private:	222	private:
223	void append16( uint16_t i ) { aData.append( i ); }	223	void append16( uint16_t i ) { aData.append( i ); }
224		224
225	void setUtf8( const Bu::String &sInput );	225	void setUtf8( const Bu::String &sInput );
226	void setUtf16( const Bu::String &sInput );	226	void setUtf16( const Bu::String &sInput );
227	void setUtf16be( const Bu::String &sInput );	227	void setUtf16be( const Bu::String &sInput );
228	void setUtf16le( const Bu::String &sInput );	228	void setUtf16le( const Bu::String &sInput );
229	void setUtf32( const Bu::String &sInput );	229	void setUtf32( const Bu::String &sInput );
230	void setUtf32be( const Bu::String &sInput );	230	void setUtf32be( const Bu::String &sInput );
231	void setUtf32le( const Bu::String &sInput );	231	void setUtf32le( const Bu::String &sInput );
232		232
233	void writeUtf8( Bu::Stream &sOut ) const;	233	void writeUtf8( Bu::Stream &sOut ) const;
234	void writeUtf16be( Bu::Stream &sOut ) const;	234	void writeUtf16be( Bu::Stream &sOut ) const;
235	void writeUtf16le( Bu::Stream &sOut ) const;	235	void writeUtf16le( Bu::Stream &sOut ) const;
236	void writeUtf32be( Bu::Stream &sOut ) const;	236	void writeUtf32be( Bu::Stream &sOut ) const;
237	void writeUtf32le( Bu::Stream &sOut ) const;	237	void writeUtf32le( Bu::Stream &sOut ) const;
238		238
239	private:	239	private:
240	Bu::Array<uint16_t> aData;	240	Bu::Array<uint16_t> aData;
241	int iRawLen;	241	int iRawLen;
242	int iCharLen;	242	int iCharLen;
243	};	243	};
244		244
245	//	245	//
246	// Hash support	246	// Hash support
247	//	247	//
248	template<typename T>	248	template<typename T>
249	uint32_t __calcHashCode( const T &k );	249	uint32_t __calcHashCode( const T &k );
250		250
251	template<typename T>	251	template<typename T>
252	bool __cmpHashKeys( const T &a, const T &b );	252	bool __cmpHashKeys( const T &a, const T &b );
253		253
254	template<> uint32_t __calcHashCode<UtfString>( const UtfString &k );	254	template<> uint32_t __calcHashCode<UtfString>( const UtfString &k );
255	template<> bool __cmpHashKeys<UtfString>(	255	template<> bool __cmpHashKeys<UtfString>(
256	const UtfString &a, const UtfString &b );	256	const UtfString &a, const UtfString &b );
257	};	257	};
258		258
259	#endif	259	#endif