aboutsummaryrefslogtreecommitdiff
path: root/src/unstable/utfstring.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/unstable/utfstring.h105
1 files changed, 94 insertions, 11 deletions
diff --git a/src/unstable/utfstring.h b/src/unstable/utfstring.h
index af233e8..1bd4cce 100644
--- a/src/unstable/utfstring.h
+++ b/src/unstable/utfstring.h
@@ -72,10 +72,12 @@ namespace Bu
72 72
73 UtfString(); 73 UtfString();
74 UtfString( const Bu::String &sInput, Encoding eEnc=Utf8 ); 74 UtfString( const Bu::String &sInput, Encoding eEnc=Utf8 );
75 UtfString( const char *sInput, Encoding eEnc=Utf8 );
75 virtual ~UtfString(); 76 virtual ~UtfString();
76 77
77 class iterator 78 class iterator
78 { 79 {
80 friend class UtfString;
79 private: 81 private:
80 iterator( UtfString *pSrc, int iCodePos ) : 82 iterator( UtfString *pSrc, int iCodePos ) :
81 pSrc( pSrc ), iCodePos( iCodePos ) 83 pSrc( pSrc ), iCodePos( iCodePos )
@@ -92,13 +94,77 @@ namespace Bu
92 { 94 {
93 if( !pSrc ) 95 if( !pSrc )
94 throw Bu::ExceptionBase("invalid UtfString::iterator dereferenced."); 96 throw Bu::ExceptionBase("invalid UtfString::iterator dereferenced.");
95 return pSrc->nextChar( iCodePos ); 97 return pSrc->get( iCodePos );
98 }
99
100 iterator operator++()
101 {
102 pSrc->nextChar( iCodePos );
103 return *this;
104 }
105
106 iterator operator++( int )
107 {
108 pSrc->nextChar( iCodePos );
109 return *this;
110 }
111
112 operator bool() const
113 {
114 return iCodePos < pSrc->aData.getSize();
96 } 115 }
97 116
98 private: 117 private:
99 UtfString *pSrc; 118 UtfString *pSrc;
100 int iCodePos; 119 int iCodePos;
101 }; 120 };
121
122 class const_iterator
123 {
124 friend class UtfString;
125 private:
126 const_iterator( const UtfString *pSrc, int iCodePos ) :
127 pSrc( pSrc ), iCodePos( iCodePos )
128 {
129 }
130
131 public:
132 const_iterator() :
133 pSrc( NULL ), iCodePos( 0 )
134 {
135 }
136
137 UtfChar operator*()
138 {
139 if( !pSrc )
140 throw Bu::ExceptionBase("invalid UtfString::iterator dereferenced.");
141 return pSrc->get( iCodePos );
142 }
143
144 const_iterator operator++()
145 {
146 pSrc->nextChar( iCodePos );
147 return *this;
148 }
149
150 const_iterator operator++( int )
151 {
152 pSrc->nextChar( iCodePos );
153 return *this;
154 }
155
156 operator bool() const
157 {
158 return iCodePos < pSrc->aData.getSize();
159 }
160
161 private:
162 const UtfString *pSrc;
163 int iCodePos;
164 };
165
166 iterator begin();
167 const_iterator begin() const;
102 168
103 /** 169 /**
104 * Append a UtfChar (A unicode code point) to the string. This can be 170 * Append a UtfChar (A unicode code point) to the string. This can be
@@ -122,23 +188,23 @@ namespace Bu
122 * the provided stream. all Utf16 and Utf32 encodings will have the 188 * the provided stream. all Utf16 and Utf32 encodings will have the
123 * correct BOM (byte order marker) at the begining. 189 * correct BOM (byte order marker) at the begining.
124 */ 190 */
125 void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ); 191 void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ) const;
126 192
127 /** 193 /**
128 * This encodes the UtfString in the given encoding and returns it as 194 * This encodes the UtfString in the given encoding and returns it as
129 * a binary Bu::String. Like write, this also includes the proper BOM 195 * a binary Bu::String. Like write, this also includes the proper BOM
130 * at the begining. 196 * at the begining.
131 */ 197 */
132 Bu::String get( Encoding eEnc=Utf8 ); 198 Bu::String get( Encoding eEnc=Utf8 ) const;
133 199
134 void debug(); 200 void debug() const;
135 201
136 /** 202 /**
137 * This may or may not stick around, given an index, this returns a 203 * This may or may not stick around, given an index, this returns a
138 * codepoint, however there isn't necesarilly a 1:1 ratio between 204 * codepoint, however there isn't necesarilly a 1:1 ratio between
139 * indexes and code points. 205 * indexes and code points.
140 */ 206 */
141 UtfChar get( int iIndex ); 207 UtfChar get( int iIndex ) const;
142 208
143 /** 209 /**
144 * This is what to use if you want to iterate through a section of the 210 * This is what to use if you want to iterate through a section of the
@@ -147,7 +213,11 @@ namespace Bu
147 * will return the codepoint at that position and increment iIndex an 213 * will return the codepoint at that position and increment iIndex an
148 * appropriate amount for it to point to the next code point. 214 * appropriate amount for it to point to the next code point.
149 */ 215 */
150 UtfChar nextChar( int &iIndex ); 216 UtfChar nextChar( int &iIndex ) const;
217
218 bool operator==( const Bu::UtfString &rhs ) const;
219 UtfString &operator+=( const Bu::UtfString &rhs );
220 UtfString &operator+=( const UtfChar &rhs );
151 221
152 private: 222 private:
153 void append16( uint16_t i ) { aData.append( i ); } 223 void append16( uint16_t i ) { aData.append( i ); }
@@ -160,17 +230,30 @@ namespace Bu
160 void setUtf32be( const Bu::String &sInput ); 230 void setUtf32be( const Bu::String &sInput );
161 void setUtf32le( const Bu::String &sInput ); 231 void setUtf32le( const Bu::String &sInput );
162 232
163 void writeUtf8( Bu::Stream &sOut ); 233 void writeUtf8( Bu::Stream &sOut ) const;
164 void writeUtf16be( Bu::Stream &sOut ); 234 void writeUtf16be( Bu::Stream &sOut ) const;
165 void writeUtf16le( Bu::Stream &sOut ); 235 void writeUtf16le( Bu::Stream &sOut ) const;
166 void writeUtf32be( Bu::Stream &sOut ); 236 void writeUtf32be( Bu::Stream &sOut ) const;
167 void writeUtf32le( Bu::Stream &sOut ); 237 void writeUtf32le( Bu::Stream &sOut ) const;
168 238
169 private: 239 private:
170 Bu::Array<uint16_t> aData; 240 Bu::Array<uint16_t> aData;
171 int iRawLen; 241 int iRawLen;
172 int iCharLen; 242 int iCharLen;
173 }; 243 };
244
245 //
246 // Hash support
247 //
248 template<typename T>
249 uint32_t __calcHashCode( const T &k );
250
251 template<typename T>
252 bool __cmpHashKeys( const T &a, const T &b );
253
254 template<> uint32_t __calcHashCode<UtfString>( const UtfString &k );
255 template<> bool __cmpHashKeys<UtfString>(
256 const UtfString &a, const UtfString &b );
174}; 257};
175 258
176#endif 259#endif