diff options
Diffstat (limited to 'src/unstable/utfstring.h')
-rw-r--r-- | src/unstable/utfstring.h | 105 |
1 files changed, 94 insertions, 11 deletions
diff --git a/src/unstable/utfstring.h b/src/unstable/utfstring.h index af233e8..1bd4cce 100644 --- a/src/unstable/utfstring.h +++ b/src/unstable/utfstring.h | |||
@@ -72,10 +72,12 @@ namespace Bu | |||
72 | 72 | ||
73 | UtfString(); | 73 | UtfString(); |
74 | UtfString( const Bu::String &sInput, Encoding eEnc=Utf8 ); | 74 | UtfString( const Bu::String &sInput, Encoding eEnc=Utf8 ); |
75 | UtfString( const char *sInput, Encoding eEnc=Utf8 ); | ||
75 | virtual ~UtfString(); | 76 | virtual ~UtfString(); |
76 | 77 | ||
77 | class iterator | 78 | class iterator |
78 | { | 79 | { |
80 | friend class UtfString; | ||
79 | private: | 81 | private: |
80 | iterator( UtfString *pSrc, int iCodePos ) : | 82 | iterator( UtfString *pSrc, int iCodePos ) : |
81 | pSrc( pSrc ), iCodePos( iCodePos ) | 83 | pSrc( pSrc ), iCodePos( iCodePos ) |
@@ -92,13 +94,77 @@ namespace Bu | |||
92 | { | 94 | { |
93 | if( !pSrc ) | 95 | if( !pSrc ) |
94 | throw Bu::ExceptionBase("invalid UtfString::iterator dereferenced."); | 96 | throw Bu::ExceptionBase("invalid UtfString::iterator dereferenced."); |
95 | return pSrc->nextChar( iCodePos ); | 97 | return pSrc->get( iCodePos ); |
98 | } | ||
99 | |||
100 | iterator operator++() | ||
101 | { | ||
102 | pSrc->nextChar( iCodePos ); | ||
103 | return *this; | ||
104 | } | ||
105 | |||
106 | iterator operator++( int ) | ||
107 | { | ||
108 | pSrc->nextChar( iCodePos ); | ||
109 | return *this; | ||
110 | } | ||
111 | |||
112 | operator bool() const | ||
113 | { | ||
114 | return iCodePos < pSrc->aData.getSize(); | ||
96 | } | 115 | } |
97 | 116 | ||
98 | private: | 117 | private: |
99 | UtfString *pSrc; | 118 | UtfString *pSrc; |
100 | int iCodePos; | 119 | int iCodePos; |
101 | }; | 120 | }; |
121 | |||
122 | class const_iterator | ||
123 | { | ||
124 | friend class UtfString; | ||
125 | private: | ||
126 | const_iterator( const UtfString *pSrc, int iCodePos ) : | ||
127 | pSrc( pSrc ), iCodePos( iCodePos ) | ||
128 | { | ||
129 | } | ||
130 | |||
131 | public: | ||
132 | const_iterator() : | ||
133 | pSrc( NULL ), iCodePos( 0 ) | ||
134 | { | ||
135 | } | ||
136 | |||
137 | UtfChar operator*() | ||
138 | { | ||
139 | if( !pSrc ) | ||
140 | throw Bu::ExceptionBase("invalid UtfString::iterator dereferenced."); | ||
141 | return pSrc->get( iCodePos ); | ||
142 | } | ||
143 | |||
144 | const_iterator operator++() | ||
145 | { | ||
146 | pSrc->nextChar( iCodePos ); | ||
147 | return *this; | ||
148 | } | ||
149 | |||
150 | const_iterator operator++( int ) | ||
151 | { | ||
152 | pSrc->nextChar( iCodePos ); | ||
153 | return *this; | ||
154 | } | ||
155 | |||
156 | operator bool() const | ||
157 | { | ||
158 | return iCodePos < pSrc->aData.getSize(); | ||
159 | } | ||
160 | |||
161 | private: | ||
162 | const UtfString *pSrc; | ||
163 | int iCodePos; | ||
164 | }; | ||
165 | |||
166 | iterator begin(); | ||
167 | const_iterator begin() const; | ||
102 | 168 | ||
103 | /** | 169 | /** |
104 | * Append a UtfChar (A unicode code point) to the string. This can be | 170 | * Append a UtfChar (A unicode code point) to the string. This can be |
@@ -122,23 +188,23 @@ namespace Bu | |||
122 | * the provided stream. all Utf16 and Utf32 encodings will have the | 188 | * the provided stream. all Utf16 and Utf32 encodings will have the |
123 | * correct BOM (byte order marker) at the begining. | 189 | * correct BOM (byte order marker) at the begining. |
124 | */ | 190 | */ |
125 | void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ); | 191 | void write( Bu::Stream &sOut, Encoding eEnc=Utf8 ) const; |
126 | 192 | ||
127 | /** | 193 | /** |
128 | * This encodes the UtfString in the given encoding and returns it as | 194 | * This encodes the UtfString in the given encoding and returns it as |
129 | * a binary Bu::String. Like write, this also includes the proper BOM | 195 | * a binary Bu::String. Like write, this also includes the proper BOM |
130 | * at the begining. | 196 | * at the begining. |
131 | */ | 197 | */ |
132 | Bu::String get( Encoding eEnc=Utf8 ); | 198 | Bu::String get( Encoding eEnc=Utf8 ) const; |
133 | 199 | ||
134 | void debug(); | 200 | void debug() const; |
135 | 201 | ||
136 | /** | 202 | /** |
137 | * This may or may not stick around, given an index, this returns a | 203 | * This may or may not stick around, given an index, this returns a |
138 | * codepoint, however there isn't necesarilly a 1:1 ratio between | 204 | * codepoint, however there isn't necesarilly a 1:1 ratio between |
139 | * indexes and code points. | 205 | * indexes and code points. |
140 | */ | 206 | */ |
141 | UtfChar get( int iIndex ); | 207 | UtfChar get( int iIndex ) const; |
142 | 208 | ||
143 | /** | 209 | /** |
144 | * This is what to use if you want to iterate through a section of the | 210 | * This is what to use if you want to iterate through a section of the |
@@ -147,7 +213,11 @@ namespace Bu | |||
147 | * will return the codepoint at that position and increment iIndex an | 213 | * will return the codepoint at that position and increment iIndex an |
148 | * appropriate amount for it to point to the next code point. | 214 | * appropriate amount for it to point to the next code point. |
149 | */ | 215 | */ |
150 | UtfChar nextChar( int &iIndex ); | 216 | UtfChar nextChar( int &iIndex ) const; |
217 | |||
218 | bool operator==( const Bu::UtfString &rhs ) const; | ||
219 | UtfString &operator+=( const Bu::UtfString &rhs ); | ||
220 | UtfString &operator+=( const UtfChar &rhs ); | ||
151 | 221 | ||
152 | private: | 222 | private: |
153 | void append16( uint16_t i ) { aData.append( i ); } | 223 | void append16( uint16_t i ) { aData.append( i ); } |
@@ -160,17 +230,30 @@ namespace Bu | |||
160 | void setUtf32be( const Bu::String &sInput ); | 230 | void setUtf32be( const Bu::String &sInput ); |
161 | void setUtf32le( const Bu::String &sInput ); | 231 | void setUtf32le( const Bu::String &sInput ); |
162 | 232 | ||
163 | void writeUtf8( Bu::Stream &sOut ); | 233 | void writeUtf8( Bu::Stream &sOut ) const; |
164 | void writeUtf16be( Bu::Stream &sOut ); | 234 | void writeUtf16be( Bu::Stream &sOut ) const; |
165 | void writeUtf16le( Bu::Stream &sOut ); | 235 | void writeUtf16le( Bu::Stream &sOut ) const; |
166 | void writeUtf32be( Bu::Stream &sOut ); | 236 | void writeUtf32be( Bu::Stream &sOut ) const; |
167 | void writeUtf32le( Bu::Stream &sOut ); | 237 | void writeUtf32le( Bu::Stream &sOut ) const; |
168 | 238 | ||
169 | private: | 239 | private: |
170 | Bu::Array<uint16_t> aData; | 240 | Bu::Array<uint16_t> aData; |
171 | int iRawLen; | 241 | int iRawLen; |
172 | int iCharLen; | 242 | int iCharLen; |
173 | }; | 243 | }; |
244 | |||
245 | // | ||
246 | // Hash support | ||
247 | // | ||
248 | template<typename T> | ||
249 | uint32_t __calcHashCode( const T &k ); | ||
250 | |||
251 | template<typename T> | ||
252 | bool __cmpHashKeys( const T &a, const T &b ); | ||
253 | |||
254 | template<> uint32_t __calcHashCode<UtfString>( const UtfString &k ); | ||
255 | template<> bool __cmpHashKeys<UtfString>( | ||
256 | const UtfString &a, const UtfString &b ); | ||
174 | }; | 257 | }; |
175 | 258 | ||
176 | #endif | 259 | #endif |