diff options
Diffstat (limited to 'src/fstring.h')
-rw-r--r-- | src/fstring.h | 653 |
1 files changed, 653 insertions, 0 deletions
diff --git a/src/fstring.h b/src/fstring.h new file mode 100644 index 0000000..717068f --- /dev/null +++ b/src/fstring.h | |||
@@ -0,0 +1,653 @@ | |||
1 | #ifndef F_STRING_H | ||
2 | #define F_STRING_H | ||
3 | |||
4 | #include <stdint.h> | ||
5 | #include <memory> | ||
6 | #include "archable.h" | ||
7 | #include "archive.h" | ||
8 | #include "hash.h" | ||
9 | |||
10 | namespace Bu | ||
11 | { | ||
12 | template< typename chr > | ||
13 | struct FStringChunk | ||
14 | { | ||
15 | long nLength; | ||
16 | chr *pData; | ||
17 | FStringChunk *pNext; | ||
18 | }; | ||
19 | |||
20 | /** | ||
21 | * Flexible String class. This class was designed with string passing and | ||
22 | * generation in mind. Like the standard string class you can specify what | ||
23 | * datatype to use for each character. Unlike the standard string class, | ||
24 | * collection of appended and prepended terms is done lazily, making long | ||
25 | * operations that involve many appends very inexpensive. In addition internal | ||
26 | * ref-counting means that if you pass strings around between functions there's | ||
27 | * almost no overhead in time or memory since a reference is created and no | ||
28 | * data is actually copied. This also means that you never need to put any | ||
29 | * FBasicString into a ref-counting container class. | ||
30 | */ | ||
31 | template< typename chr, typename chralloc=std::allocator<chr>, typename chunkalloc=std::allocator<struct FStringChunk<chr> > > | ||
32 | class FBasicString : public Archable | ||
33 | { | ||
34 | #ifndef VALTEST | ||
35 | #define cpy( dest, src, size ) memcpy( dest, src, size*sizeof(chr) ) | ||
36 | #endif | ||
37 | private: | ||
38 | typedef struct FStringChunk<chr> Chunk; | ||
39 | typedef struct FBasicString<chr, chralloc, chunkalloc> MyType; | ||
40 | |||
41 | public: | ||
42 | FBasicString() : | ||
43 | nLength( 0 ), | ||
44 | pnRefs( NULL ), | ||
45 | pFirst( NULL ), | ||
46 | pLast( NULL ) | ||
47 | { | ||
48 | } | ||
49 | |||
50 | FBasicString( const chr *pData ) : | ||
51 | nLength( 0 ), | ||
52 | pnRefs( NULL ), | ||
53 | pFirst( NULL ), | ||
54 | pLast( NULL ) | ||
55 | { | ||
56 | append( pData ); | ||
57 | } | ||
58 | |||
59 | FBasicString( const chr *pData, long nLength ) : | ||
60 | nLength( 0 ), | ||
61 | pnRefs( NULL ), | ||
62 | pFirst( NULL ), | ||
63 | pLast( NULL ) | ||
64 | { | ||
65 | append( pData, nLength ); | ||
66 | } | ||
67 | |||
68 | FBasicString( const MyType &rSrc ) : | ||
69 | nLength( 0 ), | ||
70 | pnRefs( NULL ), | ||
71 | pFirst( NULL ), | ||
72 | pLast( NULL ) | ||
73 | { | ||
74 | // Here we have no choice but to copy, since the other guy is a const. | ||
75 | // In the case that the source were flat, we could get a reference, it | ||
76 | // would make some things faster, but not matter in many other cases. | ||
77 | |||
78 | joinShare( rSrc ); | ||
79 | //copyFrom( rSrc ); | ||
80 | } | ||
81 | |||
82 | FBasicString( const MyType &rSrc, long nLength ) : | ||
83 | nLength( 0 ), | ||
84 | pnRefs( NULL ), | ||
85 | pFirst( NULL ), | ||
86 | pLast( NULL ) | ||
87 | { | ||
88 | append( rSrc.pFirst->pData, nLength ); | ||
89 | } | ||
90 | |||
91 | FBasicString( const MyType &rSrc, long nStart, long nLength ) : | ||
92 | nLength( 0 ), | ||
93 | pnRefs( NULL ), | ||
94 | pFirst( NULL ), | ||
95 | pLast( NULL ) | ||
96 | { | ||
97 | append( rSrc.pFirst->pData+nStart, nLength ); | ||
98 | } | ||
99 | |||
100 | FBasicString( long nSize ) : | ||
101 | nLength( nSize ), | ||
102 | pnRefs( NULL ), | ||
103 | pFirst( NULL ), | ||
104 | pLast( NULL ) | ||
105 | { | ||
106 | pFirst = pLast = newChunk( nSize ); | ||
107 | } | ||
108 | |||
109 | virtual ~FBasicString() | ||
110 | { | ||
111 | clear(); | ||
112 | } | ||
113 | |||
114 | void append( const chr *pData ) | ||
115 | { | ||
116 | long nLen; | ||
117 | for( nLen = 0; pData[nLen] != (chr)0; nLen++ ); | ||
118 | |||
119 | Chunk *pNew = newChunk( nLen ); | ||
120 | cpy( pNew->pData, pData, nLen ); | ||
121 | |||
122 | appendChunk( pNew ); | ||
123 | } | ||
124 | |||
125 | void append( const chr *pData, long nLen ) | ||
126 | { | ||
127 | Chunk *pNew = newChunk( nLen ); | ||
128 | |||
129 | cpy( pNew->pData, pData, nLen ); | ||
130 | |||
131 | appendChunk( pNew ); | ||
132 | } | ||
133 | |||
134 | void prepend( const chr *pData ) | ||
135 | { | ||
136 | long nLen; | ||
137 | for( nLen = 0; pData[nLen] != (chr)0; nLen++ ); | ||
138 | |||
139 | Chunk *pNew = newChunk( nLen ); | ||
140 | cpy( pNew->pData, pData, nLen ); | ||
141 | |||
142 | prependChunk( pNew ); | ||
143 | } | ||
144 | |||
145 | void prepend( const chr *pData, long nLen ) | ||
146 | { | ||
147 | Chunk *pNew = newChunk( nLen ); | ||
148 | |||
149 | cpy( pNew->pData, pData, nLen ); | ||
150 | |||
151 | prependChunk( pNew ); | ||
152 | } | ||
153 | |||
154 | void clear() | ||
155 | { | ||
156 | realClear(); | ||
157 | } | ||
158 | |||
159 | void resize( long nNewSize ) | ||
160 | { | ||
161 | if( nLength == nNewSize ) | ||
162 | return; | ||
163 | |||
164 | flatten(); | ||
165 | |||
166 | Chunk *pNew = newChunk( nNewSize ); | ||
167 | long nNewLen = (nNewSize<nLength)?(nNewSize):(nLength); | ||
168 | cpy( pNew->pData, pFirst->pData, nNewLen ); | ||
169 | pNew->pData[nNewLen] = (chr)0; | ||
170 | aChr.deallocate( pFirst->pData, pFirst->nLength+1 ); | ||
171 | aChunk.deallocate( pFirst, 1 ); | ||
172 | pFirst = pLast = pNew; | ||
173 | nLength = nNewSize; | ||
174 | } | ||
175 | |||
176 | long getSize() const | ||
177 | { | ||
178 | return nLength; | ||
179 | } | ||
180 | |||
181 | chr *getStr() | ||
182 | { | ||
183 | if( pFirst == NULL ) | ||
184 | return NULL; | ||
185 | |||
186 | flatten(); | ||
187 | return pFirst->pData; | ||
188 | } | ||
189 | |||
190 | const chr *getStr() const | ||
191 | { | ||
192 | if( pFirst == NULL ) | ||
193 | return NULL; | ||
194 | |||
195 | flatten(); | ||
196 | return pFirst->pData; | ||
197 | } | ||
198 | |||
199 | chr *c_str() | ||
200 | { | ||
201 | if( pFirst == NULL ) | ||
202 | return NULL; | ||
203 | |||
204 | flatten(); | ||
205 | return pFirst->pData; | ||
206 | } | ||
207 | |||
208 | const chr *c_str() const | ||
209 | { | ||
210 | if( pFirst == NULL ) | ||
211 | return NULL; | ||
212 | |||
213 | flatten(); | ||
214 | return pFirst->pData; | ||
215 | } | ||
216 | |||
217 | MyType &operator +=( const chr *pData ) | ||
218 | { | ||
219 | append( pData ); | ||
220 | |||
221 | return (*this); | ||
222 | } | ||
223 | |||
224 | MyType &operator +=( const MyType &rSrc ) | ||
225 | { | ||
226 | rSrc.flatten(); | ||
227 | append( rSrc.pFirst->pData, rSrc.nLength ); | ||
228 | |||
229 | return (*this); | ||
230 | } | ||
231 | |||
232 | MyType &operator +=( const chr pData ) | ||
233 | { | ||
234 | chr tmp[2] = { pData, (chr)0 }; | ||
235 | append( tmp ); | ||
236 | |||
237 | return (*this); | ||
238 | } | ||
239 | |||
240 | MyType &operator =( const chr *pData ) | ||
241 | { | ||
242 | clear(); | ||
243 | append( pData ); | ||
244 | |||
245 | return (*this); | ||
246 | } | ||
247 | |||
248 | MyType &operator =( const MyType &rSrc ) | ||
249 | { | ||
250 | //if( rSrc.isFlat() ) | ||
251 | //{ | ||
252 | joinShare( rSrc ); | ||
253 | //} | ||
254 | //else | ||
255 | //{ | ||
256 | // copyFrom( rSrc ); | ||
257 | //} | ||
258 | // | ||
259 | |||
260 | return (*this); | ||
261 | } | ||
262 | |||
263 | bool operator ==( const chr *pData ) const | ||
264 | { | ||
265 | if( pFirst == NULL ) { | ||
266 | if( pData == NULL ) | ||
267 | return true; | ||
268 | return false; | ||
269 | } | ||
270 | |||
271 | flatten(); | ||
272 | const chr *a = pData; | ||
273 | chr *b = pFirst->pData; | ||
274 | for( ; *a!=(chr)0; a++, b++ ) | ||
275 | { | ||
276 | if( *a != *b ) | ||
277 | return false; | ||
278 | } | ||
279 | |||
280 | return true; | ||
281 | } | ||
282 | |||
283 | bool operator ==( const MyType &pData ) const | ||
284 | { | ||
285 | if( pFirst == pData.pFirst ) | ||
286 | return true; | ||
287 | if( pFirst == NULL ) | ||
288 | return false; | ||
289 | |||
290 | flatten(); | ||
291 | pData.flatten(); | ||
292 | const chr *a = pData.pFirst->pData; | ||
293 | chr *b = pFirst->pData; | ||
294 | for( ; *a!=(chr)0; a++, b++ ) | ||
295 | { | ||
296 | if( *a != *b ) | ||
297 | return false; | ||
298 | } | ||
299 | |||
300 | return true; | ||
301 | } | ||
302 | |||
303 | bool operator !=(const chr *pData ) const | ||
304 | { | ||
305 | return !(*this == pData); | ||
306 | } | ||
307 | |||
308 | bool operator !=(const MyType &pData ) const | ||
309 | { | ||
310 | return !(*this == pData); | ||
311 | } | ||
312 | |||
313 | chr &operator[]( long nIndex ) | ||
314 | { | ||
315 | flatten(); | ||
316 | |||
317 | return pFirst->pData[nIndex]; | ||
318 | } | ||
319 | |||
320 | const chr &operator[]( long nIndex ) const | ||
321 | { | ||
322 | flatten(); | ||
323 | |||
324 | return pFirst->pData[nIndex]; | ||
325 | } | ||
326 | |||
327 | bool isWS( long nIndex ) const | ||
328 | { | ||
329 | flatten(); | ||
330 | |||
331 | return pFirst->pData[nIndex]==' ' || pFirst->pData[nIndex]=='\t' | ||
332 | || pFirst->pData[nIndex]=='\r' || pFirst->pData[nIndex]=='\n'; | ||
333 | } | ||
334 | |||
335 | bool isAlpha( long nIndex ) const | ||
336 | { | ||
337 | flatten(); | ||
338 | |||
339 | return (pFirst->pData[nIndex] >= 'a' && pFirst->pData[nIndex] <= 'z') | ||
340 | || (pFirst->pData[nIndex] >= 'A' && pFirst->pData[nIndex] <= 'Z'); | ||
341 | } | ||
342 | |||
343 | void toLower() | ||
344 | { | ||
345 | flatten(); | ||
346 | unShare(); | ||
347 | |||
348 | for( long j = 0; j < nLength; j++ ) | ||
349 | { | ||
350 | if( pFirst->pData[j] >= 'A' && pFirst->pData[j] <= 'Z' ) | ||
351 | pFirst->pData[j] -= 'A'-'a'; | ||
352 | } | ||
353 | } | ||
354 | |||
355 | void toUpper() | ||
356 | { | ||
357 | flatten(); | ||
358 | unShare(); | ||
359 | |||
360 | for( long j = 0; j < nLength; j++ ) | ||
361 | { | ||
362 | if( pFirst->pData[j] >= 'a' && pFirst->pData[j] <= 'z' ) | ||
363 | pFirst->pData[j] += 'A'-'a'; | ||
364 | } | ||
365 | } | ||
366 | |||
367 | void archive( class Archive &ar ) | ||
368 | { | ||
369 | if( ar.isLoading() ) | ||
370 | { | ||
371 | clear(); | ||
372 | long nLen; | ||
373 | ar >> nLen; | ||
374 | |||
375 | Chunk *pNew = newChunk( nLen ); | ||
376 | ar.read( pNew->pData, nLen*sizeof(chr) ); | ||
377 | appendChunk( pNew ); | ||
378 | } | ||
379 | else | ||
380 | { | ||
381 | flatten(); | ||
382 | |||
383 | ar << nLength; | ||
384 | ar.write( pFirst->pData, nLength*sizeof(chr) ); | ||
385 | } | ||
386 | } | ||
387 | |||
388 | private: | ||
389 | void flatten() const | ||
390 | { | ||
391 | if( isFlat() ) | ||
392 | return; | ||
393 | |||
394 | if( pFirst == NULL ) | ||
395 | return; | ||
396 | |||
397 | unShare(); | ||
398 | |||
399 | Chunk *pNew = newChunk( nLength ); | ||
400 | chr *pos = pNew->pData; | ||
401 | Chunk *i = pFirst; | ||
402 | for(;;) | ||
403 | { | ||
404 | cpy( pos, i->pData, i->nLength ); | ||
405 | pos += i->nLength; | ||
406 | i = i->pNext; | ||
407 | if( i == NULL ) | ||
408 | break; | ||
409 | } | ||
410 | realClear(); | ||
411 | |||
412 | pLast = pFirst = pNew; | ||
413 | nLength = pNew->nLength; | ||
414 | } | ||
415 | |||
416 | void realClear() const | ||
417 | { | ||
418 | if( pFirst == NULL ) | ||
419 | return; | ||
420 | |||
421 | if( isShared() ) | ||
422 | { | ||
423 | decRefs(); | ||
424 | } | ||
425 | else | ||
426 | { | ||
427 | Chunk *i = pFirst; | ||
428 | for(;;) | ||
429 | { | ||
430 | Chunk *n = i->pNext; | ||
431 | aChr.deallocate( i->pData, i->nLength+1 ); | ||
432 | aChunk.deallocate( i, 1 ); | ||
433 | if( n == NULL ) | ||
434 | break; | ||
435 | i = n; | ||
436 | } | ||
437 | pFirst = pLast = NULL; | ||
438 | nLength = 0; | ||
439 | } | ||
440 | } | ||
441 | |||
442 | void copyFrom( const FBasicString<chr, chralloc, chunkalloc> &rSrc ) | ||
443 | { | ||
444 | if( rSrc.pFirst == NULL ) | ||
445 | return; | ||
446 | |||
447 | decRefs(); | ||
448 | |||
449 | Chunk *pNew = newChunk( rSrc.nLength ); | ||
450 | chr *pos = pNew->pData; | ||
451 | Chunk *i = rSrc.pFirst; | ||
452 | for(;;) | ||
453 | { | ||
454 | cpy( pos, i->pData, i->nLength ); | ||
455 | pos += i->nLength; | ||
456 | i = i->pNext; | ||
457 | if( i == NULL ) | ||
458 | break; | ||
459 | } | ||
460 | clear(); | ||
461 | |||
462 | appendChunk( pNew ); | ||
463 | } | ||
464 | |||
465 | bool isFlat() const | ||
466 | { | ||
467 | return (pFirst == pLast); | ||
468 | } | ||
469 | |||
470 | bool isShared() const | ||
471 | { | ||
472 | return (pnRefs != NULL); | ||
473 | } | ||
474 | |||
475 | Chunk *newChunk() const | ||
476 | { | ||
477 | Chunk *pNew = aChunk.allocate( 1 ); | ||
478 | pNew->pNext = NULL; | ||
479 | return pNew; | ||
480 | } | ||
481 | |||
482 | Chunk *newChunk( long nLen ) const | ||
483 | { | ||
484 | Chunk *pNew = aChunk.allocate( 1 ); | ||
485 | pNew->pNext = NULL; | ||
486 | pNew->nLength = nLen; | ||
487 | pNew->pData = aChr.allocate( nLen+1 ); | ||
488 | pNew->pData[nLen] = (chr)0; | ||
489 | return pNew; | ||
490 | } | ||
491 | |||
492 | void appendChunk( Chunk *pNewChunk ) | ||
493 | { | ||
494 | unShare(); | ||
495 | |||
496 | if( pFirst == NULL ) | ||
497 | pLast = pFirst = pNewChunk; | ||
498 | else | ||
499 | { | ||
500 | pLast->pNext = pNewChunk; | ||
501 | pLast = pNewChunk; | ||
502 | } | ||
503 | |||
504 | nLength += pNewChunk->nLength; | ||
505 | } | ||
506 | |||
507 | void prependChunk( Chunk *pNewChunk ) | ||
508 | { | ||
509 | unShare(); | ||
510 | |||
511 | if( pFirst == NULL ) | ||
512 | pLast = pFirst = pNewChunk; | ||
513 | else | ||
514 | { | ||
515 | pNewChunk->pNext = pFirst; | ||
516 | pFirst = pNewChunk; | ||
517 | } | ||
518 | |||
519 | nLength += pNewChunk->nLength; | ||
520 | } | ||
521 | |||
522 | void joinShare( MyType &rSrc ) | ||
523 | { | ||
524 | clear(); | ||
525 | |||
526 | if( !rSrc.isFlat() ) | ||
527 | rSrc.flatten(); | ||
528 | |||
529 | rSrc.initCount(); | ||
530 | pnRefs = rSrc.pnRefs; | ||
531 | (*pnRefs)++; | ||
532 | nLength = rSrc.nLength; | ||
533 | pFirst = rSrc.pFirst; | ||
534 | pLast = rSrc.pLast; | ||
535 | } | ||
536 | |||
537 | void joinShare( const MyType &rSrc ) | ||
538 | { | ||
539 | clear(); | ||
540 | |||
541 | rSrc.flatten(); | ||
542 | |||
543 | if( !rSrc.isShared() ) | ||
544 | { | ||
545 | rSrc.pnRefs = new uint32_t; | ||
546 | (*rSrc.pnRefs) = 1; | ||
547 | } | ||
548 | pnRefs = rSrc.pnRefs; | ||
549 | (*pnRefs)++; | ||
550 | nLength = rSrc.nLength; | ||
551 | pFirst = rSrc.pFirst; | ||
552 | pLast = rSrc.pLast; | ||
553 | } | ||
554 | |||
555 | /** | ||
556 | * This takes an object that was shared and makes a copy of the base data | ||
557 | * that was being shared so that this copy can be changed. This should be | ||
558 | * added before any call that will change this object; | ||
559 | */ | ||
560 | void unShare() const | ||
561 | { | ||
562 | if( isShared() == false ) | ||
563 | return; | ||
564 | |||
565 | Chunk *pNew = newChunk( nLength ); | ||
566 | chr *pos = pNew->pData; | ||
567 | Chunk *i = pFirst; | ||
568 | for(;;) | ||
569 | { | ||
570 | cpy( pos, i->pData, i->nLength ); | ||
571 | pos += i->nLength; | ||
572 | i = i->pNext; | ||
573 | if( i == NULL ) | ||
574 | break; | ||
575 | } | ||
576 | decRefs(); | ||
577 | pLast = pFirst = pNew; | ||
578 | nLength = pNew->nLength; | ||
579 | } | ||
580 | |||
581 | /** | ||
582 | * This decrements our ref count and pulls us out of the share. If the ref | ||
583 | * count hits zero because of this, it destroys the share. This is not | ||
584 | * safe to call on it's own, it's much better to call unShare. | ||
585 | */ | ||
586 | void decRefs() const | ||
587 | { | ||
588 | if( isShared() ) | ||
589 | { | ||
590 | (*pnRefs)--; | ||
591 | if( (*pnRefs) == 0 ) | ||
592 | destroyShare(); | ||
593 | else | ||
594 | { | ||
595 | pnRefs = NULL; | ||
596 | pFirst = NULL; | ||
597 | pLast = NULL; | ||
598 | nLength = 0; | ||
599 | } | ||
600 | } | ||
601 | } | ||
602 | |||
603 | /** | ||
604 | * While the unShare function removes an instance from a share, this | ||
605 | * function destroys the data that was in the share, removing the share | ||
606 | * itself. This should only be called when the refcount for the share has | ||
607 | * or is about to reach zero. | ||
608 | */ | ||
609 | void destroyShare() const | ||
610 | { | ||
611 | delete pnRefs; | ||
612 | pnRefs = NULL; | ||
613 | realClear(); | ||
614 | } | ||
615 | |||
616 | #ifdef VALTEST | ||
617 | void cpy( chr *dest, const chr *src, long count ) const | ||
618 | { | ||
619 | for( int j = 0; j < count; j++ ) | ||
620 | { | ||
621 | *dest = *src; | ||
622 | dest++; | ||
623 | src++; | ||
624 | } | ||
625 | } | ||
626 | #endif | ||
627 | |||
628 | void initCount() const | ||
629 | { | ||
630 | if( !isShared() ) | ||
631 | { | ||
632 | pnRefs = new uint32_t; | ||
633 | (*pnRefs) = 1; | ||
634 | } | ||
635 | } | ||
636 | |||
637 | private: | ||
638 | mutable long nLength; | ||
639 | mutable uint32_t *pnRefs; | ||
640 | mutable Chunk *pFirst; | ||
641 | mutable Chunk *pLast; | ||
642 | |||
643 | mutable chralloc aChr; | ||
644 | mutable chunkalloc aChunk; | ||
645 | }; | ||
646 | |||
647 | typedef FBasicString<char> FString; | ||
648 | |||
649 | template<> uint32_t __calcHashCode<FString>( const FString &k ); | ||
650 | template<> bool __cmpHashKeys<FString>( const FString &a, const FString &b ); | ||
651 | } | ||
652 | |||
653 | #endif | ||