diff options
author | Mike Buland <eichlan@xagasoft.com> | 2007-05-11 07:51:40 +0000 |
---|---|---|
committer | Mike Buland <eichlan@xagasoft.com> | 2007-05-11 07:51:40 +0000 |
commit | 033c41ed57348abb3a418166b1fb39bfad3312de (patch) | |
tree | 72edbb0b7ff35ef35e4d533bca384b4f7c986942 /src/xmlreader.cpp | |
parent | ad92dc50b7cdf7cfe086f21d19442d03a90fd05d (diff) | |
download | libbu++-033c41ed57348abb3a418166b1fb39bfad3312de.tar.gz libbu++-033c41ed57348abb3a418166b1fb39bfad3312de.tar.bz2 libbu++-033c41ed57348abb3a418166b1fb39bfad3312de.tar.xz libbu++-033c41ed57348abb3a418166b1fb39bfad3312de.zip |
Added a list template class, seems to work pretty well for now, I may have
forgotten proper cleanup in the deconstructor, but besides that you can do
almost everything you need. I'll make a slist/stack next, probably with the
same basic code, just a different structure (not doubley-linked).
The xml system from old-libbu++ is almost completely converted, I was going to
re-write it, but this seemed easier at first, it may not have been, we'll see.
It almost parses everything again, and almost outputs again, and it does use
streams now.
The FString is partway to doing minimum chunk allocations, so that adding
single-characters will be really fast up to the minimum chunk size. I also
figured out how to add this optimization without any extra variables taking
up space, and it's optional in the template, which is cool. You can specify
the size of the blocks (default 256 bytes), if it's 0 then they'll be like the
old FString, 1 chunk per operation.
The next FString update should be allowing efficient removal from the begining
of the string by faking it, and simply moving a secondary base pointer ahead,
and then optimizing appends after that fact to simply move the existing data
around if you shouldn't have to re-allocate (alla FlexBuf). The final fun
addition that I'm planning is a simple switch in the template (boolean) that
will switch an FString into a thread-safe mode without changing the interface
or anything that you can do with them at all. It may increasing memory usage,
but they should still be better than std::strings, and totally thread-safe.
The best part of that is that if it's done with a boolean template parameter and
if statements that only test that parameter controlling flow, the code that you
don't want (threadsafe/non-threadsafe) won't be included at all
post-optimization.
Diffstat (limited to 'src/xmlreader.cpp')
-rw-r--r-- | src/xmlreader.cpp | 170 |
1 files changed, 86 insertions, 84 deletions
diff --git a/src/xmlreader.cpp b/src/xmlreader.cpp index 18df69c..38cad5f 100644 --- a/src/xmlreader.cpp +++ b/src/xmlreader.cpp | |||
@@ -1,32 +1,49 @@ | |||
1 | #include "xmlreader.h" | 1 | #include "bu/xmlreader.h" |
2 | #include "exceptions.h" | 2 | #include "bu/exceptions.h" |
3 | #include <string.h> | 3 | #include <string.h> |
4 | #include "hashfunctionstring.h" | ||
5 | 4 | ||
6 | XmlReader::XmlReader( bool bStrip ) : | 5 | XmlReader::XmlReader( Bu::Stream &sIn, bool bStrip ) : |
7 | bStrip( bStrip ), | 6 | sIn( sIn ), |
8 | htEntity( new HashFunctionString(), 11 ) | 7 | bStrip( bStrip ) |
9 | { | 8 | { |
9 | buildDoc(); | ||
10 | } | 10 | } |
11 | 11 | ||
12 | XmlReader::~XmlReader() | 12 | XmlReader::~XmlReader() |
13 | { | 13 | { |
14 | void *i = htEntity.getFirstItemPos(); | 14 | } |
15 | while( (i = htEntity.getNextItemPos( i ) ) ) | 15 | |
16 | char XmlReader::getChar( int nIndex ) | ||
17 | { | ||
18 | if( sBuf.getSize() <= nIndex ) | ||
16 | { | 19 | { |
17 | free( (char *)(htEntity.getItemID( i )) ); | 20 | int nInc = nIndex-sBuf.getSize()+1; |
18 | delete (StaticString *)htEntity.getItemData( i ); | 21 | char *buf = new char[nInc]; |
22 | sIn.read( buf, nInc ); | ||
23 | sBuf.append( buf, nInc ); | ||
24 | delete[] buf; | ||
19 | } | 25 | } |
26 | |||
27 | return sBuf[nIndex]; | ||
20 | } | 28 | } |
21 | 29 | ||
22 | void XmlReader::addEntity( const char *name, const char *value ) | 30 | void XmlReader::usedChar( int nAmnt ) |
23 | { | 31 | { |
24 | if( htEntity[name] ) return; | 32 | if( nAmnt >= sBuf.getSize() ) |
25 | 33 | { | |
26 | char *sName = strdup( name ); | 34 | sBuf.clear(); |
27 | StaticString *sValue = new StaticString( value ); | 35 | } |
36 | else | ||
37 | { | ||
38 | char *s = sBuf.getStr(); | ||
39 | memcpy( s, s+nAmnt, sBuf.getSize()-nAmnt ); | ||
40 | sBuf.resize( sBuf.getSize()-nAmnt ); | ||
41 | } | ||
42 | } | ||
28 | 43 | ||
29 | htEntity.insert( sName, sValue ); | 44 | void XmlReader::addEntity( const Bu::FString &name, const Bu::FString &value ) |
45 | { | ||
46 | htEntity[name] = value; | ||
30 | } | 47 | } |
31 | 48 | ||
32 | #define gcall( x ) if( x == false ) return false; | 49 | #define gcall( x ) if( x == false ) return false; |
@@ -99,7 +116,7 @@ void XmlReader::entity() | |||
99 | { | 116 | { |
100 | usedChar( 2 ); | 117 | usedChar( 2 ); |
101 | ws(); | 118 | ws(); |
102 | std::string buf; | 119 | Bu::FString buf; |
103 | for(;;) | 120 | for(;;) |
104 | { | 121 | { |
105 | char chr = getChar(); | 122 | char chr = getChar(); |
@@ -111,7 +128,7 @@ void XmlReader::entity() | |||
111 | if( strcmp( buf.c_str(), "ENTITY") == 0 ) | 128 | if( strcmp( buf.c_str(), "ENTITY") == 0 ) |
112 | { | 129 | { |
113 | ws(); | 130 | ws(); |
114 | std::string name; | 131 | Bu::FString name; |
115 | for(;;) | 132 | for(;;) |
116 | { | 133 | { |
117 | char chr = getChar(); | 134 | char chr = getChar(); |
@@ -124,21 +141,19 @@ void XmlReader::entity() | |||
124 | usedChar(); | 141 | usedChar(); |
125 | if( quot != '\'' && quot != '\"' ) | 142 | if( quot != '\'' && quot != '\"' ) |
126 | { | 143 | { |
127 | throw XmlException( | 144 | throw Bu::XmlException( |
128 | "Only quoted entity values are supported." | 145 | "Only quoted entity values are supported." |
129 | ); | 146 | ); |
130 | } | 147 | } |
131 | std::string value; | 148 | Bu::FString value; |
132 | for(;;) | 149 | for(;;) |
133 | { | 150 | { |
134 | char chr = getChar(); | 151 | char chr = getChar(); |
135 | usedChar(); | 152 | usedChar(); |
136 | if( chr == '&' ) | 153 | if( chr == '&' ) |
137 | { | 154 | { |
138 | StaticString *tmp = getEscape(); | 155 | Bu::FString tmp = getEscape(); |
139 | if( tmp == NULL ) throw XmlException("Entity thing"); | 156 | value += tmp; |
140 | value += tmp->getString(); | ||
141 | delete tmp; | ||
142 | } | 157 | } |
143 | else if( chr == quot ) | 158 | else if( chr == quot ) |
144 | { | 159 | { |
@@ -158,7 +173,7 @@ void XmlReader::entity() | |||
158 | } | 173 | } |
159 | else | 174 | else |
160 | { | 175 | { |
161 | throw XmlException( | 176 | throw Bu::XmlException( |
162 | "Malformed ENTITY: unexpected '%c' found.", | 177 | "Malformed ENTITY: unexpected '%c' found.", |
163 | getChar() | 178 | getChar() |
164 | ); | 179 | ); |
@@ -166,7 +181,7 @@ void XmlReader::entity() | |||
166 | } | 181 | } |
167 | else | 182 | else |
168 | { | 183 | { |
169 | throw XmlException( | 184 | throw Bu::XmlException( |
170 | "Unsupported header symbol: %s", | 185 | "Unsupported header symbol: %s", |
171 | buf.c_str() | 186 | buf.c_str() |
172 | ); | 187 | ); |
@@ -203,12 +218,12 @@ bool XmlReader::node() | |||
203 | } | 218 | } |
204 | else | 219 | else |
205 | { | 220 | { |
206 | throw XmlException("Close node in singleNode malformed!"); | 221 | throw Bu::XmlException("Close node in singleNode malformed!"); |
207 | } | 222 | } |
208 | } | 223 | } |
209 | else | 224 | else |
210 | { | 225 | { |
211 | throw XmlException("Close node expected, but not found."); | 226 | throw Bu::XmlException("Close node expected, but not found."); |
212 | return false; | 227 | return false; |
213 | } | 228 | } |
214 | 229 | ||
@@ -224,7 +239,7 @@ bool XmlReader::startNode() | |||
224 | if( getChar() == '/' ) | 239 | if( getChar() == '/' ) |
225 | { | 240 | { |
226 | // Heh, it's actually a close node, go figure | 241 | // Heh, it's actually a close node, go figure |
227 | FlexBuf fbName; | 242 | Bu::FString sName; |
228 | usedChar(); | 243 | usedChar(); |
229 | gcall( ws() ); | 244 | gcall( ws() ); |
230 | 245 | ||
@@ -235,19 +250,19 @@ bool XmlReader::startNode() | |||
235 | { | 250 | { |
236 | // Here we actually compare the name we got to the name | 251 | // Here we actually compare the name we got to the name |
237 | // we already set, they have to match exactly. | 252 | // we already set, they have to match exactly. |
238 | if( !strcasecmp( getCurrent()->getName(), fbName.getData() ) ) | 253 | if( getCurrent()->getName() == sName ) |
239 | { | 254 | { |
240 | closeNode(); | 255 | closeNode(); |
241 | break; | 256 | break; |
242 | } | 257 | } |
243 | else | 258 | else |
244 | { | 259 | { |
245 | throw XmlException("Got a mismatched node close tag."); | 260 | throw Bu::XmlException("Got a mismatched node close tag."); |
246 | } | 261 | } |
247 | } | 262 | } |
248 | else | 263 | else |
249 | { | 264 | { |
250 | fbName.appendData( chr ); | 265 | sName += chr; |
251 | usedChar(); | 266 | usedChar(); |
252 | } | 267 | } |
253 | } | 268 | } |
@@ -260,13 +275,13 @@ bool XmlReader::startNode() | |||
260 | } | 275 | } |
261 | else | 276 | else |
262 | { | 277 | { |
263 | throw XmlException("Got extra junk data instead of node close tag."); | 278 | throw Bu::XmlException("Got extra junk data instead of node close tag."); |
264 | } | 279 | } |
265 | } | 280 | } |
266 | else | 281 | else |
267 | { | 282 | { |
268 | // We're good, format is consistant | 283 | // We're good, format is consistant |
269 | addNode(); | 284 | //addNode(); |
270 | 285 | ||
271 | // Skip extra whitespace | 286 | // Skip extra whitespace |
272 | gcall( ws() ); | 287 | gcall( ws() ); |
@@ -278,7 +293,7 @@ bool XmlReader::startNode() | |||
278 | } | 293 | } |
279 | else | 294 | else |
280 | { | 295 | { |
281 | throw XmlException("Expected to find node opening char, '<'."); | 296 | throw Bu::XmlException("Expected to find node opening char, '<'."); |
282 | } | 297 | } |
283 | 298 | ||
284 | return true; | 299 | return true; |
@@ -286,19 +301,19 @@ bool XmlReader::startNode() | |||
286 | 301 | ||
287 | bool XmlReader::name() | 302 | bool XmlReader::name() |
288 | { | 303 | { |
289 | FlexBuf fbName; | 304 | Bu::FString sName; |
290 | 305 | ||
291 | while( true ) | 306 | while( true ) |
292 | { | 307 | { |
293 | char chr = getChar(); | 308 | char chr = getChar(); |
294 | if( isws( chr ) || chr == '>' || chr == '/' ) | 309 | if( isws( chr ) || chr == '>' || chr == '/' ) |
295 | { | 310 | { |
296 | setName( fbName.getData() ); | 311 | addNode( sName ); |
297 | return true; | 312 | return true; |
298 | } | 313 | } |
299 | else | 314 | else |
300 | { | 315 | { |
301 | fbName.appendData( chr ); | 316 | sName += chr; |
302 | usedChar(); | 317 | usedChar(); |
303 | } | 318 | } |
304 | } | 319 | } |
@@ -325,7 +340,7 @@ bool XmlReader::paramlist() | |||
325 | return true; | 340 | return true; |
326 | } | 341 | } |
327 | 342 | ||
328 | StaticString *XmlReader::getEscape() | 343 | Bu::FString XmlReader::getEscape() |
329 | { | 344 | { |
330 | if( getChar( 1 ) == '#' ) | 345 | if( getChar( 1 ) == '#' ) |
331 | { | 346 | { |
@@ -349,12 +364,12 @@ StaticString *XmlReader::getEscape() | |||
349 | buf[0] = (char)strtol( buf, (char **)NULL, base ); | 364 | buf[0] = (char)strtol( buf, (char **)NULL, base ); |
350 | buf[1] = '\0'; | 365 | buf[1] = '\0'; |
351 | 366 | ||
352 | return new StaticString( buf ); | 367 | return buf; |
353 | } | 368 | } |
354 | else | 369 | else |
355 | { | 370 | { |
356 | // ...otherwise replace with the appropriate string... | 371 | // ...otherwise replace with the appropriate string... |
357 | std::string buf; | 372 | Bu::FString buf; |
358 | usedChar(); | 373 | usedChar(); |
359 | for(;;) | 374 | for(;;) |
360 | { | 375 | { |
@@ -364,18 +379,14 @@ StaticString *XmlReader::getEscape() | |||
364 | buf += cbuf; | 379 | buf += cbuf; |
365 | } | 380 | } |
366 | 381 | ||
367 | StaticString *tmp = (StaticString *)htEntity[buf.c_str()]; | 382 | return htEntity[buf]; |
368 | if( tmp == NULL ) return NULL; | ||
369 | |||
370 | StaticString *ret = new StaticString( *tmp ); | ||
371 | return ret; | ||
372 | } | 383 | } |
373 | } | 384 | } |
374 | 385 | ||
375 | bool XmlReader::param() | 386 | bool XmlReader::param() |
376 | { | 387 | { |
377 | FlexBuf fbName; | 388 | Bu::FString sName; |
378 | FlexBuf fbValue; | 389 | Bu::FString sValue; |
379 | 390 | ||
380 | while( true ) | 391 | while( true ) |
381 | { | 392 | { |
@@ -386,7 +397,7 @@ bool XmlReader::param() | |||
386 | } | 397 | } |
387 | else | 398 | else |
388 | { | 399 | { |
389 | fbName.appendData( chr ); | 400 | sName.append( chr ); |
390 | usedChar(); | 401 | usedChar(); |
391 | } | 402 | } |
392 | } | 403 | } |
@@ -411,21 +422,18 @@ bool XmlReader::param() | |||
411 | if( chr == '"' ) | 422 | if( chr == '"' ) |
412 | { | 423 | { |
413 | usedChar(); | 424 | usedChar(); |
414 | addProperty( fbName.getData(), fbValue.getData() ); | 425 | addProperty( sName.getStr(), sValue.getStr() ); |
415 | return true; | 426 | return true; |
416 | } | 427 | } |
417 | else | 428 | else |
418 | { | 429 | { |
419 | if( chr == '&' ) | 430 | if( chr == '&' ) |
420 | { | 431 | { |
421 | StaticString *tmp = getEscape(); | 432 | sValue += getEscape(); |
422 | if( tmp == NULL ) return false; | ||
423 | fbValue.appendData( tmp->getString() ); | ||
424 | delete tmp; | ||
425 | } | 433 | } |
426 | else | 434 | else |
427 | { | 435 | { |
428 | fbValue.appendData( chr ); | 436 | sValue += chr; |
429 | usedChar(); | 437 | usedChar(); |
430 | } | 438 | } |
431 | } | 439 | } |
@@ -439,21 +447,18 @@ bool XmlReader::param() | |||
439 | chr = getChar(); | 447 | chr = getChar(); |
440 | if( isws( chr ) || chr == '/' || chr == '>' ) | 448 | if( isws( chr ) || chr == '/' || chr == '>' ) |
441 | { | 449 | { |
442 | addProperty( fbName.getData(), fbValue.getData() ); | 450 | addProperty( sName.getStr(), sValue.getStr() ); |
443 | return true; | 451 | return true; |
444 | } | 452 | } |
445 | else | 453 | else |
446 | { | 454 | { |
447 | if( chr == '&' ) | 455 | if( chr == '&' ) |
448 | { | 456 | { |
449 | StaticString *tmp = getEscape(); | 457 | sValue += getEscape(); |
450 | if( tmp == NULL ) return false; | ||
451 | fbValue.appendData( tmp->getString() ); | ||
452 | delete tmp; | ||
453 | } | 458 | } |
454 | else | 459 | else |
455 | { | 460 | { |
456 | fbValue.appendData( chr ); | 461 | sValue += chr; |
457 | usedChar(); | 462 | usedChar(); |
458 | } | 463 | } |
459 | } | 464 | } |
@@ -462,7 +467,7 @@ bool XmlReader::param() | |||
462 | } | 467 | } |
463 | else | 468 | else |
464 | { | 469 | { |
465 | throw XmlException("Expected an equals to seperate the params."); | 470 | throw Bu::XmlException("Expected an equals to seperate the params."); |
466 | return false; | 471 | return false; |
467 | } | 472 | } |
468 | 473 | ||
@@ -471,7 +476,7 @@ bool XmlReader::param() | |||
471 | 476 | ||
472 | bool XmlReader::content() | 477 | bool XmlReader::content() |
473 | { | 478 | { |
474 | FlexBuf fbContent; | 479 | Bu::FString sContent; |
475 | 480 | ||
476 | if( bStrip ) gcall( ws() ); | 481 | if( bStrip ) gcall( ws() ); |
477 | 482 | ||
@@ -482,37 +487,37 @@ bool XmlReader::content() | |||
482 | { | 487 | { |
483 | if( getChar(1) == '/' ) | 488 | if( getChar(1) == '/' ) |
484 | { | 489 | { |
485 | if( fbContent.getLength() > 0 ) | 490 | if( sContent.getSize() > 0 ) |
486 | { | 491 | { |
487 | if( bStrip ) | 492 | if( bStrip ) |
488 | { | 493 | { |
489 | int j; | 494 | int j; |
490 | for( j = fbContent.getLength()-1; isws(fbContent.getData()[j]); j-- ); | 495 | for( j = sContent.getSize()-1; isws(sContent[j]); j-- ); |
491 | ((char *)fbContent.getData())[j+1] = '\0'; | 496 | sContent[j+1] = '\0'; |
492 | } | 497 | } |
493 | setContent( fbContent.getData() ); | 498 | setContent( sContent.getStr() ); |
494 | } | 499 | } |
495 | usedChar( 2 ); | 500 | usedChar( 2 ); |
496 | gcall( ws() ); | 501 | gcall( ws() ); |
497 | FlexBuf fbName; | 502 | Bu::FString sName; |
498 | while( true ) | 503 | while( true ) |
499 | { | 504 | { |
500 | chr = getChar(); | 505 | chr = getChar(); |
501 | if( isws( chr ) || chr == '>' ) | 506 | if( isws( chr ) || chr == '>' ) |
502 | { | 507 | { |
503 | if( !strcasecmp( getCurrent()->getName(), fbName.getData() ) ) | 508 | if( !strcasecmp( getCurrent()->getName().getStr(), sName.getStr() ) ) |
504 | { | 509 | { |
505 | closeNode(); | 510 | closeNode(); |
506 | break; | 511 | break; |
507 | } | 512 | } |
508 | else | 513 | else |
509 | { | 514 | { |
510 | throw XmlException("Mismatched close tag found: <%s> to <%s>.", getCurrent()->getName(), fbName.getData() ); | 515 | throw Bu::XmlException("Mismatched close tag found: <%s> to <%s>.", getCurrent()->getName().getStr(), sName.getStr() ); |
511 | } | 516 | } |
512 | } | 517 | } |
513 | else | 518 | else |
514 | { | 519 | { |
515 | fbName.appendData( chr ); | 520 | sName += chr; |
516 | usedChar(); | 521 | usedChar(); |
517 | } | 522 | } |
518 | } | 523 | } |
@@ -524,7 +529,7 @@ bool XmlReader::content() | |||
524 | } | 529 | } |
525 | else | 530 | else |
526 | { | 531 | { |
527 | throw XmlException("Malformed close tag."); | 532 | throw Bu::XmlException("Malformed close tag."); |
528 | } | 533 | } |
529 | } | 534 | } |
530 | else if( getChar(1) == '!' ) | 535 | else if( getChar(1) == '!' ) |
@@ -534,7 +539,7 @@ bool XmlReader::content() | |||
534 | getChar(3) != '-' ) | 539 | getChar(3) != '-' ) |
535 | { | 540 | { |
536 | // Not a valid XML comment | 541 | // Not a valid XML comment |
537 | throw XmlException("Malformed comment start tag found."); | 542 | throw Bu::XmlException("Malformed comment start tag found."); |
538 | } | 543 | } |
539 | 544 | ||
540 | usedChar( 4 ); | 545 | usedChar( 4 ); |
@@ -549,7 +554,7 @@ bool XmlReader::content() | |||
549 | // The next one has to be a '>' now | 554 | // The next one has to be a '>' now |
550 | if( getChar( 2 ) != '>' ) | 555 | if( getChar( 2 ) != '>' ) |
551 | { | 556 | { |
552 | throw XmlException("Malformed comment close tag found. You cannot have a '--' that isn't followed by a '>' in a comment."); | 557 | throw Bu::XmlException("Malformed comment close tag found. You cannot have a '--' that isn't followed by a '>' in a comment."); |
553 | } | 558 | } |
554 | usedChar( 3 ); | 559 | usedChar( 3 ); |
555 | break; | 560 | break; |
@@ -569,16 +574,16 @@ bool XmlReader::content() | |||
569 | } | 574 | } |
570 | else | 575 | else |
571 | { | 576 | { |
572 | if( fbContent.getLength() > 0 ) | 577 | if( sContent.getSize() > 0 ) |
573 | { | 578 | { |
574 | if( bStrip ) | 579 | if( bStrip ) |
575 | { | 580 | { |
576 | int j; | 581 | int j; |
577 | for( j = fbContent.getLength()-1; isws(fbContent.getData()[j]); j-- ); | 582 | for( j = sContent.getSize()-1; isws(sContent[j]); j-- ); |
578 | ((char *)fbContent.getData())[j+1] = '\0'; | 583 | sContent[j+1] = '\0'; |
579 | } | 584 | } |
580 | setContent( fbContent.getData() ); | 585 | setContent( sContent.getStr() ); |
581 | fbContent.clearData(); | 586 | sContent.clear(); |
582 | } | 587 | } |
583 | gcall( node() ); | 588 | gcall( node() ); |
584 | } | 589 | } |
@@ -587,14 +592,11 @@ bool XmlReader::content() | |||
587 | } | 592 | } |
588 | else if( chr == '&' ) | 593 | else if( chr == '&' ) |
589 | { | 594 | { |
590 | StaticString *tmp = getEscape(); | 595 | sContent += getEscape(); |
591 | if( tmp == NULL ) return false; | ||
592 | fbContent.appendData( tmp->getString() ); | ||
593 | delete tmp; | ||
594 | } | 596 | } |
595 | else | 597 | else |
596 | { | 598 | { |
597 | fbContent.appendData( chr ); | 599 | sContent += chr; |
598 | usedChar(); | 600 | usedChar(); |
599 | } | 601 | } |
600 | } | 602 | } |