summaryrefslogtreecommitdiff
path: root/src/xmlreader.cpp
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2007-05-11 07:51:40 +0000
committerMike Buland <eichlan@xagasoft.com>2007-05-11 07:51:40 +0000
commit033c41ed57348abb3a418166b1fb39bfad3312de (patch)
tree72edbb0b7ff35ef35e4d533bca384b4f7c986942 /src/xmlreader.cpp
parentad92dc50b7cdf7cfe086f21d19442d03a90fd05d (diff)
downloadlibbu++-033c41ed57348abb3a418166b1fb39bfad3312de.tar.gz
libbu++-033c41ed57348abb3a418166b1fb39bfad3312de.tar.bz2
libbu++-033c41ed57348abb3a418166b1fb39bfad3312de.tar.xz
libbu++-033c41ed57348abb3a418166b1fb39bfad3312de.zip
Added a list template class, seems to work pretty well for now, I may have
forgotten proper cleanup in the deconstructor, but besides that you can do almost everything you need. I'll make a slist/stack next, probably with the same basic code, just a different structure (not doubley-linked). The xml system from old-libbu++ is almost completely converted, I was going to re-write it, but this seemed easier at first, it may not have been, we'll see. It almost parses everything again, and almost outputs again, and it does use streams now. The FString is partway to doing minimum chunk allocations, so that adding single-characters will be really fast up to the minimum chunk size. I also figured out how to add this optimization without any extra variables taking up space, and it's optional in the template, which is cool. You can specify the size of the blocks (default 256 bytes), if it's 0 then they'll be like the old FString, 1 chunk per operation. The next FString update should be allowing efficient removal from the begining of the string by faking it, and simply moving a secondary base pointer ahead, and then optimizing appends after that fact to simply move the existing data around if you shouldn't have to re-allocate (alla FlexBuf). The final fun addition that I'm planning is a simple switch in the template (boolean) that will switch an FString into a thread-safe mode without changing the interface or anything that you can do with them at all. It may increasing memory usage, but they should still be better than std::strings, and totally thread-safe. The best part of that is that if it's done with a boolean template parameter and if statements that only test that parameter controlling flow, the code that you don't want (threadsafe/non-threadsafe) won't be included at all post-optimization.
Diffstat (limited to 'src/xmlreader.cpp')
-rw-r--r--src/xmlreader.cpp170
1 files changed, 86 insertions, 84 deletions
diff --git a/src/xmlreader.cpp b/src/xmlreader.cpp
index 18df69c..38cad5f 100644
--- a/src/xmlreader.cpp
+++ b/src/xmlreader.cpp
@@ -1,32 +1,49 @@
1#include "xmlreader.h" 1#include "bu/xmlreader.h"
2#include "exceptions.h" 2#include "bu/exceptions.h"
3#include <string.h> 3#include <string.h>
4#include "hashfunctionstring.h"
5 4
6XmlReader::XmlReader( bool bStrip ) : 5XmlReader::XmlReader( Bu::Stream &sIn, bool bStrip ) :
7 bStrip( bStrip ), 6 sIn( sIn ),
8 htEntity( new HashFunctionString(), 11 ) 7 bStrip( bStrip )
9{ 8{
9 buildDoc();
10} 10}
11 11
12XmlReader::~XmlReader() 12XmlReader::~XmlReader()
13{ 13{
14 void *i = htEntity.getFirstItemPos(); 14}
15 while( (i = htEntity.getNextItemPos( i ) ) ) 15
16char XmlReader::getChar( int nIndex )
17{
18 if( sBuf.getSize() <= nIndex )
16 { 19 {
17 free( (char *)(htEntity.getItemID( i )) ); 20 int nInc = nIndex-sBuf.getSize()+1;
18 delete (StaticString *)htEntity.getItemData( i ); 21 char *buf = new char[nInc];
22 sIn.read( buf, nInc );
23 sBuf.append( buf, nInc );
24 delete[] buf;
19 } 25 }
26
27 return sBuf[nIndex];
20} 28}
21 29
22void XmlReader::addEntity( const char *name, const char *value ) 30void XmlReader::usedChar( int nAmnt )
23{ 31{
24 if( htEntity[name] ) return; 32 if( nAmnt >= sBuf.getSize() )
25 33 {
26 char *sName = strdup( name ); 34 sBuf.clear();
27 StaticString *sValue = new StaticString( value ); 35 }
36 else
37 {
38 char *s = sBuf.getStr();
39 memcpy( s, s+nAmnt, sBuf.getSize()-nAmnt );
40 sBuf.resize( sBuf.getSize()-nAmnt );
41 }
42}
28 43
29 htEntity.insert( sName, sValue ); 44void XmlReader::addEntity( const Bu::FString &name, const Bu::FString &value )
45{
46 htEntity[name] = value;
30} 47}
31 48
32#define gcall( x ) if( x == false ) return false; 49#define gcall( x ) if( x == false ) return false;
@@ -99,7 +116,7 @@ void XmlReader::entity()
99 { 116 {
100 usedChar( 2 ); 117 usedChar( 2 );
101 ws(); 118 ws();
102 std::string buf; 119 Bu::FString buf;
103 for(;;) 120 for(;;)
104 { 121 {
105 char chr = getChar(); 122 char chr = getChar();
@@ -111,7 +128,7 @@ void XmlReader::entity()
111 if( strcmp( buf.c_str(), "ENTITY") == 0 ) 128 if( strcmp( buf.c_str(), "ENTITY") == 0 )
112 { 129 {
113 ws(); 130 ws();
114 std::string name; 131 Bu::FString name;
115 for(;;) 132 for(;;)
116 { 133 {
117 char chr = getChar(); 134 char chr = getChar();
@@ -124,21 +141,19 @@ void XmlReader::entity()
124 usedChar(); 141 usedChar();
125 if( quot != '\'' && quot != '\"' ) 142 if( quot != '\'' && quot != '\"' )
126 { 143 {
127 throw XmlException( 144 throw Bu::XmlException(
128 "Only quoted entity values are supported." 145 "Only quoted entity values are supported."
129 ); 146 );
130 } 147 }
131 std::string value; 148 Bu::FString value;
132 for(;;) 149 for(;;)
133 { 150 {
134 char chr = getChar(); 151 char chr = getChar();
135 usedChar(); 152 usedChar();
136 if( chr == '&' ) 153 if( chr == '&' )
137 { 154 {
138 StaticString *tmp = getEscape(); 155 Bu::FString tmp = getEscape();
139 if( tmp == NULL ) throw XmlException("Entity thing"); 156 value += tmp;
140 value += tmp->getString();
141 delete tmp;
142 } 157 }
143 else if( chr == quot ) 158 else if( chr == quot )
144 { 159 {
@@ -158,7 +173,7 @@ void XmlReader::entity()
158 } 173 }
159 else 174 else
160 { 175 {
161 throw XmlException( 176 throw Bu::XmlException(
162 "Malformed ENTITY: unexpected '%c' found.", 177 "Malformed ENTITY: unexpected '%c' found.",
163 getChar() 178 getChar()
164 ); 179 );
@@ -166,7 +181,7 @@ void XmlReader::entity()
166 } 181 }
167 else 182 else
168 { 183 {
169 throw XmlException( 184 throw Bu::XmlException(
170 "Unsupported header symbol: %s", 185 "Unsupported header symbol: %s",
171 buf.c_str() 186 buf.c_str()
172 ); 187 );
@@ -203,12 +218,12 @@ bool XmlReader::node()
203 } 218 }
204 else 219 else
205 { 220 {
206 throw XmlException("Close node in singleNode malformed!"); 221 throw Bu::XmlException("Close node in singleNode malformed!");
207 } 222 }
208 } 223 }
209 else 224 else
210 { 225 {
211 throw XmlException("Close node expected, but not found."); 226 throw Bu::XmlException("Close node expected, but not found.");
212 return false; 227 return false;
213 } 228 }
214 229
@@ -224,7 +239,7 @@ bool XmlReader::startNode()
224 if( getChar() == '/' ) 239 if( getChar() == '/' )
225 { 240 {
226 // Heh, it's actually a close node, go figure 241 // Heh, it's actually a close node, go figure
227 FlexBuf fbName; 242 Bu::FString sName;
228 usedChar(); 243 usedChar();
229 gcall( ws() ); 244 gcall( ws() );
230 245
@@ -235,19 +250,19 @@ bool XmlReader::startNode()
235 { 250 {
236 // Here we actually compare the name we got to the name 251 // Here we actually compare the name we got to the name
237 // we already set, they have to match exactly. 252 // we already set, they have to match exactly.
238 if( !strcasecmp( getCurrent()->getName(), fbName.getData() ) ) 253 if( getCurrent()->getName() == sName )
239 { 254 {
240 closeNode(); 255 closeNode();
241 break; 256 break;
242 } 257 }
243 else 258 else
244 { 259 {
245 throw XmlException("Got a mismatched node close tag."); 260 throw Bu::XmlException("Got a mismatched node close tag.");
246 } 261 }
247 } 262 }
248 else 263 else
249 { 264 {
250 fbName.appendData( chr ); 265 sName += chr;
251 usedChar(); 266 usedChar();
252 } 267 }
253 } 268 }
@@ -260,13 +275,13 @@ bool XmlReader::startNode()
260 } 275 }
261 else 276 else
262 { 277 {
263 throw XmlException("Got extra junk data instead of node close tag."); 278 throw Bu::XmlException("Got extra junk data instead of node close tag.");
264 } 279 }
265 } 280 }
266 else 281 else
267 { 282 {
268 // We're good, format is consistant 283 // We're good, format is consistant
269 addNode(); 284 //addNode();
270 285
271 // Skip extra whitespace 286 // Skip extra whitespace
272 gcall( ws() ); 287 gcall( ws() );
@@ -278,7 +293,7 @@ bool XmlReader::startNode()
278 } 293 }
279 else 294 else
280 { 295 {
281 throw XmlException("Expected to find node opening char, '<'."); 296 throw Bu::XmlException("Expected to find node opening char, '<'.");
282 } 297 }
283 298
284 return true; 299 return true;
@@ -286,19 +301,19 @@ bool XmlReader::startNode()
286 301
287bool XmlReader::name() 302bool XmlReader::name()
288{ 303{
289 FlexBuf fbName; 304 Bu::FString sName;
290 305
291 while( true ) 306 while( true )
292 { 307 {
293 char chr = getChar(); 308 char chr = getChar();
294 if( isws( chr ) || chr == '>' || chr == '/' ) 309 if( isws( chr ) || chr == '>' || chr == '/' )
295 { 310 {
296 setName( fbName.getData() ); 311 addNode( sName );
297 return true; 312 return true;
298 } 313 }
299 else 314 else
300 { 315 {
301 fbName.appendData( chr ); 316 sName += chr;
302 usedChar(); 317 usedChar();
303 } 318 }
304 } 319 }
@@ -325,7 +340,7 @@ bool XmlReader::paramlist()
325 return true; 340 return true;
326} 341}
327 342
328StaticString *XmlReader::getEscape() 343Bu::FString XmlReader::getEscape()
329{ 344{
330 if( getChar( 1 ) == '#' ) 345 if( getChar( 1 ) == '#' )
331 { 346 {
@@ -349,12 +364,12 @@ StaticString *XmlReader::getEscape()
349 buf[0] = (char)strtol( buf, (char **)NULL, base ); 364 buf[0] = (char)strtol( buf, (char **)NULL, base );
350 buf[1] = '\0'; 365 buf[1] = '\0';
351 366
352 return new StaticString( buf ); 367 return buf;
353 } 368 }
354 else 369 else
355 { 370 {
356 // ...otherwise replace with the appropriate string... 371 // ...otherwise replace with the appropriate string...
357 std::string buf; 372 Bu::FString buf;
358 usedChar(); 373 usedChar();
359 for(;;) 374 for(;;)
360 { 375 {
@@ -364,18 +379,14 @@ StaticString *XmlReader::getEscape()
364 buf += cbuf; 379 buf += cbuf;
365 } 380 }
366 381
367 StaticString *tmp = (StaticString *)htEntity[buf.c_str()]; 382 return htEntity[buf];
368 if( tmp == NULL ) return NULL;
369
370 StaticString *ret = new StaticString( *tmp );
371 return ret;
372 } 383 }
373} 384}
374 385
375bool XmlReader::param() 386bool XmlReader::param()
376{ 387{
377 FlexBuf fbName; 388 Bu::FString sName;
378 FlexBuf fbValue; 389 Bu::FString sValue;
379 390
380 while( true ) 391 while( true )
381 { 392 {
@@ -386,7 +397,7 @@ bool XmlReader::param()
386 } 397 }
387 else 398 else
388 { 399 {
389 fbName.appendData( chr ); 400 sName.append( chr );
390 usedChar(); 401 usedChar();
391 } 402 }
392 } 403 }
@@ -411,21 +422,18 @@ bool XmlReader::param()
411 if( chr == '"' ) 422 if( chr == '"' )
412 { 423 {
413 usedChar(); 424 usedChar();
414 addProperty( fbName.getData(), fbValue.getData() ); 425 addProperty( sName.getStr(), sValue.getStr() );
415 return true; 426 return true;
416 } 427 }
417 else 428 else
418 { 429 {
419 if( chr == '&' ) 430 if( chr == '&' )
420 { 431 {
421 StaticString *tmp = getEscape(); 432 sValue += getEscape();
422 if( tmp == NULL ) return false;
423 fbValue.appendData( tmp->getString() );
424 delete tmp;
425 } 433 }
426 else 434 else
427 { 435 {
428 fbValue.appendData( chr ); 436 sValue += chr;
429 usedChar(); 437 usedChar();
430 } 438 }
431 } 439 }
@@ -439,21 +447,18 @@ bool XmlReader::param()
439 chr = getChar(); 447 chr = getChar();
440 if( isws( chr ) || chr == '/' || chr == '>' ) 448 if( isws( chr ) || chr == '/' || chr == '>' )
441 { 449 {
442 addProperty( fbName.getData(), fbValue.getData() ); 450 addProperty( sName.getStr(), sValue.getStr() );
443 return true; 451 return true;
444 } 452 }
445 else 453 else
446 { 454 {
447 if( chr == '&' ) 455 if( chr == '&' )
448 { 456 {
449 StaticString *tmp = getEscape(); 457 sValue += getEscape();
450 if( tmp == NULL ) return false;
451 fbValue.appendData( tmp->getString() );
452 delete tmp;
453 } 458 }
454 else 459 else
455 { 460 {
456 fbValue.appendData( chr ); 461 sValue += chr;
457 usedChar(); 462 usedChar();
458 } 463 }
459 } 464 }
@@ -462,7 +467,7 @@ bool XmlReader::param()
462 } 467 }
463 else 468 else
464 { 469 {
465 throw XmlException("Expected an equals to seperate the params."); 470 throw Bu::XmlException("Expected an equals to seperate the params.");
466 return false; 471 return false;
467 } 472 }
468 473
@@ -471,7 +476,7 @@ bool XmlReader::param()
471 476
472bool XmlReader::content() 477bool XmlReader::content()
473{ 478{
474 FlexBuf fbContent; 479 Bu::FString sContent;
475 480
476 if( bStrip ) gcall( ws() ); 481 if( bStrip ) gcall( ws() );
477 482
@@ -482,37 +487,37 @@ bool XmlReader::content()
482 { 487 {
483 if( getChar(1) == '/' ) 488 if( getChar(1) == '/' )
484 { 489 {
485 if( fbContent.getLength() > 0 ) 490 if( sContent.getSize() > 0 )
486 { 491 {
487 if( bStrip ) 492 if( bStrip )
488 { 493 {
489 int j; 494 int j;
490 for( j = fbContent.getLength()-1; isws(fbContent.getData()[j]); j-- ); 495 for( j = sContent.getSize()-1; isws(sContent[j]); j-- );
491 ((char *)fbContent.getData())[j+1] = '\0'; 496 sContent[j+1] = '\0';
492 } 497 }
493 setContent( fbContent.getData() ); 498 setContent( sContent.getStr() );
494 } 499 }
495 usedChar( 2 ); 500 usedChar( 2 );
496 gcall( ws() ); 501 gcall( ws() );
497 FlexBuf fbName; 502 Bu::FString sName;
498 while( true ) 503 while( true )
499 { 504 {
500 chr = getChar(); 505 chr = getChar();
501 if( isws( chr ) || chr == '>' ) 506 if( isws( chr ) || chr == '>' )
502 { 507 {
503 if( !strcasecmp( getCurrent()->getName(), fbName.getData() ) ) 508 if( !strcasecmp( getCurrent()->getName().getStr(), sName.getStr() ) )
504 { 509 {
505 closeNode(); 510 closeNode();
506 break; 511 break;
507 } 512 }
508 else 513 else
509 { 514 {
510 throw XmlException("Mismatched close tag found: <%s> to <%s>.", getCurrent()->getName(), fbName.getData() ); 515 throw Bu::XmlException("Mismatched close tag found: <%s> to <%s>.", getCurrent()->getName().getStr(), sName.getStr() );
511 } 516 }
512 } 517 }
513 else 518 else
514 { 519 {
515 fbName.appendData( chr ); 520 sName += chr;
516 usedChar(); 521 usedChar();
517 } 522 }
518 } 523 }
@@ -524,7 +529,7 @@ bool XmlReader::content()
524 } 529 }
525 else 530 else
526 { 531 {
527 throw XmlException("Malformed close tag."); 532 throw Bu::XmlException("Malformed close tag.");
528 } 533 }
529 } 534 }
530 else if( getChar(1) == '!' ) 535 else if( getChar(1) == '!' )
@@ -534,7 +539,7 @@ bool XmlReader::content()
534 getChar(3) != '-' ) 539 getChar(3) != '-' )
535 { 540 {
536 // Not a valid XML comment 541 // Not a valid XML comment
537 throw XmlException("Malformed comment start tag found."); 542 throw Bu::XmlException("Malformed comment start tag found.");
538 } 543 }
539 544
540 usedChar( 4 ); 545 usedChar( 4 );
@@ -549,7 +554,7 @@ bool XmlReader::content()
549 // The next one has to be a '>' now 554 // The next one has to be a '>' now
550 if( getChar( 2 ) != '>' ) 555 if( getChar( 2 ) != '>' )
551 { 556 {
552 throw XmlException("Malformed comment close tag found. You cannot have a '--' that isn't followed by a '>' in a comment."); 557 throw Bu::XmlException("Malformed comment close tag found. You cannot have a '--' that isn't followed by a '>' in a comment.");
553 } 558 }
554 usedChar( 3 ); 559 usedChar( 3 );
555 break; 560 break;
@@ -569,16 +574,16 @@ bool XmlReader::content()
569 } 574 }
570 else 575 else
571 { 576 {
572 if( fbContent.getLength() > 0 ) 577 if( sContent.getSize() > 0 )
573 { 578 {
574 if( bStrip ) 579 if( bStrip )
575 { 580 {
576 int j; 581 int j;
577 for( j = fbContent.getLength()-1; isws(fbContent.getData()[j]); j-- ); 582 for( j = sContent.getSize()-1; isws(sContent[j]); j-- );
578 ((char *)fbContent.getData())[j+1] = '\0'; 583 sContent[j+1] = '\0';
579 } 584 }
580 setContent( fbContent.getData() ); 585 setContent( sContent.getStr() );
581 fbContent.clearData(); 586 sContent.clear();
582 } 587 }
583 gcall( node() ); 588 gcall( node() );
584 } 589 }
@@ -587,14 +592,11 @@ bool XmlReader::content()
587 } 592 }
588 else if( chr == '&' ) 593 else if( chr == '&' )
589 { 594 {
590 StaticString *tmp = getEscape(); 595 sContent += getEscape();
591 if( tmp == NULL ) return false;
592 fbContent.appendData( tmp->getString() );
593 delete tmp;
594 } 596 }
595 else 597 else
596 { 598 {
597 fbContent.appendData( chr ); 599 sContent += chr;
598 usedChar(); 600 usedChar();
599 } 601 }
600 } 602 }