aboutsummaryrefslogtreecommitdiff
path: root/src/xmlreader.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/xmlreader.cpp')
-rw-r--r--src/xmlreader.cpp665
1 files changed, 500 insertions, 165 deletions
diff --git a/src/xmlreader.cpp b/src/xmlreader.cpp
index bd241cf..18df69c 100644
--- a/src/xmlreader.cpp
+++ b/src/xmlreader.cpp
@@ -1,82 +1,176 @@
1#include "xmlreader.h" 1#include "xmlreader.h"
2#include "exceptions.h"
3#include <string.h>
4#include "hashfunctionstring.h"
2 5
3Bu::XmlReader::XmlReader( Bu::Stream &sIn ) : 6XmlReader::XmlReader( bool bStrip ) :
4 sIn( sIn ) 7 bStrip( bStrip ),
8 htEntity( new HashFunctionString(), 11 )
5{ 9{
6} 10}
7 11
8Bu::XmlReader::~XmlReader() 12XmlReader::~XmlReader()
9{ 13{
14 void *i = htEntity.getFirstItemPos();
15 while( (i = htEntity.getNextItemPos( i ) ) )
16 {
17 free( (char *)(htEntity.getItemID( i )) );
18 delete (StaticString *)htEntity.getItemData( i );
19 }
10} 20}
11 21
12const char *Bu::XmlReader::lookahead( int nAmnt ) 22void XmlReader::addEntity( const char *name, const char *value )
13{ 23{
14 if( sBuf.getSize() >= nAmnt ) 24 if( htEntity[name] ) return;
15 return sBuf.getStr();
16 25
17 int nNew = nAmnt - sBuf.getSize(); 26 char *sName = strdup( name );
18 char *buf = new char[nNew]; 27 StaticString *sValue = new StaticString( value );
19 sIn.read( buf, nNew );
20 sBuf.append( buf );
21 28
22 return sBuf.getStr(); 29 htEntity.insert( sName, sValue );
23} 30}
24 31
25void Bu::XmlReader::burn( int nAmnt ) 32#define gcall( x ) if( x == false ) return false;
26{
27 if( sBuf.getSize() < nAmnt )
28 {
29 lookahead( nAmnt );
30 }
31 33
32 //sBuf.remove( nAmnt ); 34bool XmlReader::isws( char chr )
35{
36 return ( chr == ' ' || chr == '\t' || chr == '\n' || chr == '\r' );
33} 37}
34 38
35void Bu::XmlReader::checkString( const char *str, int nLen ) 39bool XmlReader::ws()
36{ 40{
37 if( !strncmp( str, lookahead( nLen ), nLen ) ) 41 while( true )
38 { 42 {
39 burn( nLen ); 43 char chr = getChar();
40 return; 44 if( isws( chr ) )
45 {
46 usedChar();
47 }
48 else
49 {
50 return true;
51 }
41 } 52 }
42 53 return true;
43 throw Bu::ExceptionBase("Expected string '%s'", str );
44} 54}
45 55
46Bu::XmlNode *Bu::XmlReader::read() 56bool XmlReader::buildDoc()
47{ 57{
48 prolog(); 58 // take care of initial whitespace
49} 59 gcall( ws() );
60 textDecl();
61 entity();
62 addEntity("gt", ">");
63 addEntity("lt", "<");
64 addEntity("amp", "&");
65 addEntity("apos", "\'");
66 addEntity("quot", "\"");
67 gcall( node() );
50 68
51void Bu::XmlReader::prolog() 69 return true;
52{
53 XMLDecl();
54 Misc();
55} 70}
56 71
57void Bu::XmlReader::XMLDecl() 72void XmlReader::textDecl()
58{ 73{
59 checkString("<?xml", 5 ); 74 if( getChar() == '<' && getChar( 1 ) == '?' )
60 S(); 75 {
61 VersionInfo(); 76 usedChar( 2 );
62 EncodingDecl(); 77 for(;;)
63 SDDecl(); 78 {
64 Sq(); 79 if( getChar() == '?' )
65 checkString("?>", 2 ); 80 {
81 if( getChar( 1 ) == '>' )
82 {
83 usedChar( 2 );
84 return;
85 }
86 }
87 usedChar();
88 }
89 }
66} 90}
67 91
68void Bu::XmlReader::Misc() 92void XmlReader::entity()
69{ 93{
70 for(;;) 94 for(;;)
71 { 95 {
72 S(); 96 ws();
73 if( !strncmp("<!--", lookahead( 4 ), 4 ) ) 97
74 { 98 if( getChar() == '<' && getChar( 1 ) == '!' )
75 Comment();
76 }
77 else if( !strncmp("<?", lookahead( 2 ), 2 ) )
78 { 99 {
79 PI(); 100 usedChar( 2 );
101 ws();
102 std::string buf;
103 for(;;)
104 {
105 char chr = getChar();
106 usedChar();
107 if( isws( chr ) ) break;
108 buf += chr;
109 }
110
111 if( strcmp( buf.c_str(), "ENTITY") == 0 )
112 {
113 ws();
114 std::string name;
115 for(;;)
116 {
117 char chr = getChar();
118 usedChar();
119 if( isws( chr ) ) break;
120 name += chr;
121 }
122 ws();
123 char quot = getChar();
124 usedChar();
125 if( quot != '\'' && quot != '\"' )
126 {
127 throw XmlException(
128 "Only quoted entity values are supported."
129 );
130 }
131 std::string value;
132 for(;;)
133 {
134 char chr = getChar();
135 usedChar();
136 if( chr == '&' )
137 {
138 StaticString *tmp = getEscape();
139 if( tmp == NULL ) throw XmlException("Entity thing");
140 value += tmp->getString();
141 delete tmp;
142 }
143 else if( chr == quot )
144 {
145 break;
146 }
147 else
148 {
149 value += chr;
150 }
151 }
152 ws();
153 if( getChar() == '>' )
154 {
155 usedChar();
156
157 addEntity( name.c_str(), value.c_str() );
158 }
159 else
160 {
161 throw XmlException(
162 "Malformed ENTITY: unexpected '%c' found.",
163 getChar()
164 );
165 }
166 }
167 else
168 {
169 throw XmlException(
170 "Unsupported header symbol: %s",
171 buf.c_str()
172 );
173 }
80 } 174 }
81 else 175 else
82 { 176 {
@@ -85,182 +179,423 @@ void Bu::XmlReader::Misc()
85 } 179 }
86} 180}
87 181
88void Bu::XmlReader::Comment() 182bool XmlReader::node()
89{ 183{
90 checkString("<!--", 4 ); 184 gcall( startNode() )
91 for(;;) 185
186 // At this point, we are closing the startNode
187 char chr = getChar();
188 if( chr == '>' )
189 {
190 usedChar();
191
192 // Now we process the guts of the node.
193 gcall( content() );
194 }
195 else if( chr == '/' )
92 { 196 {
93 unsigned char c = *lookahead(1); 197 // This is the tricky one, one more validation, then we close the node.
94 if( c == '-' ) 198 usedChar();
199 if( getChar() == '>' )
95 { 200 {
96 if( lookahead(2)[1] == '-' ) 201 closeNode();
97 { 202 usedChar();
98 checkString("-->", 3 ); 203 }
99 return; 204 else
100 } 205 {
206 throw XmlException("Close node in singleNode malformed!");
101 } 207 }
102 burn( 1 );
103 } 208 }
209 else
210 {
211 throw XmlException("Close node expected, but not found.");
212 return false;
213 }
214
215 return true;
104} 216}
105 217
106void Bu::XmlReader::PI() 218bool XmlReader::startNode()
107{ 219{
108 checkString("<?", 2 ); 220 if( getChar() == '<' )
109 FString sName = Name();
110 printf("PI: %s\n---\n", sName.getStr() );
111 S();
112 for(int j = 0;; j++ )
113 { 221 {
114 if( !strncmp( "?>", lookahead(j+2)+j, 2 ) ) 222 usedChar();
223
224 if( getChar() == '/' )
115 { 225 {
116 burn( j+2 ); 226 // Heh, it's actually a close node, go figure
117 return; 227 FlexBuf fbName;
228 usedChar();
229 gcall( ws() );
230
231 while( true )
232 {
233 char chr = getChar();
234 if( isws( chr ) || chr == '>' )
235 {
236 // Here we actually compare the name we got to the name
237 // we already set, they have to match exactly.
238 if( !strcasecmp( getCurrent()->getName(), fbName.getData() ) )
239 {
240 closeNode();
241 break;
242 }
243 else
244 {
245 throw XmlException("Got a mismatched node close tag.");
246 }
247 }
248 else
249 {
250 fbName.appendData( chr );
251 usedChar();
252 }
253 }
254
255 gcall( ws() );
256 if( getChar() == '>' )
257 {
258 // Everything is cool.
259 usedChar();
260 }
261 else
262 {
263 throw XmlException("Got extra junk data instead of node close tag.");
264 }
118 } 265 }
266 else
267 {
268 // We're good, format is consistant
269 addNode();
270
271 // Skip extra whitespace
272 gcall( ws() );
273 gcall( name() );
274 gcall( ws() );
275 gcall( paramlist() );
276 gcall( ws() );
277 }
278 }
279 else
280 {
281 throw XmlException("Expected to find node opening char, '<'.");
119 } 282 }
283
284 return true;
120} 285}
121 286
122void Bu::XmlReader::S() 287bool XmlReader::name()
123{ 288{
124 for( int j = 0;; j++ ) 289 FlexBuf fbName;
290
291 while( true )
125 { 292 {
126 char c = *lookahead( 1 ); 293 char chr = getChar();
127 if( c == 0x20 || c == 0x9 || c == 0xD || c == 0xA ) 294 if( isws( chr ) || chr == '>' || chr == '/' )
128 continue; 295 {
129 if( j == 0 ) 296 setName( fbName.getData() );
130 throw ExceptionBase("Expected whitespace."); 297 return true;
131 return; 298 }
299 else
300 {
301 fbName.appendData( chr );
302 usedChar();
303 }
132 } 304 }
305
306 return true;
133} 307}
134 308
135void Bu::XmlReader::Sq() 309bool XmlReader::paramlist()
136{ 310{
137 for(;;) 311 while( true )
138 { 312 {
139 char c = *lookahead( 1 ); 313 char chr = getChar();
140 if( c == 0x20 || c == 0x9 || c == 0xD || c == 0xA ) 314 if( chr == '/' || chr == '>' )
141 continue; 315 {
142 return; 316 return true;
317 }
318 else
319 {
320 gcall( param() );
321 gcall( ws() );
322 }
143 } 323 }
324
325 return true;
144} 326}
145 327
146void Bu::XmlReader::VersionInfo() 328StaticString *XmlReader::getEscape()
147{ 329{
148 try 330 if( getChar( 1 ) == '#' )
149 { 331 {
150 S(); 332 // If the entity starts with a # it's a character escape code
151 checkString("version", 7 ); 333 int base = 10;
334 usedChar( 2 );
335 if( getChar() == 'x' )
336 {
337 base = 16;
338 usedChar();
339 }
340 char buf[4];
341 int j = 0;
342 for( j = 0; getChar() != ';'; j++ )
343 {
344 buf[j] = getChar();
345 usedChar();
346 }
347 usedChar();
348 buf[j] = '\0';
349 buf[0] = (char)strtol( buf, (char **)NULL, base );
350 buf[1] = '\0';
351
352 return new StaticString( buf );
152 } 353 }
153 catch( ExceptionBase &e ) 354 else
154 { 355 {
155 return; 356 // ...otherwise replace with the appropriate string...
357 std::string buf;
358 usedChar();
359 for(;;)
360 {
361 char cbuf = getChar();
362 usedChar();
363 if( cbuf == ';' ) break;
364 buf += cbuf;
365 }
366
367 StaticString *tmp = (StaticString *)htEntity[buf.c_str()];
368 if( tmp == NULL ) return NULL;
369
370 StaticString *ret = new StaticString( *tmp );
371 return ret;
156 } 372 }
157 Eq();
158 Bu::FString ver = AttValue();
159 if( ver != "1.1" )
160 throw ExceptionBase("Currently we only support xml version 1.1\n");
161} 373}
162 374
163void Bu::XmlReader::Eq() 375bool XmlReader::param()
164{ 376{
165 Sq(); 377 FlexBuf fbName;
166 checkString("=", 1 ); 378 FlexBuf fbValue;
167 Sq();
168}
169 379
170void Bu::XmlReader::EncodingDecl() 380 while( true )
171{
172 S();
173 try
174 {
175 checkString("encoding", 8 );
176 }
177 catch( ExceptionBase &e )
178 { 381 {
179 return; 382 char chr = getChar();
383 if( isws( chr ) || chr == '=' )
384 {
385 break;
386 }
387 else
388 {
389 fbName.appendData( chr );
390 usedChar();
391 }
180 } 392 }
181 393
182 Eq(); 394 gcall( ws() );
183 AttValue();
184}
185 395
186void Bu::XmlReader::SDDecl() 396 if( getChar() == '=' )
187{
188 S();
189 try
190 {
191 checkString("standalone", 10 );
192 }
193 catch( ExceptionBase &e )
194 { 397 {
195 return; 398 usedChar();
196 }
197 399
198 Eq(); 400 gcall( ws() );
199 AttValue();
200}
201 401
202Bu::FString Bu::XmlReader::AttValue() 402 char chr = getChar();
203{ 403 if( chr == '"' )
204 char q = *lookahead(1);
205 if( q == '\"' )
206 {
207 for( int j = 2;; j++ )
208 { 404 {
209 if( lookahead(j)[j-1] == '\"' ) 405 // Better quoted rhs
406 usedChar();
407
408 while( true )
210 { 409 {
211 Bu::FString ret( lookahead(j)+1, j-2 ); 410 chr = getChar();
212 burn( j ); 411 if( chr == '"' )
213 return ret; 412 {
413 usedChar();
414 addProperty( fbName.getData(), fbValue.getData() );
415 return true;
416 }
417 else
418 {
419 if( chr == '&' )
420 {
421 StaticString *tmp = getEscape();
422 if( tmp == NULL ) return false;
423 fbValue.appendData( tmp->getString() );
424 delete tmp;
425 }
426 else
427 {
428 fbValue.appendData( chr );
429 usedChar();
430 }
431 }
214 } 432 }
215 } 433 }
216 } 434 else
217 else if( q == '\'' )
218 {
219 for( int j = 2;; j++ )
220 { 435 {
221 if( lookahead(j)[j-1] == '\'' ) 436 // Simple one-word rhs
437 while( true )
222 { 438 {
223 Bu::FString ret( lookahead(j)+1, j-2 ); 439 chr = getChar();
224 burn( j ); 440 if( isws( chr ) || chr == '/' || chr == '>' )
225 return ret; 441 {
442 addProperty( fbName.getData(), fbValue.getData() );
443 return true;
444 }
445 else
446 {
447 if( chr == '&' )
448 {
449 StaticString *tmp = getEscape();
450 if( tmp == NULL ) return false;
451 fbValue.appendData( tmp->getString() );
452 delete tmp;
453 }
454 else
455 {
456 fbValue.appendData( chr );
457 usedChar();
458 }
459 }
226 } 460 }
227 } 461 }
228 } 462 }
463 else
464 {
465 throw XmlException("Expected an equals to seperate the params.");
466 return false;
467 }
229 468
230 throw ExceptionBase("Excpected either \' or \".\n"); 469 return true;
231} 470}
232 471
233Bu::FString Bu::XmlReader::Name() 472bool XmlReader::content()
234{ 473{
235 unsigned char c = *lookahead( 1 ); 474 FlexBuf fbContent;
236 if( c != ':' && c != '_' &&
237 (c < 'A' || c > 'Z') &&
238 (c < 'a' || c > 'z') &&
239 (c < 0xC0 || c > 0xD6 ) &&
240 (c < 0xD8 || c > 0xF6 ) &&
241 (c < 0xF8))
242 {
243 throw ExceptionBase("Invalid entity name starting character.");
244 }
245 475
246 for( int j = 1;; j++ ) 476 if( bStrip ) gcall( ws() );
477
478 while( true )
247 { 479 {
248 unsigned char c = lookahead(j+1)[j]; 480 char chr = getChar();
249 if( isS( c ) ) 481 if( chr == '<' )
482 {
483 if( getChar(1) == '/' )
484 {
485 if( fbContent.getLength() > 0 )
486 {
487 if( bStrip )
488 {
489 int j;
490 for( j = fbContent.getLength()-1; isws(fbContent.getData()[j]); j-- );
491 ((char *)fbContent.getData())[j+1] = '\0';
492 }
493 setContent( fbContent.getData() );
494 }
495 usedChar( 2 );
496 gcall( ws() );
497 FlexBuf fbName;
498 while( true )
499 {
500 chr = getChar();
501 if( isws( chr ) || chr == '>' )
502 {
503 if( !strcasecmp( getCurrent()->getName(), fbName.getData() ) )
504 {
505 closeNode();
506 break;
507 }
508 else
509 {
510 throw XmlException("Mismatched close tag found: <%s> to <%s>.", getCurrent()->getName(), fbName.getData() );
511 }
512 }
513 else
514 {
515 fbName.appendData( chr );
516 usedChar();
517 }
518 }
519 gcall( ws() );
520 if( getChar() == '>' )
521 {
522 usedChar();
523 return true;
524 }
525 else
526 {
527 throw XmlException("Malformed close tag.");
528 }
529 }
530 else if( getChar(1) == '!' )
531 {
532 // We know it's a comment, let's see if it's proper
533 if( getChar(2) != '-' ||
534 getChar(3) != '-' )
535 {
536 // Not a valid XML comment
537 throw XmlException("Malformed comment start tag found.");
538 }
539
540 usedChar( 4 );
541
542 // Now burn text until we find the close tag
543 for(;;)
544 {
545 if( getChar() == '-' )
546 {
547 if( getChar( 1 ) == '-' )
548 {
549 // The next one has to be a '>' now
550 if( getChar( 2 ) != '>' )
551 {
552 throw XmlException("Malformed comment close tag found. You cannot have a '--' that isn't followed by a '>' in a comment.");
553 }
554 usedChar( 3 );
555 break;
556 }
557 else
558 {
559 // Found a dash followed by a non dash, that's ok...
560 usedChar( 2 );
561 }
562 }
563 else
564 {
565 // Burn comment chars
566 usedChar();
567 }
568 }
569 }
570 else
571 {
572 if( fbContent.getLength() > 0 )
573 {
574 if( bStrip )
575 {
576 int j;
577 for( j = fbContent.getLength()-1; isws(fbContent.getData()[j]); j-- );
578 ((char *)fbContent.getData())[j+1] = '\0';
579 }
580 setContent( fbContent.getData() );
581 fbContent.clearData();
582 }
583 gcall( node() );
584 }
585
586 if( bStrip ) gcall( ws() );
587 }
588 else if( chr == '&' )
250 { 589 {
251 FString ret( lookahead(j+1), j+1 ); 590 StaticString *tmp = getEscape();
252 burn( j+1 ); 591 if( tmp == NULL ) return false;
253 return ret; 592 fbContent.appendData( tmp->getString() );
593 delete tmp;
254 } 594 }
255 if( c != ':' && c != '_' && c != '-' && c != '.' && c != 0xB7 && 595 else
256 (c < 'A' || c > 'Z') &&
257 (c < 'a' || c > 'z') &&
258 (c < '0' || c > '9') &&
259 (c < 0xC0 || c > 0xD6 ) &&
260 (c < 0xD8 || c > 0xF6 ) &&
261 (c < 0xF8))
262 { 596 {
263 throw ExceptionBase("Invalid character in name."); 597 fbContent.appendData( chr );
598 usedChar();
264 } 599 }
265 } 600 }
266} 601}