aboutsummaryrefslogtreecommitdiff
path: root/src/old/xmlreader.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/old/xmlreader.cpp')
-rw-r--r--src/old/xmlreader.cpp604
1 files changed, 604 insertions, 0 deletions
diff --git a/src/old/xmlreader.cpp b/src/old/xmlreader.cpp
new file mode 100644
index 0000000..38cad5f
--- /dev/null
+++ b/src/old/xmlreader.cpp
@@ -0,0 +1,604 @@
1#include "bu/xmlreader.h"
2#include "bu/exceptions.h"
3#include <string.h>
4
5XmlReader::XmlReader( Bu::Stream &sIn, bool bStrip ) :
6 sIn( sIn ),
7 bStrip( bStrip )
8{
9 buildDoc();
10}
11
12XmlReader::~XmlReader()
13{
14}
15
16char XmlReader::getChar( int nIndex )
17{
18 if( sBuf.getSize() <= nIndex )
19 {
20 int nInc = nIndex-sBuf.getSize()+1;
21 char *buf = new char[nInc];
22 sIn.read( buf, nInc );
23 sBuf.append( buf, nInc );
24 delete[] buf;
25 }
26
27 return sBuf[nIndex];
28}
29
30void XmlReader::usedChar( int nAmnt )
31{
32 if( nAmnt >= sBuf.getSize() )
33 {
34 sBuf.clear();
35 }
36 else
37 {
38 char *s = sBuf.getStr();
39 memcpy( s, s+nAmnt, sBuf.getSize()-nAmnt );
40 sBuf.resize( sBuf.getSize()-nAmnt );
41 }
42}
43
44void XmlReader::addEntity( const Bu::FString &name, const Bu::FString &value )
45{
46 htEntity[name] = value;
47}
48
49#define gcall( x ) if( x == false ) return false;
50
51bool XmlReader::isws( char chr )
52{
53 return ( chr == ' ' || chr == '\t' || chr == '\n' || chr == '\r' );
54}
55
56bool XmlReader::ws()
57{
58 while( true )
59 {
60 char chr = getChar();
61 if( isws( chr ) )
62 {
63 usedChar();
64 }
65 else
66 {
67 return true;
68 }
69 }
70 return true;
71}
72
73bool XmlReader::buildDoc()
74{
75 // take care of initial whitespace
76 gcall( ws() );
77 textDecl();
78 entity();
79 addEntity("gt", ">");
80 addEntity("lt", "<");
81 addEntity("amp", "&");
82 addEntity("apos", "\'");
83 addEntity("quot", "\"");
84 gcall( node() );
85
86 return true;
87}
88
89void XmlReader::textDecl()
90{
91 if( getChar() == '<' && getChar( 1 ) == '?' )
92 {
93 usedChar( 2 );
94 for(;;)
95 {
96 if( getChar() == '?' )
97 {
98 if( getChar( 1 ) == '>' )
99 {
100 usedChar( 2 );
101 return;
102 }
103 }
104 usedChar();
105 }
106 }
107}
108
109void XmlReader::entity()
110{
111 for(;;)
112 {
113 ws();
114
115 if( getChar() == '<' && getChar( 1 ) == '!' )
116 {
117 usedChar( 2 );
118 ws();
119 Bu::FString buf;
120 for(;;)
121 {
122 char chr = getChar();
123 usedChar();
124 if( isws( chr ) ) break;
125 buf += chr;
126 }
127
128 if( strcmp( buf.c_str(), "ENTITY") == 0 )
129 {
130 ws();
131 Bu::FString name;
132 for(;;)
133 {
134 char chr = getChar();
135 usedChar();
136 if( isws( chr ) ) break;
137 name += chr;
138 }
139 ws();
140 char quot = getChar();
141 usedChar();
142 if( quot != '\'' && quot != '\"' )
143 {
144 throw Bu::XmlException(
145 "Only quoted entity values are supported."
146 );
147 }
148 Bu::FString value;
149 for(;;)
150 {
151 char chr = getChar();
152 usedChar();
153 if( chr == '&' )
154 {
155 Bu::FString tmp = getEscape();
156 value += tmp;
157 }
158 else if( chr == quot )
159 {
160 break;
161 }
162 else
163 {
164 value += chr;
165 }
166 }
167 ws();
168 if( getChar() == '>' )
169 {
170 usedChar();
171
172 addEntity( name.c_str(), value.c_str() );
173 }
174 else
175 {
176 throw Bu::XmlException(
177 "Malformed ENTITY: unexpected '%c' found.",
178 getChar()
179 );
180 }
181 }
182 else
183 {
184 throw Bu::XmlException(
185 "Unsupported header symbol: %s",
186 buf.c_str()
187 );
188 }
189 }
190 else
191 {
192 return;
193 }
194 }
195}
196
197bool XmlReader::node()
198{
199 gcall( startNode() )
200
201 // At this point, we are closing the startNode
202 char chr = getChar();
203 if( chr == '>' )
204 {
205 usedChar();
206
207 // Now we process the guts of the node.
208 gcall( content() );
209 }
210 else if( chr == '/' )
211 {
212 // This is the tricky one, one more validation, then we close the node.
213 usedChar();
214 if( getChar() == '>' )
215 {
216 closeNode();
217 usedChar();
218 }
219 else
220 {
221 throw Bu::XmlException("Close node in singleNode malformed!");
222 }
223 }
224 else
225 {
226 throw Bu::XmlException("Close node expected, but not found.");
227 return false;
228 }
229
230 return true;
231}
232
233bool XmlReader::startNode()
234{
235 if( getChar() == '<' )
236 {
237 usedChar();
238
239 if( getChar() == '/' )
240 {
241 // Heh, it's actually a close node, go figure
242 Bu::FString sName;
243 usedChar();
244 gcall( ws() );
245
246 while( true )
247 {
248 char chr = getChar();
249 if( isws( chr ) || chr == '>' )
250 {
251 // Here we actually compare the name we got to the name
252 // we already set, they have to match exactly.
253 if( getCurrent()->getName() == sName )
254 {
255 closeNode();
256 break;
257 }
258 else
259 {
260 throw Bu::XmlException("Got a mismatched node close tag.");
261 }
262 }
263 else
264 {
265 sName += chr;
266 usedChar();
267 }
268 }
269
270 gcall( ws() );
271 if( getChar() == '>' )
272 {
273 // Everything is cool.
274 usedChar();
275 }
276 else
277 {
278 throw Bu::XmlException("Got extra junk data instead of node close tag.");
279 }
280 }
281 else
282 {
283 // We're good, format is consistant
284 //addNode();
285
286 // Skip extra whitespace
287 gcall( ws() );
288 gcall( name() );
289 gcall( ws() );
290 gcall( paramlist() );
291 gcall( ws() );
292 }
293 }
294 else
295 {
296 throw Bu::XmlException("Expected to find node opening char, '<'.");
297 }
298
299 return true;
300}
301
302bool XmlReader::name()
303{
304 Bu::FString sName;
305
306 while( true )
307 {
308 char chr = getChar();
309 if( isws( chr ) || chr == '>' || chr == '/' )
310 {
311 addNode( sName );
312 return true;
313 }
314 else
315 {
316 sName += chr;
317 usedChar();
318 }
319 }
320
321 return true;
322}
323
324bool XmlReader::paramlist()
325{
326 while( true )
327 {
328 char chr = getChar();
329 if( chr == '/' || chr == '>' )
330 {
331 return true;
332 }
333 else
334 {
335 gcall( param() );
336 gcall( ws() );
337 }
338 }
339
340 return true;
341}
342
343Bu::FString XmlReader::getEscape()
344{
345 if( getChar( 1 ) == '#' )
346 {
347 // If the entity starts with a # it's a character escape code
348 int base = 10;
349 usedChar( 2 );
350 if( getChar() == 'x' )
351 {
352 base = 16;
353 usedChar();
354 }
355 char buf[4];
356 int j = 0;
357 for( j = 0; getChar() != ';'; j++ )
358 {
359 buf[j] = getChar();
360 usedChar();
361 }
362 usedChar();
363 buf[j] = '\0';
364 buf[0] = (char)strtol( buf, (char **)NULL, base );
365 buf[1] = '\0';
366
367 return buf;
368 }
369 else
370 {
371 // ...otherwise replace with the appropriate string...
372 Bu::FString buf;
373 usedChar();
374 for(;;)
375 {
376 char cbuf = getChar();
377 usedChar();
378 if( cbuf == ';' ) break;
379 buf += cbuf;
380 }
381
382 return htEntity[buf];
383 }
384}
385
386bool XmlReader::param()
387{
388 Bu::FString sName;
389 Bu::FString sValue;
390
391 while( true )
392 {
393 char chr = getChar();
394 if( isws( chr ) || chr == '=' )
395 {
396 break;
397 }
398 else
399 {
400 sName.append( chr );
401 usedChar();
402 }
403 }
404
405 gcall( ws() );
406
407 if( getChar() == '=' )
408 {
409 usedChar();
410
411 gcall( ws() );
412
413 char chr = getChar();
414 if( chr == '"' )
415 {
416 // Better quoted rhs
417 usedChar();
418
419 while( true )
420 {
421 chr = getChar();
422 if( chr == '"' )
423 {
424 usedChar();
425 addProperty( sName.getStr(), sValue.getStr() );
426 return true;
427 }
428 else
429 {
430 if( chr == '&' )
431 {
432 sValue += getEscape();
433 }
434 else
435 {
436 sValue += chr;
437 usedChar();
438 }
439 }
440 }
441 }
442 else
443 {
444 // Simple one-word rhs
445 while( true )
446 {
447 chr = getChar();
448 if( isws( chr ) || chr == '/' || chr == '>' )
449 {
450 addProperty( sName.getStr(), sValue.getStr() );
451 return true;
452 }
453 else
454 {
455 if( chr == '&' )
456 {
457 sValue += getEscape();
458 }
459 else
460 {
461 sValue += chr;
462 usedChar();
463 }
464 }
465 }
466 }
467 }
468 else
469 {
470 throw Bu::XmlException("Expected an equals to seperate the params.");
471 return false;
472 }
473
474 return true;
475}
476
477bool XmlReader::content()
478{
479 Bu::FString sContent;
480
481 if( bStrip ) gcall( ws() );
482
483 while( true )
484 {
485 char chr = getChar();
486 if( chr == '<' )
487 {
488 if( getChar(1) == '/' )
489 {
490 if( sContent.getSize() > 0 )
491 {
492 if( bStrip )
493 {
494 int j;
495 for( j = sContent.getSize()-1; isws(sContent[j]); j-- );
496 sContent[j+1] = '\0';
497 }
498 setContent( sContent.getStr() );
499 }
500 usedChar( 2 );
501 gcall( ws() );
502 Bu::FString sName;
503 while( true )
504 {
505 chr = getChar();
506 if( isws( chr ) || chr == '>' )
507 {
508 if( !strcasecmp( getCurrent()->getName().getStr(), sName.getStr() ) )
509 {
510 closeNode();
511 break;
512 }
513 else
514 {
515 throw Bu::XmlException("Mismatched close tag found: <%s> to <%s>.", getCurrent()->getName().getStr(), sName.getStr() );
516 }
517 }
518 else
519 {
520 sName += chr;
521 usedChar();
522 }
523 }
524 gcall( ws() );
525 if( getChar() == '>' )
526 {
527 usedChar();
528 return true;
529 }
530 else
531 {
532 throw Bu::XmlException("Malformed close tag.");
533 }
534 }
535 else if( getChar(1) == '!' )
536 {
537 // We know it's a comment, let's see if it's proper
538 if( getChar(2) != '-' ||
539 getChar(3) != '-' )
540 {
541 // Not a valid XML comment
542 throw Bu::XmlException("Malformed comment start tag found.");
543 }
544
545 usedChar( 4 );
546
547 // Now burn text until we find the close tag
548 for(;;)
549 {
550 if( getChar() == '-' )
551 {
552 if( getChar( 1 ) == '-' )
553 {
554 // The next one has to be a '>' now
555 if( getChar( 2 ) != '>' )
556 {
557 throw Bu::XmlException("Malformed comment close tag found. You cannot have a '--' that isn't followed by a '>' in a comment.");
558 }
559 usedChar( 3 );
560 break;
561 }
562 else
563 {
564 // Found a dash followed by a non dash, that's ok...
565 usedChar( 2 );
566 }
567 }
568 else
569 {
570 // Burn comment chars
571 usedChar();
572 }
573 }
574 }
575 else
576 {
577 if( sContent.getSize() > 0 )
578 {
579 if( bStrip )
580 {
581 int j;
582 for( j = sContent.getSize()-1; isws(sContent[j]); j-- );
583 sContent[j+1] = '\0';
584 }
585 setContent( sContent.getStr() );
586 sContent.clear();
587 }
588 gcall( node() );
589 }
590
591 if( bStrip ) gcall( ws() );
592 }
593 else if( chr == '&' )
594 {
595 sContent += getEscape();
596 }
597 else
598 {
599 sContent += chr;
600 usedChar();
601 }
602 }
603}
604