summaryrefslogtreecommitdiff
path: root/src/xmlreader.cpp
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2007-04-03 03:49:53 +0000
committerMike Buland <eichlan@xagasoft.com>2007-04-03 03:49:53 +0000
commitf4c20290509d7ed3a8fd5304577e7a4cc0b9d974 (patch)
tree13cdf64f7cf134f397a7165b7a3fe0807e37026b /src/xmlreader.cpp
parent74d4c8cd27334fc7204d5a8773deb3d424565778 (diff)
downloadlibbu++-f4c20290509d7ed3a8fd5304577e7a4cc0b9d974.tar.gz
libbu++-f4c20290509d7ed3a8fd5304577e7a4cc0b9d974.tar.bz2
libbu++-f4c20290509d7ed3a8fd5304577e7a4cc0b9d974.tar.xz
libbu++-f4c20290509d7ed3a8fd5304577e7a4cc0b9d974.zip
Ok, no code is left in src, it's all in src/old. We'll gradually move code back
into src as it's fixed and re-org'd. This includes tests, which, I may write a unit test system into libbu++ just to make my life easier.
Diffstat (limited to 'src/xmlreader.cpp')
-rw-r--r--src/xmlreader.cpp602
1 files changed, 0 insertions, 602 deletions
diff --git a/src/xmlreader.cpp b/src/xmlreader.cpp
deleted file mode 100644
index 18df69c..0000000
--- a/src/xmlreader.cpp
+++ /dev/null
@@ -1,602 +0,0 @@
1#include "xmlreader.h"
2#include "exceptions.h"
3#include <string.h>
4#include "hashfunctionstring.h"
5
6XmlReader::XmlReader( bool bStrip ) :
7 bStrip( bStrip ),
8 htEntity( new HashFunctionString(), 11 )
9{
10}
11
12XmlReader::~XmlReader()
13{
14 void *i = htEntity.getFirstItemPos();
15 while( (i = htEntity.getNextItemPos( i ) ) )
16 {
17 free( (char *)(htEntity.getItemID( i )) );
18 delete (StaticString *)htEntity.getItemData( i );
19 }
20}
21
22void XmlReader::addEntity( const char *name, const char *value )
23{
24 if( htEntity[name] ) return;
25
26 char *sName = strdup( name );
27 StaticString *sValue = new StaticString( value );
28
29 htEntity.insert( sName, sValue );
30}
31
32#define gcall( x ) if( x == false ) return false;
33
34bool XmlReader::isws( char chr )
35{
36 return ( chr == ' ' || chr == '\t' || chr == '\n' || chr == '\r' );
37}
38
39bool XmlReader::ws()
40{
41 while( true )
42 {
43 char chr = getChar();
44 if( isws( chr ) )
45 {
46 usedChar();
47 }
48 else
49 {
50 return true;
51 }
52 }
53 return true;
54}
55
56bool XmlReader::buildDoc()
57{
58 // take care of initial whitespace
59 gcall( ws() );
60 textDecl();
61 entity();
62 addEntity("gt", ">");
63 addEntity("lt", "<");
64 addEntity("amp", "&");
65 addEntity("apos", "\'");
66 addEntity("quot", "\"");
67 gcall( node() );
68
69 return true;
70}
71
72void XmlReader::textDecl()
73{
74 if( getChar() == '<' && getChar( 1 ) == '?' )
75 {
76 usedChar( 2 );
77 for(;;)
78 {
79 if( getChar() == '?' )
80 {
81 if( getChar( 1 ) == '>' )
82 {
83 usedChar( 2 );
84 return;
85 }
86 }
87 usedChar();
88 }
89 }
90}
91
92void XmlReader::entity()
93{
94 for(;;)
95 {
96 ws();
97
98 if( getChar() == '<' && getChar( 1 ) == '!' )
99 {
100 usedChar( 2 );
101 ws();
102 std::string buf;
103 for(;;)
104 {
105 char chr = getChar();
106 usedChar();
107 if( isws( chr ) ) break;
108 buf += chr;
109 }
110
111 if( strcmp( buf.c_str(), "ENTITY") == 0 )
112 {
113 ws();
114 std::string name;
115 for(;;)
116 {
117 char chr = getChar();
118 usedChar();
119 if( isws( chr ) ) break;
120 name += chr;
121 }
122 ws();
123 char quot = getChar();
124 usedChar();
125 if( quot != '\'' && quot != '\"' )
126 {
127 throw XmlException(
128 "Only quoted entity values are supported."
129 );
130 }
131 std::string value;
132 for(;;)
133 {
134 char chr = getChar();
135 usedChar();
136 if( chr == '&' )
137 {
138 StaticString *tmp = getEscape();
139 if( tmp == NULL ) throw XmlException("Entity thing");
140 value += tmp->getString();
141 delete tmp;
142 }
143 else if( chr == quot )
144 {
145 break;
146 }
147 else
148 {
149 value += chr;
150 }
151 }
152 ws();
153 if( getChar() == '>' )
154 {
155 usedChar();
156
157 addEntity( name.c_str(), value.c_str() );
158 }
159 else
160 {
161 throw XmlException(
162 "Malformed ENTITY: unexpected '%c' found.",
163 getChar()
164 );
165 }
166 }
167 else
168 {
169 throw XmlException(
170 "Unsupported header symbol: %s",
171 buf.c_str()
172 );
173 }
174 }
175 else
176 {
177 return;
178 }
179 }
180}
181
182bool XmlReader::node()
183{
184 gcall( startNode() )
185
186 // At this point, we are closing the startNode
187 char chr = getChar();
188 if( chr == '>' )
189 {
190 usedChar();
191
192 // Now we process the guts of the node.
193 gcall( content() );
194 }
195 else if( chr == '/' )
196 {
197 // This is the tricky one, one more validation, then we close the node.
198 usedChar();
199 if( getChar() == '>' )
200 {
201 closeNode();
202 usedChar();
203 }
204 else
205 {
206 throw XmlException("Close node in singleNode malformed!");
207 }
208 }
209 else
210 {
211 throw XmlException("Close node expected, but not found.");
212 return false;
213 }
214
215 return true;
216}
217
218bool XmlReader::startNode()
219{
220 if( getChar() == '<' )
221 {
222 usedChar();
223
224 if( getChar() == '/' )
225 {
226 // Heh, it's actually a close node, go figure
227 FlexBuf fbName;
228 usedChar();
229 gcall( ws() );
230
231 while( true )
232 {
233 char chr = getChar();
234 if( isws( chr ) || chr == '>' )
235 {
236 // Here we actually compare the name we got to the name
237 // we already set, they have to match exactly.
238 if( !strcasecmp( getCurrent()->getName(), fbName.getData() ) )
239 {
240 closeNode();
241 break;
242 }
243 else
244 {
245 throw XmlException("Got a mismatched node close tag.");
246 }
247 }
248 else
249 {
250 fbName.appendData( chr );
251 usedChar();
252 }
253 }
254
255 gcall( ws() );
256 if( getChar() == '>' )
257 {
258 // Everything is cool.
259 usedChar();
260 }
261 else
262 {
263 throw XmlException("Got extra junk data instead of node close tag.");
264 }
265 }
266 else
267 {
268 // We're good, format is consistant
269 addNode();
270
271 // Skip extra whitespace
272 gcall( ws() );
273 gcall( name() );
274 gcall( ws() );
275 gcall( paramlist() );
276 gcall( ws() );
277 }
278 }
279 else
280 {
281 throw XmlException("Expected to find node opening char, '<'.");
282 }
283
284 return true;
285}
286
287bool XmlReader::name()
288{
289 FlexBuf fbName;
290
291 while( true )
292 {
293 char chr = getChar();
294 if( isws( chr ) || chr == '>' || chr == '/' )
295 {
296 setName( fbName.getData() );
297 return true;
298 }
299 else
300 {
301 fbName.appendData( chr );
302 usedChar();
303 }
304 }
305
306 return true;
307}
308
309bool XmlReader::paramlist()
310{
311 while( true )
312 {
313 char chr = getChar();
314 if( chr == '/' || chr == '>' )
315 {
316 return true;
317 }
318 else
319 {
320 gcall( param() );
321 gcall( ws() );
322 }
323 }
324
325 return true;
326}
327
328StaticString *XmlReader::getEscape()
329{
330 if( getChar( 1 ) == '#' )
331 {
332 // If the entity starts with a # it's a character escape code
333 int base = 10;
334 usedChar( 2 );
335 if( getChar() == 'x' )
336 {
337 base = 16;
338 usedChar();
339 }
340 char buf[4];
341 int j = 0;
342 for( j = 0; getChar() != ';'; j++ )
343 {
344 buf[j] = getChar();
345 usedChar();
346 }
347 usedChar();
348 buf[j] = '\0';
349 buf[0] = (char)strtol( buf, (char **)NULL, base );
350 buf[1] = '\0';
351
352 return new StaticString( buf );
353 }
354 else
355 {
356 // ...otherwise replace with the appropriate string...
357 std::string buf;
358 usedChar();
359 for(;;)
360 {
361 char cbuf = getChar();
362 usedChar();
363 if( cbuf == ';' ) break;
364 buf += cbuf;
365 }
366
367 StaticString *tmp = (StaticString *)htEntity[buf.c_str()];
368 if( tmp == NULL ) return NULL;
369
370 StaticString *ret = new StaticString( *tmp );
371 return ret;
372 }
373}
374
375bool XmlReader::param()
376{
377 FlexBuf fbName;
378 FlexBuf fbValue;
379
380 while( true )
381 {
382 char chr = getChar();
383 if( isws( chr ) || chr == '=' )
384 {
385 break;
386 }
387 else
388 {
389 fbName.appendData( chr );
390 usedChar();
391 }
392 }
393
394 gcall( ws() );
395
396 if( getChar() == '=' )
397 {
398 usedChar();
399
400 gcall( ws() );
401
402 char chr = getChar();
403 if( chr == '"' )
404 {
405 // Better quoted rhs
406 usedChar();
407
408 while( true )
409 {
410 chr = getChar();
411 if( chr == '"' )
412 {
413 usedChar();
414 addProperty( fbName.getData(), fbValue.getData() );
415 return true;
416 }
417 else
418 {
419 if( chr == '&' )
420 {
421 StaticString *tmp = getEscape();
422 if( tmp == NULL ) return false;
423 fbValue.appendData( tmp->getString() );
424 delete tmp;
425 }
426 else
427 {
428 fbValue.appendData( chr );
429 usedChar();
430 }
431 }
432 }
433 }
434 else
435 {
436 // Simple one-word rhs
437 while( true )
438 {
439 chr = getChar();
440 if( isws( chr ) || chr == '/' || chr == '>' )
441 {
442 addProperty( fbName.getData(), fbValue.getData() );
443 return true;
444 }
445 else
446 {
447 if( chr == '&' )
448 {
449 StaticString *tmp = getEscape();
450 if( tmp == NULL ) return false;
451 fbValue.appendData( tmp->getString() );
452 delete tmp;
453 }
454 else
455 {
456 fbValue.appendData( chr );
457 usedChar();
458 }
459 }
460 }
461 }
462 }
463 else
464 {
465 throw XmlException("Expected an equals to seperate the params.");
466 return false;
467 }
468
469 return true;
470}
471
472bool XmlReader::content()
473{
474 FlexBuf fbContent;
475
476 if( bStrip ) gcall( ws() );
477
478 while( true )
479 {
480 char chr = getChar();
481 if( chr == '<' )
482 {
483 if( getChar(1) == '/' )
484 {
485 if( fbContent.getLength() > 0 )
486 {
487 if( bStrip )
488 {
489 int j;
490 for( j = fbContent.getLength()-1; isws(fbContent.getData()[j]); j-- );
491 ((char *)fbContent.getData())[j+1] = '\0';
492 }
493 setContent( fbContent.getData() );
494 }
495 usedChar( 2 );
496 gcall( ws() );
497 FlexBuf fbName;
498 while( true )
499 {
500 chr = getChar();
501 if( isws( chr ) || chr == '>' )
502 {
503 if( !strcasecmp( getCurrent()->getName(), fbName.getData() ) )
504 {
505 closeNode();
506 break;
507 }
508 else
509 {
510 throw XmlException("Mismatched close tag found: <%s> to <%s>.", getCurrent()->getName(), fbName.getData() );
511 }
512 }
513 else
514 {
515 fbName.appendData( chr );
516 usedChar();
517 }
518 }
519 gcall( ws() );
520 if( getChar() == '>' )
521 {
522 usedChar();
523 return true;
524 }
525 else
526 {
527 throw XmlException("Malformed close tag.");
528 }
529 }
530 else if( getChar(1) == '!' )
531 {
532 // We know it's a comment, let's see if it's proper
533 if( getChar(2) != '-' ||
534 getChar(3) != '-' )
535 {
536 // Not a valid XML comment
537 throw XmlException("Malformed comment start tag found.");
538 }
539
540 usedChar( 4 );
541
542 // Now burn text until we find the close tag
543 for(;;)
544 {
545 if( getChar() == '-' )
546 {
547 if( getChar( 1 ) == '-' )
548 {
549 // The next one has to be a '>' now
550 if( getChar( 2 ) != '>' )
551 {
552 throw XmlException("Malformed comment close tag found. You cannot have a '--' that isn't followed by a '>' in a comment.");
553 }
554 usedChar( 3 );
555 break;
556 }
557 else
558 {
559 // Found a dash followed by a non dash, that's ok...
560 usedChar( 2 );
561 }
562 }
563 else
564 {
565 // Burn comment chars
566 usedChar();
567 }
568 }
569 }
570 else
571 {
572 if( fbContent.getLength() > 0 )
573 {
574 if( bStrip )
575 {
576 int j;
577 for( j = fbContent.getLength()-1; isws(fbContent.getData()[j]); j-- );
578 ((char *)fbContent.getData())[j+1] = '\0';
579 }
580 setContent( fbContent.getData() );
581 fbContent.clearData();
582 }
583 gcall( node() );
584 }
585
586 if( bStrip ) gcall( ws() );
587 }
588 else if( chr == '&' )
589 {
590 StaticString *tmp = getEscape();
591 if( tmp == NULL ) return false;
592 fbContent.appendData( tmp->getString() );
593 delete tmp;
594 }
595 else
596 {
597 fbContent.appendData( chr );
598 usedChar();
599 }
600 }
601}
602