diff options
Diffstat (limited to 'src/old/xmlreader.cpp')
-rw-r--r-- | src/old/xmlreader.cpp | 602 |
1 files changed, 602 insertions, 0 deletions
diff --git a/src/old/xmlreader.cpp b/src/old/xmlreader.cpp new file mode 100644 index 0000000..18df69c --- /dev/null +++ b/src/old/xmlreader.cpp | |||
@@ -0,0 +1,602 @@ | |||
1 | #include "xmlreader.h" | ||
2 | #include "exceptions.h" | ||
3 | #include <string.h> | ||
4 | #include "hashfunctionstring.h" | ||
5 | |||
6 | XmlReader::XmlReader( bool bStrip ) : | ||
7 | bStrip( bStrip ), | ||
8 | htEntity( new HashFunctionString(), 11 ) | ||
9 | { | ||
10 | } | ||
11 | |||
12 | XmlReader::~XmlReader() | ||
13 | { | ||
14 | void *i = htEntity.getFirstItemPos(); | ||
15 | while( (i = htEntity.getNextItemPos( i ) ) ) | ||
16 | { | ||
17 | free( (char *)(htEntity.getItemID( i )) ); | ||
18 | delete (StaticString *)htEntity.getItemData( i ); | ||
19 | } | ||
20 | } | ||
21 | |||
22 | void XmlReader::addEntity( const char *name, const char *value ) | ||
23 | { | ||
24 | if( htEntity[name] ) return; | ||
25 | |||
26 | char *sName = strdup( name ); | ||
27 | StaticString *sValue = new StaticString( value ); | ||
28 | |||
29 | htEntity.insert( sName, sValue ); | ||
30 | } | ||
31 | |||
32 | #define gcall( x ) if( x == false ) return false; | ||
33 | |||
34 | bool XmlReader::isws( char chr ) | ||
35 | { | ||
36 | return ( chr == ' ' || chr == '\t' || chr == '\n' || chr == '\r' ); | ||
37 | } | ||
38 | |||
39 | bool XmlReader::ws() | ||
40 | { | ||
41 | while( true ) | ||
42 | { | ||
43 | char chr = getChar(); | ||
44 | if( isws( chr ) ) | ||
45 | { | ||
46 | usedChar(); | ||
47 | } | ||
48 | else | ||
49 | { | ||
50 | return true; | ||
51 | } | ||
52 | } | ||
53 | return true; | ||
54 | } | ||
55 | |||
56 | bool XmlReader::buildDoc() | ||
57 | { | ||
58 | // take care of initial whitespace | ||
59 | gcall( ws() ); | ||
60 | textDecl(); | ||
61 | entity(); | ||
62 | addEntity("gt", ">"); | ||
63 | addEntity("lt", "<"); | ||
64 | addEntity("amp", "&"); | ||
65 | addEntity("apos", "\'"); | ||
66 | addEntity("quot", "\""); | ||
67 | gcall( node() ); | ||
68 | |||
69 | return true; | ||
70 | } | ||
71 | |||
72 | void XmlReader::textDecl() | ||
73 | { | ||
74 | if( getChar() == '<' && getChar( 1 ) == '?' ) | ||
75 | { | ||
76 | usedChar( 2 ); | ||
77 | for(;;) | ||
78 | { | ||
79 | if( getChar() == '?' ) | ||
80 | { | ||
81 | if( getChar( 1 ) == '>' ) | ||
82 | { | ||
83 | usedChar( 2 ); | ||
84 | return; | ||
85 | } | ||
86 | } | ||
87 | usedChar(); | ||
88 | } | ||
89 | } | ||
90 | } | ||
91 | |||
92 | void XmlReader::entity() | ||
93 | { | ||
94 | for(;;) | ||
95 | { | ||
96 | ws(); | ||
97 | |||
98 | if( getChar() == '<' && getChar( 1 ) == '!' ) | ||
99 | { | ||
100 | usedChar( 2 ); | ||
101 | ws(); | ||
102 | std::string buf; | ||
103 | for(;;) | ||
104 | { | ||
105 | char chr = getChar(); | ||
106 | usedChar(); | ||
107 | if( isws( chr ) ) break; | ||
108 | buf += chr; | ||
109 | } | ||
110 | |||
111 | if( strcmp( buf.c_str(), "ENTITY") == 0 ) | ||
112 | { | ||
113 | ws(); | ||
114 | std::string name; | ||
115 | for(;;) | ||
116 | { | ||
117 | char chr = getChar(); | ||
118 | usedChar(); | ||
119 | if( isws( chr ) ) break; | ||
120 | name += chr; | ||
121 | } | ||
122 | ws(); | ||
123 | char quot = getChar(); | ||
124 | usedChar(); | ||
125 | if( quot != '\'' && quot != '\"' ) | ||
126 | { | ||
127 | throw XmlException( | ||
128 | "Only quoted entity values are supported." | ||
129 | ); | ||
130 | } | ||
131 | std::string value; | ||
132 | for(;;) | ||
133 | { | ||
134 | char chr = getChar(); | ||
135 | usedChar(); | ||
136 | if( chr == '&' ) | ||
137 | { | ||
138 | StaticString *tmp = getEscape(); | ||
139 | if( tmp == NULL ) throw XmlException("Entity thing"); | ||
140 | value += tmp->getString(); | ||
141 | delete tmp; | ||
142 | } | ||
143 | else if( chr == quot ) | ||
144 | { | ||
145 | break; | ||
146 | } | ||
147 | else | ||
148 | { | ||
149 | value += chr; | ||
150 | } | ||
151 | } | ||
152 | ws(); | ||
153 | if( getChar() == '>' ) | ||
154 | { | ||
155 | usedChar(); | ||
156 | |||
157 | addEntity( name.c_str(), value.c_str() ); | ||
158 | } | ||
159 | else | ||
160 | { | ||
161 | throw XmlException( | ||
162 | "Malformed ENTITY: unexpected '%c' found.", | ||
163 | getChar() | ||
164 | ); | ||
165 | } | ||
166 | } | ||
167 | else | ||
168 | { | ||
169 | throw XmlException( | ||
170 | "Unsupported header symbol: %s", | ||
171 | buf.c_str() | ||
172 | ); | ||
173 | } | ||
174 | } | ||
175 | else | ||
176 | { | ||
177 | return; | ||
178 | } | ||
179 | } | ||
180 | } | ||
181 | |||
182 | bool XmlReader::node() | ||
183 | { | ||
184 | gcall( startNode() ) | ||
185 | |||
186 | // At this point, we are closing the startNode | ||
187 | char chr = getChar(); | ||
188 | if( chr == '>' ) | ||
189 | { | ||
190 | usedChar(); | ||
191 | |||
192 | // Now we process the guts of the node. | ||
193 | gcall( content() ); | ||
194 | } | ||
195 | else if( chr == '/' ) | ||
196 | { | ||
197 | // This is the tricky one, one more validation, then we close the node. | ||
198 | usedChar(); | ||
199 | if( getChar() == '>' ) | ||
200 | { | ||
201 | closeNode(); | ||
202 | usedChar(); | ||
203 | } | ||
204 | else | ||
205 | { | ||
206 | throw XmlException("Close node in singleNode malformed!"); | ||
207 | } | ||
208 | } | ||
209 | else | ||
210 | { | ||
211 | throw XmlException("Close node expected, but not found."); | ||
212 | return false; | ||
213 | } | ||
214 | |||
215 | return true; | ||
216 | } | ||
217 | |||
218 | bool XmlReader::startNode() | ||
219 | { | ||
220 | if( getChar() == '<' ) | ||
221 | { | ||
222 | usedChar(); | ||
223 | |||
224 | if( getChar() == '/' ) | ||
225 | { | ||
226 | // Heh, it's actually a close node, go figure | ||
227 | FlexBuf fbName; | ||
228 | usedChar(); | ||
229 | gcall( ws() ); | ||
230 | |||
231 | while( true ) | ||
232 | { | ||
233 | char chr = getChar(); | ||
234 | if( isws( chr ) || chr == '>' ) | ||
235 | { | ||
236 | // Here we actually compare the name we got to the name | ||
237 | // we already set, they have to match exactly. | ||
238 | if( !strcasecmp( getCurrent()->getName(), fbName.getData() ) ) | ||
239 | { | ||
240 | closeNode(); | ||
241 | break; | ||
242 | } | ||
243 | else | ||
244 | { | ||
245 | throw XmlException("Got a mismatched node close tag."); | ||
246 | } | ||
247 | } | ||
248 | else | ||
249 | { | ||
250 | fbName.appendData( chr ); | ||
251 | usedChar(); | ||
252 | } | ||
253 | } | ||
254 | |||
255 | gcall( ws() ); | ||
256 | if( getChar() == '>' ) | ||
257 | { | ||
258 | // Everything is cool. | ||
259 | usedChar(); | ||
260 | } | ||
261 | else | ||
262 | { | ||
263 | throw XmlException("Got extra junk data instead of node close tag."); | ||
264 | } | ||
265 | } | ||
266 | else | ||
267 | { | ||
268 | // We're good, format is consistant | ||
269 | addNode(); | ||
270 | |||
271 | // Skip extra whitespace | ||
272 | gcall( ws() ); | ||
273 | gcall( name() ); | ||
274 | gcall( ws() ); | ||
275 | gcall( paramlist() ); | ||
276 | gcall( ws() ); | ||
277 | } | ||
278 | } | ||
279 | else | ||
280 | { | ||
281 | throw XmlException("Expected to find node opening char, '<'."); | ||
282 | } | ||
283 | |||
284 | return true; | ||
285 | } | ||
286 | |||
287 | bool XmlReader::name() | ||
288 | { | ||
289 | FlexBuf fbName; | ||
290 | |||
291 | while( true ) | ||
292 | { | ||
293 | char chr = getChar(); | ||
294 | if( isws( chr ) || chr == '>' || chr == '/' ) | ||
295 | { | ||
296 | setName( fbName.getData() ); | ||
297 | return true; | ||
298 | } | ||
299 | else | ||
300 | { | ||
301 | fbName.appendData( chr ); | ||
302 | usedChar(); | ||
303 | } | ||
304 | } | ||
305 | |||
306 | return true; | ||
307 | } | ||
308 | |||
309 | bool XmlReader::paramlist() | ||
310 | { | ||
311 | while( true ) | ||
312 | { | ||
313 | char chr = getChar(); | ||
314 | if( chr == '/' || chr == '>' ) | ||
315 | { | ||
316 | return true; | ||
317 | } | ||
318 | else | ||
319 | { | ||
320 | gcall( param() ); | ||
321 | gcall( ws() ); | ||
322 | } | ||
323 | } | ||
324 | |||
325 | return true; | ||
326 | } | ||
327 | |||
328 | StaticString *XmlReader::getEscape() | ||
329 | { | ||
330 | if( getChar( 1 ) == '#' ) | ||
331 | { | ||
332 | // If the entity starts with a # it's a character escape code | ||
333 | int base = 10; | ||
334 | usedChar( 2 ); | ||
335 | if( getChar() == 'x' ) | ||
336 | { | ||
337 | base = 16; | ||
338 | usedChar(); | ||
339 | } | ||
340 | char buf[4]; | ||
341 | int j = 0; | ||
342 | for( j = 0; getChar() != ';'; j++ ) | ||
343 | { | ||
344 | buf[j] = getChar(); | ||
345 | usedChar(); | ||
346 | } | ||
347 | usedChar(); | ||
348 | buf[j] = '\0'; | ||
349 | buf[0] = (char)strtol( buf, (char **)NULL, base ); | ||
350 | buf[1] = '\0'; | ||
351 | |||
352 | return new StaticString( buf ); | ||
353 | } | ||
354 | else | ||
355 | { | ||
356 | // ...otherwise replace with the appropriate string... | ||
357 | std::string buf; | ||
358 | usedChar(); | ||
359 | for(;;) | ||
360 | { | ||
361 | char cbuf = getChar(); | ||
362 | usedChar(); | ||
363 | if( cbuf == ';' ) break; | ||
364 | buf += cbuf; | ||
365 | } | ||
366 | |||
367 | StaticString *tmp = (StaticString *)htEntity[buf.c_str()]; | ||
368 | if( tmp == NULL ) return NULL; | ||
369 | |||
370 | StaticString *ret = new StaticString( *tmp ); | ||
371 | return ret; | ||
372 | } | ||
373 | } | ||
374 | |||
375 | bool XmlReader::param() | ||
376 | { | ||
377 | FlexBuf fbName; | ||
378 | FlexBuf fbValue; | ||
379 | |||
380 | while( true ) | ||
381 | { | ||
382 | char chr = getChar(); | ||
383 | if( isws( chr ) || chr == '=' ) | ||
384 | { | ||
385 | break; | ||
386 | } | ||
387 | else | ||
388 | { | ||
389 | fbName.appendData( chr ); | ||
390 | usedChar(); | ||
391 | } | ||
392 | } | ||
393 | |||
394 | gcall( ws() ); | ||
395 | |||
396 | if( getChar() == '=' ) | ||
397 | { | ||
398 | usedChar(); | ||
399 | |||
400 | gcall( ws() ); | ||
401 | |||
402 | char chr = getChar(); | ||
403 | if( chr == '"' ) | ||
404 | { | ||
405 | // Better quoted rhs | ||
406 | usedChar(); | ||
407 | |||
408 | while( true ) | ||
409 | { | ||
410 | chr = getChar(); | ||
411 | if( chr == '"' ) | ||
412 | { | ||
413 | usedChar(); | ||
414 | addProperty( fbName.getData(), fbValue.getData() ); | ||
415 | return true; | ||
416 | } | ||
417 | else | ||
418 | { | ||
419 | if( chr == '&' ) | ||
420 | { | ||
421 | StaticString *tmp = getEscape(); | ||
422 | if( tmp == NULL ) return false; | ||
423 | fbValue.appendData( tmp->getString() ); | ||
424 | delete tmp; | ||
425 | } | ||
426 | else | ||
427 | { | ||
428 | fbValue.appendData( chr ); | ||
429 | usedChar(); | ||
430 | } | ||
431 | } | ||
432 | } | ||
433 | } | ||
434 | else | ||
435 | { | ||
436 | // Simple one-word rhs | ||
437 | while( true ) | ||
438 | { | ||
439 | chr = getChar(); | ||
440 | if( isws( chr ) || chr == '/' || chr == '>' ) | ||
441 | { | ||
442 | addProperty( fbName.getData(), fbValue.getData() ); | ||
443 | return true; | ||
444 | } | ||
445 | else | ||
446 | { | ||
447 | if( chr == '&' ) | ||
448 | { | ||
449 | StaticString *tmp = getEscape(); | ||
450 | if( tmp == NULL ) return false; | ||
451 | fbValue.appendData( tmp->getString() ); | ||
452 | delete tmp; | ||
453 | } | ||
454 | else | ||
455 | { | ||
456 | fbValue.appendData( chr ); | ||
457 | usedChar(); | ||
458 | } | ||
459 | } | ||
460 | } | ||
461 | } | ||
462 | } | ||
463 | else | ||
464 | { | ||
465 | throw XmlException("Expected an equals to seperate the params."); | ||
466 | return false; | ||
467 | } | ||
468 | |||
469 | return true; | ||
470 | } | ||
471 | |||
472 | bool XmlReader::content() | ||
473 | { | ||
474 | FlexBuf fbContent; | ||
475 | |||
476 | if( bStrip ) gcall( ws() ); | ||
477 | |||
478 | while( true ) | ||
479 | { | ||
480 | char chr = getChar(); | ||
481 | if( chr == '<' ) | ||
482 | { | ||
483 | if( getChar(1) == '/' ) | ||
484 | { | ||
485 | if( fbContent.getLength() > 0 ) | ||
486 | { | ||
487 | if( bStrip ) | ||
488 | { | ||
489 | int j; | ||
490 | for( j = fbContent.getLength()-1; isws(fbContent.getData()[j]); j-- ); | ||
491 | ((char *)fbContent.getData())[j+1] = '\0'; | ||
492 | } | ||
493 | setContent( fbContent.getData() ); | ||
494 | } | ||
495 | usedChar( 2 ); | ||
496 | gcall( ws() ); | ||
497 | FlexBuf fbName; | ||
498 | while( true ) | ||
499 | { | ||
500 | chr = getChar(); | ||
501 | if( isws( chr ) || chr == '>' ) | ||
502 | { | ||
503 | if( !strcasecmp( getCurrent()->getName(), fbName.getData() ) ) | ||
504 | { | ||
505 | closeNode(); | ||
506 | break; | ||
507 | } | ||
508 | else | ||
509 | { | ||
510 | throw XmlException("Mismatched close tag found: <%s> to <%s>.", getCurrent()->getName(), fbName.getData() ); | ||
511 | } | ||
512 | } | ||
513 | else | ||
514 | { | ||
515 | fbName.appendData( chr ); | ||
516 | usedChar(); | ||
517 | } | ||
518 | } | ||
519 | gcall( ws() ); | ||
520 | if( getChar() == '>' ) | ||
521 | { | ||
522 | usedChar(); | ||
523 | return true; | ||
524 | } | ||
525 | else | ||
526 | { | ||
527 | throw XmlException("Malformed close tag."); | ||
528 | } | ||
529 | } | ||
530 | else if( getChar(1) == '!' ) | ||
531 | { | ||
532 | // We know it's a comment, let's see if it's proper | ||
533 | if( getChar(2) != '-' || | ||
534 | getChar(3) != '-' ) | ||
535 | { | ||
536 | // Not a valid XML comment | ||
537 | throw XmlException("Malformed comment start tag found."); | ||
538 | } | ||
539 | |||
540 | usedChar( 4 ); | ||
541 | |||
542 | // Now burn text until we find the close tag | ||
543 | for(;;) | ||
544 | { | ||
545 | if( getChar() == '-' ) | ||
546 | { | ||
547 | if( getChar( 1 ) == '-' ) | ||
548 | { | ||
549 | // The next one has to be a '>' now | ||
550 | if( getChar( 2 ) != '>' ) | ||
551 | { | ||
552 | throw XmlException("Malformed comment close tag found. You cannot have a '--' that isn't followed by a '>' in a comment."); | ||
553 | } | ||
554 | usedChar( 3 ); | ||
555 | break; | ||
556 | } | ||
557 | else | ||
558 | { | ||
559 | // Found a dash followed by a non dash, that's ok... | ||
560 | usedChar( 2 ); | ||
561 | } | ||
562 | } | ||
563 | else | ||
564 | { | ||
565 | // Burn comment chars | ||
566 | usedChar(); | ||
567 | } | ||
568 | } | ||
569 | } | ||
570 | else | ||
571 | { | ||
572 | if( fbContent.getLength() > 0 ) | ||
573 | { | ||
574 | if( bStrip ) | ||
575 | { | ||
576 | int j; | ||
577 | for( j = fbContent.getLength()-1; isws(fbContent.getData()[j]); j-- ); | ||
578 | ((char *)fbContent.getData())[j+1] = '\0'; | ||
579 | } | ||
580 | setContent( fbContent.getData() ); | ||
581 | fbContent.clearData(); | ||
582 | } | ||
583 | gcall( node() ); | ||
584 | } | ||
585 | |||
586 | if( bStrip ) gcall( ws() ); | ||
587 | } | ||
588 | else if( chr == '&' ) | ||
589 | { | ||
590 | StaticString *tmp = getEscape(); | ||
591 | if( tmp == NULL ) return false; | ||
592 | fbContent.appendData( tmp->getString() ); | ||
593 | delete tmp; | ||
594 | } | ||
595 | else | ||
596 | { | ||
597 | fbContent.appendData( chr ); | ||
598 | usedChar(); | ||
599 | } | ||
600 | } | ||
601 | } | ||
602 | |||