diff options
author | Mike Buland <eichlan@xagasoft.com> | 2007-07-03 00:28:59 +0000 |
---|---|---|
committer | Mike Buland <eichlan@xagasoft.com> | 2007-07-03 00:28:59 +0000 |
commit | ac517a2b7625e0aa0862679e961c6349f859ea3b (patch) | |
tree | e3e27a6b9bd5e2be6150088495c91fc91786ad9d /src/old/xmlreader.cpp | |
parent | f8d4301e9fa4f3709258505941e37fab2eadadc6 (diff) | |
parent | bd865cee5f89116c1f054cd0e5c275e97c2d0a9b (diff) | |
download | libbu++-ac517a2b7625e0aa0862679e961c6349f859ea3b.tar.gz libbu++-ac517a2b7625e0aa0862679e961c6349f859ea3b.tar.bz2 libbu++-ac517a2b7625e0aa0862679e961c6349f859ea3b.tar.xz libbu++-ac517a2b7625e0aa0862679e961c6349f859ea3b.zip |
The reorg is being put in trunk, I think it's ready. Now we just get to find
out how many applications won't work anymore :)
Diffstat (limited to 'src/old/xmlreader.cpp')
-rw-r--r-- | src/old/xmlreader.cpp | 604 |
1 files changed, 604 insertions, 0 deletions
diff --git a/src/old/xmlreader.cpp b/src/old/xmlreader.cpp new file mode 100644 index 0000000..38cad5f --- /dev/null +++ b/src/old/xmlreader.cpp | |||
@@ -0,0 +1,604 @@ | |||
1 | #include "bu/xmlreader.h" | ||
2 | #include "bu/exceptions.h" | ||
3 | #include <string.h> | ||
4 | |||
5 | XmlReader::XmlReader( Bu::Stream &sIn, bool bStrip ) : | ||
6 | sIn( sIn ), | ||
7 | bStrip( bStrip ) | ||
8 | { | ||
9 | buildDoc(); | ||
10 | } | ||
11 | |||
12 | XmlReader::~XmlReader() | ||
13 | { | ||
14 | } | ||
15 | |||
16 | char XmlReader::getChar( int nIndex ) | ||
17 | { | ||
18 | if( sBuf.getSize() <= nIndex ) | ||
19 | { | ||
20 | int nInc = nIndex-sBuf.getSize()+1; | ||
21 | char *buf = new char[nInc]; | ||
22 | sIn.read( buf, nInc ); | ||
23 | sBuf.append( buf, nInc ); | ||
24 | delete[] buf; | ||
25 | } | ||
26 | |||
27 | return sBuf[nIndex]; | ||
28 | } | ||
29 | |||
30 | void XmlReader::usedChar( int nAmnt ) | ||
31 | { | ||
32 | if( nAmnt >= sBuf.getSize() ) | ||
33 | { | ||
34 | sBuf.clear(); | ||
35 | } | ||
36 | else | ||
37 | { | ||
38 | char *s = sBuf.getStr(); | ||
39 | memcpy( s, s+nAmnt, sBuf.getSize()-nAmnt ); | ||
40 | sBuf.resize( sBuf.getSize()-nAmnt ); | ||
41 | } | ||
42 | } | ||
43 | |||
44 | void XmlReader::addEntity( const Bu::FString &name, const Bu::FString &value ) | ||
45 | { | ||
46 | htEntity[name] = value; | ||
47 | } | ||
48 | |||
49 | #define gcall( x ) if( x == false ) return false; | ||
50 | |||
51 | bool XmlReader::isws( char chr ) | ||
52 | { | ||
53 | return ( chr == ' ' || chr == '\t' || chr == '\n' || chr == '\r' ); | ||
54 | } | ||
55 | |||
56 | bool XmlReader::ws() | ||
57 | { | ||
58 | while( true ) | ||
59 | { | ||
60 | char chr = getChar(); | ||
61 | if( isws( chr ) ) | ||
62 | { | ||
63 | usedChar(); | ||
64 | } | ||
65 | else | ||
66 | { | ||
67 | return true; | ||
68 | } | ||
69 | } | ||
70 | return true; | ||
71 | } | ||
72 | |||
73 | bool XmlReader::buildDoc() | ||
74 | { | ||
75 | // take care of initial whitespace | ||
76 | gcall( ws() ); | ||
77 | textDecl(); | ||
78 | entity(); | ||
79 | addEntity("gt", ">"); | ||
80 | addEntity("lt", "<"); | ||
81 | addEntity("amp", "&"); | ||
82 | addEntity("apos", "\'"); | ||
83 | addEntity("quot", "\""); | ||
84 | gcall( node() ); | ||
85 | |||
86 | return true; | ||
87 | } | ||
88 | |||
89 | void XmlReader::textDecl() | ||
90 | { | ||
91 | if( getChar() == '<' && getChar( 1 ) == '?' ) | ||
92 | { | ||
93 | usedChar( 2 ); | ||
94 | for(;;) | ||
95 | { | ||
96 | if( getChar() == '?' ) | ||
97 | { | ||
98 | if( getChar( 1 ) == '>' ) | ||
99 | { | ||
100 | usedChar( 2 ); | ||
101 | return; | ||
102 | } | ||
103 | } | ||
104 | usedChar(); | ||
105 | } | ||
106 | } | ||
107 | } | ||
108 | |||
109 | void XmlReader::entity() | ||
110 | { | ||
111 | for(;;) | ||
112 | { | ||
113 | ws(); | ||
114 | |||
115 | if( getChar() == '<' && getChar( 1 ) == '!' ) | ||
116 | { | ||
117 | usedChar( 2 ); | ||
118 | ws(); | ||
119 | Bu::FString buf; | ||
120 | for(;;) | ||
121 | { | ||
122 | char chr = getChar(); | ||
123 | usedChar(); | ||
124 | if( isws( chr ) ) break; | ||
125 | buf += chr; | ||
126 | } | ||
127 | |||
128 | if( strcmp( buf.c_str(), "ENTITY") == 0 ) | ||
129 | { | ||
130 | ws(); | ||
131 | Bu::FString name; | ||
132 | for(;;) | ||
133 | { | ||
134 | char chr = getChar(); | ||
135 | usedChar(); | ||
136 | if( isws( chr ) ) break; | ||
137 | name += chr; | ||
138 | } | ||
139 | ws(); | ||
140 | char quot = getChar(); | ||
141 | usedChar(); | ||
142 | if( quot != '\'' && quot != '\"' ) | ||
143 | { | ||
144 | throw Bu::XmlException( | ||
145 | "Only quoted entity values are supported." | ||
146 | ); | ||
147 | } | ||
148 | Bu::FString value; | ||
149 | for(;;) | ||
150 | { | ||
151 | char chr = getChar(); | ||
152 | usedChar(); | ||
153 | if( chr == '&' ) | ||
154 | { | ||
155 | Bu::FString tmp = getEscape(); | ||
156 | value += tmp; | ||
157 | } | ||
158 | else if( chr == quot ) | ||
159 | { | ||
160 | break; | ||
161 | } | ||
162 | else | ||
163 | { | ||
164 | value += chr; | ||
165 | } | ||
166 | } | ||
167 | ws(); | ||
168 | if( getChar() == '>' ) | ||
169 | { | ||
170 | usedChar(); | ||
171 | |||
172 | addEntity( name.c_str(), value.c_str() ); | ||
173 | } | ||
174 | else | ||
175 | { | ||
176 | throw Bu::XmlException( | ||
177 | "Malformed ENTITY: unexpected '%c' found.", | ||
178 | getChar() | ||
179 | ); | ||
180 | } | ||
181 | } | ||
182 | else | ||
183 | { | ||
184 | throw Bu::XmlException( | ||
185 | "Unsupported header symbol: %s", | ||
186 | buf.c_str() | ||
187 | ); | ||
188 | } | ||
189 | } | ||
190 | else | ||
191 | { | ||
192 | return; | ||
193 | } | ||
194 | } | ||
195 | } | ||
196 | |||
197 | bool XmlReader::node() | ||
198 | { | ||
199 | gcall( startNode() ) | ||
200 | |||
201 | // At this point, we are closing the startNode | ||
202 | char chr = getChar(); | ||
203 | if( chr == '>' ) | ||
204 | { | ||
205 | usedChar(); | ||
206 | |||
207 | // Now we process the guts of the node. | ||
208 | gcall( content() ); | ||
209 | } | ||
210 | else if( chr == '/' ) | ||
211 | { | ||
212 | // This is the tricky one, one more validation, then we close the node. | ||
213 | usedChar(); | ||
214 | if( getChar() == '>' ) | ||
215 | { | ||
216 | closeNode(); | ||
217 | usedChar(); | ||
218 | } | ||
219 | else | ||
220 | { | ||
221 | throw Bu::XmlException("Close node in singleNode malformed!"); | ||
222 | } | ||
223 | } | ||
224 | else | ||
225 | { | ||
226 | throw Bu::XmlException("Close node expected, but not found."); | ||
227 | return false; | ||
228 | } | ||
229 | |||
230 | return true; | ||
231 | } | ||
232 | |||
233 | bool XmlReader::startNode() | ||
234 | { | ||
235 | if( getChar() == '<' ) | ||
236 | { | ||
237 | usedChar(); | ||
238 | |||
239 | if( getChar() == '/' ) | ||
240 | { | ||
241 | // Heh, it's actually a close node, go figure | ||
242 | Bu::FString sName; | ||
243 | usedChar(); | ||
244 | gcall( ws() ); | ||
245 | |||
246 | while( true ) | ||
247 | { | ||
248 | char chr = getChar(); | ||
249 | if( isws( chr ) || chr == '>' ) | ||
250 | { | ||
251 | // Here we actually compare the name we got to the name | ||
252 | // we already set, they have to match exactly. | ||
253 | if( getCurrent()->getName() == sName ) | ||
254 | { | ||
255 | closeNode(); | ||
256 | break; | ||
257 | } | ||
258 | else | ||
259 | { | ||
260 | throw Bu::XmlException("Got a mismatched node close tag."); | ||
261 | } | ||
262 | } | ||
263 | else | ||
264 | { | ||
265 | sName += chr; | ||
266 | usedChar(); | ||
267 | } | ||
268 | } | ||
269 | |||
270 | gcall( ws() ); | ||
271 | if( getChar() == '>' ) | ||
272 | { | ||
273 | // Everything is cool. | ||
274 | usedChar(); | ||
275 | } | ||
276 | else | ||
277 | { | ||
278 | throw Bu::XmlException("Got extra junk data instead of node close tag."); | ||
279 | } | ||
280 | } | ||
281 | else | ||
282 | { | ||
283 | // We're good, format is consistant | ||
284 | //addNode(); | ||
285 | |||
286 | // Skip extra whitespace | ||
287 | gcall( ws() ); | ||
288 | gcall( name() ); | ||
289 | gcall( ws() ); | ||
290 | gcall( paramlist() ); | ||
291 | gcall( ws() ); | ||
292 | } | ||
293 | } | ||
294 | else | ||
295 | { | ||
296 | throw Bu::XmlException("Expected to find node opening char, '<'."); | ||
297 | } | ||
298 | |||
299 | return true; | ||
300 | } | ||
301 | |||
302 | bool XmlReader::name() | ||
303 | { | ||
304 | Bu::FString sName; | ||
305 | |||
306 | while( true ) | ||
307 | { | ||
308 | char chr = getChar(); | ||
309 | if( isws( chr ) || chr == '>' || chr == '/' ) | ||
310 | { | ||
311 | addNode( sName ); | ||
312 | return true; | ||
313 | } | ||
314 | else | ||
315 | { | ||
316 | sName += chr; | ||
317 | usedChar(); | ||
318 | } | ||
319 | } | ||
320 | |||
321 | return true; | ||
322 | } | ||
323 | |||
324 | bool XmlReader::paramlist() | ||
325 | { | ||
326 | while( true ) | ||
327 | { | ||
328 | char chr = getChar(); | ||
329 | if( chr == '/' || chr == '>' ) | ||
330 | { | ||
331 | return true; | ||
332 | } | ||
333 | else | ||
334 | { | ||
335 | gcall( param() ); | ||
336 | gcall( ws() ); | ||
337 | } | ||
338 | } | ||
339 | |||
340 | return true; | ||
341 | } | ||
342 | |||
343 | Bu::FString XmlReader::getEscape() | ||
344 | { | ||
345 | if( getChar( 1 ) == '#' ) | ||
346 | { | ||
347 | // If the entity starts with a # it's a character escape code | ||
348 | int base = 10; | ||
349 | usedChar( 2 ); | ||
350 | if( getChar() == 'x' ) | ||
351 | { | ||
352 | base = 16; | ||
353 | usedChar(); | ||
354 | } | ||
355 | char buf[4]; | ||
356 | int j = 0; | ||
357 | for( j = 0; getChar() != ';'; j++ ) | ||
358 | { | ||
359 | buf[j] = getChar(); | ||
360 | usedChar(); | ||
361 | } | ||
362 | usedChar(); | ||
363 | buf[j] = '\0'; | ||
364 | buf[0] = (char)strtol( buf, (char **)NULL, base ); | ||
365 | buf[1] = '\0'; | ||
366 | |||
367 | return buf; | ||
368 | } | ||
369 | else | ||
370 | { | ||
371 | // ...otherwise replace with the appropriate string... | ||
372 | Bu::FString buf; | ||
373 | usedChar(); | ||
374 | for(;;) | ||
375 | { | ||
376 | char cbuf = getChar(); | ||
377 | usedChar(); | ||
378 | if( cbuf == ';' ) break; | ||
379 | buf += cbuf; | ||
380 | } | ||
381 | |||
382 | return htEntity[buf]; | ||
383 | } | ||
384 | } | ||
385 | |||
386 | bool XmlReader::param() | ||
387 | { | ||
388 | Bu::FString sName; | ||
389 | Bu::FString sValue; | ||
390 | |||
391 | while( true ) | ||
392 | { | ||
393 | char chr = getChar(); | ||
394 | if( isws( chr ) || chr == '=' ) | ||
395 | { | ||
396 | break; | ||
397 | } | ||
398 | else | ||
399 | { | ||
400 | sName.append( chr ); | ||
401 | usedChar(); | ||
402 | } | ||
403 | } | ||
404 | |||
405 | gcall( ws() ); | ||
406 | |||
407 | if( getChar() == '=' ) | ||
408 | { | ||
409 | usedChar(); | ||
410 | |||
411 | gcall( ws() ); | ||
412 | |||
413 | char chr = getChar(); | ||
414 | if( chr == '"' ) | ||
415 | { | ||
416 | // Better quoted rhs | ||
417 | usedChar(); | ||
418 | |||
419 | while( true ) | ||
420 | { | ||
421 | chr = getChar(); | ||
422 | if( chr == '"' ) | ||
423 | { | ||
424 | usedChar(); | ||
425 | addProperty( sName.getStr(), sValue.getStr() ); | ||
426 | return true; | ||
427 | } | ||
428 | else | ||
429 | { | ||
430 | if( chr == '&' ) | ||
431 | { | ||
432 | sValue += getEscape(); | ||
433 | } | ||
434 | else | ||
435 | { | ||
436 | sValue += chr; | ||
437 | usedChar(); | ||
438 | } | ||
439 | } | ||
440 | } | ||
441 | } | ||
442 | else | ||
443 | { | ||
444 | // Simple one-word rhs | ||
445 | while( true ) | ||
446 | { | ||
447 | chr = getChar(); | ||
448 | if( isws( chr ) || chr == '/' || chr == '>' ) | ||
449 | { | ||
450 | addProperty( sName.getStr(), sValue.getStr() ); | ||
451 | return true; | ||
452 | } | ||
453 | else | ||
454 | { | ||
455 | if( chr == '&' ) | ||
456 | { | ||
457 | sValue += getEscape(); | ||
458 | } | ||
459 | else | ||
460 | { | ||
461 | sValue += chr; | ||
462 | usedChar(); | ||
463 | } | ||
464 | } | ||
465 | } | ||
466 | } | ||
467 | } | ||
468 | else | ||
469 | { | ||
470 | throw Bu::XmlException("Expected an equals to seperate the params."); | ||
471 | return false; | ||
472 | } | ||
473 | |||
474 | return true; | ||
475 | } | ||
476 | |||
477 | bool XmlReader::content() | ||
478 | { | ||
479 | Bu::FString sContent; | ||
480 | |||
481 | if( bStrip ) gcall( ws() ); | ||
482 | |||
483 | while( true ) | ||
484 | { | ||
485 | char chr = getChar(); | ||
486 | if( chr == '<' ) | ||
487 | { | ||
488 | if( getChar(1) == '/' ) | ||
489 | { | ||
490 | if( sContent.getSize() > 0 ) | ||
491 | { | ||
492 | if( bStrip ) | ||
493 | { | ||
494 | int j; | ||
495 | for( j = sContent.getSize()-1; isws(sContent[j]); j-- ); | ||
496 | sContent[j+1] = '\0'; | ||
497 | } | ||
498 | setContent( sContent.getStr() ); | ||
499 | } | ||
500 | usedChar( 2 ); | ||
501 | gcall( ws() ); | ||
502 | Bu::FString sName; | ||
503 | while( true ) | ||
504 | { | ||
505 | chr = getChar(); | ||
506 | if( isws( chr ) || chr == '>' ) | ||
507 | { | ||
508 | if( !strcasecmp( getCurrent()->getName().getStr(), sName.getStr() ) ) | ||
509 | { | ||
510 | closeNode(); | ||
511 | break; | ||
512 | } | ||
513 | else | ||
514 | { | ||
515 | throw Bu::XmlException("Mismatched close tag found: <%s> to <%s>.", getCurrent()->getName().getStr(), sName.getStr() ); | ||
516 | } | ||
517 | } | ||
518 | else | ||
519 | { | ||
520 | sName += chr; | ||
521 | usedChar(); | ||
522 | } | ||
523 | } | ||
524 | gcall( ws() ); | ||
525 | if( getChar() == '>' ) | ||
526 | { | ||
527 | usedChar(); | ||
528 | return true; | ||
529 | } | ||
530 | else | ||
531 | { | ||
532 | throw Bu::XmlException("Malformed close tag."); | ||
533 | } | ||
534 | } | ||
535 | else if( getChar(1) == '!' ) | ||
536 | { | ||
537 | // We know it's a comment, let's see if it's proper | ||
538 | if( getChar(2) != '-' || | ||
539 | getChar(3) != '-' ) | ||
540 | { | ||
541 | // Not a valid XML comment | ||
542 | throw Bu::XmlException("Malformed comment start tag found."); | ||
543 | } | ||
544 | |||
545 | usedChar( 4 ); | ||
546 | |||
547 | // Now burn text until we find the close tag | ||
548 | for(;;) | ||
549 | { | ||
550 | if( getChar() == '-' ) | ||
551 | { | ||
552 | if( getChar( 1 ) == '-' ) | ||
553 | { | ||
554 | // The next one has to be a '>' now | ||
555 | if( getChar( 2 ) != '>' ) | ||
556 | { | ||
557 | throw Bu::XmlException("Malformed comment close tag found. You cannot have a '--' that isn't followed by a '>' in a comment."); | ||
558 | } | ||
559 | usedChar( 3 ); | ||
560 | break; | ||
561 | } | ||
562 | else | ||
563 | { | ||
564 | // Found a dash followed by a non dash, that's ok... | ||
565 | usedChar( 2 ); | ||
566 | } | ||
567 | } | ||
568 | else | ||
569 | { | ||
570 | // Burn comment chars | ||
571 | usedChar(); | ||
572 | } | ||
573 | } | ||
574 | } | ||
575 | else | ||
576 | { | ||
577 | if( sContent.getSize() > 0 ) | ||
578 | { | ||
579 | if( bStrip ) | ||
580 | { | ||
581 | int j; | ||
582 | for( j = sContent.getSize()-1; isws(sContent[j]); j-- ); | ||
583 | sContent[j+1] = '\0'; | ||
584 | } | ||
585 | setContent( sContent.getStr() ); | ||
586 | sContent.clear(); | ||
587 | } | ||
588 | gcall( node() ); | ||
589 | } | ||
590 | |||
591 | if( bStrip ) gcall( ws() ); | ||
592 | } | ||
593 | else if( chr == '&' ) | ||
594 | { | ||
595 | sContent += getEscape(); | ||
596 | } | ||
597 | else | ||
598 | { | ||
599 | sContent += chr; | ||
600 | usedChar(); | ||
601 | } | ||
602 | } | ||
603 | } | ||
604 | |||