diff options
author | Mike Buland <eichlan@xagasoft.com> | 2006-06-28 07:28:17 +0000 |
---|---|---|
committer | Mike Buland <eichlan@xagasoft.com> | 2006-06-28 07:28:17 +0000 |
commit | 789eaff64b6dcdf920eb3f5a5d64ab4f1f33aa05 (patch) | |
tree | 8eae4bb7d22e5553e130c513cc3e29347cfc28c2 /src/xmlreader.cpp | |
parent | 35274124dc95ec5d6094e71c18ac7b484d812f13 (diff) | |
download | libbu++-789eaff64b6dcdf920eb3f5a5d64ab4f1f33aa05.tar.gz libbu++-789eaff64b6dcdf920eb3f5a5d64ab4f1f33aa05.tar.bz2 libbu++-789eaff64b6dcdf920eb3f5a5d64ab4f1f33aa05.tar.xz libbu++-789eaff64b6dcdf920eb3f5a5d64ab4f1f33aa05.zip |
Entities now work in the xml processor the way they should, you can define your
own, use the 5 builtin ones (gt, lt, apos, quot, amp), and even create your own.
The parser now skips any text definition at the top, which is fine for most xml
that you get these days. I think if we ever make the break to full compliance
we'll need to make a new parser from scratch.
Diffstat (limited to '')
-rw-r--r-- | src/xmlreader.cpp | 194 |
1 files changed, 181 insertions, 13 deletions
diff --git a/src/xmlreader.cpp b/src/xmlreader.cpp index 70fd1d7..2a5f63f 100644 --- a/src/xmlreader.cpp +++ b/src/xmlreader.cpp | |||
@@ -1,14 +1,32 @@ | |||
1 | #include "xmlreader.h" | 1 | #include "xmlreader.h" |
2 | #include "xmlexception.h" | 2 | #include "xmlexception.h" |
3 | #include <string.h> | 3 | #include <string.h> |
4 | #include "hashfunctionstring.h" | ||
4 | 5 | ||
5 | XmlReader::XmlReader( bool bStrip ) : | 6 | XmlReader::XmlReader( bool bStrip ) : |
6 | bStrip( bStrip ) | 7 | bStrip( bStrip ), |
8 | htEntity( new HashFunctionString(), 11 ) | ||
7 | { | 9 | { |
8 | } | 10 | } |
9 | 11 | ||
10 | XmlReader::~XmlReader() | 12 | XmlReader::~XmlReader() |
11 | { | 13 | { |
14 | void *i = htEntity.getFirstItemPos(); | ||
15 | while( (i = htEntity.getNextItemPos( i ) ) ) | ||
16 | { | ||
17 | free( (char *)(htEntity.getItemID( i )) ); | ||
18 | delete (StaticString *)htEntity.getItemData( i ); | ||
19 | } | ||
20 | } | ||
21 | |||
22 | void XmlReader::addEntity( const char *name, const char *value ) | ||
23 | { | ||
24 | if( htEntity[name] ) return; | ||
25 | |||
26 | char *sName = strdup( name ); | ||
27 | StaticString *sValue = new StaticString( value ); | ||
28 | |||
29 | htEntity.insert( sName, sValue ); | ||
12 | } | 30 | } |
13 | 31 | ||
14 | #define gcall( x ) if( x == false ) return false; | 32 | #define gcall( x ) if( x == false ) return false; |
@@ -39,11 +57,129 @@ bool XmlReader::buildDoc() | |||
39 | { | 57 | { |
40 | // take care of initial whitespace | 58 | // take care of initial whitespace |
41 | gcall( ws() ); | 59 | gcall( ws() ); |
60 | textDecl(); | ||
61 | entity(); | ||
62 | addEntity("gt", ">"); | ||
63 | addEntity("lt", "<"); | ||
64 | addEntity("amp", "&"); | ||
65 | addEntity("apos", "\'"); | ||
66 | addEntity("quot", "\""); | ||
42 | gcall( node() ); | 67 | gcall( node() ); |
43 | 68 | ||
44 | return true; | 69 | return true; |
45 | } | 70 | } |
46 | 71 | ||
72 | void XmlReader::textDecl() | ||
73 | { | ||
74 | char chr; | ||
75 | if( getChar() == '<' && getChar( 1 ) == '?' ) | ||
76 | { | ||
77 | usedChar( 2 ); | ||
78 | for(;;) | ||
79 | { | ||
80 | if( getChar() == '?' ) | ||
81 | { | ||
82 | if( getChar( 1 ) == '>' ) | ||
83 | { | ||
84 | usedChar( 2 ); | ||
85 | return; | ||
86 | } | ||
87 | } | ||
88 | usedChar(); | ||
89 | } | ||
90 | } | ||
91 | } | ||
92 | |||
93 | void XmlReader::entity() | ||
94 | { | ||
95 | for(;;) | ||
96 | { | ||
97 | ws(); | ||
98 | |||
99 | if( getChar() == '<' && getChar( 1 ) == '!' ) | ||
100 | { | ||
101 | usedChar( 2 ); | ||
102 | ws(); | ||
103 | std::string buf; | ||
104 | for(;;) | ||
105 | { | ||
106 | char chr = getChar(); | ||
107 | usedChar(); | ||
108 | if( isws( chr ) ) break; | ||
109 | buf += chr; | ||
110 | } | ||
111 | |||
112 | if( strcmp( buf.c_str(), "ENTITY") == 0 ) | ||
113 | { | ||
114 | ws(); | ||
115 | std::string name; | ||
116 | for(;;) | ||
117 | { | ||
118 | char chr = getChar(); | ||
119 | usedChar(); | ||
120 | if( isws( chr ) ) break; | ||
121 | name += chr; | ||
122 | } | ||
123 | ws(); | ||
124 | char quot = getChar(); | ||
125 | usedChar(); | ||
126 | if( quot != '\'' && quot != '\"' ) | ||
127 | { | ||
128 | throw XmlException( | ||
129 | "Only quoted entity values are supported." | ||
130 | ); | ||
131 | } | ||
132 | std::string value; | ||
133 | for(;;) | ||
134 | { | ||
135 | char chr = getChar(); | ||
136 | usedChar(); | ||
137 | if( chr == '&' ) | ||
138 | { | ||
139 | StaticString *tmp = getEscape(); | ||
140 | if( tmp == NULL ) throw XmlException("Entity thing"); | ||
141 | value += tmp->getString(); | ||
142 | delete tmp; | ||
143 | } | ||
144 | else if( chr == quot ) | ||
145 | { | ||
146 | break; | ||
147 | } | ||
148 | else | ||
149 | { | ||
150 | value += chr; | ||
151 | } | ||
152 | } | ||
153 | ws(); | ||
154 | if( getChar() == '>' ) | ||
155 | { | ||
156 | usedChar(); | ||
157 | |||
158 | addEntity( name.c_str(), value.c_str() ); | ||
159 | } | ||
160 | else | ||
161 | { | ||
162 | throw XmlException( | ||
163 | "Malformed ENTITY: unexpected '%c' found.", | ||
164 | getChar() | ||
165 | ); | ||
166 | } | ||
167 | } | ||
168 | else | ||
169 | { | ||
170 | throw XmlException( | ||
171 | "Unsupported header symbol: %s", | ||
172 | buf.c_str() | ||
173 | ); | ||
174 | } | ||
175 | } | ||
176 | else | ||
177 | { | ||
178 | return; | ||
179 | } | ||
180 | } | ||
181 | } | ||
182 | |||
47 | bool XmlReader::node() | 183 | bool XmlReader::node() |
48 | { | 184 | { |
49 | gcall( startNode() ) | 185 | gcall( startNode() ) |
@@ -190,13 +326,18 @@ bool XmlReader::paramlist() | |||
190 | return true; | 326 | return true; |
191 | } | 327 | } |
192 | 328 | ||
193 | char XmlReader::getEscape() | 329 | StaticString *XmlReader::getEscape() |
194 | { | 330 | { |
195 | // Right now, we just do # escapes... | ||
196 | if( getChar( 1 ) == '#' ) | 331 | if( getChar( 1 ) == '#' ) |
197 | { | 332 | { |
198 | usedChar(); | 333 | // If the entity starts with a # it's a character escape code |
199 | usedChar(); | 334 | int base = 10; |
335 | usedChar( 2 ); | ||
336 | if( getChar() == 'x' ) | ||
337 | { | ||
338 | base = 16; | ||
339 | usedChar(); | ||
340 | } | ||
200 | char buf[4]; | 341 | char buf[4]; |
201 | int j = 0; | 342 | int j = 0; |
202 | for( j = 0; getChar() != ';'; j++ ) | 343 | for( j = 0; getChar() != ';'; j++ ) |
@@ -206,11 +347,29 @@ char XmlReader::getEscape() | |||
206 | } | 347 | } |
207 | usedChar(); | 348 | usedChar(); |
208 | buf[j] = '\0'; | 349 | buf[j] = '\0'; |
209 | return (char)atoi( buf ); | 350 | buf[0] = (char)strtol( buf, (char **)NULL, base ); |
351 | buf[1] = '\0'; | ||
352 | |||
353 | return new StaticString( buf ); | ||
210 | } | 354 | } |
211 | else | 355 | else |
212 | { | 356 | { |
213 | return '\0'; | 357 | // ...otherwise replace with the appropriate string... |
358 | std::string buf; | ||
359 | usedChar(); | ||
360 | for(;;) | ||
361 | { | ||
362 | char cbuf = getChar(); | ||
363 | usedChar(); | ||
364 | if( cbuf == ';' ) break; | ||
365 | buf += cbuf; | ||
366 | } | ||
367 | |||
368 | StaticString *tmp = (StaticString *)htEntity[buf.c_str()]; | ||
369 | if( tmp == NULL ) return NULL; | ||
370 | |||
371 | StaticString *ret = new StaticString( *tmp ); | ||
372 | return ret; | ||
214 | } | 373 | } |
215 | } | 374 | } |
216 | 375 | ||
@@ -260,9 +419,10 @@ bool XmlReader::param() | |||
260 | { | 419 | { |
261 | if( chr == '&' ) | 420 | if( chr == '&' ) |
262 | { | 421 | { |
263 | chr = getEscape(); | 422 | StaticString *tmp = getEscape(); |
264 | if( chr == '\0' ) return false; | 423 | if( tmp == NULL ) return false; |
265 | fbValue.appendData( chr ); | 424 | fbValue.appendData( tmp->getString() ); |
425 | delete tmp; | ||
266 | } | 426 | } |
267 | else | 427 | else |
268 | { | 428 | { |
@@ -287,9 +447,10 @@ bool XmlReader::param() | |||
287 | { | 447 | { |
288 | if( chr == '&' ) | 448 | if( chr == '&' ) |
289 | { | 449 | { |
290 | chr = getEscape(); | 450 | StaticString *tmp = getEscape(); |
291 | if( chr == '\0' ) return false; | 451 | if( tmp == NULL ) return false; |
292 | fbValue.appendData( chr ); | 452 | fbValue.appendData( tmp->getString() ); |
453 | delete tmp; | ||
293 | } | 454 | } |
294 | else | 455 | else |
295 | { | 456 | { |
@@ -425,6 +586,13 @@ bool XmlReader::content() | |||
425 | 586 | ||
426 | if( bStrip ) gcall( ws() ); | 587 | if( bStrip ) gcall( ws() ); |
427 | } | 588 | } |
589 | else if( chr == '&' ) | ||
590 | { | ||
591 | StaticString *tmp = getEscape(); | ||
592 | if( tmp == NULL ) return false; | ||
593 | fbContent.appendData( tmp->getString() ); | ||
594 | delete tmp; | ||
595 | } | ||
428 | else | 596 | else |
429 | { | 597 | { |
430 | fbContent.appendData( chr ); | 598 | fbContent.appendData( chr ); |