diff options
Diffstat (limited to 'src/xmlreader.cpp')
-rw-r--r-- | src/xmlreader.cpp | 194 |
1 files changed, 181 insertions, 13 deletions
diff --git a/src/xmlreader.cpp b/src/xmlreader.cpp index 70fd1d7..2a5f63f 100644 --- a/src/xmlreader.cpp +++ b/src/xmlreader.cpp | |||
@@ -1,14 +1,32 @@ | |||
1 | #include "xmlreader.h" | 1 | #include "xmlreader.h" |
2 | #include "xmlexception.h" | 2 | #include "xmlexception.h" |
3 | #include <string.h> | 3 | #include <string.h> |
4 | #include "hashfunctionstring.h" | ||
4 | 5 | ||
5 | XmlReader::XmlReader( bool bStrip ) : | 6 | XmlReader::XmlReader( bool bStrip ) : |
6 | bStrip( bStrip ) | 7 | bStrip( bStrip ), |
8 | htEntity( new HashFunctionString(), 11 ) | ||
7 | { | 9 | { |
8 | } | 10 | } |
9 | 11 | ||
10 | XmlReader::~XmlReader() | 12 | XmlReader::~XmlReader() |
11 | { | 13 | { |
14 | void *i = htEntity.getFirstItemPos(); | ||
15 | while( (i = htEntity.getNextItemPos( i ) ) ) | ||
16 | { | ||
17 | free( (char *)(htEntity.getItemID( i )) ); | ||
18 | delete (StaticString *)htEntity.getItemData( i ); | ||
19 | } | ||
20 | } | ||
21 | |||
22 | void XmlReader::addEntity( const char *name, const char *value ) | ||
23 | { | ||
24 | if( htEntity[name] ) return; | ||
25 | |||
26 | char *sName = strdup( name ); | ||
27 | StaticString *sValue = new StaticString( value ); | ||
28 | |||
29 | htEntity.insert( sName, sValue ); | ||
12 | } | 30 | } |
13 | 31 | ||
14 | #define gcall( x ) if( x == false ) return false; | 32 | #define gcall( x ) if( x == false ) return false; |
@@ -39,11 +57,129 @@ bool XmlReader::buildDoc() | |||
39 | { | 57 | { |
40 | // take care of initial whitespace | 58 | // take care of initial whitespace |
41 | gcall( ws() ); | 59 | gcall( ws() ); |
60 | textDecl(); | ||
61 | entity(); | ||
62 | addEntity("gt", ">"); | ||
63 | addEntity("lt", "<"); | ||
64 | addEntity("amp", "&"); | ||
65 | addEntity("apos", "\'"); | ||
66 | addEntity("quot", "\""); | ||
42 | gcall( node() ); | 67 | gcall( node() ); |
43 | 68 | ||
44 | return true; | 69 | return true; |
45 | } | 70 | } |
46 | 71 | ||
72 | void XmlReader::textDecl() | ||
73 | { | ||
74 | char chr; | ||
75 | if( getChar() == '<' && getChar( 1 ) == '?' ) | ||
76 | { | ||
77 | usedChar( 2 ); | ||
78 | for(;;) | ||
79 | { | ||
80 | if( getChar() == '?' ) | ||
81 | { | ||
82 | if( getChar( 1 ) == '>' ) | ||
83 | { | ||
84 | usedChar( 2 ); | ||
85 | return; | ||
86 | } | ||
87 | } | ||
88 | usedChar(); | ||
89 | } | ||
90 | } | ||
91 | } | ||
92 | |||
93 | void XmlReader::entity() | ||
94 | { | ||
95 | for(;;) | ||
96 | { | ||
97 | ws(); | ||
98 | |||
99 | if( getChar() == '<' && getChar( 1 ) == '!' ) | ||
100 | { | ||
101 | usedChar( 2 ); | ||
102 | ws(); | ||
103 | std::string buf; | ||
104 | for(;;) | ||
105 | { | ||
106 | char chr = getChar(); | ||
107 | usedChar(); | ||
108 | if( isws( chr ) ) break; | ||
109 | buf += chr; | ||
110 | } | ||
111 | |||
112 | if( strcmp( buf.c_str(), "ENTITY") == 0 ) | ||
113 | { | ||
114 | ws(); | ||
115 | std::string name; | ||
116 | for(;;) | ||
117 | { | ||
118 | char chr = getChar(); | ||
119 | usedChar(); | ||
120 | if( isws( chr ) ) break; | ||
121 | name += chr; | ||
122 | } | ||
123 | ws(); | ||
124 | char quot = getChar(); | ||
125 | usedChar(); | ||
126 | if( quot != '\'' && quot != '\"' ) | ||
127 | { | ||
128 | throw XmlException( | ||
129 | "Only quoted entity values are supported." | ||
130 | ); | ||
131 | } | ||
132 | std::string value; | ||
133 | for(;;) | ||
134 | { | ||
135 | char chr = getChar(); | ||
136 | usedChar(); | ||
137 | if( chr == '&' ) | ||
138 | { | ||
139 | StaticString *tmp = getEscape(); | ||
140 | if( tmp == NULL ) throw XmlException("Entity thing"); | ||
141 | value += tmp->getString(); | ||
142 | delete tmp; | ||
143 | } | ||
144 | else if( chr == quot ) | ||
145 | { | ||
146 | break; | ||
147 | } | ||
148 | else | ||
149 | { | ||
150 | value += chr; | ||
151 | } | ||
152 | } | ||
153 | ws(); | ||
154 | if( getChar() == '>' ) | ||
155 | { | ||
156 | usedChar(); | ||
157 | |||
158 | addEntity( name.c_str(), value.c_str() ); | ||
159 | } | ||
160 | else | ||
161 | { | ||
162 | throw XmlException( | ||
163 | "Malformed ENTITY: unexpected '%c' found.", | ||
164 | getChar() | ||
165 | ); | ||
166 | } | ||
167 | } | ||
168 | else | ||
169 | { | ||
170 | throw XmlException( | ||
171 | "Unsupported header symbol: %s", | ||
172 | buf.c_str() | ||
173 | ); | ||
174 | } | ||
175 | } | ||
176 | else | ||
177 | { | ||
178 | return; | ||
179 | } | ||
180 | } | ||
181 | } | ||
182 | |||
47 | bool XmlReader::node() | 183 | bool XmlReader::node() |
48 | { | 184 | { |
49 | gcall( startNode() ) | 185 | gcall( startNode() ) |
@@ -190,13 +326,18 @@ bool XmlReader::paramlist() | |||
190 | return true; | 326 | return true; |
191 | } | 327 | } |
192 | 328 | ||
193 | char XmlReader::getEscape() | 329 | StaticString *XmlReader::getEscape() |
194 | { | 330 | { |
195 | // Right now, we just do # escapes... | ||
196 | if( getChar( 1 ) == '#' ) | 331 | if( getChar( 1 ) == '#' ) |
197 | { | 332 | { |
198 | usedChar(); | 333 | // If the entity starts with a # it's a character escape code |
199 | usedChar(); | 334 | int base = 10; |
335 | usedChar( 2 ); | ||
336 | if( getChar() == 'x' ) | ||
337 | { | ||
338 | base = 16; | ||
339 | usedChar(); | ||
340 | } | ||
200 | char buf[4]; | 341 | char buf[4]; |
201 | int j = 0; | 342 | int j = 0; |
202 | for( j = 0; getChar() != ';'; j++ ) | 343 | for( j = 0; getChar() != ';'; j++ ) |
@@ -206,11 +347,29 @@ char XmlReader::getEscape() | |||
206 | } | 347 | } |
207 | usedChar(); | 348 | usedChar(); |
208 | buf[j] = '\0'; | 349 | buf[j] = '\0'; |
209 | return (char)atoi( buf ); | 350 | buf[0] = (char)strtol( buf, (char **)NULL, base ); |
351 | buf[1] = '\0'; | ||
352 | |||
353 | return new StaticString( buf ); | ||
210 | } | 354 | } |
211 | else | 355 | else |
212 | { | 356 | { |
213 | return '\0'; | 357 | // ...otherwise replace with the appropriate string... |
358 | std::string buf; | ||
359 | usedChar(); | ||
360 | for(;;) | ||
361 | { | ||
362 | char cbuf = getChar(); | ||
363 | usedChar(); | ||
364 | if( cbuf == ';' ) break; | ||
365 | buf += cbuf; | ||
366 | } | ||
367 | |||
368 | StaticString *tmp = (StaticString *)htEntity[buf.c_str()]; | ||
369 | if( tmp == NULL ) return NULL; | ||
370 | |||
371 | StaticString *ret = new StaticString( *tmp ); | ||
372 | return ret; | ||
214 | } | 373 | } |
215 | } | 374 | } |
216 | 375 | ||
@@ -260,9 +419,10 @@ bool XmlReader::param() | |||
260 | { | 419 | { |
261 | if( chr == '&' ) | 420 | if( chr == '&' ) |
262 | { | 421 | { |
263 | chr = getEscape(); | 422 | StaticString *tmp = getEscape(); |
264 | if( chr == '\0' ) return false; | 423 | if( tmp == NULL ) return false; |
265 | fbValue.appendData( chr ); | 424 | fbValue.appendData( tmp->getString() ); |
425 | delete tmp; | ||
266 | } | 426 | } |
267 | else | 427 | else |
268 | { | 428 | { |
@@ -287,9 +447,10 @@ bool XmlReader::param() | |||
287 | { | 447 | { |
288 | if( chr == '&' ) | 448 | if( chr == '&' ) |
289 | { | 449 | { |
290 | chr = getEscape(); | 450 | StaticString *tmp = getEscape(); |
291 | if( chr == '\0' ) return false; | 451 | if( tmp == NULL ) return false; |
292 | fbValue.appendData( chr ); | 452 | fbValue.appendData( tmp->getString() ); |
453 | delete tmp; | ||
293 | } | 454 | } |
294 | else | 455 | else |
295 | { | 456 | { |
@@ -425,6 +586,13 @@ bool XmlReader::content() | |||
425 | 586 | ||
426 | if( bStrip ) gcall( ws() ); | 587 | if( bStrip ) gcall( ws() ); |
427 | } | 588 | } |
589 | else if( chr == '&' ) | ||
590 | { | ||
591 | StaticString *tmp = getEscape(); | ||
592 | if( tmp == NULL ) return false; | ||
593 | fbContent.appendData( tmp->getString() ); | ||
594 | delete tmp; | ||
595 | } | ||
428 | else | 596 | else |
429 | { | 597 | { |
430 | fbContent.appendData( chr ); | 598 | fbContent.appendData( chr ); |