diff options
-rw-r--r-- | src/unit/xml.cpp | 16 | ||||
-rw-r--r-- | src/xmlreader.cpp | 194 | ||||
-rw-r--r-- | src/xmlreader.h | 24 |
3 files changed, 220 insertions, 14 deletions
diff --git a/src/unit/xml.cpp b/src/unit/xml.cpp index 559b2f4..e4d779c 100644 --- a/src/unit/xml.cpp +++ b/src/unit/xml.cpp | |||
@@ -15,6 +15,10 @@ public: | |||
15 | TEST_ADD( XmlCoreTestSuite::badXml01 ) | 15 | TEST_ADD( XmlCoreTestSuite::badXml01 ) |
16 | TEST_ADD( XmlCoreTestSuite::badXml02 ) | 16 | TEST_ADD( XmlCoreTestSuite::badXml02 ) |
17 | TEST_ADD( XmlCoreTestSuite::badXml03 ) | 17 | TEST_ADD( XmlCoreTestSuite::badXml03 ) |
18 | |||
19 | TEST_ADD( XmlCoreTestSuite::entityBuiltin01 ) | ||
20 | |||
21 | TEST_ADD( XmlCoreTestSuite::entityDoc01 ) | ||
18 | } | 22 | } |
19 | 23 | ||
20 | private: | 24 | private: |
@@ -32,6 +36,18 @@ private: | |||
32 | { | 36 | { |
33 | TEST_THROWS( XmlStringReader r("<hello param=\"stuff?"), XmlException & ); | 37 | TEST_THROWS( XmlStringReader r("<hello param=\"stuff?"), XmlException & ); |
34 | } | 38 | } |
39 | |||
40 | void entityBuiltin01() | ||
41 | { | ||
42 | XmlStringReader r("<hello>><&'"</hello>"); | ||
43 | TEST_ASSERT( strcmp( r.getRoot()->getContent(), "><&\'\"" ) == 0 ); | ||
44 | } | ||
45 | |||
46 | void entityDoc01() | ||
47 | { | ||
48 | XmlStringReader r("<!ENTITY name \"bob the man\"><hello>"&name;"</hello>"); | ||
49 | TEST_ASSERT( strcmp( r.getRoot()->getContent(), "\"bob the man\"" ) == 0 ); | ||
50 | } | ||
35 | }; | 51 | }; |
36 | 52 | ||
37 | int main( int argc, char *argv[] ) | 53 | int main( int argc, char *argv[] ) |
diff --git a/src/xmlreader.cpp b/src/xmlreader.cpp index 70fd1d7..2a5f63f 100644 --- a/src/xmlreader.cpp +++ b/src/xmlreader.cpp | |||
@@ -1,14 +1,32 @@ | |||
1 | #include "xmlreader.h" | 1 | #include "xmlreader.h" |
2 | #include "xmlexception.h" | 2 | #include "xmlexception.h" |
3 | #include <string.h> | 3 | #include <string.h> |
4 | #include "hashfunctionstring.h" | ||
4 | 5 | ||
5 | XmlReader::XmlReader( bool bStrip ) : | 6 | XmlReader::XmlReader( bool bStrip ) : |
6 | bStrip( bStrip ) | 7 | bStrip( bStrip ), |
8 | htEntity( new HashFunctionString(), 11 ) | ||
7 | { | 9 | { |
8 | } | 10 | } |
9 | 11 | ||
10 | XmlReader::~XmlReader() | 12 | XmlReader::~XmlReader() |
11 | { | 13 | { |
14 | void *i = htEntity.getFirstItemPos(); | ||
15 | while( (i = htEntity.getNextItemPos( i ) ) ) | ||
16 | { | ||
17 | free( (char *)(htEntity.getItemID( i )) ); | ||
18 | delete (StaticString *)htEntity.getItemData( i ); | ||
19 | } | ||
20 | } | ||
21 | |||
22 | void XmlReader::addEntity( const char *name, const char *value ) | ||
23 | { | ||
24 | if( htEntity[name] ) return; | ||
25 | |||
26 | char *sName = strdup( name ); | ||
27 | StaticString *sValue = new StaticString( value ); | ||
28 | |||
29 | htEntity.insert( sName, sValue ); | ||
12 | } | 30 | } |
13 | 31 | ||
14 | #define gcall( x ) if( x == false ) return false; | 32 | #define gcall( x ) if( x == false ) return false; |
@@ -39,11 +57,129 @@ bool XmlReader::buildDoc() | |||
39 | { | 57 | { |
40 | // take care of initial whitespace | 58 | // take care of initial whitespace |
41 | gcall( ws() ); | 59 | gcall( ws() ); |
60 | textDecl(); | ||
61 | entity(); | ||
62 | addEntity("gt", ">"); | ||
63 | addEntity("lt", "<"); | ||
64 | addEntity("amp", "&"); | ||
65 | addEntity("apos", "\'"); | ||
66 | addEntity("quot", "\""); | ||
42 | gcall( node() ); | 67 | gcall( node() ); |
43 | 68 | ||
44 | return true; | 69 | return true; |
45 | } | 70 | } |
46 | 71 | ||
72 | void XmlReader::textDecl() | ||
73 | { | ||
74 | char chr; | ||
75 | if( getChar() == '<' && getChar( 1 ) == '?' ) | ||
76 | { | ||
77 | usedChar( 2 ); | ||
78 | for(;;) | ||
79 | { | ||
80 | if( getChar() == '?' ) | ||
81 | { | ||
82 | if( getChar( 1 ) == '>' ) | ||
83 | { | ||
84 | usedChar( 2 ); | ||
85 | return; | ||
86 | } | ||
87 | } | ||
88 | usedChar(); | ||
89 | } | ||
90 | } | ||
91 | } | ||
92 | |||
93 | void XmlReader::entity() | ||
94 | { | ||
95 | for(;;) | ||
96 | { | ||
97 | ws(); | ||
98 | |||
99 | if( getChar() == '<' && getChar( 1 ) == '!' ) | ||
100 | { | ||
101 | usedChar( 2 ); | ||
102 | ws(); | ||
103 | std::string buf; | ||
104 | for(;;) | ||
105 | { | ||
106 | char chr = getChar(); | ||
107 | usedChar(); | ||
108 | if( isws( chr ) ) break; | ||
109 | buf += chr; | ||
110 | } | ||
111 | |||
112 | if( strcmp( buf.c_str(), "ENTITY") == 0 ) | ||
113 | { | ||
114 | ws(); | ||
115 | std::string name; | ||
116 | for(;;) | ||
117 | { | ||
118 | char chr = getChar(); | ||
119 | usedChar(); | ||
120 | if( isws( chr ) ) break; | ||
121 | name += chr; | ||
122 | } | ||
123 | ws(); | ||
124 | char quot = getChar(); | ||
125 | usedChar(); | ||
126 | if( quot != '\'' && quot != '\"' ) | ||
127 | { | ||
128 | throw XmlException( | ||
129 | "Only quoted entity values are supported." | ||
130 | ); | ||
131 | } | ||
132 | std::string value; | ||
133 | for(;;) | ||
134 | { | ||
135 | char chr = getChar(); | ||
136 | usedChar(); | ||
137 | if( chr == '&' ) | ||
138 | { | ||
139 | StaticString *tmp = getEscape(); | ||
140 | if( tmp == NULL ) throw XmlException("Entity thing"); | ||
141 | value += tmp->getString(); | ||
142 | delete tmp; | ||
143 | } | ||
144 | else if( chr == quot ) | ||
145 | { | ||
146 | break; | ||
147 | } | ||
148 | else | ||
149 | { | ||
150 | value += chr; | ||
151 | } | ||
152 | } | ||
153 | ws(); | ||
154 | if( getChar() == '>' ) | ||
155 | { | ||
156 | usedChar(); | ||
157 | |||
158 | addEntity( name.c_str(), value.c_str() ); | ||
159 | } | ||
160 | else | ||
161 | { | ||
162 | throw XmlException( | ||
163 | "Malformed ENTITY: unexpected '%c' found.", | ||
164 | getChar() | ||
165 | ); | ||
166 | } | ||
167 | } | ||
168 | else | ||
169 | { | ||
170 | throw XmlException( | ||
171 | "Unsupported header symbol: %s", | ||
172 | buf.c_str() | ||
173 | ); | ||
174 | } | ||
175 | } | ||
176 | else | ||
177 | { | ||
178 | return; | ||
179 | } | ||
180 | } | ||
181 | } | ||
182 | |||
47 | bool XmlReader::node() | 183 | bool XmlReader::node() |
48 | { | 184 | { |
49 | gcall( startNode() ) | 185 | gcall( startNode() ) |
@@ -190,13 +326,18 @@ bool XmlReader::paramlist() | |||
190 | return true; | 326 | return true; |
191 | } | 327 | } |
192 | 328 | ||
193 | char XmlReader::getEscape() | 329 | StaticString *XmlReader::getEscape() |
194 | { | 330 | { |
195 | // Right now, we just do # escapes... | ||
196 | if( getChar( 1 ) == '#' ) | 331 | if( getChar( 1 ) == '#' ) |
197 | { | 332 | { |
198 | usedChar(); | 333 | // If the entity starts with a # it's a character escape code |
199 | usedChar(); | 334 | int base = 10; |
335 | usedChar( 2 ); | ||
336 | if( getChar() == 'x' ) | ||
337 | { | ||
338 | base = 16; | ||
339 | usedChar(); | ||
340 | } | ||
200 | char buf[4]; | 341 | char buf[4]; |
201 | int j = 0; | 342 | int j = 0; |
202 | for( j = 0; getChar() != ';'; j++ ) | 343 | for( j = 0; getChar() != ';'; j++ ) |
@@ -206,11 +347,29 @@ char XmlReader::getEscape() | |||
206 | } | 347 | } |
207 | usedChar(); | 348 | usedChar(); |
208 | buf[j] = '\0'; | 349 | buf[j] = '\0'; |
209 | return (char)atoi( buf ); | 350 | buf[0] = (char)strtol( buf, (char **)NULL, base ); |
351 | buf[1] = '\0'; | ||
352 | |||
353 | return new StaticString( buf ); | ||
210 | } | 354 | } |
211 | else | 355 | else |
212 | { | 356 | { |
213 | return '\0'; | 357 | // ...otherwise replace with the appropriate string... |
358 | std::string buf; | ||
359 | usedChar(); | ||
360 | for(;;) | ||
361 | { | ||
362 | char cbuf = getChar(); | ||
363 | usedChar(); | ||
364 | if( cbuf == ';' ) break; | ||
365 | buf += cbuf; | ||
366 | } | ||
367 | |||
368 | StaticString *tmp = (StaticString *)htEntity[buf.c_str()]; | ||
369 | if( tmp == NULL ) return NULL; | ||
370 | |||
371 | StaticString *ret = new StaticString( *tmp ); | ||
372 | return ret; | ||
214 | } | 373 | } |
215 | } | 374 | } |
216 | 375 | ||
@@ -260,9 +419,10 @@ bool XmlReader::param() | |||
260 | { | 419 | { |
261 | if( chr == '&' ) | 420 | if( chr == '&' ) |
262 | { | 421 | { |
263 | chr = getEscape(); | 422 | StaticString *tmp = getEscape(); |
264 | if( chr == '\0' ) return false; | 423 | if( tmp == NULL ) return false; |
265 | fbValue.appendData( chr ); | 424 | fbValue.appendData( tmp->getString() ); |
425 | delete tmp; | ||
266 | } | 426 | } |
267 | else | 427 | else |
268 | { | 428 | { |
@@ -287,9 +447,10 @@ bool XmlReader::param() | |||
287 | { | 447 | { |
288 | if( chr == '&' ) | 448 | if( chr == '&' ) |
289 | { | 449 | { |
290 | chr = getEscape(); | 450 | StaticString *tmp = getEscape(); |
291 | if( chr == '\0' ) return false; | 451 | if( tmp == NULL ) return false; |
292 | fbValue.appendData( chr ); | 452 | fbValue.appendData( tmp->getString() ); |
453 | delete tmp; | ||
293 | } | 454 | } |
294 | else | 455 | else |
295 | { | 456 | { |
@@ -425,6 +586,13 @@ bool XmlReader::content() | |||
425 | 586 | ||
426 | if( bStrip ) gcall( ws() ); | 587 | if( bStrip ) gcall( ws() ); |
427 | } | 588 | } |
589 | else if( chr == '&' ) | ||
590 | { | ||
591 | StaticString *tmp = getEscape(); | ||
592 | if( tmp == NULL ) return false; | ||
593 | fbContent.appendData( tmp->getString() ); | ||
594 | delete tmp; | ||
595 | } | ||
428 | else | 596 | else |
429 | { | 597 | { |
430 | fbContent.appendData( chr ); | 598 | fbContent.appendData( chr ); |
diff --git a/src/xmlreader.h b/src/xmlreader.h index 4117dfd..a9881cb 100644 --- a/src/xmlreader.h +++ b/src/xmlreader.h | |||
@@ -4,6 +4,8 @@ | |||
4 | #include <stdio.h> | 4 | #include <stdio.h> |
5 | #include "xmldocument.h" | 5 | #include "xmldocument.h" |
6 | #include "flexbuf.h" | 6 | #include "flexbuf.h" |
7 | #include "hashtable.h" | ||
8 | #include "staticstring.h" | ||
7 | 9 | ||
8 | /** | 10 | /** |
9 | * Takes care of reading in xml formatted data from a file. This could/should | 11 | * Takes care of reading in xml formatted data from a file. This could/should |
@@ -90,7 +92,25 @@ private: | |||
90 | */ | 92 | */ |
91 | bool name(); | 93 | bool name(); |
92 | 94 | ||
93 | char getEscape(); | 95 | /** |
96 | * Automoton function: textDecl. Processes the xml text decleration, if | ||
97 | * there is one. | ||
98 | */ | ||
99 | void textDecl(); | ||
100 | |||
101 | /** | ||
102 | * Automoton function: entity. Processes an entity from the header. | ||
103 | */ | ||
104 | void entity(); | ||
105 | |||
106 | /** | ||
107 | * Adds an entity to the list, if it doesn't already exist. | ||
108 | *@param name The name of the entity | ||
109 | *@param value The value of the entity | ||
110 | */ | ||
111 | void addEntity( const char *name, const char *value ); | ||
112 | |||
113 | StaticString *getEscape(); | ||
94 | 114 | ||
95 | /** | 115 | /** |
96 | * Automoton function: paramlist. Processes a list of node params. | 116 | * Automoton function: paramlist. Processes a list of node params. |
@@ -114,6 +134,8 @@ private: | |||
114 | FlexBuf fbParamName; /**< buffer for the current param's name. */ | 134 | FlexBuf fbParamName; /**< buffer for the current param's name. */ |
115 | FlexBuf fbParamValue; /**< buffer for the current param's value. */ | 135 | FlexBuf fbParamValue; /**< buffer for the current param's value. */ |
116 | bool bStrip; /**< Are we stripping whitespace? */ | 136 | bool bStrip; /**< Are we stripping whitespace? */ |
137 | |||
138 | HashTable htEntity; /**< Entity type definitions. */ | ||
117 | }; | 139 | }; |
118 | 140 | ||
119 | #endif | 141 | #endif |