diff options
author | Mike Buland <eichlan@xagasoft.com> | 2006-06-28 07:28:17 +0000 |
---|---|---|
committer | Mike Buland <eichlan@xagasoft.com> | 2006-06-28 07:28:17 +0000 |
commit | 789eaff64b6dcdf920eb3f5a5d64ab4f1f33aa05 (patch) | |
tree | 8eae4bb7d22e5553e130c513cc3e29347cfc28c2 | |
parent | 35274124dc95ec5d6094e71c18ac7b484d812f13 (diff) | |
download | libbu++-789eaff64b6dcdf920eb3f5a5d64ab4f1f33aa05.tar.gz libbu++-789eaff64b6dcdf920eb3f5a5d64ab4f1f33aa05.tar.bz2 libbu++-789eaff64b6dcdf920eb3f5a5d64ab4f1f33aa05.tar.xz libbu++-789eaff64b6dcdf920eb3f5a5d64ab4f1f33aa05.zip |
Entities now work in the xml processor the way they should, you can define your
own, use the 5 builtin ones (gt, lt, apos, quot, amp), and even create your own.
The parser now skips any text definition at the top, which is fine for most xml
that you get these days. I think if we ever make the break to full compliance
we'll need to make a new parser from scratch.
-rw-r--r-- | src/unit/xml.cpp | 16 | ||||
-rw-r--r-- | src/xmlreader.cpp | 194 | ||||
-rw-r--r-- | src/xmlreader.h | 24 |
3 files changed, 220 insertions, 14 deletions
diff --git a/src/unit/xml.cpp b/src/unit/xml.cpp index 559b2f4..e4d779c 100644 --- a/src/unit/xml.cpp +++ b/src/unit/xml.cpp | |||
@@ -15,6 +15,10 @@ public: | |||
15 | TEST_ADD( XmlCoreTestSuite::badXml01 ) | 15 | TEST_ADD( XmlCoreTestSuite::badXml01 ) |
16 | TEST_ADD( XmlCoreTestSuite::badXml02 ) | 16 | TEST_ADD( XmlCoreTestSuite::badXml02 ) |
17 | TEST_ADD( XmlCoreTestSuite::badXml03 ) | 17 | TEST_ADD( XmlCoreTestSuite::badXml03 ) |
18 | |||
19 | TEST_ADD( XmlCoreTestSuite::entityBuiltin01 ) | ||
20 | |||
21 | TEST_ADD( XmlCoreTestSuite::entityDoc01 ) | ||
18 | } | 22 | } |
19 | 23 | ||
20 | private: | 24 | private: |
@@ -32,6 +36,18 @@ private: | |||
32 | { | 36 | { |
33 | TEST_THROWS( XmlStringReader r("<hello param=\"stuff?"), XmlException & ); | 37 | TEST_THROWS( XmlStringReader r("<hello param=\"stuff?"), XmlException & ); |
34 | } | 38 | } |
39 | |||
40 | void entityBuiltin01() | ||
41 | { | ||
42 | XmlStringReader r("<hello>><&'"</hello>"); | ||
43 | TEST_ASSERT( strcmp( r.getRoot()->getContent(), "><&\'\"" ) == 0 ); | ||
44 | } | ||
45 | |||
46 | void entityDoc01() | ||
47 | { | ||
48 | XmlStringReader r("<!ENTITY name \"bob the man\"><hello>"&name;"</hello>"); | ||
49 | TEST_ASSERT( strcmp( r.getRoot()->getContent(), "\"bob the man\"" ) == 0 ); | ||
50 | } | ||
35 | }; | 51 | }; |
36 | 52 | ||
37 | int main( int argc, char *argv[] ) | 53 | int main( int argc, char *argv[] ) |
diff --git a/src/xmlreader.cpp b/src/xmlreader.cpp index 70fd1d7..2a5f63f 100644 --- a/src/xmlreader.cpp +++ b/src/xmlreader.cpp | |||
@@ -1,14 +1,32 @@ | |||
1 | #include "xmlreader.h" | 1 | #include "xmlreader.h" |
2 | #include "xmlexception.h" | 2 | #include "xmlexception.h" |
3 | #include <string.h> | 3 | #include <string.h> |
4 | #include "hashfunctionstring.h" | ||
4 | 5 | ||
5 | XmlReader::XmlReader( bool bStrip ) : | 6 | XmlReader::XmlReader( bool bStrip ) : |
6 | bStrip( bStrip ) | 7 | bStrip( bStrip ), |
8 | htEntity( new HashFunctionString(), 11 ) | ||
7 | { | 9 | { |
8 | } | 10 | } |
9 | 11 | ||
10 | XmlReader::~XmlReader() | 12 | XmlReader::~XmlReader() |
11 | { | 13 | { |
14 | void *i = htEntity.getFirstItemPos(); | ||
15 | while( (i = htEntity.getNextItemPos( i ) ) ) | ||
16 | { | ||
17 | free( (char *)(htEntity.getItemID( i )) ); | ||
18 | delete (StaticString *)htEntity.getItemData( i ); | ||
19 | } | ||
20 | } | ||
21 | |||
22 | void XmlReader::addEntity( const char *name, const char *value ) | ||
23 | { | ||
24 | if( htEntity[name] ) return; | ||
25 | |||
26 | char *sName = strdup( name ); | ||
27 | StaticString *sValue = new StaticString( value ); | ||
28 | |||
29 | htEntity.insert( sName, sValue ); | ||
12 | } | 30 | } |
13 | 31 | ||
14 | #define gcall( x ) if( x == false ) return false; | 32 | #define gcall( x ) if( x == false ) return false; |
@@ -39,11 +57,129 @@ bool XmlReader::buildDoc() | |||
39 | { | 57 | { |
40 | // take care of initial whitespace | 58 | // take care of initial whitespace |
41 | gcall( ws() ); | 59 | gcall( ws() ); |
60 | textDecl(); | ||
61 | entity(); | ||
62 | addEntity("gt", ">"); | ||
63 | addEntity("lt", "<"); | ||
64 | addEntity("amp", "&"); | ||
65 | addEntity("apos", "\'"); | ||
66 | addEntity("quot", "\""); | ||
42 | gcall( node() ); | 67 | gcall( node() ); |
43 | 68 | ||
44 | return true; | 69 | return true; |
45 | } | 70 | } |
46 | 71 | ||
72 | void XmlReader::textDecl() | ||
73 | { | ||
74 | char chr; | ||
75 | if( getChar() == '<' && getChar( 1 ) == '?' ) | ||
76 | { | ||
77 | usedChar( 2 ); | ||
78 | for(;;) | ||
79 | { | ||
80 | if( getChar() == '?' ) | ||
81 | { | ||
82 | if( getChar( 1 ) == '>' ) | ||
83 | { | ||
84 | usedChar( 2 ); | ||
85 | return; | ||
86 | } | ||
87 | } | ||
88 | usedChar(); | ||
89 | } | ||
90 | } | ||
91 | } | ||
92 | |||
93 | void XmlReader::entity() | ||
94 | { | ||
95 | for(;;) | ||
96 | { | ||
97 | ws(); | ||
98 | |||
99 | if( getChar() == '<' && getChar( 1 ) == '!' ) | ||
100 | { | ||
101 | usedChar( 2 ); | ||
102 | ws(); | ||
103 | std::string buf; | ||
104 | for(;;) | ||
105 | { | ||
106 | char chr = getChar(); | ||
107 | usedChar(); | ||
108 | if( isws( chr ) ) break; | ||
109 | buf += chr; | ||
110 | } | ||
111 | |||
112 | if( strcmp( buf.c_str(), "ENTITY") == 0 ) | ||
113 | { | ||
114 | ws(); | ||
115 | std::string name; | ||
116 | for(;;) | ||
117 | { | ||
118 | char chr = getChar(); | ||
119 | usedChar(); | ||
120 | if( isws( chr ) ) break; | ||
121 | name += chr; | ||
122 | } | ||
123 | ws(); | ||
124 | char quot = getChar(); | ||
125 | usedChar(); | ||
126 | if( quot != '\'' && quot != '\"' ) | ||
127 | { | ||
128 | throw XmlException( | ||
129 | "Only quoted entity values are supported." | ||
130 | ); | ||
131 | } | ||
132 | std::string value; | ||
133 | for(;;) | ||
134 | { | ||
135 | char chr = getChar(); | ||
136 | usedChar(); | ||
137 | if( chr == '&' ) | ||
138 | { | ||
139 | StaticString *tmp = getEscape(); | ||
140 | if( tmp == NULL ) throw XmlException("Entity thing"); | ||
141 | value += tmp->getString(); | ||
142 | delete tmp; | ||
143 | } | ||
144 | else if( chr == quot ) | ||
145 | { | ||
146 | break; | ||
147 | } | ||
148 | else | ||
149 | { | ||
150 | value += chr; | ||
151 | } | ||
152 | } | ||
153 | ws(); | ||
154 | if( getChar() == '>' ) | ||
155 | { | ||
156 | usedChar(); | ||
157 | |||
158 | addEntity( name.c_str(), value.c_str() ); | ||
159 | } | ||
160 | else | ||
161 | { | ||
162 | throw XmlException( | ||
163 | "Malformed ENTITY: unexpected '%c' found.", | ||
164 | getChar() | ||
165 | ); | ||
166 | } | ||
167 | } | ||
168 | else | ||
169 | { | ||
170 | throw XmlException( | ||
171 | "Unsupported header symbol: %s", | ||
172 | buf.c_str() | ||
173 | ); | ||
174 | } | ||
175 | } | ||
176 | else | ||
177 | { | ||
178 | return; | ||
179 | } | ||
180 | } | ||
181 | } | ||
182 | |||
47 | bool XmlReader::node() | 183 | bool XmlReader::node() |
48 | { | 184 | { |
49 | gcall( startNode() ) | 185 | gcall( startNode() ) |
@@ -190,13 +326,18 @@ bool XmlReader::paramlist() | |||
190 | return true; | 326 | return true; |
191 | } | 327 | } |
192 | 328 | ||
193 | char XmlReader::getEscape() | 329 | StaticString *XmlReader::getEscape() |
194 | { | 330 | { |
195 | // Right now, we just do # escapes... | ||
196 | if( getChar( 1 ) == '#' ) | 331 | if( getChar( 1 ) == '#' ) |
197 | { | 332 | { |
198 | usedChar(); | 333 | // If the entity starts with a # it's a character escape code |
199 | usedChar(); | 334 | int base = 10; |
335 | usedChar( 2 ); | ||
336 | if( getChar() == 'x' ) | ||
337 | { | ||
338 | base = 16; | ||
339 | usedChar(); | ||
340 | } | ||
200 | char buf[4]; | 341 | char buf[4]; |
201 | int j = 0; | 342 | int j = 0; |
202 | for( j = 0; getChar() != ';'; j++ ) | 343 | for( j = 0; getChar() != ';'; j++ ) |
@@ -206,11 +347,29 @@ char XmlReader::getEscape() | |||
206 | } | 347 | } |
207 | usedChar(); | 348 | usedChar(); |
208 | buf[j] = '\0'; | 349 | buf[j] = '\0'; |
209 | return (char)atoi( buf ); | 350 | buf[0] = (char)strtol( buf, (char **)NULL, base ); |
351 | buf[1] = '\0'; | ||
352 | |||
353 | return new StaticString( buf ); | ||
210 | } | 354 | } |
211 | else | 355 | else |
212 | { | 356 | { |
213 | return '\0'; | 357 | // ...otherwise replace with the appropriate string... |
358 | std::string buf; | ||
359 | usedChar(); | ||
360 | for(;;) | ||
361 | { | ||
362 | char cbuf = getChar(); | ||
363 | usedChar(); | ||
364 | if( cbuf == ';' ) break; | ||
365 | buf += cbuf; | ||
366 | } | ||
367 | |||
368 | StaticString *tmp = (StaticString *)htEntity[buf.c_str()]; | ||
369 | if( tmp == NULL ) return NULL; | ||
370 | |||
371 | StaticString *ret = new StaticString( *tmp ); | ||
372 | return ret; | ||
214 | } | 373 | } |
215 | } | 374 | } |
216 | 375 | ||
@@ -260,9 +419,10 @@ bool XmlReader::param() | |||
260 | { | 419 | { |
261 | if( chr == '&' ) | 420 | if( chr == '&' ) |
262 | { | 421 | { |
263 | chr = getEscape(); | 422 | StaticString *tmp = getEscape(); |
264 | if( chr == '\0' ) return false; | 423 | if( tmp == NULL ) return false; |
265 | fbValue.appendData( chr ); | 424 | fbValue.appendData( tmp->getString() ); |
425 | delete tmp; | ||
266 | } | 426 | } |
267 | else | 427 | else |
268 | { | 428 | { |
@@ -287,9 +447,10 @@ bool XmlReader::param() | |||
287 | { | 447 | { |
288 | if( chr == '&' ) | 448 | if( chr == '&' ) |
289 | { | 449 | { |
290 | chr = getEscape(); | 450 | StaticString *tmp = getEscape(); |
291 | if( chr == '\0' ) return false; | 451 | if( tmp == NULL ) return false; |
292 | fbValue.appendData( chr ); | 452 | fbValue.appendData( tmp->getString() ); |
453 | delete tmp; | ||
293 | } | 454 | } |
294 | else | 455 | else |
295 | { | 456 | { |
@@ -425,6 +586,13 @@ bool XmlReader::content() | |||
425 | 586 | ||
426 | if( bStrip ) gcall( ws() ); | 587 | if( bStrip ) gcall( ws() ); |
427 | } | 588 | } |
589 | else if( chr == '&' ) | ||
590 | { | ||
591 | StaticString *tmp = getEscape(); | ||
592 | if( tmp == NULL ) return false; | ||
593 | fbContent.appendData( tmp->getString() ); | ||
594 | delete tmp; | ||
595 | } | ||
428 | else | 596 | else |
429 | { | 597 | { |
430 | fbContent.appendData( chr ); | 598 | fbContent.appendData( chr ); |
diff --git a/src/xmlreader.h b/src/xmlreader.h index 4117dfd..a9881cb 100644 --- a/src/xmlreader.h +++ b/src/xmlreader.h | |||
@@ -4,6 +4,8 @@ | |||
4 | #include <stdio.h> | 4 | #include <stdio.h> |
5 | #include "xmldocument.h" | 5 | #include "xmldocument.h" |
6 | #include "flexbuf.h" | 6 | #include "flexbuf.h" |
7 | #include "hashtable.h" | ||
8 | #include "staticstring.h" | ||
7 | 9 | ||
8 | /** | 10 | /** |
9 | * Takes care of reading in xml formatted data from a file. This could/should | 11 | * Takes care of reading in xml formatted data from a file. This could/should |
@@ -90,7 +92,25 @@ private: | |||
90 | */ | 92 | */ |
91 | bool name(); | 93 | bool name(); |
92 | 94 | ||
93 | char getEscape(); | 95 | /** |
96 | * Automoton function: textDecl. Processes the xml text decleration, if | ||
97 | * there is one. | ||
98 | */ | ||
99 | void textDecl(); | ||
100 | |||
101 | /** | ||
102 | * Automoton function: entity. Processes an entity from the header. | ||
103 | */ | ||
104 | void entity(); | ||
105 | |||
106 | /** | ||
107 | * Adds an entity to the list, if it doesn't already exist. | ||
108 | *@param name The name of the entity | ||
109 | *@param value The value of the entity | ||
110 | */ | ||
111 | void addEntity( const char *name, const char *value ); | ||
112 | |||
113 | StaticString *getEscape(); | ||
94 | 114 | ||
95 | /** | 115 | /** |
96 | * Automoton function: paramlist. Processes a list of node params. | 116 | * Automoton function: paramlist. Processes a list of node params. |
@@ -114,6 +134,8 @@ private: | |||
114 | FlexBuf fbParamName; /**< buffer for the current param's name. */ | 134 | FlexBuf fbParamName; /**< buffer for the current param's name. */ |
115 | FlexBuf fbParamValue; /**< buffer for the current param's value. */ | 135 | FlexBuf fbParamValue; /**< buffer for the current param's value. */ |
116 | bool bStrip; /**< Are we stripping whitespace? */ | 136 | bool bStrip; /**< Are we stripping whitespace? */ |
137 | |||
138 | HashTable htEntity; /**< Entity type definitions. */ | ||
117 | }; | 139 | }; |
118 | 140 | ||
119 | #endif | 141 | #endif |