summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/unit/xml.cpp16
-rw-r--r--src/xmlreader.cpp194
-rw-r--r--src/xmlreader.h24
3 files changed, 220 insertions, 14 deletions
diff --git a/src/unit/xml.cpp b/src/unit/xml.cpp
index 559b2f4..e4d779c 100644
--- a/src/unit/xml.cpp
+++ b/src/unit/xml.cpp
@@ -15,6 +15,10 @@ public:
15 TEST_ADD( XmlCoreTestSuite::badXml01 ) 15 TEST_ADD( XmlCoreTestSuite::badXml01 )
16 TEST_ADD( XmlCoreTestSuite::badXml02 ) 16 TEST_ADD( XmlCoreTestSuite::badXml02 )
17 TEST_ADD( XmlCoreTestSuite::badXml03 ) 17 TEST_ADD( XmlCoreTestSuite::badXml03 )
18
19 TEST_ADD( XmlCoreTestSuite::entityBuiltin01 )
20
21 TEST_ADD( XmlCoreTestSuite::entityDoc01 )
18 } 22 }
19 23
20private: 24private:
@@ -32,6 +36,18 @@ private:
32 { 36 {
33 TEST_THROWS( XmlStringReader r("<hello param=\"stuff?"), XmlException & ); 37 TEST_THROWS( XmlStringReader r("<hello param=\"stuff?"), XmlException & );
34 } 38 }
39
40 void entityBuiltin01()
41 {
42 XmlStringReader r("<hello>&gt;&lt;&amp;&apos;&quot;</hello>");
43 TEST_ASSERT( strcmp( r.getRoot()->getContent(), "><&\'\"" ) == 0 );
44 }
45
46 void entityDoc01()
47 {
48 XmlStringReader r("<!ENTITY name \"bob the man\"><hello>&quot;&name;&quot;</hello>");
49 TEST_ASSERT( strcmp( r.getRoot()->getContent(), "\"bob the man\"" ) == 0 );
50 }
35}; 51};
36 52
37int main( int argc, char *argv[] ) 53int main( int argc, char *argv[] )
diff --git a/src/xmlreader.cpp b/src/xmlreader.cpp
index 70fd1d7..2a5f63f 100644
--- a/src/xmlreader.cpp
+++ b/src/xmlreader.cpp
@@ -1,14 +1,32 @@
1#include "xmlreader.h" 1#include "xmlreader.h"
2#include "xmlexception.h" 2#include "xmlexception.h"
3#include <string.h> 3#include <string.h>
4#include "hashfunctionstring.h"
4 5
5XmlReader::XmlReader( bool bStrip ) : 6XmlReader::XmlReader( bool bStrip ) :
6 bStrip( bStrip ) 7 bStrip( bStrip ),
8 htEntity( new HashFunctionString(), 11 )
7{ 9{
8} 10}
9 11
10XmlReader::~XmlReader() 12XmlReader::~XmlReader()
11{ 13{
14 void *i = htEntity.getFirstItemPos();
15 while( (i = htEntity.getNextItemPos( i ) ) )
16 {
17 free( (char *)(htEntity.getItemID( i )) );
18 delete (StaticString *)htEntity.getItemData( i );
19 }
20}
21
22void XmlReader::addEntity( const char *name, const char *value )
23{
24 if( htEntity[name] ) return;
25
26 char *sName = strdup( name );
27 StaticString *sValue = new StaticString( value );
28
29 htEntity.insert( sName, sValue );
12} 30}
13 31
14#define gcall( x ) if( x == false ) return false; 32#define gcall( x ) if( x == false ) return false;
@@ -39,11 +57,129 @@ bool XmlReader::buildDoc()
39{ 57{
40 // take care of initial whitespace 58 // take care of initial whitespace
41 gcall( ws() ); 59 gcall( ws() );
60 textDecl();
61 entity();
62 addEntity("gt", ">");
63 addEntity("lt", "<");
64 addEntity("amp", "&");
65 addEntity("apos", "\'");
66 addEntity("quot", "\"");
42 gcall( node() ); 67 gcall( node() );
43 68
44 return true; 69 return true;
45} 70}
46 71
72void XmlReader::textDecl()
73{
74 char chr;
75 if( getChar() == '<' && getChar( 1 ) == '?' )
76 {
77 usedChar( 2 );
78 for(;;)
79 {
80 if( getChar() == '?' )
81 {
82 if( getChar( 1 ) == '>' )
83 {
84 usedChar( 2 );
85 return;
86 }
87 }
88 usedChar();
89 }
90 }
91}
92
93void XmlReader::entity()
94{
95 for(;;)
96 {
97 ws();
98
99 if( getChar() == '<' && getChar( 1 ) == '!' )
100 {
101 usedChar( 2 );
102 ws();
103 std::string buf;
104 for(;;)
105 {
106 char chr = getChar();
107 usedChar();
108 if( isws( chr ) ) break;
109 buf += chr;
110 }
111
112 if( strcmp( buf.c_str(), "ENTITY") == 0 )
113 {
114 ws();
115 std::string name;
116 for(;;)
117 {
118 char chr = getChar();
119 usedChar();
120 if( isws( chr ) ) break;
121 name += chr;
122 }
123 ws();
124 char quot = getChar();
125 usedChar();
126 if( quot != '\'' && quot != '\"' )
127 {
128 throw XmlException(
129 "Only quoted entity values are supported."
130 );
131 }
132 std::string value;
133 for(;;)
134 {
135 char chr = getChar();
136 usedChar();
137 if( chr == '&' )
138 {
139 StaticString *tmp = getEscape();
140 if( tmp == NULL ) throw XmlException("Entity thing");
141 value += tmp->getString();
142 delete tmp;
143 }
144 else if( chr == quot )
145 {
146 break;
147 }
148 else
149 {
150 value += chr;
151 }
152 }
153 ws();
154 if( getChar() == '>' )
155 {
156 usedChar();
157
158 addEntity( name.c_str(), value.c_str() );
159 }
160 else
161 {
162 throw XmlException(
163 "Malformed ENTITY: unexpected '%c' found.",
164 getChar()
165 );
166 }
167 }
168 else
169 {
170 throw XmlException(
171 "Unsupported header symbol: %s",
172 buf.c_str()
173 );
174 }
175 }
176 else
177 {
178 return;
179 }
180 }
181}
182
47bool XmlReader::node() 183bool XmlReader::node()
48{ 184{
49 gcall( startNode() ) 185 gcall( startNode() )
@@ -190,13 +326,18 @@ bool XmlReader::paramlist()
190 return true; 326 return true;
191} 327}
192 328
193char XmlReader::getEscape() 329StaticString *XmlReader::getEscape()
194{ 330{
195 // Right now, we just do # escapes...
196 if( getChar( 1 ) == '#' ) 331 if( getChar( 1 ) == '#' )
197 { 332 {
198 usedChar(); 333 // If the entity starts with a # it's a character escape code
199 usedChar(); 334 int base = 10;
335 usedChar( 2 );
336 if( getChar() == 'x' )
337 {
338 base = 16;
339 usedChar();
340 }
200 char buf[4]; 341 char buf[4];
201 int j = 0; 342 int j = 0;
202 for( j = 0; getChar() != ';'; j++ ) 343 for( j = 0; getChar() != ';'; j++ )
@@ -206,11 +347,29 @@ char XmlReader::getEscape()
206 } 347 }
207 usedChar(); 348 usedChar();
208 buf[j] = '\0'; 349 buf[j] = '\0';
209 return (char)atoi( buf ); 350 buf[0] = (char)strtol( buf, (char **)NULL, base );
351 buf[1] = '\0';
352
353 return new StaticString( buf );
210 } 354 }
211 else 355 else
212 { 356 {
213 return '\0'; 357 // ...otherwise replace with the appropriate string...
358 std::string buf;
359 usedChar();
360 for(;;)
361 {
362 char cbuf = getChar();
363 usedChar();
364 if( cbuf == ';' ) break;
365 buf += cbuf;
366 }
367
368 StaticString *tmp = (StaticString *)htEntity[buf.c_str()];
369 if( tmp == NULL ) return NULL;
370
371 StaticString *ret = new StaticString( *tmp );
372 return ret;
214 } 373 }
215} 374}
216 375
@@ -260,9 +419,10 @@ bool XmlReader::param()
260 { 419 {
261 if( chr == '&' ) 420 if( chr == '&' )
262 { 421 {
263 chr = getEscape(); 422 StaticString *tmp = getEscape();
264 if( chr == '\0' ) return false; 423 if( tmp == NULL ) return false;
265 fbValue.appendData( chr ); 424 fbValue.appendData( tmp->getString() );
425 delete tmp;
266 } 426 }
267 else 427 else
268 { 428 {
@@ -287,9 +447,10 @@ bool XmlReader::param()
287 { 447 {
288 if( chr == '&' ) 448 if( chr == '&' )
289 { 449 {
290 chr = getEscape(); 450 StaticString *tmp = getEscape();
291 if( chr == '\0' ) return false; 451 if( tmp == NULL ) return false;
292 fbValue.appendData( chr ); 452 fbValue.appendData( tmp->getString() );
453 delete tmp;
293 } 454 }
294 else 455 else
295 { 456 {
@@ -425,6 +586,13 @@ bool XmlReader::content()
425 586
426 if( bStrip ) gcall( ws() ); 587 if( bStrip ) gcall( ws() );
427 } 588 }
589 else if( chr == '&' )
590 {
591 StaticString *tmp = getEscape();
592 if( tmp == NULL ) return false;
593 fbContent.appendData( tmp->getString() );
594 delete tmp;
595 }
428 else 596 else
429 { 597 {
430 fbContent.appendData( chr ); 598 fbContent.appendData( chr );
diff --git a/src/xmlreader.h b/src/xmlreader.h
index 4117dfd..a9881cb 100644
--- a/src/xmlreader.h
+++ b/src/xmlreader.h
@@ -4,6 +4,8 @@
4#include <stdio.h> 4#include <stdio.h>
5#include "xmldocument.h" 5#include "xmldocument.h"
6#include "flexbuf.h" 6#include "flexbuf.h"
7#include "hashtable.h"
8#include "staticstring.h"
7 9
8/** 10/**
9 * Takes care of reading in xml formatted data from a file. This could/should 11 * Takes care of reading in xml formatted data from a file. This could/should
@@ -90,7 +92,25 @@ private:
90 */ 92 */
91 bool name(); 93 bool name();
92 94
93 char getEscape(); 95 /**
96 * Automoton function: textDecl. Processes the xml text decleration, if
97 * there is one.
98 */
99 void textDecl();
100
101 /**
102 * Automoton function: entity. Processes an entity from the header.
103 */
104 void entity();
105
106 /**
107 * Adds an entity to the list, if it doesn't already exist.
108 *@param name The name of the entity
109 *@param value The value of the entity
110 */
111 void addEntity( const char *name, const char *value );
112
113 StaticString *getEscape();
94 114
95 /** 115 /**
96 * Automoton function: paramlist. Processes a list of node params. 116 * Automoton function: paramlist. Processes a list of node params.
@@ -114,6 +134,8 @@ private:
114 FlexBuf fbParamName; /**< buffer for the current param's name. */ 134 FlexBuf fbParamName; /**< buffer for the current param's name. */
115 FlexBuf fbParamValue; /**< buffer for the current param's value. */ 135 FlexBuf fbParamValue; /**< buffer for the current param's value. */
116 bool bStrip; /**< Are we stripping whitespace? */ 136 bool bStrip; /**< Are we stripping whitespace? */
137
138 HashTable htEntity; /**< Entity type definitions. */
117}; 139};
118 140
119#endif 141#endif