summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2006-06-28 07:28:17 +0000
committerMike Buland <eichlan@xagasoft.com>2006-06-28 07:28:17 +0000
commit789eaff64b6dcdf920eb3f5a5d64ab4f1f33aa05 (patch)
tree8eae4bb7d22e5553e130c513cc3e29347cfc28c2
parent35274124dc95ec5d6094e71c18ac7b484d812f13 (diff)
downloadlibbu++-789eaff64b6dcdf920eb3f5a5d64ab4f1f33aa05.tar.gz
libbu++-789eaff64b6dcdf920eb3f5a5d64ab4f1f33aa05.tar.bz2
libbu++-789eaff64b6dcdf920eb3f5a5d64ab4f1f33aa05.tar.xz
libbu++-789eaff64b6dcdf920eb3f5a5d64ab4f1f33aa05.zip
Entities now work in the xml processor the way they should, you can define your
own, use the 5 builtin ones (gt, lt, apos, quot, amp), and even create your own. The parser now skips any text definition at the top, which is fine for most xml that you get these days. I think if we ever make the break to full compliance we'll need to make a new parser from scratch.
-rw-r--r--src/unit/xml.cpp16
-rw-r--r--src/xmlreader.cpp194
-rw-r--r--src/xmlreader.h24
3 files changed, 220 insertions, 14 deletions
diff --git a/src/unit/xml.cpp b/src/unit/xml.cpp
index 559b2f4..e4d779c 100644
--- a/src/unit/xml.cpp
+++ b/src/unit/xml.cpp
@@ -15,6 +15,10 @@ public:
15 TEST_ADD( XmlCoreTestSuite::badXml01 ) 15 TEST_ADD( XmlCoreTestSuite::badXml01 )
16 TEST_ADD( XmlCoreTestSuite::badXml02 ) 16 TEST_ADD( XmlCoreTestSuite::badXml02 )
17 TEST_ADD( XmlCoreTestSuite::badXml03 ) 17 TEST_ADD( XmlCoreTestSuite::badXml03 )
18
19 TEST_ADD( XmlCoreTestSuite::entityBuiltin01 )
20
21 TEST_ADD( XmlCoreTestSuite::entityDoc01 )
18 } 22 }
19 23
20private: 24private:
@@ -32,6 +36,18 @@ private:
32 { 36 {
33 TEST_THROWS( XmlStringReader r("<hello param=\"stuff?"), XmlException & ); 37 TEST_THROWS( XmlStringReader r("<hello param=\"stuff?"), XmlException & );
34 } 38 }
39
40 void entityBuiltin01()
41 {
42 XmlStringReader r("<hello>&gt;&lt;&amp;&apos;&quot;</hello>");
43 TEST_ASSERT( strcmp( r.getRoot()->getContent(), "><&\'\"" ) == 0 );
44 }
45
46 void entityDoc01()
47 {
48 XmlStringReader r("<!ENTITY name \"bob the man\"><hello>&quot;&name;&quot;</hello>");
49 TEST_ASSERT( strcmp( r.getRoot()->getContent(), "\"bob the man\"" ) == 0 );
50 }
35}; 51};
36 52
37int main( int argc, char *argv[] ) 53int main( int argc, char *argv[] )
diff --git a/src/xmlreader.cpp b/src/xmlreader.cpp
index 70fd1d7..2a5f63f 100644
--- a/src/xmlreader.cpp
+++ b/src/xmlreader.cpp
@@ -1,14 +1,32 @@
1#include "xmlreader.h" 1#include "xmlreader.h"
2#include "xmlexception.h" 2#include "xmlexception.h"
3#include <string.h> 3#include <string.h>
4#include "hashfunctionstring.h"
4 5
5XmlReader::XmlReader( bool bStrip ) : 6XmlReader::XmlReader( bool bStrip ) :
6 bStrip( bStrip ) 7 bStrip( bStrip ),
8 htEntity( new HashFunctionString(), 11 )
7{ 9{
8} 10}
9 11
10XmlReader::~XmlReader() 12XmlReader::~XmlReader()
11{ 13{
14 void *i = htEntity.getFirstItemPos();
15 while( (i = htEntity.getNextItemPos( i ) ) )
16 {
17 free( (char *)(htEntity.getItemID( i )) );
18 delete (StaticString *)htEntity.getItemData( i );
19 }
20}
21
22void XmlReader::addEntity( const char *name, const char *value )
23{
24 if( htEntity[name] ) return;
25
26 char *sName = strdup( name );
27 StaticString *sValue = new StaticString( value );
28
29 htEntity.insert( sName, sValue );
12} 30}
13 31
14#define gcall( x ) if( x == false ) return false; 32#define gcall( x ) if( x == false ) return false;
@@ -39,11 +57,129 @@ bool XmlReader::buildDoc()
39{ 57{
40 // take care of initial whitespace 58 // take care of initial whitespace
41 gcall( ws() ); 59 gcall( ws() );
60 textDecl();
61 entity();
62 addEntity("gt", ">");
63 addEntity("lt", "<");
64 addEntity("amp", "&");
65 addEntity("apos", "\'");
66 addEntity("quot", "\"");
42 gcall( node() ); 67 gcall( node() );
43 68
44 return true; 69 return true;
45} 70}
46 71
72void XmlReader::textDecl()
73{
74 char chr;
75 if( getChar() == '<' && getChar( 1 ) == '?' )
76 {
77 usedChar( 2 );
78 for(;;)
79 {
80 if( getChar() == '?' )
81 {
82 if( getChar( 1 ) == '>' )
83 {
84 usedChar( 2 );
85 return;
86 }
87 }
88 usedChar();
89 }
90 }
91}
92
93void XmlReader::entity()
94{
95 for(;;)
96 {
97 ws();
98
99 if( getChar() == '<' && getChar( 1 ) == '!' )
100 {
101 usedChar( 2 );
102 ws();
103 std::string buf;
104 for(;;)
105 {
106 char chr = getChar();
107 usedChar();
108 if( isws( chr ) ) break;
109 buf += chr;
110 }
111
112 if( strcmp( buf.c_str(), "ENTITY") == 0 )
113 {
114 ws();
115 std::string name;
116 for(;;)
117 {
118 char chr = getChar();
119 usedChar();
120 if( isws( chr ) ) break;
121 name += chr;
122 }
123 ws();
124 char quot = getChar();
125 usedChar();
126 if( quot != '\'' && quot != '\"' )
127 {
128 throw XmlException(
129 "Only quoted entity values are supported."
130 );
131 }
132 std::string value;
133 for(;;)
134 {
135 char chr = getChar();
136 usedChar();
137 if( chr == '&' )
138 {
139 StaticString *tmp = getEscape();
140 if( tmp == NULL ) throw XmlException("Entity thing");
141 value += tmp->getString();
142 delete tmp;
143 }
144 else if( chr == quot )
145 {
146 break;
147 }
148 else
149 {
150 value += chr;
151 }
152 }
153 ws();
154 if( getChar() == '>' )
155 {
156 usedChar();
157
158 addEntity( name.c_str(), value.c_str() );
159 }
160 else
161 {
162 throw XmlException(
163 "Malformed ENTITY: unexpected '%c' found.",
164 getChar()
165 );
166 }
167 }
168 else
169 {
170 throw XmlException(
171 "Unsupported header symbol: %s",
172 buf.c_str()
173 );
174 }
175 }
176 else
177 {
178 return;
179 }
180 }
181}
182
47bool XmlReader::node() 183bool XmlReader::node()
48{ 184{
49 gcall( startNode() ) 185 gcall( startNode() )
@@ -190,13 +326,18 @@ bool XmlReader::paramlist()
190 return true; 326 return true;
191} 327}
192 328
193char XmlReader::getEscape() 329StaticString *XmlReader::getEscape()
194{ 330{
195 // Right now, we just do # escapes...
196 if( getChar( 1 ) == '#' ) 331 if( getChar( 1 ) == '#' )
197 { 332 {
198 usedChar(); 333 // If the entity starts with a # it's a character escape code
199 usedChar(); 334 int base = 10;
335 usedChar( 2 );
336 if( getChar() == 'x' )
337 {
338 base = 16;
339 usedChar();
340 }
200 char buf[4]; 341 char buf[4];
201 int j = 0; 342 int j = 0;
202 for( j = 0; getChar() != ';'; j++ ) 343 for( j = 0; getChar() != ';'; j++ )
@@ -206,11 +347,29 @@ char XmlReader::getEscape()
206 } 347 }
207 usedChar(); 348 usedChar();
208 buf[j] = '\0'; 349 buf[j] = '\0';
209 return (char)atoi( buf ); 350 buf[0] = (char)strtol( buf, (char **)NULL, base );
351 buf[1] = '\0';
352
353 return new StaticString( buf );
210 } 354 }
211 else 355 else
212 { 356 {
213 return '\0'; 357 // ...otherwise replace with the appropriate string...
358 std::string buf;
359 usedChar();
360 for(;;)
361 {
362 char cbuf = getChar();
363 usedChar();
364 if( cbuf == ';' ) break;
365 buf += cbuf;
366 }
367
368 StaticString *tmp = (StaticString *)htEntity[buf.c_str()];
369 if( tmp == NULL ) return NULL;
370
371 StaticString *ret = new StaticString( *tmp );
372 return ret;
214 } 373 }
215} 374}
216 375
@@ -260,9 +419,10 @@ bool XmlReader::param()
260 { 419 {
261 if( chr == '&' ) 420 if( chr == '&' )
262 { 421 {
263 chr = getEscape(); 422 StaticString *tmp = getEscape();
264 if( chr == '\0' ) return false; 423 if( tmp == NULL ) return false;
265 fbValue.appendData( chr ); 424 fbValue.appendData( tmp->getString() );
425 delete tmp;
266 } 426 }
267 else 427 else
268 { 428 {
@@ -287,9 +447,10 @@ bool XmlReader::param()
287 { 447 {
288 if( chr == '&' ) 448 if( chr == '&' )
289 { 449 {
290 chr = getEscape(); 450 StaticString *tmp = getEscape();
291 if( chr == '\0' ) return false; 451 if( tmp == NULL ) return false;
292 fbValue.appendData( chr ); 452 fbValue.appendData( tmp->getString() );
453 delete tmp;
293 } 454 }
294 else 455 else
295 { 456 {
@@ -425,6 +586,13 @@ bool XmlReader::content()
425 586
426 if( bStrip ) gcall( ws() ); 587 if( bStrip ) gcall( ws() );
427 } 588 }
589 else if( chr == '&' )
590 {
591 StaticString *tmp = getEscape();
592 if( tmp == NULL ) return false;
593 fbContent.appendData( tmp->getString() );
594 delete tmp;
595 }
428 else 596 else
429 { 597 {
430 fbContent.appendData( chr ); 598 fbContent.appendData( chr );
diff --git a/src/xmlreader.h b/src/xmlreader.h
index 4117dfd..a9881cb 100644
--- a/src/xmlreader.h
+++ b/src/xmlreader.h
@@ -4,6 +4,8 @@
4#include <stdio.h> 4#include <stdio.h>
5#include "xmldocument.h" 5#include "xmldocument.h"
6#include "flexbuf.h" 6#include "flexbuf.h"
7#include "hashtable.h"
8#include "staticstring.h"
7 9
8/** 10/**
9 * Takes care of reading in xml formatted data from a file. This could/should 11 * Takes care of reading in xml formatted data from a file. This could/should
@@ -90,7 +92,25 @@ private:
90 */ 92 */
91 bool name(); 93 bool name();
92 94
93 char getEscape(); 95 /**
96 * Automoton function: textDecl. Processes the xml text decleration, if
97 * there is one.
98 */
99 void textDecl();
100
101 /**
102 * Automoton function: entity. Processes an entity from the header.
103 */
104 void entity();
105
106 /**
107 * Adds an entity to the list, if it doesn't already exist.
108 *@param name The name of the entity
109 *@param value The value of the entity
110 */
111 void addEntity( const char *name, const char *value );
112
113 StaticString *getEscape();
94 114
95 /** 115 /**
96 * Automoton function: paramlist. Processes a list of node params. 116 * Automoton function: paramlist. Processes a list of node params.
@@ -114,6 +134,8 @@ private:
114 FlexBuf fbParamName; /**< buffer for the current param's name. */ 134 FlexBuf fbParamName; /**< buffer for the current param's name. */
115 FlexBuf fbParamValue; /**< buffer for the current param's value. */ 135 FlexBuf fbParamValue; /**< buffer for the current param's value. */
116 bool bStrip; /**< Are we stripping whitespace? */ 136 bool bStrip; /**< Are we stripping whitespace? */
137
138 HashTable htEntity; /**< Entity type definitions. */
117}; 139};
118 140
119#endif 141#endif