diff options
| -rw-r--r-- | src/unit/xml.cpp | 16 | ||||
| -rw-r--r-- | src/xmlreader.cpp | 194 | ||||
| -rw-r--r-- | src/xmlreader.h | 24 |
3 files changed, 220 insertions, 14 deletions
diff --git a/src/unit/xml.cpp b/src/unit/xml.cpp index 559b2f4..e4d779c 100644 --- a/src/unit/xml.cpp +++ b/src/unit/xml.cpp | |||
| @@ -15,6 +15,10 @@ public: | |||
| 15 | TEST_ADD( XmlCoreTestSuite::badXml01 ) | 15 | TEST_ADD( XmlCoreTestSuite::badXml01 ) |
| 16 | TEST_ADD( XmlCoreTestSuite::badXml02 ) | 16 | TEST_ADD( XmlCoreTestSuite::badXml02 ) |
| 17 | TEST_ADD( XmlCoreTestSuite::badXml03 ) | 17 | TEST_ADD( XmlCoreTestSuite::badXml03 ) |
| 18 | |||
| 19 | TEST_ADD( XmlCoreTestSuite::entityBuiltin01 ) | ||
| 20 | |||
| 21 | TEST_ADD( XmlCoreTestSuite::entityDoc01 ) | ||
| 18 | } | 22 | } |
| 19 | 23 | ||
| 20 | private: | 24 | private: |
| @@ -32,6 +36,18 @@ private: | |||
| 32 | { | 36 | { |
| 33 | TEST_THROWS( XmlStringReader r("<hello param=\"stuff?"), XmlException & ); | 37 | TEST_THROWS( XmlStringReader r("<hello param=\"stuff?"), XmlException & ); |
| 34 | } | 38 | } |
| 39 | |||
| 40 | void entityBuiltin01() | ||
| 41 | { | ||
| 42 | XmlStringReader r("<hello>><&'"</hello>"); | ||
| 43 | TEST_ASSERT( strcmp( r.getRoot()->getContent(), "><&\'\"" ) == 0 ); | ||
| 44 | } | ||
| 45 | |||
| 46 | void entityDoc01() | ||
| 47 | { | ||
| 48 | XmlStringReader r("<!ENTITY name \"bob the man\"><hello>"&name;"</hello>"); | ||
| 49 | TEST_ASSERT( strcmp( r.getRoot()->getContent(), "\"bob the man\"" ) == 0 ); | ||
| 50 | } | ||
| 35 | }; | 51 | }; |
| 36 | 52 | ||
| 37 | int main( int argc, char *argv[] ) | 53 | int main( int argc, char *argv[] ) |
diff --git a/src/xmlreader.cpp b/src/xmlreader.cpp index 70fd1d7..2a5f63f 100644 --- a/src/xmlreader.cpp +++ b/src/xmlreader.cpp | |||
| @@ -1,14 +1,32 @@ | |||
| 1 | #include "xmlreader.h" | 1 | #include "xmlreader.h" |
| 2 | #include "xmlexception.h" | 2 | #include "xmlexception.h" |
| 3 | #include <string.h> | 3 | #include <string.h> |
| 4 | #include "hashfunctionstring.h" | ||
| 4 | 5 | ||
| 5 | XmlReader::XmlReader( bool bStrip ) : | 6 | XmlReader::XmlReader( bool bStrip ) : |
| 6 | bStrip( bStrip ) | 7 | bStrip( bStrip ), |
| 8 | htEntity( new HashFunctionString(), 11 ) | ||
| 7 | { | 9 | { |
| 8 | } | 10 | } |
| 9 | 11 | ||
| 10 | XmlReader::~XmlReader() | 12 | XmlReader::~XmlReader() |
| 11 | { | 13 | { |
| 14 | void *i = htEntity.getFirstItemPos(); | ||
| 15 | while( (i = htEntity.getNextItemPos( i ) ) ) | ||
| 16 | { | ||
| 17 | free( (char *)(htEntity.getItemID( i )) ); | ||
| 18 | delete (StaticString *)htEntity.getItemData( i ); | ||
| 19 | } | ||
| 20 | } | ||
| 21 | |||
| 22 | void XmlReader::addEntity( const char *name, const char *value ) | ||
| 23 | { | ||
| 24 | if( htEntity[name] ) return; | ||
| 25 | |||
| 26 | char *sName = strdup( name ); | ||
| 27 | StaticString *sValue = new StaticString( value ); | ||
| 28 | |||
| 29 | htEntity.insert( sName, sValue ); | ||
| 12 | } | 30 | } |
| 13 | 31 | ||
| 14 | #define gcall( x ) if( x == false ) return false; | 32 | #define gcall( x ) if( x == false ) return false; |
| @@ -39,11 +57,129 @@ bool XmlReader::buildDoc() | |||
| 39 | { | 57 | { |
| 40 | // take care of initial whitespace | 58 | // take care of initial whitespace |
| 41 | gcall( ws() ); | 59 | gcall( ws() ); |
| 60 | textDecl(); | ||
| 61 | entity(); | ||
| 62 | addEntity("gt", ">"); | ||
| 63 | addEntity("lt", "<"); | ||
| 64 | addEntity("amp", "&"); | ||
| 65 | addEntity("apos", "\'"); | ||
| 66 | addEntity("quot", "\""); | ||
| 42 | gcall( node() ); | 67 | gcall( node() ); |
| 43 | 68 | ||
| 44 | return true; | 69 | return true; |
| 45 | } | 70 | } |
| 46 | 71 | ||
| 72 | void XmlReader::textDecl() | ||
| 73 | { | ||
| 74 | char chr; | ||
| 75 | if( getChar() == '<' && getChar( 1 ) == '?' ) | ||
| 76 | { | ||
| 77 | usedChar( 2 ); | ||
| 78 | for(;;) | ||
| 79 | { | ||
| 80 | if( getChar() == '?' ) | ||
| 81 | { | ||
| 82 | if( getChar( 1 ) == '>' ) | ||
| 83 | { | ||
| 84 | usedChar( 2 ); | ||
| 85 | return; | ||
| 86 | } | ||
| 87 | } | ||
| 88 | usedChar(); | ||
| 89 | } | ||
| 90 | } | ||
| 91 | } | ||
| 92 | |||
| 93 | void XmlReader::entity() | ||
| 94 | { | ||
| 95 | for(;;) | ||
| 96 | { | ||
| 97 | ws(); | ||
| 98 | |||
| 99 | if( getChar() == '<' && getChar( 1 ) == '!' ) | ||
| 100 | { | ||
| 101 | usedChar( 2 ); | ||
| 102 | ws(); | ||
| 103 | std::string buf; | ||
| 104 | for(;;) | ||
| 105 | { | ||
| 106 | char chr = getChar(); | ||
| 107 | usedChar(); | ||
| 108 | if( isws( chr ) ) break; | ||
| 109 | buf += chr; | ||
| 110 | } | ||
| 111 | |||
| 112 | if( strcmp( buf.c_str(), "ENTITY") == 0 ) | ||
| 113 | { | ||
| 114 | ws(); | ||
| 115 | std::string name; | ||
| 116 | for(;;) | ||
| 117 | { | ||
| 118 | char chr = getChar(); | ||
| 119 | usedChar(); | ||
| 120 | if( isws( chr ) ) break; | ||
| 121 | name += chr; | ||
| 122 | } | ||
| 123 | ws(); | ||
| 124 | char quot = getChar(); | ||
| 125 | usedChar(); | ||
| 126 | if( quot != '\'' && quot != '\"' ) | ||
| 127 | { | ||
| 128 | throw XmlException( | ||
| 129 | "Only quoted entity values are supported." | ||
| 130 | ); | ||
| 131 | } | ||
| 132 | std::string value; | ||
| 133 | for(;;) | ||
| 134 | { | ||
| 135 | char chr = getChar(); | ||
| 136 | usedChar(); | ||
| 137 | if( chr == '&' ) | ||
| 138 | { | ||
| 139 | StaticString *tmp = getEscape(); | ||
| 140 | if( tmp == NULL ) throw XmlException("Entity thing"); | ||
| 141 | value += tmp->getString(); | ||
| 142 | delete tmp; | ||
| 143 | } | ||
| 144 | else if( chr == quot ) | ||
| 145 | { | ||
| 146 | break; | ||
| 147 | } | ||
| 148 | else | ||
| 149 | { | ||
| 150 | value += chr; | ||
| 151 | } | ||
| 152 | } | ||
| 153 | ws(); | ||
| 154 | if( getChar() == '>' ) | ||
| 155 | { | ||
| 156 | usedChar(); | ||
| 157 | |||
| 158 | addEntity( name.c_str(), value.c_str() ); | ||
| 159 | } | ||
| 160 | else | ||
| 161 | { | ||
| 162 | throw XmlException( | ||
| 163 | "Malformed ENTITY: unexpected '%c' found.", | ||
| 164 | getChar() | ||
| 165 | ); | ||
| 166 | } | ||
| 167 | } | ||
| 168 | else | ||
| 169 | { | ||
| 170 | throw XmlException( | ||
| 171 | "Unsupported header symbol: %s", | ||
| 172 | buf.c_str() | ||
| 173 | ); | ||
| 174 | } | ||
| 175 | } | ||
| 176 | else | ||
| 177 | { | ||
| 178 | return; | ||
| 179 | } | ||
| 180 | } | ||
| 181 | } | ||
| 182 | |||
| 47 | bool XmlReader::node() | 183 | bool XmlReader::node() |
| 48 | { | 184 | { |
| 49 | gcall( startNode() ) | 185 | gcall( startNode() ) |
| @@ -190,13 +326,18 @@ bool XmlReader::paramlist() | |||
| 190 | return true; | 326 | return true; |
| 191 | } | 327 | } |
| 192 | 328 | ||
| 193 | char XmlReader::getEscape() | 329 | StaticString *XmlReader::getEscape() |
| 194 | { | 330 | { |
| 195 | // Right now, we just do # escapes... | ||
| 196 | if( getChar( 1 ) == '#' ) | 331 | if( getChar( 1 ) == '#' ) |
| 197 | { | 332 | { |
| 198 | usedChar(); | 333 | // If the entity starts with a # it's a character escape code |
| 199 | usedChar(); | 334 | int base = 10; |
| 335 | usedChar( 2 ); | ||
| 336 | if( getChar() == 'x' ) | ||
| 337 | { | ||
| 338 | base = 16; | ||
| 339 | usedChar(); | ||
| 340 | } | ||
| 200 | char buf[4]; | 341 | char buf[4]; |
| 201 | int j = 0; | 342 | int j = 0; |
| 202 | for( j = 0; getChar() != ';'; j++ ) | 343 | for( j = 0; getChar() != ';'; j++ ) |
| @@ -206,11 +347,29 @@ char XmlReader::getEscape() | |||
| 206 | } | 347 | } |
| 207 | usedChar(); | 348 | usedChar(); |
| 208 | buf[j] = '\0'; | 349 | buf[j] = '\0'; |
| 209 | return (char)atoi( buf ); | 350 | buf[0] = (char)strtol( buf, (char **)NULL, base ); |
| 351 | buf[1] = '\0'; | ||
| 352 | |||
| 353 | return new StaticString( buf ); | ||
| 210 | } | 354 | } |
| 211 | else | 355 | else |
| 212 | { | 356 | { |
| 213 | return '\0'; | 357 | // ...otherwise replace with the appropriate string... |
| 358 | std::string buf; | ||
| 359 | usedChar(); | ||
| 360 | for(;;) | ||
| 361 | { | ||
| 362 | char cbuf = getChar(); | ||
| 363 | usedChar(); | ||
| 364 | if( cbuf == ';' ) break; | ||
| 365 | buf += cbuf; | ||
| 366 | } | ||
| 367 | |||
| 368 | StaticString *tmp = (StaticString *)htEntity[buf.c_str()]; | ||
| 369 | if( tmp == NULL ) return NULL; | ||
| 370 | |||
| 371 | StaticString *ret = new StaticString( *tmp ); | ||
| 372 | return ret; | ||
| 214 | } | 373 | } |
| 215 | } | 374 | } |
| 216 | 375 | ||
| @@ -260,9 +419,10 @@ bool XmlReader::param() | |||
| 260 | { | 419 | { |
| 261 | if( chr == '&' ) | 420 | if( chr == '&' ) |
| 262 | { | 421 | { |
| 263 | chr = getEscape(); | 422 | StaticString *tmp = getEscape(); |
| 264 | if( chr == '\0' ) return false; | 423 | if( tmp == NULL ) return false; |
| 265 | fbValue.appendData( chr ); | 424 | fbValue.appendData( tmp->getString() ); |
| 425 | delete tmp; | ||
| 266 | } | 426 | } |
| 267 | else | 427 | else |
| 268 | { | 428 | { |
| @@ -287,9 +447,10 @@ bool XmlReader::param() | |||
| 287 | { | 447 | { |
| 288 | if( chr == '&' ) | 448 | if( chr == '&' ) |
| 289 | { | 449 | { |
| 290 | chr = getEscape(); | 450 | StaticString *tmp = getEscape(); |
| 291 | if( chr == '\0' ) return false; | 451 | if( tmp == NULL ) return false; |
| 292 | fbValue.appendData( chr ); | 452 | fbValue.appendData( tmp->getString() ); |
| 453 | delete tmp; | ||
| 293 | } | 454 | } |
| 294 | else | 455 | else |
| 295 | { | 456 | { |
| @@ -425,6 +586,13 @@ bool XmlReader::content() | |||
| 425 | 586 | ||
| 426 | if( bStrip ) gcall( ws() ); | 587 | if( bStrip ) gcall( ws() ); |
| 427 | } | 588 | } |
| 589 | else if( chr == '&' ) | ||
| 590 | { | ||
| 591 | StaticString *tmp = getEscape(); | ||
| 592 | if( tmp == NULL ) return false; | ||
| 593 | fbContent.appendData( tmp->getString() ); | ||
| 594 | delete tmp; | ||
| 595 | } | ||
| 428 | else | 596 | else |
| 429 | { | 597 | { |
| 430 | fbContent.appendData( chr ); | 598 | fbContent.appendData( chr ); |
diff --git a/src/xmlreader.h b/src/xmlreader.h index 4117dfd..a9881cb 100644 --- a/src/xmlreader.h +++ b/src/xmlreader.h | |||
| @@ -4,6 +4,8 @@ | |||
| 4 | #include <stdio.h> | 4 | #include <stdio.h> |
| 5 | #include "xmldocument.h" | 5 | #include "xmldocument.h" |
| 6 | #include "flexbuf.h" | 6 | #include "flexbuf.h" |
| 7 | #include "hashtable.h" | ||
| 8 | #include "staticstring.h" | ||
| 7 | 9 | ||
| 8 | /** | 10 | /** |
| 9 | * Takes care of reading in xml formatted data from a file. This could/should | 11 | * Takes care of reading in xml formatted data from a file. This could/should |
| @@ -90,7 +92,25 @@ private: | |||
| 90 | */ | 92 | */ |
| 91 | bool name(); | 93 | bool name(); |
| 92 | 94 | ||
| 93 | char getEscape(); | 95 | /** |
| 96 | * Automoton function: textDecl. Processes the xml text decleration, if | ||
| 97 | * there is one. | ||
| 98 | */ | ||
| 99 | void textDecl(); | ||
| 100 | |||
| 101 | /** | ||
| 102 | * Automoton function: entity. Processes an entity from the header. | ||
| 103 | */ | ||
| 104 | void entity(); | ||
| 105 | |||
| 106 | /** | ||
| 107 | * Adds an entity to the list, if it doesn't already exist. | ||
| 108 | *@param name The name of the entity | ||
| 109 | *@param value The value of the entity | ||
| 110 | */ | ||
| 111 | void addEntity( const char *name, const char *value ); | ||
| 112 | |||
| 113 | StaticString *getEscape(); | ||
| 94 | 114 | ||
| 95 | /** | 115 | /** |
| 96 | * Automoton function: paramlist. Processes a list of node params. | 116 | * Automoton function: paramlist. Processes a list of node params. |
| @@ -114,6 +134,8 @@ private: | |||
| 114 | FlexBuf fbParamName; /**< buffer for the current param's name. */ | 134 | FlexBuf fbParamName; /**< buffer for the current param's name. */ |
| 115 | FlexBuf fbParamValue; /**< buffer for the current param's value. */ | 135 | FlexBuf fbParamValue; /**< buffer for the current param's value. */ |
| 116 | bool bStrip; /**< Are we stripping whitespace? */ | 136 | bool bStrip; /**< Are we stripping whitespace? */ |
| 137 | |||
| 138 | HashTable htEntity; /**< Entity type definitions. */ | ||
| 117 | }; | 139 | }; |
| 118 | 140 | ||
| 119 | #endif | 141 | #endif |
