From ad92dc50b7cdf7cfe086f21d19442d03a90fd05d Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Wed, 9 May 2007 15:04:31 +0000 Subject: Just a few things re-arranged, moved the new taf/xml systems to the inprogress directory, and moved the old xml system in, so it will require heavy changes. --- src/xmlreader.cpp | 665 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 500 insertions(+), 165 deletions(-) (limited to 'src/xmlreader.cpp') diff --git a/src/xmlreader.cpp b/src/xmlreader.cpp index bd241cf..18df69c 100644 --- a/src/xmlreader.cpp +++ b/src/xmlreader.cpp @@ -1,82 +1,176 @@ #include "xmlreader.h" +#include "exceptions.h" +#include +#include "hashfunctionstring.h" -Bu::XmlReader::XmlReader( Bu::Stream &sIn ) : - sIn( sIn ) +XmlReader::XmlReader( bool bStrip ) : + bStrip( bStrip ), + htEntity( new HashFunctionString(), 11 ) { } -Bu::XmlReader::~XmlReader() +XmlReader::~XmlReader() { + void *i = htEntity.getFirstItemPos(); + while( (i = htEntity.getNextItemPos( i ) ) ) + { + free( (char *)(htEntity.getItemID( i )) ); + delete (StaticString *)htEntity.getItemData( i ); + } } -const char *Bu::XmlReader::lookahead( int nAmnt ) +void XmlReader::addEntity( const char *name, const char *value ) { - if( sBuf.getSize() >= nAmnt ) - return sBuf.getStr(); + if( htEntity[name] ) return; - int nNew = nAmnt - sBuf.getSize(); - char *buf = new char[nNew]; - sIn.read( buf, nNew ); - sBuf.append( buf ); + char *sName = strdup( name ); + StaticString *sValue = new StaticString( value ); - return sBuf.getStr(); + htEntity.insert( sName, sValue ); } -void Bu::XmlReader::burn( int nAmnt ) -{ - if( sBuf.getSize() < nAmnt ) - { - lookahead( nAmnt ); - } +#define gcall( x ) if( x == false ) return false; - //sBuf.remove( nAmnt ); +bool XmlReader::isws( char chr ) +{ + return ( chr == ' ' || chr == '\t' || chr == '\n' || chr == '\r' ); } -void Bu::XmlReader::checkString( const char *str, int nLen ) +bool XmlReader::ws() { - if( !strncmp( str, lookahead( nLen ), nLen ) ) + while( true ) { - burn( nLen ); - return; + char chr = getChar(); + if( isws( chr ) ) + { + usedChar(); + } + else + { + return true; + } } - - throw Bu::ExceptionBase("Expected string '%s'", str ); + return true; } -Bu::XmlNode *Bu::XmlReader::read() +bool XmlReader::buildDoc() { - prolog(); -} + // take care of initial whitespace + gcall( ws() ); + textDecl(); + entity(); + addEntity("gt", ">"); + addEntity("lt", "<"); + addEntity("amp", "&"); + addEntity("apos", "\'"); + addEntity("quot", "\""); + gcall( node() ); -void Bu::XmlReader::prolog() -{ - XMLDecl(); - Misc(); + return true; } -void Bu::XmlReader::XMLDecl() +void XmlReader::textDecl() { - checkString("", 2 ); + if( getChar() == '<' && getChar( 1 ) == '?' ) + { + usedChar( 2 ); + for(;;) + { + if( getChar() == '?' ) + { + if( getChar( 1 ) == '>' ) + { + usedChar( 2 ); + return; + } + } + usedChar(); + } + } } -void Bu::XmlReader::Misc() +void XmlReader::entity() { for(;;) { - S(); - if( !strncmp("", 3 ); - return; - } + closeNode(); + usedChar(); + } + else + { + throw XmlException("Close node in singleNode malformed!"); } - burn( 1 ); } + else + { + throw XmlException("Close node expected, but not found."); + return false; + } + + return true; } -void Bu::XmlReader::PI() +bool XmlReader::startNode() { - checkString("", lookahead(j+2)+j, 2 ) ) + usedChar(); + + if( getChar() == '/' ) { - burn( j+2 ); - return; + // Heh, it's actually a close node, go figure + FlexBuf fbName; + usedChar(); + gcall( ws() ); + + while( true ) + { + char chr = getChar(); + if( isws( chr ) || chr == '>' ) + { + // Here we actually compare the name we got to the name + // we already set, they have to match exactly. + if( !strcasecmp( getCurrent()->getName(), fbName.getData() ) ) + { + closeNode(); + break; + } + else + { + throw XmlException("Got a mismatched node close tag."); + } + } + else + { + fbName.appendData( chr ); + usedChar(); + } + } + + gcall( ws() ); + if( getChar() == '>' ) + { + // Everything is cool. + usedChar(); + } + else + { + throw XmlException("Got extra junk data instead of node close tag."); + } } + else + { + // We're good, format is consistant + addNode(); + + // Skip extra whitespace + gcall( ws() ); + gcall( name() ); + gcall( ws() ); + gcall( paramlist() ); + gcall( ws() ); + } + } + else + { + throw XmlException("Expected to find node opening char, '<'."); } + + return true; } -void Bu::XmlReader::S() +bool XmlReader::name() { - for( int j = 0;; j++ ) + FlexBuf fbName; + + while( true ) { - char c = *lookahead( 1 ); - if( c == 0x20 || c == 0x9 || c == 0xD || c == 0xA ) - continue; - if( j == 0 ) - throw ExceptionBase("Expected whitespace."); - return; + char chr = getChar(); + if( isws( chr ) || chr == '>' || chr == '/' ) + { + setName( fbName.getData() ); + return true; + } + else + { + fbName.appendData( chr ); + usedChar(); + } } + + return true; } -void Bu::XmlReader::Sq() +bool XmlReader::paramlist() { - for(;;) + while( true ) { - char c = *lookahead( 1 ); - if( c == 0x20 || c == 0x9 || c == 0xD || c == 0xA ) - continue; - return; + char chr = getChar(); + if( chr == '/' || chr == '>' ) + { + return true; + } + else + { + gcall( param() ); + gcall( ws() ); + } } + + return true; } -void Bu::XmlReader::VersionInfo() +StaticString *XmlReader::getEscape() { - try + if( getChar( 1 ) == '#' ) { - S(); - checkString("version", 7 ); + // If the entity starts with a # it's a character escape code + int base = 10; + usedChar( 2 ); + if( getChar() == 'x' ) + { + base = 16; + usedChar(); + } + char buf[4]; + int j = 0; + for( j = 0; getChar() != ';'; j++ ) + { + buf[j] = getChar(); + usedChar(); + } + usedChar(); + buf[j] = '\0'; + buf[0] = (char)strtol( buf, (char **)NULL, base ); + buf[1] = '\0'; + + return new StaticString( buf ); } - catch( ExceptionBase &e ) + else { - return; + // ...otherwise replace with the appropriate string... + std::string buf; + usedChar(); + for(;;) + { + char cbuf = getChar(); + usedChar(); + if( cbuf == ';' ) break; + buf += cbuf; + } + + StaticString *tmp = (StaticString *)htEntity[buf.c_str()]; + if( tmp == NULL ) return NULL; + + StaticString *ret = new StaticString( *tmp ); + return ret; } - Eq(); - Bu::FString ver = AttValue(); - if( ver != "1.1" ) - throw ExceptionBase("Currently we only support xml version 1.1\n"); } -void Bu::XmlReader::Eq() +bool XmlReader::param() { - Sq(); - checkString("=", 1 ); - Sq(); -} + FlexBuf fbName; + FlexBuf fbValue; -void Bu::XmlReader::EncodingDecl() -{ - S(); - try - { - checkString("encoding", 8 ); - } - catch( ExceptionBase &e ) + while( true ) { - return; + char chr = getChar(); + if( isws( chr ) || chr == '=' ) + { + break; + } + else + { + fbName.appendData( chr ); + usedChar(); + } } - Eq(); - AttValue(); -} + gcall( ws() ); -void Bu::XmlReader::SDDecl() -{ - S(); - try - { - checkString("standalone", 10 ); - } - catch( ExceptionBase &e ) + if( getChar() == '=' ) { - return; - } + usedChar(); - Eq(); - AttValue(); -} + gcall( ws() ); -Bu::FString Bu::XmlReader::AttValue() -{ - char q = *lookahead(1); - if( q == '\"' ) - { - for( int j = 2;; j++ ) + char chr = getChar(); + if( chr == '"' ) { - if( lookahead(j)[j-1] == '\"' ) + // Better quoted rhs + usedChar(); + + while( true ) { - Bu::FString ret( lookahead(j)+1, j-2 ); - burn( j ); - return ret; + chr = getChar(); + if( chr == '"' ) + { + usedChar(); + addProperty( fbName.getData(), fbValue.getData() ); + return true; + } + else + { + if( chr == '&' ) + { + StaticString *tmp = getEscape(); + if( tmp == NULL ) return false; + fbValue.appendData( tmp->getString() ); + delete tmp; + } + else + { + fbValue.appendData( chr ); + usedChar(); + } + } } } - } - else if( q == '\'' ) - { - for( int j = 2;; j++ ) + else { - if( lookahead(j)[j-1] == '\'' ) + // Simple one-word rhs + while( true ) { - Bu::FString ret( lookahead(j)+1, j-2 ); - burn( j ); - return ret; + chr = getChar(); + if( isws( chr ) || chr == '/' || chr == '>' ) + { + addProperty( fbName.getData(), fbValue.getData() ); + return true; + } + else + { + if( chr == '&' ) + { + StaticString *tmp = getEscape(); + if( tmp == NULL ) return false; + fbValue.appendData( tmp->getString() ); + delete tmp; + } + else + { + fbValue.appendData( chr ); + usedChar(); + } + } } } } + else + { + throw XmlException("Expected an equals to seperate the params."); + return false; + } - throw ExceptionBase("Excpected either \' or \".\n"); + return true; } -Bu::FString Bu::XmlReader::Name() +bool XmlReader::content() { - unsigned char c = *lookahead( 1 ); - if( c != ':' && c != '_' && - (c < 'A' || c > 'Z') && - (c < 'a' || c > 'z') && - (c < 0xC0 || c > 0xD6 ) && - (c < 0xD8 || c > 0xF6 ) && - (c < 0xF8)) - { - throw ExceptionBase("Invalid entity name starting character."); - } + FlexBuf fbContent; - for( int j = 1;; j++ ) + if( bStrip ) gcall( ws() ); + + while( true ) { - unsigned char c = lookahead(j+1)[j]; - if( isS( c ) ) + char chr = getChar(); + if( chr == '<' ) + { + if( getChar(1) == '/' ) + { + if( fbContent.getLength() > 0 ) + { + if( bStrip ) + { + int j; + for( j = fbContent.getLength()-1; isws(fbContent.getData()[j]); j-- ); + ((char *)fbContent.getData())[j+1] = '\0'; + } + setContent( fbContent.getData() ); + } + usedChar( 2 ); + gcall( ws() ); + FlexBuf fbName; + while( true ) + { + chr = getChar(); + if( isws( chr ) || chr == '>' ) + { + if( !strcasecmp( getCurrent()->getName(), fbName.getData() ) ) + { + closeNode(); + break; + } + else + { + throw XmlException("Mismatched close tag found: <%s> to <%s>.", getCurrent()->getName(), fbName.getData() ); + } + } + else + { + fbName.appendData( chr ); + usedChar(); + } + } + gcall( ws() ); + if( getChar() == '>' ) + { + usedChar(); + return true; + } + else + { + throw XmlException("Malformed close tag."); + } + } + else if( getChar(1) == '!' ) + { + // We know it's a comment, let's see if it's proper + if( getChar(2) != '-' || + getChar(3) != '-' ) + { + // Not a valid XML comment + throw XmlException("Malformed comment start tag found."); + } + + usedChar( 4 ); + + // Now burn text until we find the close tag + for(;;) + { + if( getChar() == '-' ) + { + if( getChar( 1 ) == '-' ) + { + // The next one has to be a '>' now + if( getChar( 2 ) != '>' ) + { + throw XmlException("Malformed comment close tag found. You cannot have a '--' that isn't followed by a '>' in a comment."); + } + usedChar( 3 ); + break; + } + else + { + // Found a dash followed by a non dash, that's ok... + usedChar( 2 ); + } + } + else + { + // Burn comment chars + usedChar(); + } + } + } + else + { + if( fbContent.getLength() > 0 ) + { + if( bStrip ) + { + int j; + for( j = fbContent.getLength()-1; isws(fbContent.getData()[j]); j-- ); + ((char *)fbContent.getData())[j+1] = '\0'; + } + setContent( fbContent.getData() ); + fbContent.clearData(); + } + gcall( node() ); + } + + if( bStrip ) gcall( ws() ); + } + else if( chr == '&' ) { - FString ret( lookahead(j+1), j+1 ); - burn( j+1 ); - return ret; + StaticString *tmp = getEscape(); + if( tmp == NULL ) return false; + fbContent.appendData( tmp->getString() ); + delete tmp; } - if( c != ':' && c != '_' && c != '-' && c != '.' && c != 0xB7 && - (c < 'A' || c > 'Z') && - (c < 'a' || c > 'z') && - (c < '0' || c > '9') && - (c < 0xC0 || c > 0xD6 ) && - (c < 0xD8 || c > 0xF6 ) && - (c < 0xF8)) + else { - throw ExceptionBase("Invalid character in name."); + fbContent.appendData( chr ); + usedChar(); } } } -- cgit v1.2.3