From 2e035fee36768e3c765b7f5dc10bf0a3b7d2448b Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Wed, 9 May 2007 15:01:03 +0000 Subject: Minor changes to both the taf and xml readers. I'm thinking I'm going to archive these for now and resurect/fix the old xml reader, just to have something working. --- misc/taf | 26 ++++++++ src/tafreader.cpp | 4 +- src/tafreader.h | 7 ++- src/xmlreader.cpp | 175 +++++++++++++++++++++++++++++++++++++++++++++++++++--- src/xmlreader.h | 53 ++++++++++++++++- 5 files changed, 253 insertions(+), 12 deletions(-) create mode 100644 misc/taf diff --git a/misc/taf b/misc/taf new file mode 100644 index 0000000..5ffcdcf --- /dev/null +++ b/misc/taf @@ -0,0 +1,26 @@ +{player: + password = "aoeuaoeuao" + userclass = "implementor" + species = "human" + sex = "male" + active + startroom = "Salourn::Xagafinelle's Room" + {stats: str=14 dex=12 spd=12 enr=7 rea=12 wil=10 int=13 cha=14} + {hp: cur = 100 max = 100} + {en: cur = 100 max = 100} + attackrate = 30 + gold = 0 + {inventory: + {: count=1 id="Salourn::Dark Blade"} + {: count=1 id="Salourn::Dark Suit"} + {: count=3 id="Salourn::Small Fig"} + } + {aliases: + {: key="." value="say"} + {: key="," value="yell"} + {: key="li" value="lightning"} + } + description = """They appear to be rather average looking, not particularly + tall or short, with facial features that are difficult to remember even + seconds after witnessing them.""" +} diff --git a/src/tafreader.cpp b/src/tafreader.cpp index 4f2890a..f94fe44 100644 --- a/src/tafreader.cpp +++ b/src/tafreader.cpp @@ -1,9 +1,11 @@ #include "tafreader.h" -Bu::TafReader::TafReader() +Bu::TafReader::TafReader( Bu::Stream &sIn ) : + sIn( sIn ) { } Bu::TafReader::~TafReader() { } + diff --git a/src/tafreader.h b/src/tafreader.h index d9f1dfd..2dbb9ea 100644 --- a/src/tafreader.h +++ b/src/tafreader.h @@ -2,19 +2,22 @@ #define BU_TAF_READER_H #include +#include "bu/tafdocument.h" +#include "bu/stream.h" namespace Bu { /** * */ - class TafReader + class TafReader : public Bu::TafDocument { public: - TafReader(); + TafReader( Bu::Stream &sIn ); virtual ~TafReader(); private: + Stream &sIn; }; } diff --git a/src/xmlreader.cpp b/src/xmlreader.cpp index 432ecc1..bd241cf 100644 --- a/src/xmlreader.cpp +++ b/src/xmlreader.cpp @@ -29,10 +29,10 @@ void Bu::XmlReader::burn( int nAmnt ) lookahead( nAmnt ); } - sBuf.remove( nAmnt ); + //sBuf.remove( nAmnt ); } -void Bu::XmlNode::checkString( const char *str, int nLen ) +void Bu::XmlReader::checkString( const char *str, int nLen ) { if( !strncmp( str, lookahead( nLen ), nLen ) ) { @@ -57,14 +57,66 @@ void Bu::XmlReader::prolog() void Bu::XmlReader::XMLDecl() { checkString("", 2 ); } void Bu::XmlReader::Misc() { + for(;;) + { + S(); + if( !strncmp("", 3 ); + return; + } + } + burn( 1 ); + } +} + +void Bu::XmlReader::PI() +{ + checkString("", lookahead(j+2)+j, 2 ) ) + { + burn( j+2 ); + return; + } + } } void Bu::XmlReader::S() @@ -75,12 +127,12 @@ void Bu::XmlReader::S() if( c == 0x20 || c == 0x9 || c == 0xD || c == 0xA ) continue; if( j == 0 ) - printf("Error, expected whitespace!\n"); + throw ExceptionBase("Expected whitespace."); return; } } -void Bu::XmlReader::S() +void Bu::XmlReader::Sq() { for(;;) { @@ -93,9 +145,19 @@ void Bu::XmlReader::S() void Bu::XmlReader::VersionInfo() { - S(); - checkString("version", 7 ); - + try + { + S(); + checkString("version", 7 ); + } + catch( ExceptionBase &e ) + { + return; + } + Eq(); + Bu::FString ver = AttValue(); + if( ver != "1.1" ) + throw ExceptionBase("Currently we only support xml version 1.1\n"); } void Bu::XmlReader::Eq() @@ -105,4 +167,101 @@ void Bu::XmlReader::Eq() Sq(); } +void Bu::XmlReader::EncodingDecl() +{ + S(); + try + { + checkString("encoding", 8 ); + } + catch( ExceptionBase &e ) + { + return; + } + + Eq(); + AttValue(); +} + +void Bu::XmlReader::SDDecl() +{ + S(); + try + { + checkString("standalone", 10 ); + } + catch( ExceptionBase &e ) + { + return; + } + + Eq(); + AttValue(); +} + +Bu::FString Bu::XmlReader::AttValue() +{ + char q = *lookahead(1); + if( q == '\"' ) + { + for( int j = 2;; j++ ) + { + if( lookahead(j)[j-1] == '\"' ) + { + Bu::FString ret( lookahead(j)+1, j-2 ); + burn( j ); + return ret; + } + } + } + else if( q == '\'' ) + { + for( int j = 2;; j++ ) + { + if( lookahead(j)[j-1] == '\'' ) + { + Bu::FString ret( lookahead(j)+1, j-2 ); + burn( j ); + return ret; + } + } + } + + throw ExceptionBase("Excpected either \' or \".\n"); +} + +Bu::FString Bu::XmlReader::Name() +{ + unsigned char c = *lookahead( 1 ); + if( c != ':' && c != '_' && + (c < 'A' || c > 'Z') && + (c < 'a' || c > 'z') && + (c < 0xC0 || c > 0xD6 ) && + (c < 0xD8 || c > 0xF6 ) && + (c < 0xF8)) + { + throw ExceptionBase("Invalid entity name starting character."); + } + + for( int j = 1;; j++ ) + { + unsigned char c = lookahead(j+1)[j]; + if( isS( c ) ) + { + FString ret( lookahead(j+1), j+1 ); + burn( j+1 ); + return ret; + } + if( c != ':' && c != '_' && c != '-' && c != '.' && c != 0xB7 && + (c < 'A' || c > 'Z') && + (c < 'a' || c > 'z') && + (c < '0' || c > '9') && + (c < 0xC0 || c > 0xD6 ) && + (c < 0xD8 || c > 0xF6 ) && + (c < 0xF8)) + { + throw ExceptionBase("Invalid character in name."); + } + } +} diff --git a/src/xmlreader.h b/src/xmlreader.h index 19791c4..708a386 100644 --- a/src/xmlreader.h +++ b/src/xmlreader.h @@ -9,7 +9,24 @@ namespace Bu { /** + * An Xml 1.1 reader. I've decided to write this, this time, based on the + * official W3C reccomendation, now included with the source code. I've + * named the productions in the parser states the same as in that document, + * which may make them easier to find, etc, although possibly slightly less + * optimized than writing my own reduced grammer. * + * Below I will list differences between my parser and the official standard + * as I come up with them. + * - Encoding and Standalone headings are ignored for the moment. (4.3.3, + * 2.9) + * - The standalone heading attribute can have any standard whitespace + * before it (the specs say only spaces, no newlines). (2.9) + * - Since standalone is ignored, it is currently allowed to have any + * value (should be restricted to "yes" or "no"). (2.9) + * - Currently only UTF-8 / ascii are parsed. + * - [optional] The content of comments is thrown away. (2.5) + * - The content of processing instruction blocks is parsed properly, but + * thrown away. (2.6) */ class XmlReader { @@ -40,10 +57,20 @@ namespace Bu void XMLDecl(); /** - * Misc things...? + * Misc things, Includes Comments and PIData (Processing Instructions). */ void Misc(); + /** + * Comments + */ + void Comment(); + + /** + * Processing Instructions + */ + void PI(); + /** * Whitespace eater. */ @@ -64,6 +91,30 @@ namespace Bu */ void Eq(); + /** + * Read in an attribute value. + */ + FString AttValue(); + + /** + * Read in the name of something. + */ + FString Name(); + + /** + * Encoding decleration in the header + */ + void EncodingDecl(); + + /** + * Standalone decleration in the header + */ + void SDDecl(); + + bool isS( unsigned char c ) + { + return ( c == 0x20 || c == 0x9 || c == 0xD || c == 0xA ); + } }; } -- cgit v1.2.3