From ad92dc50b7cdf7cfe086f21d19442d03a90fd05d Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Wed, 9 May 2007 15:04:31 +0000 Subject: Just a few things re-arranged, moved the new taf/xml systems to the inprogress directory, and moved the old xml system in, so it will require heavy changes. --- src/inprogress/xmlreader.h | 121 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 src/inprogress/xmlreader.h (limited to 'src/inprogress/xmlreader.h') diff --git a/src/inprogress/xmlreader.h b/src/inprogress/xmlreader.h new file mode 100644 index 0000000..708a386 --- /dev/null +++ b/src/inprogress/xmlreader.h @@ -0,0 +1,121 @@ +#ifndef XML_READER_H +#define XML_READER_H + +#include +#include "bu/stream.h" +#include "bu/fstring.h" +#include "bu/xmlnode.h" + +namespace Bu +{ + /** + * An Xml 1.1 reader. I've decided to write this, this time, based on the + * official W3C reccomendation, now included with the source code. I've + * named the productions in the parser states the same as in that document, + * which may make them easier to find, etc, although possibly slightly less + * optimized than writing my own reduced grammer. + * + * Below I will list differences between my parser and the official standard + * as I come up with them. + * - Encoding and Standalone headings are ignored for the moment. (4.3.3, + * 2.9) + * - The standalone heading attribute can have any standard whitespace + * before it (the specs say only spaces, no newlines). (2.9) + * - Since standalone is ignored, it is currently allowed to have any + * value (should be restricted to "yes" or "no"). (2.9) + * - Currently only UTF-8 / ascii are parsed. + * - [optional] The content of comments is thrown away. (2.5) + * - The content of processing instruction blocks is parsed properly, but + * thrown away. (2.6) + */ + class XmlReader + { + public: + XmlReader( Bu::Stream &sIn ); + virtual ~XmlReader(); + + XmlNode *read(); + + private: + Bu::Stream &sIn; + Bu::FString sBuf; + + private: // Helpers + const char *lookahead( int nAmnt ); + void burn( int nAmnt ); + void checkString( const char *str, int nLen ); + + private: // States + /** + * The headers, etc. + */ + void prolog(); + + /** + * The xml decleration (version, encoding, etc). + */ + void XMLDecl(); + + /** + * Misc things, Includes Comments and PIData (Processing Instructions). + */ + void Misc(); + + /** + * Comments + */ + void Comment(); + + /** + * Processing Instructions + */ + void PI(); + + /** + * Whitespace eater. + */ + void S(); + + /** + * Optional whitespace eater. + */ + void Sq(); + + /** + * XML Version spec + */ + void VersionInfo(); + + /** + * Your basic equals sign with surrounding whitespace. + */ + void Eq(); + + /** + * Read in an attribute value. + */ + FString AttValue(); + + /** + * Read in the name of something. + */ + FString Name(); + + /** + * Encoding decleration in the header + */ + void EncodingDecl(); + + /** + * Standalone decleration in the header + */ + void SDDecl(); + + bool isS( unsigned char c ) + { + return ( c == 0x20 || c == 0x9 || c == 0xD || c == 0xA ); + } + }; +} + +#endif -- cgit v1.2.3