diff options
Diffstat (limited to '')
| -rw-r--r-- | src/xmlreader.h | 252 |
1 files changed, 136 insertions, 116 deletions
diff --git a/src/xmlreader.h b/src/xmlreader.h index 708a386..c8f7202 100644 --- a/src/xmlreader.h +++ b/src/xmlreader.h | |||
| @@ -1,121 +1,141 @@ | |||
| 1 | #ifndef XML_READER_H | 1 | #ifndef XMLREADER |
| 2 | #define XML_READER_H | 2 | #define XMLREADER |
| 3 | |||
| 4 | #include <stdio.h> | ||
| 5 | #include "xmldocument.h" | ||
| 6 | #include "flexbuf.h" | ||
| 7 | #include "hashtable.h" | ||
| 8 | #include "staticstring.h" | ||
| 9 | |||
| 10 | /** | ||
| 11 | * Takes care of reading in xml formatted data from a file. This could/should | ||
| 12 | * be made more arbitrary in the future so that we can read the data from any | ||
| 13 | * source. This is actually made quite simple already since all data read in | ||
| 14 | * is handled by one single helper function and then palced into a FlexBuf for | ||
| 15 | * easy access by the other functions. The FlexBuf also allows for block | ||
| 16 | * reading from disk, which improves speed by a noticable amount. | ||
| 17 | * <br> | ||
| 18 | * There are also some extra features implemented that allow you to break the | ||
| 19 | * standard XML reader specs and eliminate leading and trailing whitespace in | ||
| 20 | * all read content. This is useful in situations where you allow additional | ||
| 21 | * whitespace in the files to make them easily human readable. The resturned | ||
| 22 | * content will be NULL in sitautions where all content between nodes was | ||
| 23 | * stripped. | ||
| 24 | *@author Mike Buland | ||
| 25 | */ | ||
| 26 | class XmlReader : public XmlDocument | ||
| 27 | { | ||
| 28 | public: | ||
| 29 | /** | ||
| 30 | * Create a standard XmlReader. The optional parameter bStrip allows you to | ||
| 31 | * create a reader that will strip out all leading and trailing whitespace | ||
| 32 | * in content, a-la html. | ||
| 33 | *@param bStrip Strip out leading and trailing whitespace? | ||
| 34 | */ | ||
| 35 | XmlReader( bool bStrip=false ); | ||
| 3 | 36 | ||
| 4 | #include <stdint.h> | 37 | /** |
| 5 | #include "bu/stream.h" | 38 | * Destroy this XmlReader. |
| 6 | #include "bu/fstring.h" | 39 | */ |
| 7 | #include "bu/xmlnode.h" | 40 | virtual ~XmlReader(); |
| 41 | |||
| 42 | /** | ||
| 43 | * Build a document based on some kind of input. This is called | ||
| 44 | * automatically by the constructor. | ||
| 45 | */ | ||
| 46 | bool buildDoc(); | ||
| 47 | |||
| 48 | private: | ||
| 49 | /** | ||
| 50 | * This is called by the low level automoton in order to get the next | ||
| 51 | * character. This function should return a character at the current | ||
| 52 | * position plus nIndex, but does not increment the current character. | ||
| 53 | *@param nIndex The index of the character from the current stream position. | ||
| 54 | *@returns A single character at the requested position, or 0 for end of | ||
| 55 | * stream. | ||
| 56 | */ | ||
| 57 | virtual char getChar( int nIndex = 0 ) = 0; | ||
| 58 | |||
| 59 | /** | ||
| 60 | * Called to increment the current stream position by a single character. | ||
| 61 | */ | ||
| 62 | virtual void usedChar( int nAmnt = 1) = 0; | ||
| 63 | |||
| 64 | /** | ||
| 65 | * Automoton function: is whitespace. | ||
| 66 | *@param chr A character | ||
| 67 | *@returns True if chr is whitespace, false otherwise. | ||
| 68 | */ | ||
| 69 | bool isws( char chr ); | ||
| 70 | |||
| 71 | /** | ||
| 72 | * Automoton function: ws. Skips sections of whitespace. | ||
| 73 | *@returns True if everything was ok, False for end of stream. | ||
| 74 | */ | ||
| 75 | bool ws(); | ||
| 76 | |||
| 77 | /** | ||
| 78 | * Automoton function: node. Processes an XmlNode | ||
| 79 | *@returns True if everything was ok, False for end of stream. | ||
| 80 | */ | ||
| 81 | bool node(); | ||
| 8 | 82 | ||
| 9 | namespace Bu | ||
| 10 | { | ||
| 11 | /** | 83 | /** |
| 12 | * An Xml 1.1 reader. I've decided to write this, this time, based on the | 84 | * Automoton function: startNode. Processes the begining of a node. |
| 13 | * official W3C reccomendation, now included with the source code. I've | 85 | *@returns True if everything was ok, False for end of stream. |
| 14 | * named the productions in the parser states the same as in that document, | 86 | */ |
| 15 | * which may make them easier to find, etc, although possibly slightly less | 87 | bool startNode(); |
| 16 | * optimized than writing my own reduced grammer. | 88 | |
| 17 | * | 89 | /** |
| 18 | * Below I will list differences between my parser and the official standard | 90 | * Automoton function: name. Processes the name of a node. |
| 19 | * as I come up with them. | 91 | *@returns True if everything was ok, False for end of stream. |
| 20 | * - Encoding and Standalone headings are ignored for the moment. (4.3.3, | 92 | */ |
| 21 | * 2.9) | 93 | bool name(); |
| 22 | * - The standalone heading attribute can have any standard whitespace | 94 | |
| 23 | * before it (the specs say only spaces, no newlines). (2.9) | 95 | /** |
| 24 | * - Since standalone is ignored, it is currently allowed to have any | 96 | * Automoton function: textDecl. Processes the xml text decleration, if |
| 25 | * value (should be restricted to "yes" or "no"). (2.9) | 97 | * there is one. |
| 26 | * - Currently only UTF-8 / ascii are parsed. | 98 | */ |
| 27 | * - [optional] The content of comments is thrown away. (2.5) | 99 | void textDecl(); |
| 28 | * - The content of processing instruction blocks is parsed properly, but | 100 | |
| 29 | * thrown away. (2.6) | 101 | /** |
| 30 | */ | 102 | * Automoton function: entity. Processes an entity from the header. |
| 31 | class XmlReader | 103 | */ |
| 32 | { | 104 | void entity(); |
| 33 | public: | 105 | |
| 34 | XmlReader( Bu::Stream &sIn ); | 106 | /** |
| 35 | virtual ~XmlReader(); | 107 | * Adds an entity to the list, if it doesn't already exist. |
| 36 | 108 | *@param name The name of the entity | |
| 37 | XmlNode *read(); | 109 | *@param value The value of the entity |
| 38 | 110 | */ | |
| 39 | private: | 111 | void addEntity( const char *name, const char *value ); |
| 40 | Bu::Stream &sIn; | 112 | |
| 41 | Bu::FString sBuf; | 113 | StaticString *getEscape(); |
| 42 | 114 | ||
| 43 | private: // Helpers | 115 | /** |
| 44 | const char *lookahead( int nAmnt ); | 116 | * Automoton function: paramlist. Processes a list of node params. |
| 45 | void burn( int nAmnt ); | 117 | *@returns True if everything was ok, False for end of stream. |
| 46 | void checkString( const char *str, int nLen ); | 118 | */ |
| 47 | 119 | bool paramlist(); | |
| 48 | private: // States | 120 | |
| 49 | /** | 121 | /** |
| 50 | * The headers, etc. | 122 | * Automoton function: param. Processes a single parameter. |
| 51 | */ | 123 | *@returns True if everything was ok, False for end of stream. |
| 52 | void prolog(); | 124 | */ |
| 53 | 125 | bool param(); | |
| 54 | /** | 126 | |
| 55 | * The xml decleration (version, encoding, etc). | 127 | /** |
| 56 | */ | 128 | * Automoton function: content. Processes node content. |
| 57 | void XMLDecl(); | 129 | *@returns True if everything was ok, False for end of stream. |
| 58 | 130 | */ | |
| 59 | /** | 131 | bool content(); |
| 60 | * Misc things, Includes Comments and PIData (Processing Instructions). | 132 | |
| 61 | */ | 133 | FlexBuf fbContent; /**< buffer for the current node's content. */ |
| 62 | void Misc(); | 134 | FlexBuf fbParamName; /**< buffer for the current param's name. */ |
| 63 | 135 | FlexBuf fbParamValue; /**< buffer for the current param's value. */ | |
| 64 | /** | 136 | bool bStrip; /**< Are we stripping whitespace? */ |
| 65 | * Comments | 137 | |
| 66 | */ | 138 | HashTable htEntity; /**< Entity type definitions. */ |
| 67 | void Comment(); | 139 | }; |
| 68 | |||
| 69 | /** | ||
| 70 | * Processing Instructions | ||
| 71 | */ | ||
| 72 | void PI(); | ||
| 73 | |||
| 74 | /** | ||
| 75 | * Whitespace eater. | ||
| 76 | */ | ||
| 77 | void S(); | ||
| 78 | |||
| 79 | /** | ||
| 80 | * Optional whitespace eater. | ||
| 81 | */ | ||
| 82 | void Sq(); | ||
| 83 | |||
| 84 | /** | ||
| 85 | * XML Version spec | ||
| 86 | */ | ||
| 87 | void VersionInfo(); | ||
| 88 | |||
| 89 | /** | ||
| 90 | * Your basic equals sign with surrounding whitespace. | ||
| 91 | */ | ||
| 92 | void Eq(); | ||
| 93 | |||
| 94 | /** | ||
| 95 | * Read in an attribute value. | ||
| 96 | */ | ||
| 97 | FString AttValue(); | ||
| 98 | |||
| 99 | /** | ||
| 100 | * Read in the name of something. | ||
| 101 | */ | ||
| 102 | FString Name(); | ||
| 103 | |||
| 104 | /** | ||
| 105 | * Encoding decleration in the header | ||
| 106 | */ | ||
| 107 | void EncodingDecl(); | ||
| 108 | |||
| 109 | /** | ||
| 110 | * Standalone decleration in the header | ||
| 111 | */ | ||
| 112 | void SDDecl(); | ||
| 113 | |||
| 114 | bool isS( unsigned char c ) | ||
| 115 | { | ||
| 116 | return ( c == 0x20 || c == 0x9 || c == 0xD || c == 0xA ); | ||
| 117 | } | ||
| 118 | }; | ||
| 119 | } | ||
| 120 | 140 | ||
| 121 | #endif | 141 | #endif |
