summaryrefslogtreecommitdiff
path: root/src/xmlreader.h
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/xmlreader.h252
1 files changed, 136 insertions, 116 deletions
diff --git a/src/xmlreader.h b/src/xmlreader.h
index 708a386..c8f7202 100644
--- a/src/xmlreader.h
+++ b/src/xmlreader.h
@@ -1,121 +1,141 @@
1#ifndef XML_READER_H 1#ifndef XMLREADER
2#define XML_READER_H 2#define XMLREADER
3
4#include <stdio.h>
5#include "xmldocument.h"
6#include "flexbuf.h"
7#include "hashtable.h"
8#include "staticstring.h"
9
10/**
11 * Takes care of reading in xml formatted data from a file. This could/should
12 * be made more arbitrary in the future so that we can read the data from any
13 * source. This is actually made quite simple already since all data read in
14 * is handled by one single helper function and then palced into a FlexBuf for
15 * easy access by the other functions. The FlexBuf also allows for block
16 * reading from disk, which improves speed by a noticable amount.
17 * <br>
18 * There are also some extra features implemented that allow you to break the
19 * standard XML reader specs and eliminate leading and trailing whitespace in
20 * all read content. This is useful in situations where you allow additional
21 * whitespace in the files to make them easily human readable. The resturned
22 * content will be NULL in sitautions where all content between nodes was
23 * stripped.
24 *@author Mike Buland
25 */
26class XmlReader : public XmlDocument
27{
28public:
29 /**
30 * Create a standard XmlReader. The optional parameter bStrip allows you to
31 * create a reader that will strip out all leading and trailing whitespace
32 * in content, a-la html.
33 *@param bStrip Strip out leading and trailing whitespace?
34 */
35 XmlReader( bool bStrip=false );
3 36
4#include <stdint.h> 37 /**
5#include "bu/stream.h" 38 * Destroy this XmlReader.
6#include "bu/fstring.h" 39 */
7#include "bu/xmlnode.h" 40 virtual ~XmlReader();
41
42 /**
43 * Build a document based on some kind of input. This is called
44 * automatically by the constructor.
45 */
46 bool buildDoc();
47
48private:
49 /**
50 * This is called by the low level automoton in order to get the next
51 * character. This function should return a character at the current
52 * position plus nIndex, but does not increment the current character.
53 *@param nIndex The index of the character from the current stream position.
54 *@returns A single character at the requested position, or 0 for end of
55 * stream.
56 */
57 virtual char getChar( int nIndex = 0 ) = 0;
58
59 /**
60 * Called to increment the current stream position by a single character.
61 */
62 virtual void usedChar( int nAmnt = 1) = 0;
63
64 /**
65 * Automoton function: is whitespace.
66 *@param chr A character
67 *@returns True if chr is whitespace, false otherwise.
68 */
69 bool isws( char chr );
70
71 /**
72 * Automoton function: ws. Skips sections of whitespace.
73 *@returns True if everything was ok, False for end of stream.
74 */
75 bool ws();
76
77 /**
78 * Automoton function: node. Processes an XmlNode
79 *@returns True if everything was ok, False for end of stream.
80 */
81 bool node();
8 82
9namespace Bu
10{
11 /** 83 /**
12 * An Xml 1.1 reader. I've decided to write this, this time, based on the 84 * Automoton function: startNode. Processes the begining of a node.
13 * official W3C reccomendation, now included with the source code. I've 85 *@returns True if everything was ok, False for end of stream.
14 * named the productions in the parser states the same as in that document, 86 */
15 * which may make them easier to find, etc, although possibly slightly less 87 bool startNode();
16 * optimized than writing my own reduced grammer. 88
17 * 89 /**
18 * Below I will list differences between my parser and the official standard 90 * Automoton function: name. Processes the name of a node.
19 * as I come up with them. 91 *@returns True if everything was ok, False for end of stream.
20 * - Encoding and Standalone headings are ignored for the moment. (4.3.3, 92 */
21 * 2.9) 93 bool name();
22 * - The standalone heading attribute can have any standard whitespace 94
23 * before it (the specs say only spaces, no newlines). (2.9) 95 /**
24 * - Since standalone is ignored, it is currently allowed to have any 96 * Automoton function: textDecl. Processes the xml text decleration, if
25 * value (should be restricted to "yes" or "no"). (2.9) 97 * there is one.
26 * - Currently only UTF-8 / ascii are parsed. 98 */
27 * - [optional] The content of comments is thrown away. (2.5) 99 void textDecl();
28 * - The content of processing instruction blocks is parsed properly, but 100
29 * thrown away. (2.6) 101 /**
30 */ 102 * Automoton function: entity. Processes an entity from the header.
31 class XmlReader 103 */
32 { 104 void entity();
33 public: 105
34 XmlReader( Bu::Stream &sIn ); 106 /**
35 virtual ~XmlReader(); 107 * Adds an entity to the list, if it doesn't already exist.
36 108 *@param name The name of the entity
37 XmlNode *read(); 109 *@param value The value of the entity
38 110 */
39 private: 111 void addEntity( const char *name, const char *value );
40 Bu::Stream &sIn; 112
41 Bu::FString sBuf; 113 StaticString *getEscape();
42 114
43 private: // Helpers 115 /**
44 const char *lookahead( int nAmnt ); 116 * Automoton function: paramlist. Processes a list of node params.
45 void burn( int nAmnt ); 117 *@returns True if everything was ok, False for end of stream.
46 void checkString( const char *str, int nLen ); 118 */
47 119 bool paramlist();
48 private: // States 120
49 /** 121 /**
50 * The headers, etc. 122 * Automoton function: param. Processes a single parameter.
51 */ 123 *@returns True if everything was ok, False for end of stream.
52 void prolog(); 124 */
53 125 bool param();
54 /** 126
55 * The xml decleration (version, encoding, etc). 127 /**
56 */ 128 * Automoton function: content. Processes node content.
57 void XMLDecl(); 129 *@returns True if everything was ok, False for end of stream.
58 130 */
59 /** 131 bool content();
60 * Misc things, Includes Comments and PIData (Processing Instructions). 132
61 */ 133 FlexBuf fbContent; /**< buffer for the current node's content. */
62 void Misc(); 134 FlexBuf fbParamName; /**< buffer for the current param's name. */
63 135 FlexBuf fbParamValue; /**< buffer for the current param's value. */
64 /** 136 bool bStrip; /**< Are we stripping whitespace? */
65 * Comments 137
66 */ 138 HashTable htEntity; /**< Entity type definitions. */
67 void Comment(); 139};
68
69 /**
70 * Processing Instructions
71 */
72 void PI();
73
74 /**
75 * Whitespace eater.
76 */
77 void S();
78
79 /**
80 * Optional whitespace eater.
81 */
82 void Sq();
83
84 /**
85 * XML Version spec
86 */
87 void VersionInfo();
88
89 /**
90 * Your basic equals sign with surrounding whitespace.
91 */
92 void Eq();
93
94 /**
95 * Read in an attribute value.
96 */
97 FString AttValue();
98
99 /**
100 * Read in the name of something.
101 */
102 FString Name();
103
104 /**
105 * Encoding decleration in the header
106 */
107 void EncodingDecl();
108
109 /**
110 * Standalone decleration in the header
111 */
112 void SDDecl();
113
114 bool isS( unsigned char c )
115 {
116 return ( c == 0x20 || c == 0x9 || c == 0xD || c == 0xA );
117 }
118 };
119}
120 140
121#endif 141#endif