diff options
Diffstat (limited to 'src/inprogress/xmlreader.h')
-rw-r--r-- | src/inprogress/xmlreader.h | 121 |
1 files changed, 121 insertions, 0 deletions
diff --git a/src/inprogress/xmlreader.h b/src/inprogress/xmlreader.h new file mode 100644 index 0000000..708a386 --- /dev/null +++ b/src/inprogress/xmlreader.h | |||
@@ -0,0 +1,121 @@ | |||
1 | #ifndef XML_READER_H | ||
2 | #define XML_READER_H | ||
3 | |||
4 | #include <stdint.h> | ||
5 | #include "bu/stream.h" | ||
6 | #include "bu/fstring.h" | ||
7 | #include "bu/xmlnode.h" | ||
8 | |||
9 | namespace Bu | ||
10 | { | ||
11 | /** | ||
12 | * An Xml 1.1 reader. I've decided to write this, this time, based on the | ||
13 | * official W3C reccomendation, now included with the source code. I've | ||
14 | * named the productions in the parser states the same as in that document, | ||
15 | * which may make them easier to find, etc, although possibly slightly less | ||
16 | * optimized than writing my own reduced grammer. | ||
17 | * | ||
18 | * Below I will list differences between my parser and the official standard | ||
19 | * as I come up with them. | ||
20 | * - Encoding and Standalone headings are ignored for the moment. (4.3.3, | ||
21 | * 2.9) | ||
22 | * - The standalone heading attribute can have any standard whitespace | ||
23 | * before it (the specs say only spaces, no newlines). (2.9) | ||
24 | * - Since standalone is ignored, it is currently allowed to have any | ||
25 | * value (should be restricted to "yes" or "no"). (2.9) | ||
26 | * - Currently only UTF-8 / ascii are parsed. | ||
27 | * - [optional] The content of comments is thrown away. (2.5) | ||
28 | * - The content of processing instruction blocks is parsed properly, but | ||
29 | * thrown away. (2.6) | ||
30 | */ | ||
31 | class XmlReader | ||
32 | { | ||
33 | public: | ||
34 | XmlReader( Bu::Stream &sIn ); | ||
35 | virtual ~XmlReader(); | ||
36 | |||
37 | XmlNode *read(); | ||
38 | |||
39 | private: | ||
40 | Bu::Stream &sIn; | ||
41 | Bu::FString sBuf; | ||
42 | |||
43 | private: // Helpers | ||
44 | const char *lookahead( int nAmnt ); | ||
45 | void burn( int nAmnt ); | ||
46 | void checkString( const char *str, int nLen ); | ||
47 | |||
48 | private: // States | ||
49 | /** | ||
50 | * The headers, etc. | ||
51 | */ | ||
52 | void prolog(); | ||
53 | |||
54 | /** | ||
55 | * The xml decleration (version, encoding, etc). | ||
56 | */ | ||
57 | void XMLDecl(); | ||
58 | |||
59 | /** | ||
60 | * Misc things, Includes Comments and PIData (Processing Instructions). | ||
61 | */ | ||
62 | void Misc(); | ||
63 | |||
64 | /** | ||
65 | * Comments | ||
66 | */ | ||
67 | void Comment(); | ||
68 | |||
69 | /** | ||
70 | * Processing Instructions | ||
71 | */ | ||
72 | void PI(); | ||
73 | |||
74 | /** | ||
75 | * Whitespace eater. | ||
76 | */ | ||
77 | void S(); | ||
78 | |||
79 | /** | ||
80 | * Optional whitespace eater. | ||
81 | */ | ||
82 | void Sq(); | ||
83 | |||
84 | /** | ||
85 | * XML Version spec | ||
86 | */ | ||
87 | void VersionInfo(); | ||
88 | |||
89 | /** | ||
90 | * Your basic equals sign with surrounding whitespace. | ||
91 | */ | ||
92 | void Eq(); | ||
93 | |||
94 | /** | ||
95 | * Read in an attribute value. | ||
96 | */ | ||
97 | FString AttValue(); | ||
98 | |||
99 | /** | ||
100 | * Read in the name of something. | ||
101 | */ | ||
102 | FString Name(); | ||
103 | |||
104 | /** | ||
105 | * Encoding decleration in the header | ||
106 | */ | ||
107 | void EncodingDecl(); | ||
108 | |||
109 | /** | ||
110 | * Standalone decleration in the header | ||
111 | */ | ||
112 | void SDDecl(); | ||
113 | |||
114 | bool isS( unsigned char c ) | ||
115 | { | ||
116 | return ( c == 0x20 || c == 0x9 || c == 0xD || c == 0xA ); | ||
117 | } | ||
118 | }; | ||
119 | } | ||
120 | |||
121 | #endif | ||