aboutsummaryrefslogtreecommitdiff
path: root/src/inprogress/xmlreader.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/inprogress/xmlreader.h')
-rw-r--r--src/inprogress/xmlreader.h121
1 files changed, 121 insertions, 0 deletions
diff --git a/src/inprogress/xmlreader.h b/src/inprogress/xmlreader.h
new file mode 100644
index 0000000..708a386
--- /dev/null
+++ b/src/inprogress/xmlreader.h
@@ -0,0 +1,121 @@
1#ifndef XML_READER_H
2#define XML_READER_H
3
4#include <stdint.h>
5#include "bu/stream.h"
6#include "bu/fstring.h"
7#include "bu/xmlnode.h"
8
9namespace Bu
10{
11 /**
12 * An Xml 1.1 reader. I've decided to write this, this time, based on the
13 * official W3C reccomendation, now included with the source code. I've
14 * named the productions in the parser states the same as in that document,
15 * which may make them easier to find, etc, although possibly slightly less
16 * optimized than writing my own reduced grammer.
17 *
18 * Below I will list differences between my parser and the official standard
19 * as I come up with them.
20 * - Encoding and Standalone headings are ignored for the moment. (4.3.3,
21 * 2.9)
22 * - The standalone heading attribute can have any standard whitespace
23 * before it (the specs say only spaces, no newlines). (2.9)
24 * - Since standalone is ignored, it is currently allowed to have any
25 * value (should be restricted to "yes" or "no"). (2.9)
26 * - Currently only UTF-8 / ascii are parsed.
27 * - [optional] The content of comments is thrown away. (2.5)
28 * - The content of processing instruction blocks is parsed properly, but
29 * thrown away. (2.6)
30 */
31 class XmlReader
32 {
33 public:
34 XmlReader( Bu::Stream &sIn );
35 virtual ~XmlReader();
36
37 XmlNode *read();
38
39 private:
40 Bu::Stream &sIn;
41 Bu::FString sBuf;
42
43 private: // Helpers
44 const char *lookahead( int nAmnt );
45 void burn( int nAmnt );
46 void checkString( const char *str, int nLen );
47
48 private: // States
49 /**
50 * The headers, etc.
51 */
52 void prolog();
53
54 /**
55 * The xml decleration (version, encoding, etc).
56 */
57 void XMLDecl();
58
59 /**
60 * Misc things, Includes Comments and PIData (Processing Instructions).
61 */
62 void Misc();
63
64 /**
65 * Comments
66 */
67 void Comment();
68
69 /**
70 * Processing Instructions
71 */
72 void PI();
73
74 /**
75 * Whitespace eater.
76 */
77 void S();
78
79 /**
80 * Optional whitespace eater.
81 */
82 void Sq();
83
84 /**
85 * XML Version spec
86 */
87 void VersionInfo();
88
89 /**
90 * Your basic equals sign with surrounding whitespace.
91 */
92 void Eq();
93
94 /**
95 * Read in an attribute value.
96 */
97 FString AttValue();
98
99 /**
100 * Read in the name of something.
101 */
102 FString Name();
103
104 /**
105 * Encoding decleration in the header
106 */
107 void EncodingDecl();
108
109 /**
110 * Standalone decleration in the header
111 */
112 void SDDecl();
113
114 bool isS( unsigned char c )
115 {
116 return ( c == 0x20 || c == 0x9 || c == 0xD || c == 0xA );
117 }
118 };
119}
120
121#endif