1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
/*
* Copyright (C) 2007-2008 Xagasoft, All rights reserved.
*
* This file is part of the libbu++ library and is released under the
* terms of the license contained in the file LICENSE.
*/
#ifndef XML_READER_H
#define XML_READER_H
#include <stdint.h>
#include "bu/stream.h"
#include "bu/fstring.h"
#include "bu/xmlnode.h"
namespace Bu
{
/**
* An Xml 1.1 reader. I've decided to write this, this time, based on the
* official W3C reccomendation, now included with the source code. I've
* named the productions in the parser states the same as in that document,
* which may make them easier to find, etc, although possibly slightly less
* optimized than writing my own reduced grammer.
*
* Below I will list differences between my parser and the official standard
* as I come up with them.
* - Encoding and Standalone headings are ignored for the moment. (4.3.3,
* 2.9)
* - The standalone heading attribute can have any standard whitespace
* before it (the specs say only spaces, no newlines). (2.9)
* - Since standalone is ignored, it is currently allowed to have any
* value (should be restricted to "yes" or "no"). (2.9)
* - Currently only UTF-8 / ascii are parsed.
* - [optional] The content of comments is thrown away. (2.5)
* - The content of processing instruction blocks is parsed properly, but
* thrown away. (2.6)
*/
class XmlReader
{
public:
XmlReader( Bu::Stream &sIn );
virtual ~XmlReader();
XmlNode *read();
private:
Bu::Stream &sIn;
Bu::FString sBuf;
private: // Helpers
const char *lookahead( int nAmnt );
void burn( int nAmnt );
void checkString( const char *str, int nLen );
private: // States
/**
* The headers, etc.
*/
void prolog();
/**
* The xml decleration (version, encoding, etc).
*/
void XMLDecl();
/**
* Misc things, Includes Comments and PIData (Processing Instructions).
*/
void Misc();
/**
* Comments
*/
void Comment();
/**
* Processing Instructions
*/
void PI();
/**
* Whitespace eater.
*/
void S();
/**
* Optional whitespace eater.
*/
void Sq();
/**
* XML Version spec
*/
void VersionInfo();
/**
* Your basic equals sign with surrounding whitespace.
*/
void Eq();
/**
* Read in an attribute value.
*/
FString AttValue();
/**
* Read in the name of something.
*/
FString Name();
/**
* Encoding decleration in the header
*/
void EncodingDecl();
/**
* Standalone decleration in the header
*/
void SDDecl();
bool isS( unsigned char c )
{
return ( c == 0x20 || c == 0x9 || c == 0xD || c == 0xA );
}
};
}
#endif
|