summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2007-05-09 15:01:03 +0000
committerMike Buland <eichlan@xagasoft.com>2007-05-09 15:01:03 +0000
commit2e035fee36768e3c765b7f5dc10bf0a3b7d2448b (patch)
treefde606c514a9321cd4bf0d8c075b3b1c22d3a960 /src
parent6e7e30b5fd87766566620d48e785072e0b5c52cd (diff)
downloadlibbu++-2e035fee36768e3c765b7f5dc10bf0a3b7d2448b.tar.gz
libbu++-2e035fee36768e3c765b7f5dc10bf0a3b7d2448b.tar.bz2
libbu++-2e035fee36768e3c765b7f5dc10bf0a3b7d2448b.tar.xz
libbu++-2e035fee36768e3c765b7f5dc10bf0a3b7d2448b.zip
Minor changes to both the taf and xml readers. I'm thinking I'm going to
archive these for now and resurect/fix the old xml reader, just to have something working.
Diffstat (limited to '')
-rw-r--r--src/tafreader.cpp4
-rw-r--r--src/tafreader.h7
-rw-r--r--src/xmlreader.cpp175
-rw-r--r--src/xmlreader.h53
4 files changed, 227 insertions, 12 deletions
diff --git a/src/tafreader.cpp b/src/tafreader.cpp
index 4f2890a..f94fe44 100644
--- a/src/tafreader.cpp
+++ b/src/tafreader.cpp
@@ -1,9 +1,11 @@
1#include "tafreader.h" 1#include "tafreader.h"
2 2
3Bu::TafReader::TafReader() 3Bu::TafReader::TafReader( Bu::Stream &sIn ) :
4 sIn( sIn )
4{ 5{
5} 6}
6 7
7Bu::TafReader::~TafReader() 8Bu::TafReader::~TafReader()
8{ 9{
9} 10}
11
diff --git a/src/tafreader.h b/src/tafreader.h
index d9f1dfd..2dbb9ea 100644
--- a/src/tafreader.h
+++ b/src/tafreader.h
@@ -2,19 +2,22 @@
2#define BU_TAF_READER_H 2#define BU_TAF_READER_H
3 3
4#include <stdint.h> 4#include <stdint.h>
5#include "bu/tafdocument.h"
6#include "bu/stream.h"
5 7
6namespace Bu 8namespace Bu
7{ 9{
8 /** 10 /**
9 * 11 *
10 */ 12 */
11 class TafReader 13 class TafReader : public Bu::TafDocument
12 { 14 {
13 public: 15 public:
14 TafReader(); 16 TafReader( Bu::Stream &sIn );
15 virtual ~TafReader(); 17 virtual ~TafReader();
16 18
17 private: 19 private:
20 Stream &sIn;
18 21
19 }; 22 };
20} 23}
diff --git a/src/xmlreader.cpp b/src/xmlreader.cpp
index 432ecc1..bd241cf 100644
--- a/src/xmlreader.cpp
+++ b/src/xmlreader.cpp
@@ -29,10 +29,10 @@ void Bu::XmlReader::burn( int nAmnt )
29 lookahead( nAmnt ); 29 lookahead( nAmnt );
30 } 30 }
31 31
32 sBuf.remove( nAmnt ); 32 //sBuf.remove( nAmnt );
33} 33}
34 34
35void Bu::XmlNode::checkString( const char *str, int nLen ) 35void Bu::XmlReader::checkString( const char *str, int nLen )
36{ 36{
37 if( !strncmp( str, lookahead( nLen ), nLen ) ) 37 if( !strncmp( str, lookahead( nLen ), nLen ) )
38 { 38 {
@@ -57,14 +57,66 @@ void Bu::XmlReader::prolog()
57void Bu::XmlReader::XMLDecl() 57void Bu::XmlReader::XMLDecl()
58{ 58{
59 checkString("<?xml", 5 ); 59 checkString("<?xml", 5 );
60 S();
60 VersionInfo(); 61 VersionInfo();
61 EncodingDecl(); 62 EncodingDecl();
62 SDDecl(); 63 SDDecl();
63 S(); 64 Sq();
65 checkString("?>", 2 );
64} 66}
65 67
66void Bu::XmlReader::Misc() 68void Bu::XmlReader::Misc()
67{ 69{
70 for(;;)
71 {
72 S();
73 if( !strncmp("<!--", lookahead( 4 ), 4 ) )
74 {
75 Comment();
76 }
77 else if( !strncmp("<?", lookahead( 2 ), 2 ) )
78 {
79 PI();
80 }
81 else
82 {
83 return;
84 }
85 }
86}
87
88void Bu::XmlReader::Comment()
89{
90 checkString("<!--", 4 );
91 for(;;)
92 {
93 unsigned char c = *lookahead(1);
94 if( c == '-' )
95 {
96 if( lookahead(2)[1] == '-' )
97 {
98 checkString("-->", 3 );
99 return;
100 }
101 }
102 burn( 1 );
103 }
104}
105
106void Bu::XmlReader::PI()
107{
108 checkString("<?", 2 );
109 FString sName = Name();
110 printf("PI: %s\n---\n", sName.getStr() );
111 S();
112 for(int j = 0;; j++ )
113 {
114 if( !strncmp( "?>", lookahead(j+2)+j, 2 ) )
115 {
116 burn( j+2 );
117 return;
118 }
119 }
68} 120}
69 121
70void Bu::XmlReader::S() 122void Bu::XmlReader::S()
@@ -75,12 +127,12 @@ void Bu::XmlReader::S()
75 if( c == 0x20 || c == 0x9 || c == 0xD || c == 0xA ) 127 if( c == 0x20 || c == 0x9 || c == 0xD || c == 0xA )
76 continue; 128 continue;
77 if( j == 0 ) 129 if( j == 0 )
78 printf("Error, expected whitespace!\n"); 130 throw ExceptionBase("Expected whitespace.");
79 return; 131 return;
80 } 132 }
81} 133}
82 134
83void Bu::XmlReader::S() 135void Bu::XmlReader::Sq()
84{ 136{
85 for(;;) 137 for(;;)
86 { 138 {
@@ -93,9 +145,19 @@ void Bu::XmlReader::S()
93 145
94void Bu::XmlReader::VersionInfo() 146void Bu::XmlReader::VersionInfo()
95{ 147{
96 S(); 148 try
97 checkString("version", 7 ); 149 {
98 150 S();
151 checkString("version", 7 );
152 }
153 catch( ExceptionBase &e )
154 {
155 return;
156 }
157 Eq();
158 Bu::FString ver = AttValue();
159 if( ver != "1.1" )
160 throw ExceptionBase("Currently we only support xml version 1.1\n");
99} 161}
100 162
101void Bu::XmlReader::Eq() 163void Bu::XmlReader::Eq()
@@ -105,4 +167,101 @@ void Bu::XmlReader::Eq()
105 Sq(); 167 Sq();
106} 168}
107 169
170void Bu::XmlReader::EncodingDecl()
171{
172 S();
173 try
174 {
175 checkString("encoding", 8 );
176 }
177 catch( ExceptionBase &e )
178 {
179 return;
180 }
181
182 Eq();
183 AttValue();
184}
185
186void Bu::XmlReader::SDDecl()
187{
188 S();
189 try
190 {
191 checkString("standalone", 10 );
192 }
193 catch( ExceptionBase &e )
194 {
195 return;
196 }
197
198 Eq();
199 AttValue();
200}
201
202Bu::FString Bu::XmlReader::AttValue()
203{
204 char q = *lookahead(1);
205 if( q == '\"' )
206 {
207 for( int j = 2;; j++ )
208 {
209 if( lookahead(j)[j-1] == '\"' )
210 {
211 Bu::FString ret( lookahead(j)+1, j-2 );
212 burn( j );
213 return ret;
214 }
215 }
216 }
217 else if( q == '\'' )
218 {
219 for( int j = 2;; j++ )
220 {
221 if( lookahead(j)[j-1] == '\'' )
222 {
223 Bu::FString ret( lookahead(j)+1, j-2 );
224 burn( j );
225 return ret;
226 }
227 }
228 }
229
230 throw ExceptionBase("Excpected either \' or \".\n");
231}
232
233Bu::FString Bu::XmlReader::Name()
234{
235 unsigned char c = *lookahead( 1 );
236 if( c != ':' && c != '_' &&
237 (c < 'A' || c > 'Z') &&
238 (c < 'a' || c > 'z') &&
239 (c < 0xC0 || c > 0xD6 ) &&
240 (c < 0xD8 || c > 0xF6 ) &&
241 (c < 0xF8))
242 {
243 throw ExceptionBase("Invalid entity name starting character.");
244 }
245
246 for( int j = 1;; j++ )
247 {
248 unsigned char c = lookahead(j+1)[j];
249 if( isS( c ) )
250 {
251 FString ret( lookahead(j+1), j+1 );
252 burn( j+1 );
253 return ret;
254 }
255 if( c != ':' && c != '_' && c != '-' && c != '.' && c != 0xB7 &&
256 (c < 'A' || c > 'Z') &&
257 (c < 'a' || c > 'z') &&
258 (c < '0' || c > '9') &&
259 (c < 0xC0 || c > 0xD6 ) &&
260 (c < 0xD8 || c > 0xF6 ) &&
261 (c < 0xF8))
262 {
263 throw ExceptionBase("Invalid character in name.");
264 }
265 }
266}
108 267
diff --git a/src/xmlreader.h b/src/xmlreader.h
index 19791c4..708a386 100644
--- a/src/xmlreader.h
+++ b/src/xmlreader.h
@@ -9,7 +9,24 @@
9namespace Bu 9namespace Bu
10{ 10{
11 /** 11 /**
12 * An Xml 1.1 reader. I've decided to write this, this time, based on the
13 * official W3C reccomendation, now included with the source code. I've
14 * named the productions in the parser states the same as in that document,
15 * which may make them easier to find, etc, although possibly slightly less
16 * optimized than writing my own reduced grammer.
12 * 17 *
18 * Below I will list differences between my parser and the official standard
19 * as I come up with them.
20 * - Encoding and Standalone headings are ignored for the moment. (4.3.3,
21 * 2.9)
22 * - The standalone heading attribute can have any standard whitespace
23 * before it (the specs say only spaces, no newlines). (2.9)
24 * - Since standalone is ignored, it is currently allowed to have any
25 * value (should be restricted to "yes" or "no"). (2.9)
26 * - Currently only UTF-8 / ascii are parsed.
27 * - [optional] The content of comments is thrown away. (2.5)
28 * - The content of processing instruction blocks is parsed properly, but
29 * thrown away. (2.6)
13 */ 30 */
14 class XmlReader 31 class XmlReader
15 { 32 {
@@ -40,11 +57,21 @@ namespace Bu
40 void XMLDecl(); 57 void XMLDecl();
41 58
42 /** 59 /**
43 * Misc things...? 60 * Misc things, Includes Comments and PIData (Processing Instructions).
44 */ 61 */
45 void Misc(); 62 void Misc();
46 63
47 /** 64 /**
65 * Comments
66 */
67 void Comment();
68
69 /**
70 * Processing Instructions
71 */
72 void PI();
73
74 /**
48 * Whitespace eater. 75 * Whitespace eater.
49 */ 76 */
50 void S(); 77 void S();
@@ -64,6 +91,30 @@ namespace Bu
64 */ 91 */
65 void Eq(); 92 void Eq();
66 93
94 /**
95 * Read in an attribute value.
96 */
97 FString AttValue();
98
99 /**
100 * Read in the name of something.
101 */
102 FString Name();
103
104 /**
105 * Encoding decleration in the header
106 */
107 void EncodingDecl();
108
109 /**
110 * Standalone decleration in the header
111 */
112 void SDDecl();
113
114 bool isS( unsigned char c )
115 {
116 return ( c == 0x20 || c == 0x9 || c == 0xD || c == 0xA );
117 }
67 }; 118 };
68} 119}
69 120