diff options
Diffstat (limited to 'src/tests/parser.cpp')
-rw-r--r-- | src/tests/parser.cpp | 369 |
1 files changed, 369 insertions, 0 deletions
diff --git a/src/tests/parser.cpp b/src/tests/parser.cpp new file mode 100644 index 0000000..af53bc8 --- /dev/null +++ b/src/tests/parser.cpp | |||
@@ -0,0 +1,369 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2007-2012 Xagasoft, All rights reserved. | ||
3 | * | ||
4 | * This file is part of the libbu++ library and is released under the | ||
5 | * terms of the license contained in the file LICENSE. | ||
6 | */ | ||
7 | |||
8 | #include <bu/parser.h> | ||
9 | #include <bu/lexer.h> | ||
10 | #include <bu/file.h> | ||
11 | #include <bu/sio.h> | ||
12 | #include <bu/queuebuf.h> | ||
13 | #include <stdlib.h> | ||
14 | |||
15 | using namespace Bu; | ||
16 | |||
17 | enum Tok | ||
18 | { | ||
19 | tokNumber, | ||
20 | tokPlus, | ||
21 | tokMinus, | ||
22 | tokDivide, | ||
23 | tokMultiply, | ||
24 | tokOpenParen, | ||
25 | tokCloseParen, | ||
26 | tokCompute, | ||
27 | tokEndOfInput=-1 | ||
28 | }; | ||
29 | |||
30 | Bu::Formatter &operator<<( Bu::Formatter &f, Tok e ) | ||
31 | { | ||
32 | switch( e ) | ||
33 | { | ||
34 | case tokNumber: return f << "tokNumber"; | ||
35 | case tokPlus: return f << "tokPlus"; | ||
36 | case tokMinus: return f << "tokMinus"; | ||
37 | case tokDivide: return f << "tokDivide"; | ||
38 | case tokMultiply: return f << "tokMultiply"; | ||
39 | case tokOpenParen: return f << "tokOpenParen"; | ||
40 | case tokCloseParen: return f << "tokCloseParen"; | ||
41 | case tokCompute: return f << "tokCompute"; | ||
42 | case tokEndOfInput: return f << "tokEndOfInput"; | ||
43 | } | ||
44 | |||
45 | return f << "***error***"; | ||
46 | } | ||
47 | |||
48 | class MathLexer : public Lexer | ||
49 | { | ||
50 | public: | ||
51 | MathLexer( Bu::Stream &rSrc ) : | ||
52 | rSrc( rSrc ) | ||
53 | { | ||
54 | } | ||
55 | |||
56 | virtual ~MathLexer() | ||
57 | { | ||
58 | } | ||
59 | |||
60 | enum TokenTypes | ||
61 | { | ||
62 | tokStuff | ||
63 | }; | ||
64 | |||
65 | virtual Token *nextToken() | ||
66 | { | ||
67 | for(;;) | ||
68 | { | ||
69 | if( qbIn.getSize() == 0 ) | ||
70 | { | ||
71 | char buf[4096]; | ||
72 | qbIn.write( buf, rSrc.read( buf, 4096 ) ); | ||
73 | |||
74 | if( rSrc.isEos() && qbIn.getSize() == 0 ) | ||
75 | return new Token( tokEndOfInput ); | ||
76 | } | ||
77 | |||
78 | char b; | ||
79 | qbIn.peek( &b, 1 ); | ||
80 | switch( b ) | ||
81 | { | ||
82 | case '+': | ||
83 | qbIn.seek( 1 ); | ||
84 | return new Token( tokPlus ); | ||
85 | |||
86 | case '-': | ||
87 | qbIn.seek( 1 ); | ||
88 | return new Token( tokMinus ); | ||
89 | |||
90 | case '/': | ||
91 | qbIn.seek( 1 ); | ||
92 | return new Token( tokDivide ); | ||
93 | |||
94 | case '*': | ||
95 | qbIn.seek( 1 ); | ||
96 | return new Token( tokMultiply ); | ||
97 | |||
98 | case ' ': | ||
99 | case '\t': | ||
100 | case '\n': | ||
101 | qbIn.seek( 1 ); | ||
102 | break; | ||
103 | |||
104 | case '=': | ||
105 | qbIn.seek( 1 ); | ||
106 | return new Token( tokCompute ); | ||
107 | |||
108 | case '(': | ||
109 | qbIn.seek( 1 ); | ||
110 | return new Token( tokOpenParen ); | ||
111 | |||
112 | case ')': | ||
113 | qbIn.seek( 1 ); | ||
114 | return new Token( tokCloseParen ); | ||
115 | |||
116 | case '.': | ||
117 | case '0': | ||
118 | case '1': | ||
119 | case '2': | ||
120 | case '3': | ||
121 | case '4': | ||
122 | case '5': | ||
123 | case '6': | ||
124 | case '7': | ||
125 | case '8': | ||
126 | case '9': | ||
127 | { | ||
128 | Bu::String sTmp; | ||
129 | sTmp += b; | ||
130 | qbIn.seek( 1 ); | ||
131 | for(;;) | ||
132 | { | ||
133 | qbIn.peek( &b, 1 ); | ||
134 | if( b != '.' && (b < '0' || b > '9') ) | ||
135 | { | ||
136 | sio << "!! Convert '" << sTmp << "' to " | ||
137 | << strtod( sTmp.getStr(), NULL ) << sio.nl; | ||
138 | return new Token( | ||
139 | tokNumber, strtod( sTmp.getStr(), NULL ) | ||
140 | ); | ||
141 | } | ||
142 | qbIn.seek( 1 ); | ||
143 | sTmp += b; | ||
144 | } | ||
145 | } | ||
146 | break; | ||
147 | |||
148 | default: | ||
149 | throw Bu::ExceptionBase("Unexpected character '%c'.", b ); | ||
150 | } | ||
151 | } | ||
152 | } | ||
153 | |||
154 | private: | ||
155 | Bu::Stream &rSrc; | ||
156 | QueueBuf qbIn; | ||
157 | }; | ||
158 | |||
159 | void redAdd( Bu::Parser &p ) | ||
160 | { | ||
161 | Lexer::Token *a = p.popToken(); | ||
162 | Lexer::Token *b = p.popToken(); | ||
163 | |||
164 | sio << "Add! " << b->vExtra.get<double>() << " + " | ||
165 | << a->vExtra.get<double>() << sio.nl; | ||
166 | |||
167 | Lexer::Token *c = new Lexer::Token( tokNumber, | ||
168 | b->vExtra.get<double>() + a->vExtra.get<double>() | ||
169 | ); | ||
170 | p.pushToken( c ); | ||
171 | |||
172 | delete a; | ||
173 | delete b; | ||
174 | } | ||
175 | |||
176 | void redSubtract( Bu::Parser &p ) | ||
177 | { | ||
178 | Lexer::Token *a = p.popToken(); | ||
179 | Lexer::Token *b = p.popToken(); | ||
180 | |||
181 | sio << "Subtract! " << b->vExtra.get<double>() << " - " | ||
182 | << a->vExtra.get<double>() << sio.nl; | ||
183 | |||
184 | Lexer::Token *c = new Lexer::Token( tokNumber, | ||
185 | b->vExtra.get<double>() - a->vExtra.get<double>() | ||
186 | ); | ||
187 | p.pushToken( c ); | ||
188 | |||
189 | delete a; | ||
190 | delete b; | ||
191 | } | ||
192 | |||
193 | void redPrint( Bu::Parser &p ) | ||
194 | { | ||
195 | Lexer::Token *a = p.popToken(); | ||
196 | sio << "Print! = " << a->vExtra.get<double>() << sio.nl; | ||
197 | delete a; | ||
198 | } | ||
199 | |||
200 | /* Basic grammer example: | ||
201 | * | ||
202 | * input: expr '=' | ||
203 | * ; | ||
204 | * | ||
205 | * expr: expr '+' expr | ||
206 | * | '(' expr ')' | ||
207 | * | NUMBER | ||
208 | * ; | ||
209 | * | ||
210 | * The problem is, that we can't actually make something left hand recursive, | ||
211 | * so we break it into two exprs: | ||
212 | * | ||
213 | * expr-sub1: '(' expr ')' | ||
214 | * | NUMBER | ||
215 | * ; | ||
216 | * | ||
217 | * expr: expr-sub1 expr-sub2 | ||
218 | * ; | ||
219 | * | ||
220 | * expr-sub2: '+' expr | ||
221 | * | '-' expr | ||
222 | * | | ||
223 | * ; | ||
224 | * | ||
225 | * 5 + 5 + 5 = | ||
226 | */ | ||
227 | |||
228 | int main( int argc, char *argv[] ) | ||
229 | { | ||
230 | if( argc < 2 ) | ||
231 | { | ||
232 | println("Provide an input filename as the first parameter."); | ||
233 | return 0; | ||
234 | } | ||
235 | File fIn( argv[1], File::Read ); | ||
236 | |||
237 | Parser p; | ||
238 | |||
239 | p.addNonTerminal("expr"); | ||
240 | p.addNonTerminal("expr-sub1"); | ||
241 | p.addNonTerminal("expr-sub2"); | ||
242 | { | ||
243 | Parser::NonTerminal nt; | ||
244 | nt.addProduction( | ||
245 | Parser::Production( | ||
246 | Parser::State( | ||
247 | Parser::State::typeTerminal, | ||
248 | tokPlus | ||
249 | ) | ||
250 | ).append( | ||
251 | Parser::State( | ||
252 | Parser::State::typeNonTerminal, | ||
253 | p.getNonTerminalId("expr") | ||
254 | ) | ||
255 | ).append( | ||
256 | Parser::State( | ||
257 | Parser::State::typeReduction, | ||
258 | p.addReduction("add") | ||
259 | ) | ||
260 | ) | ||
261 | ); | ||
262 | nt.addProduction( | ||
263 | Parser::Production( | ||
264 | Parser::State( | ||
265 | Parser::State::typeTerminal, | ||
266 | tokMinus | ||
267 | ) | ||
268 | ).append( | ||
269 | Parser::State( | ||
270 | Parser::State::typeNonTerminal, | ||
271 | p.getNonTerminalId("expr") | ||
272 | ) | ||
273 | ).append( | ||
274 | Parser::State( | ||
275 | Parser::State::typeReduction, | ||
276 | p.addReduction("subtract") | ||
277 | ) | ||
278 | ) | ||
279 | ); | ||
280 | nt.addProduction( | ||
281 | Parser::Production( | ||
282 | ) | ||
283 | ); | ||
284 | nt.setCanSkip(); | ||
285 | p.setNonTerminal("expr-sub2", nt ); | ||
286 | } | ||
287 | { | ||
288 | Parser::NonTerminal nt; | ||
289 | nt.addProduction( | ||
290 | Parser::Production( | ||
291 | Parser::State( | ||
292 | Parser::State::typeTerminalPush, | ||
293 | tokNumber | ||
294 | ) | ||
295 | ) | ||
296 | ); | ||
297 | nt.addProduction( | ||
298 | Parser::Production( | ||
299 | Parser::State( | ||
300 | Parser::State::typeTerminal, | ||
301 | tokOpenParen | ||
302 | ) | ||
303 | ).append( | ||
304 | Parser::State( | ||
305 | Parser::State::typeNonTerminal, | ||
306 | p.getNonTerminalId("expr") | ||
307 | ) | ||
308 | ).append( | ||
309 | Parser::State( | ||
310 | Parser::State::typeTerminal, | ||
311 | tokCloseParen | ||
312 | ) | ||
313 | ) | ||
314 | ); | ||
315 | p.setNonTerminal("expr-sub1", nt ); | ||
316 | } | ||
317 | { | ||
318 | Parser::NonTerminal nt; | ||
319 | nt.addProduction( | ||
320 | Parser::Production( | ||
321 | Parser::State( | ||
322 | Parser::State::typeNonTerminal, | ||
323 | p.getNonTerminalId("expr-sub1") | ||
324 | ) | ||
325 | ).append( | ||
326 | Parser::State( | ||
327 | Parser::State::typeNonTerminal, | ||
328 | p.getNonTerminalId("expr-sub2") | ||
329 | ) | ||
330 | ) | ||
331 | ); | ||
332 | p.setNonTerminal("expr", nt ); | ||
333 | } | ||
334 | { | ||
335 | Parser::NonTerminal nt; | ||
336 | nt.addProduction( | ||
337 | Parser::Production( | ||
338 | Parser::State( | ||
339 | Parser::State::typeNonTerminal, | ||
340 | p.getNonTerminalId("expr") | ||
341 | ) | ||
342 | ).append( | ||
343 | Parser::State( | ||
344 | Parser::State::typeTerminal, | ||
345 | tokCompute | ||
346 | ) | ||
347 | ).append( | ||
348 | Parser::State( | ||
349 | Parser::State::typeReduction, | ||
350 | p.addReduction("print") | ||
351 | ) | ||
352 | ) | ||
353 | ); | ||
354 | p.addNonTerminal("input", nt ); | ||
355 | } | ||
356 | |||
357 | p.setRootNonTerminal("input"); | ||
358 | |||
359 | p.setReduction("add", Bu::slot( &redAdd ) ); | ||
360 | p.setReduction("subtract", Bu::slot( &redSubtract ) ); | ||
361 | p.setReduction("print", Bu::slot( &redPrint ) ); | ||
362 | |||
363 | p.pushLexer( new MathLexer( fIn ) ); | ||
364 | |||
365 | p.parse(); | ||
366 | |||
367 | return 0; | ||
368 | } | ||
369 | |||