summaryrefslogtreecommitdiff
path: root/src/tests
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2012-05-03 06:30:36 +0000
committerMike Buland <eichlan@xagasoft.com>2012-05-03 06:30:36 +0000
commitdf6a899ad6c9a1c792582ef1a3b838c7eb6d330c (patch)
treeff9e17d0fd00585bc1e329a63c20f90d155b4ea8 /src/tests
parent343bffafecd4329ce5ae6625fc6b2c1ea9759f36 (diff)
downloadlibbu++-df6a899ad6c9a1c792582ef1a3b838c7eb6d330c.tar.gz
libbu++-df6a899ad6c9a1c792582ef1a3b838c7eb6d330c.tar.bz2
libbu++-df6a899ad6c9a1c792582ef1a3b838c7eb6d330c.tar.xz
libbu++-df6a899ad6c9a1c792582ef1a3b838c7eb6d330c.zip
Rearranging some stuff.
Diffstat (limited to 'src/tests')
-rw-r--r--src/tests/bnfcompile.cpp422
-rw-r--r--src/tests/parser.cpp369
2 files changed, 791 insertions, 0 deletions
diff --git a/src/tests/bnfcompile.cpp b/src/tests/bnfcompile.cpp
new file mode 100644
index 0000000..af7a0eb
--- /dev/null
+++ b/src/tests/bnfcompile.cpp
@@ -0,0 +1,422 @@
1/*
2 * Copyright (C) 2007-2012 Xagasoft, All rights reserved.
3 *
4 * This file is part of the libbu++ library and is released under the
5 * terms of the license contained in the file LICENSE.
6 */
7
8#include <bu/sio.h>
9#include <bu/lexer.h>
10#include <bu/parser.h>
11#include <bu/file.h>
12#include <bu/queuebuf.h>
13
14using namespace Bu;
15
16enum TokenType
17{
18 tokIdentifier,
19 tokColon,
20 tokOr,
21 tokSemiColon,
22 tokTokens,
23 tokEquals,
24 tokOpenCurly,
25 tokCloseCurly,
26 tokOpenSquare,
27 tokCloseSquare,
28
29 tokEos=-1
30};
31
32class BnfLexer : public Lexer
33{
34public:
35 BnfLexer( Stream &rSrc ) :
36 rSrc( rSrc )
37 {
38 }
39
40 virtual ~BnfLexer()
41 {
42 }
43
44 virtual Token *nextToken()
45 {
46 char cBuf;
47
48 for(;;)
49 {
50 if( qbIn.getSize() == 0 )
51 {
52 char buf[4096];
53 qbIn.write( buf, rSrc.read( buf, 4096 ) );
54
55 if( rSrc.isEos() && qbIn.getSize() == 0 )
56 return new Token( tokEos );
57 }
58 qbIn.peek( &cBuf, 1 );
59 if( (cBuf >= 'a' && cBuf <= 'z') ||
60 (cBuf >= 'A' && cBuf <= 'Z') ||
61 (cBuf >= '0' && cBuf <= '9') ||
62 cBuf == '_' )
63 {
64 sBuf.append( cBuf );
65 qbIn.seek( 1 );
66 }
67 else if( sBuf.isSet() )
68 {
69 if( sBuf == "tokens" )
70 {
71 sBuf.clear();
72 return new Token( tokTokens );
73 }
74 else
75 {
76 Token *pRet = new Token( tokIdentifier, sBuf );
77 sBuf.clear();
78 return pRet;
79 }
80 }
81 else
82 {
83 switch( cBuf )
84 {
85 case ' ':
86 case '\t':
87 case '\n':
88 case '\r':
89 qbIn.seek( 1 );
90 continue;
91
92 case ':':
93 qbIn.seek( 1 );
94 return new Token( tokColon );
95
96 case ';':
97 qbIn.seek( 1 );
98 return new Token( tokSemiColon );
99
100 case '|':
101 qbIn.seek( 1 );
102 return new Token( tokOr );
103
104 case '=':
105 qbIn.seek( 1 );
106 return new Token( tokEquals );
107
108 case '[':
109 qbIn.seek( 1 );
110 return new Token( tokOpenSquare );
111
112 case ']':
113 qbIn.seek( 1 );
114 return new Token( tokCloseSquare );
115
116 case '{':
117 qbIn.seek( 1 );
118 return new Token( tokOpenCurly );
119
120 case '}':
121 qbIn.seek( 1 );
122 return new Token( tokCloseCurly );
123
124 default:
125 throw ExceptionBase("Unexpected character '%c'.",
126 cBuf );
127 break;
128 }
129 }
130 }
131 }
132
133 virtual String tokenToString( const Token &t )
134 {
135 switch( (TokenType)t.iToken )
136 {
137 case tokIdentifier: return "tokIdentifier";
138 case tokColon: return "tokColon";
139 case tokOr: return "tokOr";
140 case tokSemiColon: return "tokSemiColon";
141 case tokTokens: return "tokTokens";
142 case tokEquals: return "tokEquals";
143 case tokOpenCurly: return "tokOpenCurly";
144 case tokCloseCurly: return "tokCloseCurly";
145 case tokOpenSquare: return "tokOpenSquare";
146 case tokCloseSquare: return "tokCloseSquare";
147 case tokEos: return "tokEos";
148 }
149
150 return "???";
151 }
152
153private:
154 Stream &rSrc;
155 QueueBuf qbIn;
156 String sBuf;
157};
158
159class BnfParser
160{
161public:
162 BnfParser( BnfLexer &l ) :
163 l( l ),
164 pCur( NULL ),
165 iLastToken( 0 )
166 {
167 }
168
169 virtual ~BnfParser()
170 {
171 delete pCur;
172 pCur = NULL;
173 }
174
175 void parse()
176 {
177 for(;;)
178 {
179 next();
180 switch( pCur->iToken )
181 {
182 case tokTokens:
183 tokens();
184 break;
185
186 case tokIdentifier:
187 nonTerminal();
188 break;
189
190 case tokEos:
191 return;
192 break;
193
194 default:
195 tokenError("tokTokens, tokIdentifier, or tokEos");
196 }
197 }
198 }
199
200private:
201 void tokens()
202 {
203 next();
204 if( pCur->iToken != tokEquals )
205 tokenError("tokEquals");
206 for(;;)
207 {
208 next();
209 if( pCur->iToken == tokIdentifier )
210 {
211 hTokens.insert( pCur->vExtra.get<Bu::String>(), ++iLastToken );
212 sio << "Added token[" << iLastToken << "]: "
213 << pCur->vExtra.get<Bu::String>() << sio.nl;
214 }
215 else if( pCur->iToken == tokSemiColon )
216 break;
217 else
218 tokenError("tokIdentifier or tokSemiColon");
219 }
220 }
221
222 void nonTerminal()
223 {
224 Bu::String sNtName = pCur->vExtra.get<Bu::String>();
225 Parser::NonTerminal nt;
226 p.addNonTerminal( sNtName );
227 sio.incIndent();
228 sio << "Created non-terminal: " << sNtName << sio.nl;
229
230 next();
231 if( pCur->iToken != tokColon )
232 tokenError("tokColon");
233 production( nt );
234 for(;;)
235 {
236 switch( pCur->iToken )
237 {
238 case tokOr:
239 production( nt );
240 break;
241
242 case tokSemiColon:
243 p.setNonTerminal( sNtName, nt );
244 sio.decIndent();
245 sio << "Closing non-terminal." << sio.nl;
246 return;
247
248 default:
249 tokenError("tkOr or tokSemiColon");
250 break;
251 }
252 }
253 }
254
255 void production( Parser::NonTerminal &nt )
256 {
257 sio.incIndent();
258 sio << "Adding new production:" << sio.nl;
259 Parser::Production pr;
260 bool bAnything = false;
261 for(;;)
262 {
263 next();
264 switch( pCur->iToken )
265 {
266 case tokIdentifier:
267 {
268 const Bu::String &sName =
269 pCur->vExtra.get<Bu::String>();
270 if( hTokens.has( sName ) )
271 {
272 pr.append(
273 Parser::State(
274 Parser::State::typeTerminal,
275 hTokens.get( sName )
276 )
277 );
278 sio << "Added terminal " << sName << sio.nl;
279 }
280 else
281 {
282 if( !p.hasNonTerminal( sName ) )
283 {
284 p.addNonTerminal( sName );
285 }
286 pr.append(
287 Parser::State(
288 Parser::State::typeNonTerminal,
289 p.getNonTerminalId( sName )
290 )
291 );
292 sio << "Added non-terminal " << sName << sio.nl;
293 }
294 }
295 break;
296
297 case tokOpenSquare:
298 {
299 next();
300 if( pCur->iToken != tokIdentifier )
301 tokenError("tokIdentifier");
302 Bu::String sName =
303 pCur->vExtra.get<Bu::String>();
304 next();
305 if( pCur->iToken != tokCloseSquare )
306 tokenError("tokCloseSquare");
307
308 if( !hTokens.has( sName ) )
309 throw ExceptionBase("Only token names may be "
310 "enclosed in square brackets.");
311
312 pr.append(
313 Parser::State(
314 Parser::State::typeTerminalPush,
315 hTokens.get( sName )
316 )
317 );
318 sio << "Added terminal-push " << sName << sio.nl;
319 }
320 break;
321
322 case tokOpenCurly:
323 {
324 next();
325 if( pCur->iToken != tokIdentifier )
326 tokenError("tokIdentifier");
327 Bu::String sName =
328 pCur->vExtra.get<Bu::String>();
329 next();
330 if( pCur->iToken != tokCloseCurly )
331 tokenError("tokCloseCurly");
332
333 if( !p.hasReduction( sName ) )
334 p.addReduction( sName );
335
336 pr.append(
337 Parser::State(
338 Parser::State::typeReduction,
339 p.getReductionId( sName )
340 )
341 );
342 sio << "Added reduction " << sName << sio.nl;
343 }
344 break;
345
346 case tokOr:
347 case tokSemiColon:
348 if( bAnything )
349 {
350 nt.addProduction( pr );
351 sio.decIndent();
352 sio << "Closing production." << sio.nl;
353 }
354 else
355 {
356 nt.setCanSkip();
357 sio.decIndent();
358 sio << "Closing empty production." << sio.nl;
359 }
360 return;
361
362 default:
363 tokenError("tokIdentifier, tokOpenSquare, tokOr, "
364 "tokOpenCurly, or tokSemiColon");
365 }
366 }
367 }
368
369private:
370 void next()
371 {
372 delete pCur;
373 pCur = l.nextToken();
374 }
375
376 void tokenError( const String &s )
377 {
378 throw ExceptionBase( ("Expected " + s + " but found "
379 + l.tokenToString( *pCur ) + ".").getStr() );
380 }
381
382private:
383 typedef Bu::Hash<Bu::String, int> TokenHash;
384 TokenHash hTokens;
385 BnfLexer &l;
386 BnfLexer::Token *pCur;
387 int iLastToken;
388 Parser p;
389};
390
391int main( int argc, char *argv[] )
392{
393 if( argc < 2 )
394 {
395 println("Provide an input filename as the first parameter.");
396 return 0;
397 }
398 File fIn( argv[1], File::Read );
399
400 BnfLexer bl( fIn );
401 BnfParser parser( bl );
402
403 parser.parse();
404
405/*
406 for(;;)
407 {
408 Lexer::Token *pTok = bl.nextToken();
409 sio << bl.tokenToString(*pTok);
410 if( pTok->vExtra.isSet() )
411 {
412 sio << " - " << pTok->vExtra;
413 }
414 sio << sio.nl;
415 if( pTok->iToken == tokEos )
416 break;
417 }
418*/
419
420 return 0;
421}
422
diff --git a/src/tests/parser.cpp b/src/tests/parser.cpp
new file mode 100644
index 0000000..af53bc8
--- /dev/null
+++ b/src/tests/parser.cpp
@@ -0,0 +1,369 @@
1/*
2 * Copyright (C) 2007-2012 Xagasoft, All rights reserved.
3 *
4 * This file is part of the libbu++ library and is released under the
5 * terms of the license contained in the file LICENSE.
6 */
7
8#include <bu/parser.h>
9#include <bu/lexer.h>
10#include <bu/file.h>
11#include <bu/sio.h>
12#include <bu/queuebuf.h>
13#include <stdlib.h>
14
15using namespace Bu;
16
17enum Tok
18{
19 tokNumber,
20 tokPlus,
21 tokMinus,
22 tokDivide,
23 tokMultiply,
24 tokOpenParen,
25 tokCloseParen,
26 tokCompute,
27 tokEndOfInput=-1
28};
29
30Bu::Formatter &operator<<( Bu::Formatter &f, Tok e )
31{
32 switch( e )
33 {
34 case tokNumber: return f << "tokNumber";
35 case tokPlus: return f << "tokPlus";
36 case tokMinus: return f << "tokMinus";
37 case tokDivide: return f << "tokDivide";
38 case tokMultiply: return f << "tokMultiply";
39 case tokOpenParen: return f << "tokOpenParen";
40 case tokCloseParen: return f << "tokCloseParen";
41 case tokCompute: return f << "tokCompute";
42 case tokEndOfInput: return f << "tokEndOfInput";
43 }
44
45 return f << "***error***";
46}
47
48class MathLexer : public Lexer
49{
50public:
51 MathLexer( Bu::Stream &rSrc ) :
52 rSrc( rSrc )
53 {
54 }
55
56 virtual ~MathLexer()
57 {
58 }
59
60 enum TokenTypes
61 {
62 tokStuff
63 };
64
65 virtual Token *nextToken()
66 {
67 for(;;)
68 {
69 if( qbIn.getSize() == 0 )
70 {
71 char buf[4096];
72 qbIn.write( buf, rSrc.read( buf, 4096 ) );
73
74 if( rSrc.isEos() && qbIn.getSize() == 0 )
75 return new Token( tokEndOfInput );
76 }
77
78 char b;
79 qbIn.peek( &b, 1 );
80 switch( b )
81 {
82 case '+':
83 qbIn.seek( 1 );
84 return new Token( tokPlus );
85
86 case '-':
87 qbIn.seek( 1 );
88 return new Token( tokMinus );
89
90 case '/':
91 qbIn.seek( 1 );
92 return new Token( tokDivide );
93
94 case '*':
95 qbIn.seek( 1 );
96 return new Token( tokMultiply );
97
98 case ' ':
99 case '\t':
100 case '\n':
101 qbIn.seek( 1 );
102 break;
103
104 case '=':
105 qbIn.seek( 1 );
106 return new Token( tokCompute );
107
108 case '(':
109 qbIn.seek( 1 );
110 return new Token( tokOpenParen );
111
112 case ')':
113 qbIn.seek( 1 );
114 return new Token( tokCloseParen );
115
116 case '.':
117 case '0':
118 case '1':
119 case '2':
120 case '3':
121 case '4':
122 case '5':
123 case '6':
124 case '7':
125 case '8':
126 case '9':
127 {
128 Bu::String sTmp;
129 sTmp += b;
130 qbIn.seek( 1 );
131 for(;;)
132 {
133 qbIn.peek( &b, 1 );
134 if( b != '.' && (b < '0' || b > '9') )
135 {
136 sio << "!! Convert '" << sTmp << "' to "
137 << strtod( sTmp.getStr(), NULL ) << sio.nl;
138 return new Token(
139 tokNumber, strtod( sTmp.getStr(), NULL )
140 );
141 }
142 qbIn.seek( 1 );
143 sTmp += b;
144 }
145 }
146 break;
147
148 default:
149 throw Bu::ExceptionBase("Unexpected character '%c'.", b );
150 }
151 }
152 }
153
154private:
155 Bu::Stream &rSrc;
156 QueueBuf qbIn;
157};
158
159void redAdd( Bu::Parser &p )
160{
161 Lexer::Token *a = p.popToken();
162 Lexer::Token *b = p.popToken();
163
164 sio << "Add! " << b->vExtra.get<double>() << " + "
165 << a->vExtra.get<double>() << sio.nl;
166
167 Lexer::Token *c = new Lexer::Token( tokNumber,
168 b->vExtra.get<double>() + a->vExtra.get<double>()
169 );
170 p.pushToken( c );
171
172 delete a;
173 delete b;
174}
175
176void redSubtract( Bu::Parser &p )
177{
178 Lexer::Token *a = p.popToken();
179 Lexer::Token *b = p.popToken();
180
181 sio << "Subtract! " << b->vExtra.get<double>() << " - "
182 << a->vExtra.get<double>() << sio.nl;
183
184 Lexer::Token *c = new Lexer::Token( tokNumber,
185 b->vExtra.get<double>() - a->vExtra.get<double>()
186 );
187 p.pushToken( c );
188
189 delete a;
190 delete b;
191}
192
193void redPrint( Bu::Parser &p )
194{
195 Lexer::Token *a = p.popToken();
196 sio << "Print! = " << a->vExtra.get<double>() << sio.nl;
197 delete a;
198}
199
200/* Basic grammer example:
201 *
202 * input: expr '='
203 * ;
204 *
205 * expr: expr '+' expr
206 * | '(' expr ')'
207 * | NUMBER
208 * ;
209 *
210 * The problem is, that we can't actually make something left hand recursive,
211 * so we break it into two exprs:
212 *
213 * expr-sub1: '(' expr ')'
214 * | NUMBER
215 * ;
216 *
217 * expr: expr-sub1 expr-sub2
218 * ;
219 *
220 * expr-sub2: '+' expr
221 * | '-' expr
222 * |
223 * ;
224 *
225 * 5 + 5 + 5 =
226 */
227
228int main( int argc, char *argv[] )
229{
230 if( argc < 2 )
231 {
232 println("Provide an input filename as the first parameter.");
233 return 0;
234 }
235 File fIn( argv[1], File::Read );
236
237 Parser p;
238
239 p.addNonTerminal("expr");
240 p.addNonTerminal("expr-sub1");
241 p.addNonTerminal("expr-sub2");
242 {
243 Parser::NonTerminal nt;
244 nt.addProduction(
245 Parser::Production(
246 Parser::State(
247 Parser::State::typeTerminal,
248 tokPlus
249 )
250 ).append(
251 Parser::State(
252 Parser::State::typeNonTerminal,
253 p.getNonTerminalId("expr")
254 )
255 ).append(
256 Parser::State(
257 Parser::State::typeReduction,
258 p.addReduction("add")
259 )
260 )
261 );
262 nt.addProduction(
263 Parser::Production(
264 Parser::State(
265 Parser::State::typeTerminal,
266 tokMinus
267 )
268 ).append(
269 Parser::State(
270 Parser::State::typeNonTerminal,
271 p.getNonTerminalId("expr")
272 )
273 ).append(
274 Parser::State(
275 Parser::State::typeReduction,
276 p.addReduction("subtract")
277 )
278 )
279 );
280 nt.addProduction(
281 Parser::Production(
282 )
283 );
284 nt.setCanSkip();
285 p.setNonTerminal("expr-sub2", nt );
286 }
287 {
288 Parser::NonTerminal nt;
289 nt.addProduction(
290 Parser::Production(
291 Parser::State(
292 Parser::State::typeTerminalPush,
293 tokNumber
294 )
295 )
296 );
297 nt.addProduction(
298 Parser::Production(
299 Parser::State(
300 Parser::State::typeTerminal,
301 tokOpenParen
302 )
303 ).append(
304 Parser::State(
305 Parser::State::typeNonTerminal,
306 p.getNonTerminalId("expr")
307 )
308 ).append(
309 Parser::State(
310 Parser::State::typeTerminal,
311 tokCloseParen
312 )
313 )
314 );
315 p.setNonTerminal("expr-sub1", nt );
316 }
317 {
318 Parser::NonTerminal nt;
319 nt.addProduction(
320 Parser::Production(
321 Parser::State(
322 Parser::State::typeNonTerminal,
323 p.getNonTerminalId("expr-sub1")
324 )
325 ).append(
326 Parser::State(
327 Parser::State::typeNonTerminal,
328 p.getNonTerminalId("expr-sub2")
329 )
330 )
331 );
332 p.setNonTerminal("expr", nt );
333 }
334 {
335 Parser::NonTerminal nt;
336 nt.addProduction(
337 Parser::Production(
338 Parser::State(
339 Parser::State::typeNonTerminal,
340 p.getNonTerminalId("expr")
341 )
342 ).append(
343 Parser::State(
344 Parser::State::typeTerminal,
345 tokCompute
346 )
347 ).append(
348 Parser::State(
349 Parser::State::typeReduction,
350 p.addReduction("print")
351 )
352 )
353 );
354 p.addNonTerminal("input", nt );
355 }
356
357 p.setRootNonTerminal("input");
358
359 p.setReduction("add", Bu::slot( &redAdd ) );
360 p.setReduction("subtract", Bu::slot( &redSubtract ) );
361 p.setReduction("print", Bu::slot( &redPrint ) );
362
363 p.pushLexer( new MathLexer( fIn ) );
364
365 p.parse();
366
367 return 0;
368}
369