summaryrefslogtreecommitdiff
path: root/src/tests/bnfcompile.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/tests/bnfcompile.cpp')
-rw-r--r--src/tests/bnfcompile.cpp422
1 files changed, 422 insertions, 0 deletions
diff --git a/src/tests/bnfcompile.cpp b/src/tests/bnfcompile.cpp
new file mode 100644
index 0000000..af7a0eb
--- /dev/null
+++ b/src/tests/bnfcompile.cpp
@@ -0,0 +1,422 @@
1/*
2 * Copyright (C) 2007-2012 Xagasoft, All rights reserved.
3 *
4 * This file is part of the libbu++ library and is released under the
5 * terms of the license contained in the file LICENSE.
6 */
7
8#include <bu/sio.h>
9#include <bu/lexer.h>
10#include <bu/parser.h>
11#include <bu/file.h>
12#include <bu/queuebuf.h>
13
14using namespace Bu;
15
16enum TokenType
17{
18 tokIdentifier,
19 tokColon,
20 tokOr,
21 tokSemiColon,
22 tokTokens,
23 tokEquals,
24 tokOpenCurly,
25 tokCloseCurly,
26 tokOpenSquare,
27 tokCloseSquare,
28
29 tokEos=-1
30};
31
32class BnfLexer : public Lexer
33{
34public:
35 BnfLexer( Stream &rSrc ) :
36 rSrc( rSrc )
37 {
38 }
39
40 virtual ~BnfLexer()
41 {
42 }
43
44 virtual Token *nextToken()
45 {
46 char cBuf;
47
48 for(;;)
49 {
50 if( qbIn.getSize() == 0 )
51 {
52 char buf[4096];
53 qbIn.write( buf, rSrc.read( buf, 4096 ) );
54
55 if( rSrc.isEos() && qbIn.getSize() == 0 )
56 return new Token( tokEos );
57 }
58 qbIn.peek( &cBuf, 1 );
59 if( (cBuf >= 'a' && cBuf <= 'z') ||
60 (cBuf >= 'A' && cBuf <= 'Z') ||
61 (cBuf >= '0' && cBuf <= '9') ||
62 cBuf == '_' )
63 {
64 sBuf.append( cBuf );
65 qbIn.seek( 1 );
66 }
67 else if( sBuf.isSet() )
68 {
69 if( sBuf == "tokens" )
70 {
71 sBuf.clear();
72 return new Token( tokTokens );
73 }
74 else
75 {
76 Token *pRet = new Token( tokIdentifier, sBuf );
77 sBuf.clear();
78 return pRet;
79 }
80 }
81 else
82 {
83 switch( cBuf )
84 {
85 case ' ':
86 case '\t':
87 case '\n':
88 case '\r':
89 qbIn.seek( 1 );
90 continue;
91
92 case ':':
93 qbIn.seek( 1 );
94 return new Token( tokColon );
95
96 case ';':
97 qbIn.seek( 1 );
98 return new Token( tokSemiColon );
99
100 case '|':
101 qbIn.seek( 1 );
102 return new Token( tokOr );
103
104 case '=':
105 qbIn.seek( 1 );
106 return new Token( tokEquals );
107
108 case '[':
109 qbIn.seek( 1 );
110 return new Token( tokOpenSquare );
111
112 case ']':
113 qbIn.seek( 1 );
114 return new Token( tokCloseSquare );
115
116 case '{':
117 qbIn.seek( 1 );
118 return new Token( tokOpenCurly );
119
120 case '}':
121 qbIn.seek( 1 );
122 return new Token( tokCloseCurly );
123
124 default:
125 throw ExceptionBase("Unexpected character '%c'.",
126 cBuf );
127 break;
128 }
129 }
130 }
131 }
132
133 virtual String tokenToString( const Token &t )
134 {
135 switch( (TokenType)t.iToken )
136 {
137 case tokIdentifier: return "tokIdentifier";
138 case tokColon: return "tokColon";
139 case tokOr: return "tokOr";
140 case tokSemiColon: return "tokSemiColon";
141 case tokTokens: return "tokTokens";
142 case tokEquals: return "tokEquals";
143 case tokOpenCurly: return "tokOpenCurly";
144 case tokCloseCurly: return "tokCloseCurly";
145 case tokOpenSquare: return "tokOpenSquare";
146 case tokCloseSquare: return "tokCloseSquare";
147 case tokEos: return "tokEos";
148 }
149
150 return "???";
151 }
152
153private:
154 Stream &rSrc;
155 QueueBuf qbIn;
156 String sBuf;
157};
158
159class BnfParser
160{
161public:
162 BnfParser( BnfLexer &l ) :
163 l( l ),
164 pCur( NULL ),
165 iLastToken( 0 )
166 {
167 }
168
169 virtual ~BnfParser()
170 {
171 delete pCur;
172 pCur = NULL;
173 }
174
175 void parse()
176 {
177 for(;;)
178 {
179 next();
180 switch( pCur->iToken )
181 {
182 case tokTokens:
183 tokens();
184 break;
185
186 case tokIdentifier:
187 nonTerminal();
188 break;
189
190 case tokEos:
191 return;
192 break;
193
194 default:
195 tokenError("tokTokens, tokIdentifier, or tokEos");
196 }
197 }
198 }
199
200private:
201 void tokens()
202 {
203 next();
204 if( pCur->iToken != tokEquals )
205 tokenError("tokEquals");
206 for(;;)
207 {
208 next();
209 if( pCur->iToken == tokIdentifier )
210 {
211 hTokens.insert( pCur->vExtra.get<Bu::String>(), ++iLastToken );
212 sio << "Added token[" << iLastToken << "]: "
213 << pCur->vExtra.get<Bu::String>() << sio.nl;
214 }
215 else if( pCur->iToken == tokSemiColon )
216 break;
217 else
218 tokenError("tokIdentifier or tokSemiColon");
219 }
220 }
221
222 void nonTerminal()
223 {
224 Bu::String sNtName = pCur->vExtra.get<Bu::String>();
225 Parser::NonTerminal nt;
226 p.addNonTerminal( sNtName );
227 sio.incIndent();
228 sio << "Created non-terminal: " << sNtName << sio.nl;
229
230 next();
231 if( pCur->iToken != tokColon )
232 tokenError("tokColon");
233 production( nt );
234 for(;;)
235 {
236 switch( pCur->iToken )
237 {
238 case tokOr:
239 production( nt );
240 break;
241
242 case tokSemiColon:
243 p.setNonTerminal( sNtName, nt );
244 sio.decIndent();
245 sio << "Closing non-terminal." << sio.nl;
246 return;
247
248 default:
249 tokenError("tkOr or tokSemiColon");
250 break;
251 }
252 }
253 }
254
255 void production( Parser::NonTerminal &nt )
256 {
257 sio.incIndent();
258 sio << "Adding new production:" << sio.nl;
259 Parser::Production pr;
260 bool bAnything = false;
261 for(;;)
262 {
263 next();
264 switch( pCur->iToken )
265 {
266 case tokIdentifier:
267 {
268 const Bu::String &sName =
269 pCur->vExtra.get<Bu::String>();
270 if( hTokens.has( sName ) )
271 {
272 pr.append(
273 Parser::State(
274 Parser::State::typeTerminal,
275 hTokens.get( sName )
276 )
277 );
278 sio << "Added terminal " << sName << sio.nl;
279 }
280 else
281 {
282 if( !p.hasNonTerminal( sName ) )
283 {
284 p.addNonTerminal( sName );
285 }
286 pr.append(
287 Parser::State(
288 Parser::State::typeNonTerminal,
289 p.getNonTerminalId( sName )
290 )
291 );
292 sio << "Added non-terminal " << sName << sio.nl;
293 }
294 }
295 break;
296
297 case tokOpenSquare:
298 {
299 next();
300 if( pCur->iToken != tokIdentifier )
301 tokenError("tokIdentifier");
302 Bu::String sName =
303 pCur->vExtra.get<Bu::String>();
304 next();
305 if( pCur->iToken != tokCloseSquare )
306 tokenError("tokCloseSquare");
307
308 if( !hTokens.has( sName ) )
309 throw ExceptionBase("Only token names may be "
310 "enclosed in square brackets.");
311
312 pr.append(
313 Parser::State(
314 Parser::State::typeTerminalPush,
315 hTokens.get( sName )
316 )
317 );
318 sio << "Added terminal-push " << sName << sio.nl;
319 }
320 break;
321
322 case tokOpenCurly:
323 {
324 next();
325 if( pCur->iToken != tokIdentifier )
326 tokenError("tokIdentifier");
327 Bu::String sName =
328 pCur->vExtra.get<Bu::String>();
329 next();
330 if( pCur->iToken != tokCloseCurly )
331 tokenError("tokCloseCurly");
332
333 if( !p.hasReduction( sName ) )
334 p.addReduction( sName );
335
336 pr.append(
337 Parser::State(
338 Parser::State::typeReduction,
339 p.getReductionId( sName )
340 )
341 );
342 sio << "Added reduction " << sName << sio.nl;
343 }
344 break;
345
346 case tokOr:
347 case tokSemiColon:
348 if( bAnything )
349 {
350 nt.addProduction( pr );
351 sio.decIndent();
352 sio << "Closing production." << sio.nl;
353 }
354 else
355 {
356 nt.setCanSkip();
357 sio.decIndent();
358 sio << "Closing empty production." << sio.nl;
359 }
360 return;
361
362 default:
363 tokenError("tokIdentifier, tokOpenSquare, tokOr, "
364 "tokOpenCurly, or tokSemiColon");
365 }
366 }
367 }
368
369private:
370 void next()
371 {
372 delete pCur;
373 pCur = l.nextToken();
374 }
375
376 void tokenError( const String &s )
377 {
378 throw ExceptionBase( ("Expected " + s + " but found "
379 + l.tokenToString( *pCur ) + ".").getStr() );
380 }
381
382private:
383 typedef Bu::Hash<Bu::String, int> TokenHash;
384 TokenHash hTokens;
385 BnfLexer &l;
386 BnfLexer::Token *pCur;
387 int iLastToken;
388 Parser p;
389};
390
391int main( int argc, char *argv[] )
392{
393 if( argc < 2 )
394 {
395 println("Provide an input filename as the first parameter.");
396 return 0;
397 }
398 File fIn( argv[1], File::Read );
399
400 BnfLexer bl( fIn );
401 BnfParser parser( bl );
402
403 parser.parse();
404
405/*
406 for(;;)
407 {
408 Lexer::Token *pTok = bl.nextToken();
409 sio << bl.tokenToString(*pTok);
410 if( pTok->vExtra.isSet() )
411 {
412 sio << " - " << pTok->vExtra;
413 }
414 sio << sio.nl;
415 if( pTok->iToken == tokEos )
416 break;
417 }
418*/
419
420 return 0;
421}
422