diff options
Diffstat (limited to 'src/tests/bnfcompile.cpp')
-rw-r--r-- | src/tests/bnfcompile.cpp | 422 |
1 files changed, 422 insertions, 0 deletions
diff --git a/src/tests/bnfcompile.cpp b/src/tests/bnfcompile.cpp new file mode 100644 index 0000000..af7a0eb --- /dev/null +++ b/src/tests/bnfcompile.cpp | |||
@@ -0,0 +1,422 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2007-2012 Xagasoft, All rights reserved. | ||
3 | * | ||
4 | * This file is part of the libbu++ library and is released under the | ||
5 | * terms of the license contained in the file LICENSE. | ||
6 | */ | ||
7 | |||
8 | #include <bu/sio.h> | ||
9 | #include <bu/lexer.h> | ||
10 | #include <bu/parser.h> | ||
11 | #include <bu/file.h> | ||
12 | #include <bu/queuebuf.h> | ||
13 | |||
14 | using namespace Bu; | ||
15 | |||
16 | enum TokenType | ||
17 | { | ||
18 | tokIdentifier, | ||
19 | tokColon, | ||
20 | tokOr, | ||
21 | tokSemiColon, | ||
22 | tokTokens, | ||
23 | tokEquals, | ||
24 | tokOpenCurly, | ||
25 | tokCloseCurly, | ||
26 | tokOpenSquare, | ||
27 | tokCloseSquare, | ||
28 | |||
29 | tokEos=-1 | ||
30 | }; | ||
31 | |||
32 | class BnfLexer : public Lexer | ||
33 | { | ||
34 | public: | ||
35 | BnfLexer( Stream &rSrc ) : | ||
36 | rSrc( rSrc ) | ||
37 | { | ||
38 | } | ||
39 | |||
40 | virtual ~BnfLexer() | ||
41 | { | ||
42 | } | ||
43 | |||
44 | virtual Token *nextToken() | ||
45 | { | ||
46 | char cBuf; | ||
47 | |||
48 | for(;;) | ||
49 | { | ||
50 | if( qbIn.getSize() == 0 ) | ||
51 | { | ||
52 | char buf[4096]; | ||
53 | qbIn.write( buf, rSrc.read( buf, 4096 ) ); | ||
54 | |||
55 | if( rSrc.isEos() && qbIn.getSize() == 0 ) | ||
56 | return new Token( tokEos ); | ||
57 | } | ||
58 | qbIn.peek( &cBuf, 1 ); | ||
59 | if( (cBuf >= 'a' && cBuf <= 'z') || | ||
60 | (cBuf >= 'A' && cBuf <= 'Z') || | ||
61 | (cBuf >= '0' && cBuf <= '9') || | ||
62 | cBuf == '_' ) | ||
63 | { | ||
64 | sBuf.append( cBuf ); | ||
65 | qbIn.seek( 1 ); | ||
66 | } | ||
67 | else if( sBuf.isSet() ) | ||
68 | { | ||
69 | if( sBuf == "tokens" ) | ||
70 | { | ||
71 | sBuf.clear(); | ||
72 | return new Token( tokTokens ); | ||
73 | } | ||
74 | else | ||
75 | { | ||
76 | Token *pRet = new Token( tokIdentifier, sBuf ); | ||
77 | sBuf.clear(); | ||
78 | return pRet; | ||
79 | } | ||
80 | } | ||
81 | else | ||
82 | { | ||
83 | switch( cBuf ) | ||
84 | { | ||
85 | case ' ': | ||
86 | case '\t': | ||
87 | case '\n': | ||
88 | case '\r': | ||
89 | qbIn.seek( 1 ); | ||
90 | continue; | ||
91 | |||
92 | case ':': | ||
93 | qbIn.seek( 1 ); | ||
94 | return new Token( tokColon ); | ||
95 | |||
96 | case ';': | ||
97 | qbIn.seek( 1 ); | ||
98 | return new Token( tokSemiColon ); | ||
99 | |||
100 | case '|': | ||
101 | qbIn.seek( 1 ); | ||
102 | return new Token( tokOr ); | ||
103 | |||
104 | case '=': | ||
105 | qbIn.seek( 1 ); | ||
106 | return new Token( tokEquals ); | ||
107 | |||
108 | case '[': | ||
109 | qbIn.seek( 1 ); | ||
110 | return new Token( tokOpenSquare ); | ||
111 | |||
112 | case ']': | ||
113 | qbIn.seek( 1 ); | ||
114 | return new Token( tokCloseSquare ); | ||
115 | |||
116 | case '{': | ||
117 | qbIn.seek( 1 ); | ||
118 | return new Token( tokOpenCurly ); | ||
119 | |||
120 | case '}': | ||
121 | qbIn.seek( 1 ); | ||
122 | return new Token( tokCloseCurly ); | ||
123 | |||
124 | default: | ||
125 | throw ExceptionBase("Unexpected character '%c'.", | ||
126 | cBuf ); | ||
127 | break; | ||
128 | } | ||
129 | } | ||
130 | } | ||
131 | } | ||
132 | |||
133 | virtual String tokenToString( const Token &t ) | ||
134 | { | ||
135 | switch( (TokenType)t.iToken ) | ||
136 | { | ||
137 | case tokIdentifier: return "tokIdentifier"; | ||
138 | case tokColon: return "tokColon"; | ||
139 | case tokOr: return "tokOr"; | ||
140 | case tokSemiColon: return "tokSemiColon"; | ||
141 | case tokTokens: return "tokTokens"; | ||
142 | case tokEquals: return "tokEquals"; | ||
143 | case tokOpenCurly: return "tokOpenCurly"; | ||
144 | case tokCloseCurly: return "tokCloseCurly"; | ||
145 | case tokOpenSquare: return "tokOpenSquare"; | ||
146 | case tokCloseSquare: return "tokCloseSquare"; | ||
147 | case tokEos: return "tokEos"; | ||
148 | } | ||
149 | |||
150 | return "???"; | ||
151 | } | ||
152 | |||
153 | private: | ||
154 | Stream &rSrc; | ||
155 | QueueBuf qbIn; | ||
156 | String sBuf; | ||
157 | }; | ||
158 | |||
159 | class BnfParser | ||
160 | { | ||
161 | public: | ||
162 | BnfParser( BnfLexer &l ) : | ||
163 | l( l ), | ||
164 | pCur( NULL ), | ||
165 | iLastToken( 0 ) | ||
166 | { | ||
167 | } | ||
168 | |||
169 | virtual ~BnfParser() | ||
170 | { | ||
171 | delete pCur; | ||
172 | pCur = NULL; | ||
173 | } | ||
174 | |||
175 | void parse() | ||
176 | { | ||
177 | for(;;) | ||
178 | { | ||
179 | next(); | ||
180 | switch( pCur->iToken ) | ||
181 | { | ||
182 | case tokTokens: | ||
183 | tokens(); | ||
184 | break; | ||
185 | |||
186 | case tokIdentifier: | ||
187 | nonTerminal(); | ||
188 | break; | ||
189 | |||
190 | case tokEos: | ||
191 | return; | ||
192 | break; | ||
193 | |||
194 | default: | ||
195 | tokenError("tokTokens, tokIdentifier, or tokEos"); | ||
196 | } | ||
197 | } | ||
198 | } | ||
199 | |||
200 | private: | ||
201 | void tokens() | ||
202 | { | ||
203 | next(); | ||
204 | if( pCur->iToken != tokEquals ) | ||
205 | tokenError("tokEquals"); | ||
206 | for(;;) | ||
207 | { | ||
208 | next(); | ||
209 | if( pCur->iToken == tokIdentifier ) | ||
210 | { | ||
211 | hTokens.insert( pCur->vExtra.get<Bu::String>(), ++iLastToken ); | ||
212 | sio << "Added token[" << iLastToken << "]: " | ||
213 | << pCur->vExtra.get<Bu::String>() << sio.nl; | ||
214 | } | ||
215 | else if( pCur->iToken == tokSemiColon ) | ||
216 | break; | ||
217 | else | ||
218 | tokenError("tokIdentifier or tokSemiColon"); | ||
219 | } | ||
220 | } | ||
221 | |||
222 | void nonTerminal() | ||
223 | { | ||
224 | Bu::String sNtName = pCur->vExtra.get<Bu::String>(); | ||
225 | Parser::NonTerminal nt; | ||
226 | p.addNonTerminal( sNtName ); | ||
227 | sio.incIndent(); | ||
228 | sio << "Created non-terminal: " << sNtName << sio.nl; | ||
229 | |||
230 | next(); | ||
231 | if( pCur->iToken != tokColon ) | ||
232 | tokenError("tokColon"); | ||
233 | production( nt ); | ||
234 | for(;;) | ||
235 | { | ||
236 | switch( pCur->iToken ) | ||
237 | { | ||
238 | case tokOr: | ||
239 | production( nt ); | ||
240 | break; | ||
241 | |||
242 | case tokSemiColon: | ||
243 | p.setNonTerminal( sNtName, nt ); | ||
244 | sio.decIndent(); | ||
245 | sio << "Closing non-terminal." << sio.nl; | ||
246 | return; | ||
247 | |||
248 | default: | ||
249 | tokenError("tkOr or tokSemiColon"); | ||
250 | break; | ||
251 | } | ||
252 | } | ||
253 | } | ||
254 | |||
255 | void production( Parser::NonTerminal &nt ) | ||
256 | { | ||
257 | sio.incIndent(); | ||
258 | sio << "Adding new production:" << sio.nl; | ||
259 | Parser::Production pr; | ||
260 | bool bAnything = false; | ||
261 | for(;;) | ||
262 | { | ||
263 | next(); | ||
264 | switch( pCur->iToken ) | ||
265 | { | ||
266 | case tokIdentifier: | ||
267 | { | ||
268 | const Bu::String &sName = | ||
269 | pCur->vExtra.get<Bu::String>(); | ||
270 | if( hTokens.has( sName ) ) | ||
271 | { | ||
272 | pr.append( | ||
273 | Parser::State( | ||
274 | Parser::State::typeTerminal, | ||
275 | hTokens.get( sName ) | ||
276 | ) | ||
277 | ); | ||
278 | sio << "Added terminal " << sName << sio.nl; | ||
279 | } | ||
280 | else | ||
281 | { | ||
282 | if( !p.hasNonTerminal( sName ) ) | ||
283 | { | ||
284 | p.addNonTerminal( sName ); | ||
285 | } | ||
286 | pr.append( | ||
287 | Parser::State( | ||
288 | Parser::State::typeNonTerminal, | ||
289 | p.getNonTerminalId( sName ) | ||
290 | ) | ||
291 | ); | ||
292 | sio << "Added non-terminal " << sName << sio.nl; | ||
293 | } | ||
294 | } | ||
295 | break; | ||
296 | |||
297 | case tokOpenSquare: | ||
298 | { | ||
299 | next(); | ||
300 | if( pCur->iToken != tokIdentifier ) | ||
301 | tokenError("tokIdentifier"); | ||
302 | Bu::String sName = | ||
303 | pCur->vExtra.get<Bu::String>(); | ||
304 | next(); | ||
305 | if( pCur->iToken != tokCloseSquare ) | ||
306 | tokenError("tokCloseSquare"); | ||
307 | |||
308 | if( !hTokens.has( sName ) ) | ||
309 | throw ExceptionBase("Only token names may be " | ||
310 | "enclosed in square brackets."); | ||
311 | |||
312 | pr.append( | ||
313 | Parser::State( | ||
314 | Parser::State::typeTerminalPush, | ||
315 | hTokens.get( sName ) | ||
316 | ) | ||
317 | ); | ||
318 | sio << "Added terminal-push " << sName << sio.nl; | ||
319 | } | ||
320 | break; | ||
321 | |||
322 | case tokOpenCurly: | ||
323 | { | ||
324 | next(); | ||
325 | if( pCur->iToken != tokIdentifier ) | ||
326 | tokenError("tokIdentifier"); | ||
327 | Bu::String sName = | ||
328 | pCur->vExtra.get<Bu::String>(); | ||
329 | next(); | ||
330 | if( pCur->iToken != tokCloseCurly ) | ||
331 | tokenError("tokCloseCurly"); | ||
332 | |||
333 | if( !p.hasReduction( sName ) ) | ||
334 | p.addReduction( sName ); | ||
335 | |||
336 | pr.append( | ||
337 | Parser::State( | ||
338 | Parser::State::typeReduction, | ||
339 | p.getReductionId( sName ) | ||
340 | ) | ||
341 | ); | ||
342 | sio << "Added reduction " << sName << sio.nl; | ||
343 | } | ||
344 | break; | ||
345 | |||
346 | case tokOr: | ||
347 | case tokSemiColon: | ||
348 | if( bAnything ) | ||
349 | { | ||
350 | nt.addProduction( pr ); | ||
351 | sio.decIndent(); | ||
352 | sio << "Closing production." << sio.nl; | ||
353 | } | ||
354 | else | ||
355 | { | ||
356 | nt.setCanSkip(); | ||
357 | sio.decIndent(); | ||
358 | sio << "Closing empty production." << sio.nl; | ||
359 | } | ||
360 | return; | ||
361 | |||
362 | default: | ||
363 | tokenError("tokIdentifier, tokOpenSquare, tokOr, " | ||
364 | "tokOpenCurly, or tokSemiColon"); | ||
365 | } | ||
366 | } | ||
367 | } | ||
368 | |||
369 | private: | ||
370 | void next() | ||
371 | { | ||
372 | delete pCur; | ||
373 | pCur = l.nextToken(); | ||
374 | } | ||
375 | |||
376 | void tokenError( const String &s ) | ||
377 | { | ||
378 | throw ExceptionBase( ("Expected " + s + " but found " | ||
379 | + l.tokenToString( *pCur ) + ".").getStr() ); | ||
380 | } | ||
381 | |||
382 | private: | ||
383 | typedef Bu::Hash<Bu::String, int> TokenHash; | ||
384 | TokenHash hTokens; | ||
385 | BnfLexer &l; | ||
386 | BnfLexer::Token *pCur; | ||
387 | int iLastToken; | ||
388 | Parser p; | ||
389 | }; | ||
390 | |||
391 | int main( int argc, char *argv[] ) | ||
392 | { | ||
393 | if( argc < 2 ) | ||
394 | { | ||
395 | println("Provide an input filename as the first parameter."); | ||
396 | return 0; | ||
397 | } | ||
398 | File fIn( argv[1], File::Read ); | ||
399 | |||
400 | BnfLexer bl( fIn ); | ||
401 | BnfParser parser( bl ); | ||
402 | |||
403 | parser.parse(); | ||
404 | |||
405 | /* | ||
406 | for(;;) | ||
407 | { | ||
408 | Lexer::Token *pTok = bl.nextToken(); | ||
409 | sio << bl.tokenToString(*pTok); | ||
410 | if( pTok->vExtra.isSet() ) | ||
411 | { | ||
412 | sio << " - " << pTok->vExtra; | ||
413 | } | ||
414 | sio << sio.nl; | ||
415 | if( pTok->iToken == tokEos ) | ||
416 | break; | ||
417 | } | ||
418 | */ | ||
419 | |||
420 | return 0; | ||
421 | } | ||
422 | |||