summaryrefslogtreecommitdiff
path: root/src/tools/bnfcompile.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/tools/bnfcompile.cpp')
-rw-r--r--src/tools/bnfcompile.cpp410
1 files changed, 410 insertions, 0 deletions
diff --git a/src/tools/bnfcompile.cpp b/src/tools/bnfcompile.cpp
new file mode 100644
index 0000000..16e75a5
--- /dev/null
+++ b/src/tools/bnfcompile.cpp
@@ -0,0 +1,410 @@
1#include <bu/sio.h>
2#include <bu/lexer.h>
3#include <bu/parser.h>
4#include <bu/file.h>
5#include <bu/queuebuf.h>
6
7using namespace Bu;
8
9enum TokenType
10{
11 tokIdentifier,
12 tokColon,
13 tokOr,
14 tokSemiColon,
15 tokTokens,
16 tokEquals,
17 tokOpenCurly,
18 tokCloseCurly,
19 tokOpenSquare,
20 tokCloseSquare,
21
22 tokEos=-1
23};
24
25class BnfLexer : public Lexer
26{
27public:
28 BnfLexer( Stream &rSrc ) :
29 rSrc( rSrc )
30 {
31 }
32
33 virtual ~BnfLexer()
34 {
35 }
36
37 virtual Token *nextToken()
38 {
39 char cBuf;
40
41 for(;;)
42 {
43 if( qbIn.getSize() == 0 )
44 {
45 char buf[4096];
46 qbIn.write( buf, rSrc.read( buf, 4096 ) );
47
48 if( rSrc.isEos() && qbIn.getSize() == 0 )
49 return new Token( tokEos );
50 }
51 qbIn.peek( &cBuf, 1 );
52 if( (cBuf >= 'a' && cBuf <= 'z') ||
53 (cBuf >= 'A' && cBuf <= 'Z') ||
54 (cBuf >= '0' && cBuf <= '9') ||
55 cBuf == '_' )
56 {
57 sBuf.append( cBuf );
58 qbIn.seek( 1 );
59 }
60 else if( sBuf.isSet() )
61 {
62 if( sBuf == "tokens" )
63 {
64 sBuf.clear();
65 return new Token( tokTokens );
66 }
67 else
68 {
69 Token *pRet = new Token( tokIdentifier, sBuf );
70 sBuf.clear();
71 return pRet;
72 }
73 }
74 else
75 {
76 switch( cBuf )
77 {
78 case ' ':
79 case '\t':
80 case '\n':
81 case '\r':
82 qbIn.seek( 1 );
83 continue;
84
85 case ':':
86 qbIn.seek( 1 );
87 return new Token( tokColon );
88
89 case ';':
90 qbIn.seek( 1 );
91 return new Token( tokSemiColon );
92
93 case '|':
94 qbIn.seek( 1 );
95 return new Token( tokOr );
96
97 case '=':
98 qbIn.seek( 1 );
99 return new Token( tokEquals );
100
101 case '[':
102 qbIn.seek( 1 );
103 return new Token( tokOpenSquare );
104
105 case ']':
106 qbIn.seek( 1 );
107 return new Token( tokCloseSquare );
108
109 case '{':
110 qbIn.seek( 1 );
111 return new Token( tokOpenCurly );
112
113 case '}':
114 qbIn.seek( 1 );
115 return new Token( tokCloseCurly );
116
117 default:
118 throw ExceptionBase("Unexpected character '%c'.",
119 cBuf );
120 break;
121 }
122 }
123 }
124 }
125
126 virtual FString tokenToString( const Token &t )
127 {
128 switch( (TokenType)t.iToken )
129 {
130 case tokIdentifier: return "tokIdentifier";
131 case tokColon: return "tokColon";
132 case tokOr: return "tokOr";
133 case tokSemiColon: return "tokSemiColon";
134 case tokTokens: return "tokTokens";
135 case tokEquals: return "tokEquals";
136 case tokOpenCurly: return "tokOpenCurly";
137 case tokCloseCurly: return "tokCloseCurly";
138 case tokOpenSquare: return "tokOpenSquare";
139 case tokCloseSquare: return "tokCloseSquare";
140 case tokEos: return "tokEos";
141 }
142
143 return "???";
144 }
145
146private:
147 Stream &rSrc;
148 QueueBuf qbIn;
149 FString sBuf;
150};
151
152class BnfParser
153{
154public:
155 BnfParser( BnfLexer &l ) :
156 l( l ),
157 pCur( NULL ),
158 iLastToken( 0 )
159 {
160 }
161
162 virtual ~BnfParser()
163 {
164 delete pCur;
165 pCur = NULL;
166 }
167
168 void parse()
169 {
170 for(;;)
171 {
172 next();
173 switch( pCur->iToken )
174 {
175 case tokTokens:
176 tokens();
177 break;
178
179 case tokIdentifier:
180 nonTerminal();
181 break;
182
183 case tokEos:
184 return;
185 break;
186
187 default:
188 tokenError("tokTokens, tokIdentifier, or tokEos");
189 }
190 }
191 }
192
193private:
194 void tokens()
195 {
196 next();
197 if( pCur->iToken != tokEquals )
198 tokenError("tokEquals");
199 for(;;)
200 {
201 next();
202 if( pCur->iToken == tokIdentifier )
203 {
204 hTokens.insert( pCur->vExtra.get<Bu::FString>(), ++iLastToken );
205 sio << "Added token[" << iLastToken << "]: "
206 << pCur->vExtra.get<Bu::FString>() << sio.nl;
207 }
208 else if( pCur->iToken == tokSemiColon )
209 break;
210 else
211 tokenError("tokIdentifier or tokSemiColon");
212 }
213 }
214
215 void nonTerminal()
216 {
217 Bu::FString sNtName = pCur->vExtra.get<Bu::FString>();
218 Parser::NonTerminal nt;
219 p.addNonTerminal( sNtName );
220 sio.incIndent();
221 sio << "Created non-terminal: " << sNtName << sio.nl;
222
223 next();
224 if( pCur->iToken != tokColon )
225 tokenError("tokColon");
226 production( nt );
227 for(;;)
228 {
229 switch( pCur->iToken )
230 {
231 case tokOr:
232 production( nt );
233 break;
234
235 case tokSemiColon:
236 p.setNonTerminal( sNtName, nt );
237 sio.decIndent();
238 sio << "Closing non-terminal." << sio.nl;
239 return;
240
241 default:
242 tokenError("tkOr or tokSemiColon");
243 break;
244 }
245 }
246 }
247
248 void production( Parser::NonTerminal &nt )
249 {
250 sio.incIndent();
251 sio << "Adding new production:" << sio.nl;
252 Parser::Production pr;
253 bool bAnything = false;
254 for(;;)
255 {
256 next();
257 switch( pCur->iToken )
258 {
259 case tokIdentifier:
260 {
261 const Bu::FString &sName =
262 pCur->vExtra.get<Bu::FString>();
263 if( hTokens.has( sName ) )
264 {
265 pr.append(
266 Parser::State(
267 Parser::State::typeTerminal,
268 hTokens.get( sName )
269 )
270 );
271 sio << "Added terminal " << sName << sio.nl;
272 }
273 else
274 {
275 if( !p.hasNonTerminal( sName ) )
276 {
277 p.addNonTerminal( sName );
278 }
279 pr.append(
280 Parser::State(
281 Parser::State::typeNonTerminal,
282 p.getNonTerminalId( sName )
283 )
284 );
285 sio << "Added non-terminal " << sName << sio.nl;
286 }
287 }
288 break;
289
290 case tokOpenSquare:
291 {
292 next();
293 if( pCur->iToken != tokIdentifier )
294 tokenError("tokIdentifier");
295 Bu::FString sName =
296 pCur->vExtra.get<Bu::FString>();
297 next();
298 if( pCur->iToken != tokCloseSquare )
299 tokenError("tokCloseSquare");
300
301 if( !hTokens.has( sName ) )
302 throw ExceptionBase("Only token names may be "
303 "enclosed in square brackets.");
304
305 pr.append(
306 Parser::State(
307 Parser::State::typeTerminalPush,
308 hTokens.get( sName )
309 )
310 );
311 sio << "Added terminal-push " << sName << sio.nl;
312 }
313 break;
314
315 case tokOpenCurly:
316 {
317 next();
318 if( pCur->iToken != tokIdentifier )
319 tokenError("tokIdentifier");
320 Bu::FString sName =
321 pCur->vExtra.get<Bu::FString>();
322 next();
323 if( pCur->iToken != tokCloseCurly )
324 tokenError("tokCloseCurly");
325
326 if( !p.hasReduction( sName ) )
327 p.addReduction( sName );
328
329 pr.append(
330 Parser::State(
331 Parser::State::typeReduction,
332 p.getReductionId( sName )
333 )
334 );
335 sio << "Added reduction " << sName << sio.nl;
336 }
337 break;
338
339 case tokOr:
340 case tokSemiColon:
341 if( bAnything )
342 {
343 nt.addProduction( pr );
344 sio.decIndent();
345 sio << "Closing production." << sio.nl;
346 }
347 else
348 {
349 nt.setCanSkip();
350 sio.decIndent();
351 sio << "Closing empty production." << sio.nl;
352 }
353 return;
354
355 default:
356 tokenError("tokIdentifier, tokOpenSquare, tokOr, "
357 "tokOpenCurly, or tokSemiColon");
358 }
359 }
360 }
361
362private:
363 void next()
364 {
365 delete pCur;
366 pCur = l.nextToken();
367 }
368
369 void tokenError( const FString &s )
370 {
371 throw ExceptionBase( ("Expected " + s + " but found "
372 + l.tokenToString( *pCur ) + ".").getStr() );
373 }
374
375private:
376 typedef Bu::Hash<Bu::FString, int> TokenHash;
377 TokenHash hTokens;
378 BnfLexer &l;
379 BnfLexer::Token *pCur;
380 int iLastToken;
381 Parser p;
382};
383
384int main( int argc, char *argv[] )
385{
386 File fIn( argv[1], File::Read );
387
388 BnfLexer bl( fIn );
389 BnfParser parser( bl );
390
391 parser.parse();
392
393/*
394 for(;;)
395 {
396 Lexer::Token *pTok = bl.nextToken();
397 sio << bl.tokenToString(*pTok);
398 if( pTok->vExtra.isSet() )
399 {
400 sio << " - " << pTok->vExtra;
401 }
402 sio << sio.nl;
403 if( pTok->iToken == tokEos )
404 break;
405 }
406*/
407
408 return 0;
409}
410