diff options
author | Mike Buland <eichlan@xagasoft.com> | 2010-10-18 04:38:19 +0000 |
---|---|---|
committer | Mike Buland <eichlan@xagasoft.com> | 2010-10-18 04:38:19 +0000 |
commit | 78463c30031a478936b21168a6fc93ae6eeaaeb9 (patch) | |
tree | 2216344a91f23088dcb4cf93492802c75577fad1 /src/tools/bnfcompile.cpp | |
parent | 00ecd458ced768b3b8752cdd421a22244b7adc01 (diff) | |
download | libbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.tar.gz libbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.tar.bz2 libbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.tar.xz libbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.zip |
Several of these new files will go away, but I didn't want to lose them for now.
The parser works! The parser compiler works! It makes parsers!
Now we just have to implement post processing, token lookup tables, and storage.
Diffstat (limited to '')
-rw-r--r-- | src/tools/bnfcompile.cpp | 410 |
1 files changed, 410 insertions, 0 deletions
diff --git a/src/tools/bnfcompile.cpp b/src/tools/bnfcompile.cpp new file mode 100644 index 0000000..16e75a5 --- /dev/null +++ b/src/tools/bnfcompile.cpp | |||
@@ -0,0 +1,410 @@ | |||
1 | #include <bu/sio.h> | ||
2 | #include <bu/lexer.h> | ||
3 | #include <bu/parser.h> | ||
4 | #include <bu/file.h> | ||
5 | #include <bu/queuebuf.h> | ||
6 | |||
7 | using namespace Bu; | ||
8 | |||
9 | enum TokenType | ||
10 | { | ||
11 | tokIdentifier, | ||
12 | tokColon, | ||
13 | tokOr, | ||
14 | tokSemiColon, | ||
15 | tokTokens, | ||
16 | tokEquals, | ||
17 | tokOpenCurly, | ||
18 | tokCloseCurly, | ||
19 | tokOpenSquare, | ||
20 | tokCloseSquare, | ||
21 | |||
22 | tokEos=-1 | ||
23 | }; | ||
24 | |||
25 | class BnfLexer : public Lexer | ||
26 | { | ||
27 | public: | ||
28 | BnfLexer( Stream &rSrc ) : | ||
29 | rSrc( rSrc ) | ||
30 | { | ||
31 | } | ||
32 | |||
33 | virtual ~BnfLexer() | ||
34 | { | ||
35 | } | ||
36 | |||
37 | virtual Token *nextToken() | ||
38 | { | ||
39 | char cBuf; | ||
40 | |||
41 | for(;;) | ||
42 | { | ||
43 | if( qbIn.getSize() == 0 ) | ||
44 | { | ||
45 | char buf[4096]; | ||
46 | qbIn.write( buf, rSrc.read( buf, 4096 ) ); | ||
47 | |||
48 | if( rSrc.isEos() && qbIn.getSize() == 0 ) | ||
49 | return new Token( tokEos ); | ||
50 | } | ||
51 | qbIn.peek( &cBuf, 1 ); | ||
52 | if( (cBuf >= 'a' && cBuf <= 'z') || | ||
53 | (cBuf >= 'A' && cBuf <= 'Z') || | ||
54 | (cBuf >= '0' && cBuf <= '9') || | ||
55 | cBuf == '_' ) | ||
56 | { | ||
57 | sBuf.append( cBuf ); | ||
58 | qbIn.seek( 1 ); | ||
59 | } | ||
60 | else if( sBuf.isSet() ) | ||
61 | { | ||
62 | if( sBuf == "tokens" ) | ||
63 | { | ||
64 | sBuf.clear(); | ||
65 | return new Token( tokTokens ); | ||
66 | } | ||
67 | else | ||
68 | { | ||
69 | Token *pRet = new Token( tokIdentifier, sBuf ); | ||
70 | sBuf.clear(); | ||
71 | return pRet; | ||
72 | } | ||
73 | } | ||
74 | else | ||
75 | { | ||
76 | switch( cBuf ) | ||
77 | { | ||
78 | case ' ': | ||
79 | case '\t': | ||
80 | case '\n': | ||
81 | case '\r': | ||
82 | qbIn.seek( 1 ); | ||
83 | continue; | ||
84 | |||
85 | case ':': | ||
86 | qbIn.seek( 1 ); | ||
87 | return new Token( tokColon ); | ||
88 | |||
89 | case ';': | ||
90 | qbIn.seek( 1 ); | ||
91 | return new Token( tokSemiColon ); | ||
92 | |||
93 | case '|': | ||
94 | qbIn.seek( 1 ); | ||
95 | return new Token( tokOr ); | ||
96 | |||
97 | case '=': | ||
98 | qbIn.seek( 1 ); | ||
99 | return new Token( tokEquals ); | ||
100 | |||
101 | case '[': | ||
102 | qbIn.seek( 1 ); | ||
103 | return new Token( tokOpenSquare ); | ||
104 | |||
105 | case ']': | ||
106 | qbIn.seek( 1 ); | ||
107 | return new Token( tokCloseSquare ); | ||
108 | |||
109 | case '{': | ||
110 | qbIn.seek( 1 ); | ||
111 | return new Token( tokOpenCurly ); | ||
112 | |||
113 | case '}': | ||
114 | qbIn.seek( 1 ); | ||
115 | return new Token( tokCloseCurly ); | ||
116 | |||
117 | default: | ||
118 | throw ExceptionBase("Unexpected character '%c'.", | ||
119 | cBuf ); | ||
120 | break; | ||
121 | } | ||
122 | } | ||
123 | } | ||
124 | } | ||
125 | |||
126 | virtual FString tokenToString( const Token &t ) | ||
127 | { | ||
128 | switch( (TokenType)t.iToken ) | ||
129 | { | ||
130 | case tokIdentifier: return "tokIdentifier"; | ||
131 | case tokColon: return "tokColon"; | ||
132 | case tokOr: return "tokOr"; | ||
133 | case tokSemiColon: return "tokSemiColon"; | ||
134 | case tokTokens: return "tokTokens"; | ||
135 | case tokEquals: return "tokEquals"; | ||
136 | case tokOpenCurly: return "tokOpenCurly"; | ||
137 | case tokCloseCurly: return "tokCloseCurly"; | ||
138 | case tokOpenSquare: return "tokOpenSquare"; | ||
139 | case tokCloseSquare: return "tokCloseSquare"; | ||
140 | case tokEos: return "tokEos"; | ||
141 | } | ||
142 | |||
143 | return "???"; | ||
144 | } | ||
145 | |||
146 | private: | ||
147 | Stream &rSrc; | ||
148 | QueueBuf qbIn; | ||
149 | FString sBuf; | ||
150 | }; | ||
151 | |||
152 | class BnfParser | ||
153 | { | ||
154 | public: | ||
155 | BnfParser( BnfLexer &l ) : | ||
156 | l( l ), | ||
157 | pCur( NULL ), | ||
158 | iLastToken( 0 ) | ||
159 | { | ||
160 | } | ||
161 | |||
162 | virtual ~BnfParser() | ||
163 | { | ||
164 | delete pCur; | ||
165 | pCur = NULL; | ||
166 | } | ||
167 | |||
168 | void parse() | ||
169 | { | ||
170 | for(;;) | ||
171 | { | ||
172 | next(); | ||
173 | switch( pCur->iToken ) | ||
174 | { | ||
175 | case tokTokens: | ||
176 | tokens(); | ||
177 | break; | ||
178 | |||
179 | case tokIdentifier: | ||
180 | nonTerminal(); | ||
181 | break; | ||
182 | |||
183 | case tokEos: | ||
184 | return; | ||
185 | break; | ||
186 | |||
187 | default: | ||
188 | tokenError("tokTokens, tokIdentifier, or tokEos"); | ||
189 | } | ||
190 | } | ||
191 | } | ||
192 | |||
193 | private: | ||
194 | void tokens() | ||
195 | { | ||
196 | next(); | ||
197 | if( pCur->iToken != tokEquals ) | ||
198 | tokenError("tokEquals"); | ||
199 | for(;;) | ||
200 | { | ||
201 | next(); | ||
202 | if( pCur->iToken == tokIdentifier ) | ||
203 | { | ||
204 | hTokens.insert( pCur->vExtra.get<Bu::FString>(), ++iLastToken ); | ||
205 | sio << "Added token[" << iLastToken << "]: " | ||
206 | << pCur->vExtra.get<Bu::FString>() << sio.nl; | ||
207 | } | ||
208 | else if( pCur->iToken == tokSemiColon ) | ||
209 | break; | ||
210 | else | ||
211 | tokenError("tokIdentifier or tokSemiColon"); | ||
212 | } | ||
213 | } | ||
214 | |||
215 | void nonTerminal() | ||
216 | { | ||
217 | Bu::FString sNtName = pCur->vExtra.get<Bu::FString>(); | ||
218 | Parser::NonTerminal nt; | ||
219 | p.addNonTerminal( sNtName ); | ||
220 | sio.incIndent(); | ||
221 | sio << "Created non-terminal: " << sNtName << sio.nl; | ||
222 | |||
223 | next(); | ||
224 | if( pCur->iToken != tokColon ) | ||
225 | tokenError("tokColon"); | ||
226 | production( nt ); | ||
227 | for(;;) | ||
228 | { | ||
229 | switch( pCur->iToken ) | ||
230 | { | ||
231 | case tokOr: | ||
232 | production( nt ); | ||
233 | break; | ||
234 | |||
235 | case tokSemiColon: | ||
236 | p.setNonTerminal( sNtName, nt ); | ||
237 | sio.decIndent(); | ||
238 | sio << "Closing non-terminal." << sio.nl; | ||
239 | return; | ||
240 | |||
241 | default: | ||
242 | tokenError("tkOr or tokSemiColon"); | ||
243 | break; | ||
244 | } | ||
245 | } | ||
246 | } | ||
247 | |||
248 | void production( Parser::NonTerminal &nt ) | ||
249 | { | ||
250 | sio.incIndent(); | ||
251 | sio << "Adding new production:" << sio.nl; | ||
252 | Parser::Production pr; | ||
253 | bool bAnything = false; | ||
254 | for(;;) | ||
255 | { | ||
256 | next(); | ||
257 | switch( pCur->iToken ) | ||
258 | { | ||
259 | case tokIdentifier: | ||
260 | { | ||
261 | const Bu::FString &sName = | ||
262 | pCur->vExtra.get<Bu::FString>(); | ||
263 | if( hTokens.has( sName ) ) | ||
264 | { | ||
265 | pr.append( | ||
266 | Parser::State( | ||
267 | Parser::State::typeTerminal, | ||
268 | hTokens.get( sName ) | ||
269 | ) | ||
270 | ); | ||
271 | sio << "Added terminal " << sName << sio.nl; | ||
272 | } | ||
273 | else | ||
274 | { | ||
275 | if( !p.hasNonTerminal( sName ) ) | ||
276 | { | ||
277 | p.addNonTerminal( sName ); | ||
278 | } | ||
279 | pr.append( | ||
280 | Parser::State( | ||
281 | Parser::State::typeNonTerminal, | ||
282 | p.getNonTerminalId( sName ) | ||
283 | ) | ||
284 | ); | ||
285 | sio << "Added non-terminal " << sName << sio.nl; | ||
286 | } | ||
287 | } | ||
288 | break; | ||
289 | |||
290 | case tokOpenSquare: | ||
291 | { | ||
292 | next(); | ||
293 | if( pCur->iToken != tokIdentifier ) | ||
294 | tokenError("tokIdentifier"); | ||
295 | Bu::FString sName = | ||
296 | pCur->vExtra.get<Bu::FString>(); | ||
297 | next(); | ||
298 | if( pCur->iToken != tokCloseSquare ) | ||
299 | tokenError("tokCloseSquare"); | ||
300 | |||
301 | if( !hTokens.has( sName ) ) | ||
302 | throw ExceptionBase("Only token names may be " | ||
303 | "enclosed in square brackets."); | ||
304 | |||
305 | pr.append( | ||
306 | Parser::State( | ||
307 | Parser::State::typeTerminalPush, | ||
308 | hTokens.get( sName ) | ||
309 | ) | ||
310 | ); | ||
311 | sio << "Added terminal-push " << sName << sio.nl; | ||
312 | } | ||
313 | break; | ||
314 | |||
315 | case tokOpenCurly: | ||
316 | { | ||
317 | next(); | ||
318 | if( pCur->iToken != tokIdentifier ) | ||
319 | tokenError("tokIdentifier"); | ||
320 | Bu::FString sName = | ||
321 | pCur->vExtra.get<Bu::FString>(); | ||
322 | next(); | ||
323 | if( pCur->iToken != tokCloseCurly ) | ||
324 | tokenError("tokCloseCurly"); | ||
325 | |||
326 | if( !p.hasReduction( sName ) ) | ||
327 | p.addReduction( sName ); | ||
328 | |||
329 | pr.append( | ||
330 | Parser::State( | ||
331 | Parser::State::typeReduction, | ||
332 | p.getReductionId( sName ) | ||
333 | ) | ||
334 | ); | ||
335 | sio << "Added reduction " << sName << sio.nl; | ||
336 | } | ||
337 | break; | ||
338 | |||
339 | case tokOr: | ||
340 | case tokSemiColon: | ||
341 | if( bAnything ) | ||
342 | { | ||
343 | nt.addProduction( pr ); | ||
344 | sio.decIndent(); | ||
345 | sio << "Closing production." << sio.nl; | ||
346 | } | ||
347 | else | ||
348 | { | ||
349 | nt.setCanSkip(); | ||
350 | sio.decIndent(); | ||
351 | sio << "Closing empty production." << sio.nl; | ||
352 | } | ||
353 | return; | ||
354 | |||
355 | default: | ||
356 | tokenError("tokIdentifier, tokOpenSquare, tokOr, " | ||
357 | "tokOpenCurly, or tokSemiColon"); | ||
358 | } | ||
359 | } | ||
360 | } | ||
361 | |||
362 | private: | ||
363 | void next() | ||
364 | { | ||
365 | delete pCur; | ||
366 | pCur = l.nextToken(); | ||
367 | } | ||
368 | |||
369 | void tokenError( const FString &s ) | ||
370 | { | ||
371 | throw ExceptionBase( ("Expected " + s + " but found " | ||
372 | + l.tokenToString( *pCur ) + ".").getStr() ); | ||
373 | } | ||
374 | |||
375 | private: | ||
376 | typedef Bu::Hash<Bu::FString, int> TokenHash; | ||
377 | TokenHash hTokens; | ||
378 | BnfLexer &l; | ||
379 | BnfLexer::Token *pCur; | ||
380 | int iLastToken; | ||
381 | Parser p; | ||
382 | }; | ||
383 | |||
384 | int main( int argc, char *argv[] ) | ||
385 | { | ||
386 | File fIn( argv[1], File::Read ); | ||
387 | |||
388 | BnfLexer bl( fIn ); | ||
389 | BnfParser parser( bl ); | ||
390 | |||
391 | parser.parse(); | ||
392 | |||
393 | /* | ||
394 | for(;;) | ||
395 | { | ||
396 | Lexer::Token *pTok = bl.nextToken(); | ||
397 | sio << bl.tokenToString(*pTok); | ||
398 | if( pTok->vExtra.isSet() ) | ||
399 | { | ||
400 | sio << " - " << pTok->vExtra; | ||
401 | } | ||
402 | sio << sio.nl; | ||
403 | if( pTok->iToken == tokEos ) | ||
404 | break; | ||
405 | } | ||
406 | */ | ||
407 | |||
408 | return 0; | ||
409 | } | ||
410 | |||