diff options
author | Mike Buland <eichlan@xagasoft.com> | 2010-10-18 04:38:19 +0000 |
---|---|---|
committer | Mike Buland <eichlan@xagasoft.com> | 2010-10-18 04:38:19 +0000 |
commit | 78463c30031a478936b21168a6fc93ae6eeaaeb9 (patch) | |
tree | 2216344a91f23088dcb4cf93492802c75577fad1 | |
parent | 00ecd458ced768b3b8752cdd421a22244b7adc01 (diff) | |
download | libbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.tar.gz libbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.tar.bz2 libbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.tar.xz libbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.zip |
Several of these new files will go away, but I didn't want to lose them for now.
The parser works! The parser compiler works! It makes parsers!
Now we just have to implement post processing, token lookup tables, and storage.
-rw-r--r-- | bnftest | 18 | ||||
-rw-r--r-- | bnftest.2 | 24 | ||||
-rw-r--r-- | src/lexer.h | 4 | ||||
-rw-r--r-- | src/parser.cpp | 10 | ||||
-rw-r--r-- | src/parser.h | 2 | ||||
-rw-r--r-- | src/tools/bnfcompile.cpp | 410 |
6 files changed, 468 insertions, 0 deletions
@@ -0,0 +1,18 @@ | |||
1 | tokens = tokPlus tokMinus tokMult tokDivide tokOpenParen tokCloseParen | ||
2 | tokEquals tokNumber; | ||
3 | |||
4 | input: input line | ||
5 | | | ||
6 | ; | ||
7 | |||
8 | line: expr tokEquals {print} | ||
9 | ; | ||
10 | |||
11 | expr: expr tokPlus expr {add} | ||
12 | | expr tokMinus expr {subtract} | ||
13 | | expr tokMult expr {multiply} | ||
14 | | expr tokDivide expr {divide} | ||
15 | | tokOpenParen expr tokCloseParen | ||
16 | | [tokNumber] | ||
17 | ; | ||
18 | |||
diff --git a/bnftest.2 b/bnftest.2 new file mode 100644 index 0000000..229943b --- /dev/null +++ b/bnftest.2 | |||
@@ -0,0 +1,24 @@ | |||
1 | tokens = tokPlus tokMinus tokMult tokDivide tokOpenParen tokCloseParen | ||
2 | tokEquals tokNumber; | ||
3 | |||
4 | input: line input# | ||
5 | | | ||
6 | ; | ||
7 | |||
8 | input#: line input# | ||
9 | | | ||
10 | ; | ||
11 | |||
12 | line: expr tokEquals {print} | ||
13 | ; | ||
14 | |||
15 | expr: tokOpenParen expr tokCloseParen expr# | ||
16 | | [tokNumber] expr# | ||
17 | ; | ||
18 | |||
19 | expr#: tokPlus expr {add} expr# | ||
20 | | tokMinus expr {subtract} expr# | ||
21 | | tokMult expr {multiply} expr# | ||
22 | | tokDivide expr {divide} expr# | ||
23 | | | ||
24 | ; | ||
diff --git a/src/lexer.h b/src/lexer.h index 2cb5f51..9840afe 100644 --- a/src/lexer.h +++ b/src/lexer.h | |||
@@ -36,6 +36,10 @@ namespace Bu | |||
36 | } | 36 | } |
37 | TokenType iToken; | 37 | TokenType iToken; |
38 | Bu::Variant vExtra; | 38 | Bu::Variant vExtra; |
39 | int iStartCol; | ||
40 | int iStartRow; | ||
41 | int iEndCol; | ||
42 | int iEndRow; | ||
39 | }; | 43 | }; |
40 | 44 | ||
41 | virtual Token *nextToken()=0; | 45 | virtual Token *nextToken()=0; |
diff --git a/src/parser.cpp b/src/parser.cpp index 3ce73b6..4ad4ff9 100644 --- a/src/parser.cpp +++ b/src/parser.cpp | |||
@@ -205,6 +205,11 @@ int Bu::Parser::getNonTerminalId( const Bu::FString &sName ) | |||
205 | return hNonTerminalName.get( sName ); | 205 | return hNonTerminalName.get( sName ); |
206 | } | 206 | } |
207 | 207 | ||
208 | bool Bu::Parser::hasNonTerminal( const Bu::FString &sName ) | ||
209 | { | ||
210 | return hNonTerminalName.has( sName ); | ||
211 | } | ||
212 | |||
208 | int Bu::Parser::addReduction( const Bu::FString &sName, const Reduction &r ) | 213 | int Bu::Parser::addReduction( const Bu::FString &sName, const Reduction &r ) |
209 | { | 214 | { |
210 | int iId = aReduction.getSize(); | 215 | int iId = aReduction.getSize(); |
@@ -231,6 +236,11 @@ int Bu::Parser::getReductionId( const Bu::FString &sName ) | |||
231 | return hReductionName.get( sName ); | 236 | return hReductionName.get( sName ); |
232 | } | 237 | } |
233 | 238 | ||
239 | bool Bu::Parser::hasReduction( const Bu::FString &sName ) | ||
240 | { | ||
241 | return hReductionName.has( sName ); | ||
242 | } | ||
243 | |||
234 | // | 244 | // |
235 | // Bu::Parser::State | 245 | // Bu::Parser::State |
236 | // | 246 | // |
diff --git a/src/parser.h b/src/parser.h index 1679c7f..a925188 100644 --- a/src/parser.h +++ b/src/parser.h | |||
@@ -91,11 +91,13 @@ namespace Bu | |||
91 | int addNonTerminal( const Bu::FString &sName ); | 91 | int addNonTerminal( const Bu::FString &sName ); |
92 | void setNonTerminal( const Bu::FString &sName, NonTerminal &nt ); | 92 | void setNonTerminal( const Bu::FString &sName, NonTerminal &nt ); |
93 | int getNonTerminalId( const Bu::FString &sName ); | 93 | int getNonTerminalId( const Bu::FString &sName ); |
94 | bool hasNonTerminal( const Bu::FString &sName ); | ||
94 | 95 | ||
95 | int addReduction( const Bu::FString &sName, const Reduction &r ); | 96 | int addReduction( const Bu::FString &sName, const Reduction &r ); |
96 | int addReduction( const Bu::FString &sName ); | 97 | int addReduction( const Bu::FString &sName ); |
97 | void setReduction( const Bu::FString &sName, const Reduction &r ); | 98 | void setReduction( const Bu::FString &sName, const Reduction &r ); |
98 | int getReductionId( const Bu::FString &sName ); | 99 | int getReductionId( const Bu::FString &sName ); |
100 | bool hasReduction( const Bu::FString &sName ); | ||
99 | 101 | ||
100 | private: | 102 | private: |
101 | bool selectProduction( int iNt, Lexer::Token *ptCur ); | 103 | bool selectProduction( int iNt, Lexer::Token *ptCur ); |
diff --git a/src/tools/bnfcompile.cpp b/src/tools/bnfcompile.cpp new file mode 100644 index 0000000..16e75a5 --- /dev/null +++ b/src/tools/bnfcompile.cpp | |||
@@ -0,0 +1,410 @@ | |||
1 | #include <bu/sio.h> | ||
2 | #include <bu/lexer.h> | ||
3 | #include <bu/parser.h> | ||
4 | #include <bu/file.h> | ||
5 | #include <bu/queuebuf.h> | ||
6 | |||
7 | using namespace Bu; | ||
8 | |||
9 | enum TokenType | ||
10 | { | ||
11 | tokIdentifier, | ||
12 | tokColon, | ||
13 | tokOr, | ||
14 | tokSemiColon, | ||
15 | tokTokens, | ||
16 | tokEquals, | ||
17 | tokOpenCurly, | ||
18 | tokCloseCurly, | ||
19 | tokOpenSquare, | ||
20 | tokCloseSquare, | ||
21 | |||
22 | tokEos=-1 | ||
23 | }; | ||
24 | |||
25 | class BnfLexer : public Lexer | ||
26 | { | ||
27 | public: | ||
28 | BnfLexer( Stream &rSrc ) : | ||
29 | rSrc( rSrc ) | ||
30 | { | ||
31 | } | ||
32 | |||
33 | virtual ~BnfLexer() | ||
34 | { | ||
35 | } | ||
36 | |||
37 | virtual Token *nextToken() | ||
38 | { | ||
39 | char cBuf; | ||
40 | |||
41 | for(;;) | ||
42 | { | ||
43 | if( qbIn.getSize() == 0 ) | ||
44 | { | ||
45 | char buf[4096]; | ||
46 | qbIn.write( buf, rSrc.read( buf, 4096 ) ); | ||
47 | |||
48 | if( rSrc.isEos() && qbIn.getSize() == 0 ) | ||
49 | return new Token( tokEos ); | ||
50 | } | ||
51 | qbIn.peek( &cBuf, 1 ); | ||
52 | if( (cBuf >= 'a' && cBuf <= 'z') || | ||
53 | (cBuf >= 'A' && cBuf <= 'Z') || | ||
54 | (cBuf >= '0' && cBuf <= '9') || | ||
55 | cBuf == '_' ) | ||
56 | { | ||
57 | sBuf.append( cBuf ); | ||
58 | qbIn.seek( 1 ); | ||
59 | } | ||
60 | else if( sBuf.isSet() ) | ||
61 | { | ||
62 | if( sBuf == "tokens" ) | ||
63 | { | ||
64 | sBuf.clear(); | ||
65 | return new Token( tokTokens ); | ||
66 | } | ||
67 | else | ||
68 | { | ||
69 | Token *pRet = new Token( tokIdentifier, sBuf ); | ||
70 | sBuf.clear(); | ||
71 | return pRet; | ||
72 | } | ||
73 | } | ||
74 | else | ||
75 | { | ||
76 | switch( cBuf ) | ||
77 | { | ||
78 | case ' ': | ||
79 | case '\t': | ||
80 | case '\n': | ||
81 | case '\r': | ||
82 | qbIn.seek( 1 ); | ||
83 | continue; | ||
84 | |||
85 | case ':': | ||
86 | qbIn.seek( 1 ); | ||
87 | return new Token( tokColon ); | ||
88 | |||
89 | case ';': | ||
90 | qbIn.seek( 1 ); | ||
91 | return new Token( tokSemiColon ); | ||
92 | |||
93 | case '|': | ||
94 | qbIn.seek( 1 ); | ||
95 | return new Token( tokOr ); | ||
96 | |||
97 | case '=': | ||
98 | qbIn.seek( 1 ); | ||
99 | return new Token( tokEquals ); | ||
100 | |||
101 | case '[': | ||
102 | qbIn.seek( 1 ); | ||
103 | return new Token( tokOpenSquare ); | ||
104 | |||
105 | case ']': | ||
106 | qbIn.seek( 1 ); | ||
107 | return new Token( tokCloseSquare ); | ||
108 | |||
109 | case '{': | ||
110 | qbIn.seek( 1 ); | ||
111 | return new Token( tokOpenCurly ); | ||
112 | |||
113 | case '}': | ||
114 | qbIn.seek( 1 ); | ||
115 | return new Token( tokCloseCurly ); | ||
116 | |||
117 | default: | ||
118 | throw ExceptionBase("Unexpected character '%c'.", | ||
119 | cBuf ); | ||
120 | break; | ||
121 | } | ||
122 | } | ||
123 | } | ||
124 | } | ||
125 | |||
126 | virtual FString tokenToString( const Token &t ) | ||
127 | { | ||
128 | switch( (TokenType)t.iToken ) | ||
129 | { | ||
130 | case tokIdentifier: return "tokIdentifier"; | ||
131 | case tokColon: return "tokColon"; | ||
132 | case tokOr: return "tokOr"; | ||
133 | case tokSemiColon: return "tokSemiColon"; | ||
134 | case tokTokens: return "tokTokens"; | ||
135 | case tokEquals: return "tokEquals"; | ||
136 | case tokOpenCurly: return "tokOpenCurly"; | ||
137 | case tokCloseCurly: return "tokCloseCurly"; | ||
138 | case tokOpenSquare: return "tokOpenSquare"; | ||
139 | case tokCloseSquare: return "tokCloseSquare"; | ||
140 | case tokEos: return "tokEos"; | ||
141 | } | ||
142 | |||
143 | return "???"; | ||
144 | } | ||
145 | |||
146 | private: | ||
147 | Stream &rSrc; | ||
148 | QueueBuf qbIn; | ||
149 | FString sBuf; | ||
150 | }; | ||
151 | |||
152 | class BnfParser | ||
153 | { | ||
154 | public: | ||
155 | BnfParser( BnfLexer &l ) : | ||
156 | l( l ), | ||
157 | pCur( NULL ), | ||
158 | iLastToken( 0 ) | ||
159 | { | ||
160 | } | ||
161 | |||
162 | virtual ~BnfParser() | ||
163 | { | ||
164 | delete pCur; | ||
165 | pCur = NULL; | ||
166 | } | ||
167 | |||
168 | void parse() | ||
169 | { | ||
170 | for(;;) | ||
171 | { | ||
172 | next(); | ||
173 | switch( pCur->iToken ) | ||
174 | { | ||
175 | case tokTokens: | ||
176 | tokens(); | ||
177 | break; | ||
178 | |||
179 | case tokIdentifier: | ||
180 | nonTerminal(); | ||
181 | break; | ||
182 | |||
183 | case tokEos: | ||
184 | return; | ||
185 | break; | ||
186 | |||
187 | default: | ||
188 | tokenError("tokTokens, tokIdentifier, or tokEos"); | ||
189 | } | ||
190 | } | ||
191 | } | ||
192 | |||
193 | private: | ||
194 | void tokens() | ||
195 | { | ||
196 | next(); | ||
197 | if( pCur->iToken != tokEquals ) | ||
198 | tokenError("tokEquals"); | ||
199 | for(;;) | ||
200 | { | ||
201 | next(); | ||
202 | if( pCur->iToken == tokIdentifier ) | ||
203 | { | ||
204 | hTokens.insert( pCur->vExtra.get<Bu::FString>(), ++iLastToken ); | ||
205 | sio << "Added token[" << iLastToken << "]: " | ||
206 | << pCur->vExtra.get<Bu::FString>() << sio.nl; | ||
207 | } | ||
208 | else if( pCur->iToken == tokSemiColon ) | ||
209 | break; | ||
210 | else | ||
211 | tokenError("tokIdentifier or tokSemiColon"); | ||
212 | } | ||
213 | } | ||
214 | |||
215 | void nonTerminal() | ||
216 | { | ||
217 | Bu::FString sNtName = pCur->vExtra.get<Bu::FString>(); | ||
218 | Parser::NonTerminal nt; | ||
219 | p.addNonTerminal( sNtName ); | ||
220 | sio.incIndent(); | ||
221 | sio << "Created non-terminal: " << sNtName << sio.nl; | ||
222 | |||
223 | next(); | ||
224 | if( pCur->iToken != tokColon ) | ||
225 | tokenError("tokColon"); | ||
226 | production( nt ); | ||
227 | for(;;) | ||
228 | { | ||
229 | switch( pCur->iToken ) | ||
230 | { | ||
231 | case tokOr: | ||
232 | production( nt ); | ||
233 | break; | ||
234 | |||
235 | case tokSemiColon: | ||
236 | p.setNonTerminal( sNtName, nt ); | ||
237 | sio.decIndent(); | ||
238 | sio << "Closing non-terminal." << sio.nl; | ||
239 | return; | ||
240 | |||
241 | default: | ||
242 | tokenError("tkOr or tokSemiColon"); | ||
243 | break; | ||
244 | } | ||
245 | } | ||
246 | } | ||
247 | |||
248 | void production( Parser::NonTerminal &nt ) | ||
249 | { | ||
250 | sio.incIndent(); | ||
251 | sio << "Adding new production:" << sio.nl; | ||
252 | Parser::Production pr; | ||
253 | bool bAnything = false; | ||
254 | for(;;) | ||
255 | { | ||
256 | next(); | ||
257 | switch( pCur->iToken ) | ||
258 | { | ||
259 | case tokIdentifier: | ||
260 | { | ||
261 | const Bu::FString &sName = | ||
262 | pCur->vExtra.get<Bu::FString>(); | ||
263 | if( hTokens.has( sName ) ) | ||
264 | { | ||
265 | pr.append( | ||
266 | Parser::State( | ||
267 | Parser::State::typeTerminal, | ||
268 | hTokens.get( sName ) | ||
269 | ) | ||
270 | ); | ||
271 | sio << "Added terminal " << sName << sio.nl; | ||
272 | } | ||
273 | else | ||
274 | { | ||
275 | if( !p.hasNonTerminal( sName ) ) | ||
276 | { | ||
277 | p.addNonTerminal( sName ); | ||
278 | } | ||
279 | pr.append( | ||
280 | Parser::State( | ||
281 | Parser::State::typeNonTerminal, | ||
282 | p.getNonTerminalId( sName ) | ||
283 | ) | ||
284 | ); | ||
285 | sio << "Added non-terminal " << sName << sio.nl; | ||
286 | } | ||
287 | } | ||
288 | break; | ||
289 | |||
290 | case tokOpenSquare: | ||
291 | { | ||
292 | next(); | ||
293 | if( pCur->iToken != tokIdentifier ) | ||
294 | tokenError("tokIdentifier"); | ||
295 | Bu::FString sName = | ||
296 | pCur->vExtra.get<Bu::FString>(); | ||
297 | next(); | ||
298 | if( pCur->iToken != tokCloseSquare ) | ||
299 | tokenError("tokCloseSquare"); | ||
300 | |||
301 | if( !hTokens.has( sName ) ) | ||
302 | throw ExceptionBase("Only token names may be " | ||
303 | "enclosed in square brackets."); | ||
304 | |||
305 | pr.append( | ||
306 | Parser::State( | ||
307 | Parser::State::typeTerminalPush, | ||
308 | hTokens.get( sName ) | ||
309 | ) | ||
310 | ); | ||
311 | sio << "Added terminal-push " << sName << sio.nl; | ||
312 | } | ||
313 | break; | ||
314 | |||
315 | case tokOpenCurly: | ||
316 | { | ||
317 | next(); | ||
318 | if( pCur->iToken != tokIdentifier ) | ||
319 | tokenError("tokIdentifier"); | ||
320 | Bu::FString sName = | ||
321 | pCur->vExtra.get<Bu::FString>(); | ||
322 | next(); | ||
323 | if( pCur->iToken != tokCloseCurly ) | ||
324 | tokenError("tokCloseCurly"); | ||
325 | |||
326 | if( !p.hasReduction( sName ) ) | ||
327 | p.addReduction( sName ); | ||
328 | |||
329 | pr.append( | ||
330 | Parser::State( | ||
331 | Parser::State::typeReduction, | ||
332 | p.getReductionId( sName ) | ||
333 | ) | ||
334 | ); | ||
335 | sio << "Added reduction " << sName << sio.nl; | ||
336 | } | ||
337 | break; | ||
338 | |||
339 | case tokOr: | ||
340 | case tokSemiColon: | ||
341 | if( bAnything ) | ||
342 | { | ||
343 | nt.addProduction( pr ); | ||
344 | sio.decIndent(); | ||
345 | sio << "Closing production." << sio.nl; | ||
346 | } | ||
347 | else | ||
348 | { | ||
349 | nt.setCanSkip(); | ||
350 | sio.decIndent(); | ||
351 | sio << "Closing empty production." << sio.nl; | ||
352 | } | ||
353 | return; | ||
354 | |||
355 | default: | ||
356 | tokenError("tokIdentifier, tokOpenSquare, tokOr, " | ||
357 | "tokOpenCurly, or tokSemiColon"); | ||
358 | } | ||
359 | } | ||
360 | } | ||
361 | |||
362 | private: | ||
363 | void next() | ||
364 | { | ||
365 | delete pCur; | ||
366 | pCur = l.nextToken(); | ||
367 | } | ||
368 | |||
369 | void tokenError( const FString &s ) | ||
370 | { | ||
371 | throw ExceptionBase( ("Expected " + s + " but found " | ||
372 | + l.tokenToString( *pCur ) + ".").getStr() ); | ||
373 | } | ||
374 | |||
375 | private: | ||
376 | typedef Bu::Hash<Bu::FString, int> TokenHash; | ||
377 | TokenHash hTokens; | ||
378 | BnfLexer &l; | ||
379 | BnfLexer::Token *pCur; | ||
380 | int iLastToken; | ||
381 | Parser p; | ||
382 | }; | ||
383 | |||
384 | int main( int argc, char *argv[] ) | ||
385 | { | ||
386 | File fIn( argv[1], File::Read ); | ||
387 | |||
388 | BnfLexer bl( fIn ); | ||
389 | BnfParser parser( bl ); | ||
390 | |||
391 | parser.parse(); | ||
392 | |||
393 | /* | ||
394 | for(;;) | ||
395 | { | ||
396 | Lexer::Token *pTok = bl.nextToken(); | ||
397 | sio << bl.tokenToString(*pTok); | ||
398 | if( pTok->vExtra.isSet() ) | ||
399 | { | ||
400 | sio << " - " << pTok->vExtra; | ||
401 | } | ||
402 | sio << sio.nl; | ||
403 | if( pTok->iToken == tokEos ) | ||
404 | break; | ||
405 | } | ||
406 | */ | ||
407 | |||
408 | return 0; | ||
409 | } | ||
410 | |||