diff options
author | Mike Buland <eichlan@xagasoft.com> | 2010-10-18 04:38:19 +0000 |
---|---|---|
committer | Mike Buland <eichlan@xagasoft.com> | 2010-10-18 04:38:19 +0000 |
commit | 78463c30031a478936b21168a6fc93ae6eeaaeb9 (patch) | |
tree | 2216344a91f23088dcb4cf93492802c75577fad1 /src | |
parent | 00ecd458ced768b3b8752cdd421a22244b7adc01 (diff) | |
download | libbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.tar.gz libbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.tar.bz2 libbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.tar.xz libbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.zip |
Several of these new files will go away, but I didn't want to lose them for now.
The parser works! The parser compiler works! It makes parsers!
Now we just have to implement post processing, token lookup tables, and storage.
Diffstat (limited to 'src')
-rw-r--r-- | src/lexer.h | 4 | ||||
-rw-r--r-- | src/parser.cpp | 10 | ||||
-rw-r--r-- | src/parser.h | 2 | ||||
-rw-r--r-- | src/tools/bnfcompile.cpp | 410 |
4 files changed, 426 insertions, 0 deletions
diff --git a/src/lexer.h b/src/lexer.h index 2cb5f51..9840afe 100644 --- a/src/lexer.h +++ b/src/lexer.h | |||
@@ -36,6 +36,10 @@ namespace Bu | |||
36 | } | 36 | } |
37 | TokenType iToken; | 37 | TokenType iToken; |
38 | Bu::Variant vExtra; | 38 | Bu::Variant vExtra; |
39 | int iStartCol; | ||
40 | int iStartRow; | ||
41 | int iEndCol; | ||
42 | int iEndRow; | ||
39 | }; | 43 | }; |
40 | 44 | ||
41 | virtual Token *nextToken()=0; | 45 | virtual Token *nextToken()=0; |
diff --git a/src/parser.cpp b/src/parser.cpp index 3ce73b6..4ad4ff9 100644 --- a/src/parser.cpp +++ b/src/parser.cpp | |||
@@ -205,6 +205,11 @@ int Bu::Parser::getNonTerminalId( const Bu::FString &sName ) | |||
205 | return hNonTerminalName.get( sName ); | 205 | return hNonTerminalName.get( sName ); |
206 | } | 206 | } |
207 | 207 | ||
208 | bool Bu::Parser::hasNonTerminal( const Bu::FString &sName ) | ||
209 | { | ||
210 | return hNonTerminalName.has( sName ); | ||
211 | } | ||
212 | |||
208 | int Bu::Parser::addReduction( const Bu::FString &sName, const Reduction &r ) | 213 | int Bu::Parser::addReduction( const Bu::FString &sName, const Reduction &r ) |
209 | { | 214 | { |
210 | int iId = aReduction.getSize(); | 215 | int iId = aReduction.getSize(); |
@@ -231,6 +236,11 @@ int Bu::Parser::getReductionId( const Bu::FString &sName ) | |||
231 | return hReductionName.get( sName ); | 236 | return hReductionName.get( sName ); |
232 | } | 237 | } |
233 | 238 | ||
239 | bool Bu::Parser::hasReduction( const Bu::FString &sName ) | ||
240 | { | ||
241 | return hReductionName.has( sName ); | ||
242 | } | ||
243 | |||
234 | // | 244 | // |
235 | // Bu::Parser::State | 245 | // Bu::Parser::State |
236 | // | 246 | // |
diff --git a/src/parser.h b/src/parser.h index 1679c7f..a925188 100644 --- a/src/parser.h +++ b/src/parser.h | |||
@@ -91,11 +91,13 @@ namespace Bu | |||
91 | int addNonTerminal( const Bu::FString &sName ); | 91 | int addNonTerminal( const Bu::FString &sName ); |
92 | void setNonTerminal( const Bu::FString &sName, NonTerminal &nt ); | 92 | void setNonTerminal( const Bu::FString &sName, NonTerminal &nt ); |
93 | int getNonTerminalId( const Bu::FString &sName ); | 93 | int getNonTerminalId( const Bu::FString &sName ); |
94 | bool hasNonTerminal( const Bu::FString &sName ); | ||
94 | 95 | ||
95 | int addReduction( const Bu::FString &sName, const Reduction &r ); | 96 | int addReduction( const Bu::FString &sName, const Reduction &r ); |
96 | int addReduction( const Bu::FString &sName ); | 97 | int addReduction( const Bu::FString &sName ); |
97 | void setReduction( const Bu::FString &sName, const Reduction &r ); | 98 | void setReduction( const Bu::FString &sName, const Reduction &r ); |
98 | int getReductionId( const Bu::FString &sName ); | 99 | int getReductionId( const Bu::FString &sName ); |
100 | bool hasReduction( const Bu::FString &sName ); | ||
99 | 101 | ||
100 | private: | 102 | private: |
101 | bool selectProduction( int iNt, Lexer::Token *ptCur ); | 103 | bool selectProduction( int iNt, Lexer::Token *ptCur ); |
diff --git a/src/tools/bnfcompile.cpp b/src/tools/bnfcompile.cpp new file mode 100644 index 0000000..16e75a5 --- /dev/null +++ b/src/tools/bnfcompile.cpp | |||
@@ -0,0 +1,410 @@ | |||
1 | #include <bu/sio.h> | ||
2 | #include <bu/lexer.h> | ||
3 | #include <bu/parser.h> | ||
4 | #include <bu/file.h> | ||
5 | #include <bu/queuebuf.h> | ||
6 | |||
7 | using namespace Bu; | ||
8 | |||
9 | enum TokenType | ||
10 | { | ||
11 | tokIdentifier, | ||
12 | tokColon, | ||
13 | tokOr, | ||
14 | tokSemiColon, | ||
15 | tokTokens, | ||
16 | tokEquals, | ||
17 | tokOpenCurly, | ||
18 | tokCloseCurly, | ||
19 | tokOpenSquare, | ||
20 | tokCloseSquare, | ||
21 | |||
22 | tokEos=-1 | ||
23 | }; | ||
24 | |||
25 | class BnfLexer : public Lexer | ||
26 | { | ||
27 | public: | ||
28 | BnfLexer( Stream &rSrc ) : | ||
29 | rSrc( rSrc ) | ||
30 | { | ||
31 | } | ||
32 | |||
33 | virtual ~BnfLexer() | ||
34 | { | ||
35 | } | ||
36 | |||
37 | virtual Token *nextToken() | ||
38 | { | ||
39 | char cBuf; | ||
40 | |||
41 | for(;;) | ||
42 | { | ||
43 | if( qbIn.getSize() == 0 ) | ||
44 | { | ||
45 | char buf[4096]; | ||
46 | qbIn.write( buf, rSrc.read( buf, 4096 ) ); | ||
47 | |||
48 | if( rSrc.isEos() && qbIn.getSize() == 0 ) | ||
49 | return new Token( tokEos ); | ||
50 | } | ||
51 | qbIn.peek( &cBuf, 1 ); | ||
52 | if( (cBuf >= 'a' && cBuf <= 'z') || | ||
53 | (cBuf >= 'A' && cBuf <= 'Z') || | ||
54 | (cBuf >= '0' && cBuf <= '9') || | ||
55 | cBuf == '_' ) | ||
56 | { | ||
57 | sBuf.append( cBuf ); | ||
58 | qbIn.seek( 1 ); | ||
59 | } | ||
60 | else if( sBuf.isSet() ) | ||
61 | { | ||
62 | if( sBuf == "tokens" ) | ||
63 | { | ||
64 | sBuf.clear(); | ||
65 | return new Token( tokTokens ); | ||
66 | } | ||
67 | else | ||
68 | { | ||
69 | Token *pRet = new Token( tokIdentifier, sBuf ); | ||
70 | sBuf.clear(); | ||
71 | return pRet; | ||
72 | } | ||
73 | } | ||
74 | else | ||
75 | { | ||
76 | switch( cBuf ) | ||
77 | { | ||
78 | case ' ': | ||
79 | case '\t': | ||
80 | case '\n': | ||
81 | case '\r': | ||
82 | qbIn.seek( 1 ); | ||
83 | continue; | ||
84 | |||
85 | case ':': | ||
86 | qbIn.seek( 1 ); | ||
87 | return new Token( tokColon ); | ||
88 | |||
89 | case ';': | ||
90 | qbIn.seek( 1 ); | ||
91 | return new Token( tokSemiColon ); | ||
92 | |||
93 | case '|': | ||
94 | qbIn.seek( 1 ); | ||
95 | return new Token( tokOr ); | ||
96 | |||
97 | case '=': | ||
98 | qbIn.seek( 1 ); | ||
99 | return new Token( tokEquals ); | ||
100 | |||
101 | case '[': | ||
102 | qbIn.seek( 1 ); | ||
103 | return new Token( tokOpenSquare ); | ||
104 | |||
105 | case ']': | ||
106 | qbIn.seek( 1 ); | ||
107 | return new Token( tokCloseSquare ); | ||
108 | |||
109 | case '{': | ||
110 | qbIn.seek( 1 ); | ||
111 | return new Token( tokOpenCurly ); | ||
112 | |||
113 | case '}': | ||
114 | qbIn.seek( 1 ); | ||
115 | return new Token( tokCloseCurly ); | ||
116 | |||
117 | default: | ||
118 | throw ExceptionBase("Unexpected character '%c'.", | ||
119 | cBuf ); | ||
120 | break; | ||
121 | } | ||
122 | } | ||
123 | } | ||
124 | } | ||
125 | |||
126 | virtual FString tokenToString( const Token &t ) | ||
127 | { | ||
128 | switch( (TokenType)t.iToken ) | ||
129 | { | ||
130 | case tokIdentifier: return "tokIdentifier"; | ||
131 | case tokColon: return "tokColon"; | ||
132 | case tokOr: return "tokOr"; | ||
133 | case tokSemiColon: return "tokSemiColon"; | ||
134 | case tokTokens: return "tokTokens"; | ||
135 | case tokEquals: return "tokEquals"; | ||
136 | case tokOpenCurly: return "tokOpenCurly"; | ||
137 | case tokCloseCurly: return "tokCloseCurly"; | ||
138 | case tokOpenSquare: return "tokOpenSquare"; | ||
139 | case tokCloseSquare: return "tokCloseSquare"; | ||
140 | case tokEos: return "tokEos"; | ||
141 | } | ||
142 | |||
143 | return "???"; | ||
144 | } | ||
145 | |||
146 | private: | ||
147 | Stream &rSrc; | ||
148 | QueueBuf qbIn; | ||
149 | FString sBuf; | ||
150 | }; | ||
151 | |||
152 | class BnfParser | ||
153 | { | ||
154 | public: | ||
155 | BnfParser( BnfLexer &l ) : | ||
156 | l( l ), | ||
157 | pCur( NULL ), | ||
158 | iLastToken( 0 ) | ||
159 | { | ||
160 | } | ||
161 | |||
162 | virtual ~BnfParser() | ||
163 | { | ||
164 | delete pCur; | ||
165 | pCur = NULL; | ||
166 | } | ||
167 | |||
168 | void parse() | ||
169 | { | ||
170 | for(;;) | ||
171 | { | ||
172 | next(); | ||
173 | switch( pCur->iToken ) | ||
174 | { | ||
175 | case tokTokens: | ||
176 | tokens(); | ||
177 | break; | ||
178 | |||
179 | case tokIdentifier: | ||
180 | nonTerminal(); | ||
181 | break; | ||
182 | |||
183 | case tokEos: | ||
184 | return; | ||
185 | break; | ||
186 | |||
187 | default: | ||
188 | tokenError("tokTokens, tokIdentifier, or tokEos"); | ||
189 | } | ||
190 | } | ||
191 | } | ||
192 | |||
193 | private: | ||
194 | void tokens() | ||
195 | { | ||
196 | next(); | ||
197 | if( pCur->iToken != tokEquals ) | ||
198 | tokenError("tokEquals"); | ||
199 | for(;;) | ||
200 | { | ||
201 | next(); | ||
202 | if( pCur->iToken == tokIdentifier ) | ||
203 | { | ||
204 | hTokens.insert( pCur->vExtra.get<Bu::FString>(), ++iLastToken ); | ||
205 | sio << "Added token[" << iLastToken << "]: " | ||
206 | << pCur->vExtra.get<Bu::FString>() << sio.nl; | ||
207 | } | ||
208 | else if( pCur->iToken == tokSemiColon ) | ||
209 | break; | ||
210 | else | ||
211 | tokenError("tokIdentifier or tokSemiColon"); | ||
212 | } | ||
213 | } | ||
214 | |||
215 | void nonTerminal() | ||
216 | { | ||
217 | Bu::FString sNtName = pCur->vExtra.get<Bu::FString>(); | ||
218 | Parser::NonTerminal nt; | ||
219 | p.addNonTerminal( sNtName ); | ||
220 | sio.incIndent(); | ||
221 | sio << "Created non-terminal: " << sNtName << sio.nl; | ||
222 | |||
223 | next(); | ||
224 | if( pCur->iToken != tokColon ) | ||
225 | tokenError("tokColon"); | ||
226 | production( nt ); | ||
227 | for(;;) | ||
228 | { | ||
229 | switch( pCur->iToken ) | ||
230 | { | ||
231 | case tokOr: | ||
232 | production( nt ); | ||
233 | break; | ||
234 | |||
235 | case tokSemiColon: | ||
236 | p.setNonTerminal( sNtName, nt ); | ||
237 | sio.decIndent(); | ||
238 | sio << "Closing non-terminal." << sio.nl; | ||
239 | return; | ||
240 | |||
241 | default: | ||
242 | tokenError("tkOr or tokSemiColon"); | ||
243 | break; | ||
244 | } | ||
245 | } | ||
246 | } | ||
247 | |||
248 | void production( Parser::NonTerminal &nt ) | ||
249 | { | ||
250 | sio.incIndent(); | ||
251 | sio << "Adding new production:" << sio.nl; | ||
252 | Parser::Production pr; | ||
253 | bool bAnything = false; | ||
254 | for(;;) | ||
255 | { | ||
256 | next(); | ||
257 | switch( pCur->iToken ) | ||
258 | { | ||
259 | case tokIdentifier: | ||
260 | { | ||
261 | const Bu::FString &sName = | ||
262 | pCur->vExtra.get<Bu::FString>(); | ||
263 | if( hTokens.has( sName ) ) | ||
264 | { | ||
265 | pr.append( | ||
266 | Parser::State( | ||
267 | Parser::State::typeTerminal, | ||
268 | hTokens.get( sName ) | ||
269 | ) | ||
270 | ); | ||
271 | sio << "Added terminal " << sName << sio.nl; | ||
272 | } | ||
273 | else | ||
274 | { | ||
275 | if( !p.hasNonTerminal( sName ) ) | ||
276 | { | ||
277 | p.addNonTerminal( sName ); | ||
278 | } | ||
279 | pr.append( | ||
280 | Parser::State( | ||
281 | Parser::State::typeNonTerminal, | ||
282 | p.getNonTerminalId( sName ) | ||
283 | ) | ||
284 | ); | ||
285 | sio << "Added non-terminal " << sName << sio.nl; | ||
286 | } | ||
287 | } | ||
288 | break; | ||
289 | |||
290 | case tokOpenSquare: | ||
291 | { | ||
292 | next(); | ||
293 | if( pCur->iToken != tokIdentifier ) | ||
294 | tokenError("tokIdentifier"); | ||
295 | Bu::FString sName = | ||
296 | pCur->vExtra.get<Bu::FString>(); | ||
297 | next(); | ||
298 | if( pCur->iToken != tokCloseSquare ) | ||
299 | tokenError("tokCloseSquare"); | ||
300 | |||
301 | if( !hTokens.has( sName ) ) | ||
302 | throw ExceptionBase("Only token names may be " | ||
303 | "enclosed in square brackets."); | ||
304 | |||
305 | pr.append( | ||
306 | Parser::State( | ||
307 | Parser::State::typeTerminalPush, | ||
308 | hTokens.get( sName ) | ||
309 | ) | ||
310 | ); | ||
311 | sio << "Added terminal-push " << sName << sio.nl; | ||
312 | } | ||
313 | break; | ||
314 | |||
315 | case tokOpenCurly: | ||
316 | { | ||
317 | next(); | ||
318 | if( pCur->iToken != tokIdentifier ) | ||
319 | tokenError("tokIdentifier"); | ||
320 | Bu::FString sName = | ||
321 | pCur->vExtra.get<Bu::FString>(); | ||
322 | next(); | ||
323 | if( pCur->iToken != tokCloseCurly ) | ||
324 | tokenError("tokCloseCurly"); | ||
325 | |||
326 | if( !p.hasReduction( sName ) ) | ||
327 | p.addReduction( sName ); | ||
328 | |||
329 | pr.append( | ||
330 | Parser::State( | ||
331 | Parser::State::typeReduction, | ||
332 | p.getReductionId( sName ) | ||
333 | ) | ||
334 | ); | ||
335 | sio << "Added reduction " << sName << sio.nl; | ||
336 | } | ||
337 | break; | ||
338 | |||
339 | case tokOr: | ||
340 | case tokSemiColon: | ||
341 | if( bAnything ) | ||
342 | { | ||
343 | nt.addProduction( pr ); | ||
344 | sio.decIndent(); | ||
345 | sio << "Closing production." << sio.nl; | ||
346 | } | ||
347 | else | ||
348 | { | ||
349 | nt.setCanSkip(); | ||
350 | sio.decIndent(); | ||
351 | sio << "Closing empty production." << sio.nl; | ||
352 | } | ||
353 | return; | ||
354 | |||
355 | default: | ||
356 | tokenError("tokIdentifier, tokOpenSquare, tokOr, " | ||
357 | "tokOpenCurly, or tokSemiColon"); | ||
358 | } | ||
359 | } | ||
360 | } | ||
361 | |||
362 | private: | ||
363 | void next() | ||
364 | { | ||
365 | delete pCur; | ||
366 | pCur = l.nextToken(); | ||
367 | } | ||
368 | |||
369 | void tokenError( const FString &s ) | ||
370 | { | ||
371 | throw ExceptionBase( ("Expected " + s + " but found " | ||
372 | + l.tokenToString( *pCur ) + ".").getStr() ); | ||
373 | } | ||
374 | |||
375 | private: | ||
376 | typedef Bu::Hash<Bu::FString, int> TokenHash; | ||
377 | TokenHash hTokens; | ||
378 | BnfLexer &l; | ||
379 | BnfLexer::Token *pCur; | ||
380 | int iLastToken; | ||
381 | Parser p; | ||
382 | }; | ||
383 | |||
384 | int main( int argc, char *argv[] ) | ||
385 | { | ||
386 | File fIn( argv[1], File::Read ); | ||
387 | |||
388 | BnfLexer bl( fIn ); | ||
389 | BnfParser parser( bl ); | ||
390 | |||
391 | parser.parse(); | ||
392 | |||
393 | /* | ||
394 | for(;;) | ||
395 | { | ||
396 | Lexer::Token *pTok = bl.nextToken(); | ||
397 | sio << bl.tokenToString(*pTok); | ||
398 | if( pTok->vExtra.isSet() ) | ||
399 | { | ||
400 | sio << " - " << pTok->vExtra; | ||
401 | } | ||
402 | sio << sio.nl; | ||
403 | if( pTok->iToken == tokEos ) | ||
404 | break; | ||
405 | } | ||
406 | */ | ||
407 | |||
408 | return 0; | ||
409 | } | ||
410 | |||