summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2010-10-18 04:38:19 +0000
committerMike Buland <eichlan@xagasoft.com>2010-10-18 04:38:19 +0000
commit78463c30031a478936b21168a6fc93ae6eeaaeb9 (patch)
tree2216344a91f23088dcb4cf93492802c75577fad1
parent00ecd458ced768b3b8752cdd421a22244b7adc01 (diff)
downloadlibbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.tar.gz
libbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.tar.bz2
libbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.tar.xz
libbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.zip
Several of these new files will go away, but I didn't want to lose them for now.
The parser works! The parser compiler works! It makes parsers! Now we just have to implement post processing, token lookup tables, and storage.
-rw-r--r--bnftest18
-rw-r--r--bnftest.224
-rw-r--r--src/lexer.h4
-rw-r--r--src/parser.cpp10
-rw-r--r--src/parser.h2
-rw-r--r--src/tools/bnfcompile.cpp410
6 files changed, 468 insertions, 0 deletions
diff --git a/bnftest b/bnftest
new file mode 100644
index 0000000..7e61b1a
--- /dev/null
+++ b/bnftest
@@ -0,0 +1,18 @@
1tokens = tokPlus tokMinus tokMult tokDivide tokOpenParen tokCloseParen
2 tokEquals tokNumber;
3
4input: input line
5 |
6 ;
7
8line: expr tokEquals {print}
9 ;
10
11expr: expr tokPlus expr {add}
12 | expr tokMinus expr {subtract}
13 | expr tokMult expr {multiply}
14 | expr tokDivide expr {divide}
15 | tokOpenParen expr tokCloseParen
16 | [tokNumber]
17 ;
18
diff --git a/bnftest.2 b/bnftest.2
new file mode 100644
index 0000000..229943b
--- /dev/null
+++ b/bnftest.2
@@ -0,0 +1,24 @@
1tokens = tokPlus tokMinus tokMult tokDivide tokOpenParen tokCloseParen
2 tokEquals tokNumber;
3
4input: line input#
5 |
6 ;
7
8input#: line input#
9 |
10 ;
11
12line: expr tokEquals {print}
13 ;
14
15expr: tokOpenParen expr tokCloseParen expr#
16 | [tokNumber] expr#
17 ;
18
19expr#: tokPlus expr {add} expr#
20 | tokMinus expr {subtract} expr#
21 | tokMult expr {multiply} expr#
22 | tokDivide expr {divide} expr#
23 |
24 ;
diff --git a/src/lexer.h b/src/lexer.h
index 2cb5f51..9840afe 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -36,6 +36,10 @@ namespace Bu
36 } 36 }
37 TokenType iToken; 37 TokenType iToken;
38 Bu::Variant vExtra; 38 Bu::Variant vExtra;
39 int iStartCol;
40 int iStartRow;
41 int iEndCol;
42 int iEndRow;
39 }; 43 };
40 44
41 virtual Token *nextToken()=0; 45 virtual Token *nextToken()=0;
diff --git a/src/parser.cpp b/src/parser.cpp
index 3ce73b6..4ad4ff9 100644
--- a/src/parser.cpp
+++ b/src/parser.cpp
@@ -205,6 +205,11 @@ int Bu::Parser::getNonTerminalId( const Bu::FString &sName )
205 return hNonTerminalName.get( sName ); 205 return hNonTerminalName.get( sName );
206} 206}
207 207
208bool Bu::Parser::hasNonTerminal( const Bu::FString &sName )
209{
210 return hNonTerminalName.has( sName );
211}
212
208int Bu::Parser::addReduction( const Bu::FString &sName, const Reduction &r ) 213int Bu::Parser::addReduction( const Bu::FString &sName, const Reduction &r )
209{ 214{
210 int iId = aReduction.getSize(); 215 int iId = aReduction.getSize();
@@ -231,6 +236,11 @@ int Bu::Parser::getReductionId( const Bu::FString &sName )
231 return hReductionName.get( sName ); 236 return hReductionName.get( sName );
232} 237}
233 238
239bool Bu::Parser::hasReduction( const Bu::FString &sName )
240{
241 return hReductionName.has( sName );
242}
243
234// 244//
235// Bu::Parser::State 245// Bu::Parser::State
236// 246//
diff --git a/src/parser.h b/src/parser.h
index 1679c7f..a925188 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -91,11 +91,13 @@ namespace Bu
91 int addNonTerminal( const Bu::FString &sName ); 91 int addNonTerminal( const Bu::FString &sName );
92 void setNonTerminal( const Bu::FString &sName, NonTerminal &nt ); 92 void setNonTerminal( const Bu::FString &sName, NonTerminal &nt );
93 int getNonTerminalId( const Bu::FString &sName ); 93 int getNonTerminalId( const Bu::FString &sName );
94 bool hasNonTerminal( const Bu::FString &sName );
94 95
95 int addReduction( const Bu::FString &sName, const Reduction &r ); 96 int addReduction( const Bu::FString &sName, const Reduction &r );
96 int addReduction( const Bu::FString &sName ); 97 int addReduction( const Bu::FString &sName );
97 void setReduction( const Bu::FString &sName, const Reduction &r ); 98 void setReduction( const Bu::FString &sName, const Reduction &r );
98 int getReductionId( const Bu::FString &sName ); 99 int getReductionId( const Bu::FString &sName );
100 bool hasReduction( const Bu::FString &sName );
99 101
100 private: 102 private:
101 bool selectProduction( int iNt, Lexer::Token *ptCur ); 103 bool selectProduction( int iNt, Lexer::Token *ptCur );
diff --git a/src/tools/bnfcompile.cpp b/src/tools/bnfcompile.cpp
new file mode 100644
index 0000000..16e75a5
--- /dev/null
+++ b/src/tools/bnfcompile.cpp
@@ -0,0 +1,410 @@
1#include <bu/sio.h>
2#include <bu/lexer.h>
3#include <bu/parser.h>
4#include <bu/file.h>
5#include <bu/queuebuf.h>
6
7using namespace Bu;
8
9enum TokenType
10{
11 tokIdentifier,
12 tokColon,
13 tokOr,
14 tokSemiColon,
15 tokTokens,
16 tokEquals,
17 tokOpenCurly,
18 tokCloseCurly,
19 tokOpenSquare,
20 tokCloseSquare,
21
22 tokEos=-1
23};
24
25class BnfLexer : public Lexer
26{
27public:
28 BnfLexer( Stream &rSrc ) :
29 rSrc( rSrc )
30 {
31 }
32
33 virtual ~BnfLexer()
34 {
35 }
36
37 virtual Token *nextToken()
38 {
39 char cBuf;
40
41 for(;;)
42 {
43 if( qbIn.getSize() == 0 )
44 {
45 char buf[4096];
46 qbIn.write( buf, rSrc.read( buf, 4096 ) );
47
48 if( rSrc.isEos() && qbIn.getSize() == 0 )
49 return new Token( tokEos );
50 }
51 qbIn.peek( &cBuf, 1 );
52 if( (cBuf >= 'a' && cBuf <= 'z') ||
53 (cBuf >= 'A' && cBuf <= 'Z') ||
54 (cBuf >= '0' && cBuf <= '9') ||
55 cBuf == '_' )
56 {
57 sBuf.append( cBuf );
58 qbIn.seek( 1 );
59 }
60 else if( sBuf.isSet() )
61 {
62 if( sBuf == "tokens" )
63 {
64 sBuf.clear();
65 return new Token( tokTokens );
66 }
67 else
68 {
69 Token *pRet = new Token( tokIdentifier, sBuf );
70 sBuf.clear();
71 return pRet;
72 }
73 }
74 else
75 {
76 switch( cBuf )
77 {
78 case ' ':
79 case '\t':
80 case '\n':
81 case '\r':
82 qbIn.seek( 1 );
83 continue;
84
85 case ':':
86 qbIn.seek( 1 );
87 return new Token( tokColon );
88
89 case ';':
90 qbIn.seek( 1 );
91 return new Token( tokSemiColon );
92
93 case '|':
94 qbIn.seek( 1 );
95 return new Token( tokOr );
96
97 case '=':
98 qbIn.seek( 1 );
99 return new Token( tokEquals );
100
101 case '[':
102 qbIn.seek( 1 );
103 return new Token( tokOpenSquare );
104
105 case ']':
106 qbIn.seek( 1 );
107 return new Token( tokCloseSquare );
108
109 case '{':
110 qbIn.seek( 1 );
111 return new Token( tokOpenCurly );
112
113 case '}':
114 qbIn.seek( 1 );
115 return new Token( tokCloseCurly );
116
117 default:
118 throw ExceptionBase("Unexpected character '%c'.",
119 cBuf );
120 break;
121 }
122 }
123 }
124 }
125
126 virtual FString tokenToString( const Token &t )
127 {
128 switch( (TokenType)t.iToken )
129 {
130 case tokIdentifier: return "tokIdentifier";
131 case tokColon: return "tokColon";
132 case tokOr: return "tokOr";
133 case tokSemiColon: return "tokSemiColon";
134 case tokTokens: return "tokTokens";
135 case tokEquals: return "tokEquals";
136 case tokOpenCurly: return "tokOpenCurly";
137 case tokCloseCurly: return "tokCloseCurly";
138 case tokOpenSquare: return "tokOpenSquare";
139 case tokCloseSquare: return "tokCloseSquare";
140 case tokEos: return "tokEos";
141 }
142
143 return "???";
144 }
145
146private:
147 Stream &rSrc;
148 QueueBuf qbIn;
149 FString sBuf;
150};
151
152class BnfParser
153{
154public:
155 BnfParser( BnfLexer &l ) :
156 l( l ),
157 pCur( NULL ),
158 iLastToken( 0 )
159 {
160 }
161
162 virtual ~BnfParser()
163 {
164 delete pCur;
165 pCur = NULL;
166 }
167
168 void parse()
169 {
170 for(;;)
171 {
172 next();
173 switch( pCur->iToken )
174 {
175 case tokTokens:
176 tokens();
177 break;
178
179 case tokIdentifier:
180 nonTerminal();
181 break;
182
183 case tokEos:
184 return;
185 break;
186
187 default:
188 tokenError("tokTokens, tokIdentifier, or tokEos");
189 }
190 }
191 }
192
193private:
194 void tokens()
195 {
196 next();
197 if( pCur->iToken != tokEquals )
198 tokenError("tokEquals");
199 for(;;)
200 {
201 next();
202 if( pCur->iToken == tokIdentifier )
203 {
204 hTokens.insert( pCur->vExtra.get<Bu::FString>(), ++iLastToken );
205 sio << "Added token[" << iLastToken << "]: "
206 << pCur->vExtra.get<Bu::FString>() << sio.nl;
207 }
208 else if( pCur->iToken == tokSemiColon )
209 break;
210 else
211 tokenError("tokIdentifier or tokSemiColon");
212 }
213 }
214
215 void nonTerminal()
216 {
217 Bu::FString sNtName = pCur->vExtra.get<Bu::FString>();
218 Parser::NonTerminal nt;
219 p.addNonTerminal( sNtName );
220 sio.incIndent();
221 sio << "Created non-terminal: " << sNtName << sio.nl;
222
223 next();
224 if( pCur->iToken != tokColon )
225 tokenError("tokColon");
226 production( nt );
227 for(;;)
228 {
229 switch( pCur->iToken )
230 {
231 case tokOr:
232 production( nt );
233 break;
234
235 case tokSemiColon:
236 p.setNonTerminal( sNtName, nt );
237 sio.decIndent();
238 sio << "Closing non-terminal." << sio.nl;
239 return;
240
241 default:
242 tokenError("tkOr or tokSemiColon");
243 break;
244 }
245 }
246 }
247
248 void production( Parser::NonTerminal &nt )
249 {
250 sio.incIndent();
251 sio << "Adding new production:" << sio.nl;
252 Parser::Production pr;
253 bool bAnything = false;
254 for(;;)
255 {
256 next();
257 switch( pCur->iToken )
258 {
259 case tokIdentifier:
260 {
261 const Bu::FString &sName =
262 pCur->vExtra.get<Bu::FString>();
263 if( hTokens.has( sName ) )
264 {
265 pr.append(
266 Parser::State(
267 Parser::State::typeTerminal,
268 hTokens.get( sName )
269 )
270 );
271 sio << "Added terminal " << sName << sio.nl;
272 }
273 else
274 {
275 if( !p.hasNonTerminal( sName ) )
276 {
277 p.addNonTerminal( sName );
278 }
279 pr.append(
280 Parser::State(
281 Parser::State::typeNonTerminal,
282 p.getNonTerminalId( sName )
283 )
284 );
285 sio << "Added non-terminal " << sName << sio.nl;
286 }
287 }
288 break;
289
290 case tokOpenSquare:
291 {
292 next();
293 if( pCur->iToken != tokIdentifier )
294 tokenError("tokIdentifier");
295 Bu::FString sName =
296 pCur->vExtra.get<Bu::FString>();
297 next();
298 if( pCur->iToken != tokCloseSquare )
299 tokenError("tokCloseSquare");
300
301 if( !hTokens.has( sName ) )
302 throw ExceptionBase("Only token names may be "
303 "enclosed in square brackets.");
304
305 pr.append(
306 Parser::State(
307 Parser::State::typeTerminalPush,
308 hTokens.get( sName )
309 )
310 );
311 sio << "Added terminal-push " << sName << sio.nl;
312 }
313 break;
314
315 case tokOpenCurly:
316 {
317 next();
318 if( pCur->iToken != tokIdentifier )
319 tokenError("tokIdentifier");
320 Bu::FString sName =
321 pCur->vExtra.get<Bu::FString>();
322 next();
323 if( pCur->iToken != tokCloseCurly )
324 tokenError("tokCloseCurly");
325
326 if( !p.hasReduction( sName ) )
327 p.addReduction( sName );
328
329 pr.append(
330 Parser::State(
331 Parser::State::typeReduction,
332 p.getReductionId( sName )
333 )
334 );
335 sio << "Added reduction " << sName << sio.nl;
336 }
337 break;
338
339 case tokOr:
340 case tokSemiColon:
341 if( bAnything )
342 {
343 nt.addProduction( pr );
344 sio.decIndent();
345 sio << "Closing production." << sio.nl;
346 }
347 else
348 {
349 nt.setCanSkip();
350 sio.decIndent();
351 sio << "Closing empty production." << sio.nl;
352 }
353 return;
354
355 default:
356 tokenError("tokIdentifier, tokOpenSquare, tokOr, "
357 "tokOpenCurly, or tokSemiColon");
358 }
359 }
360 }
361
362private:
363 void next()
364 {
365 delete pCur;
366 pCur = l.nextToken();
367 }
368
369 void tokenError( const FString &s )
370 {
371 throw ExceptionBase( ("Expected " + s + " but found "
372 + l.tokenToString( *pCur ) + ".").getStr() );
373 }
374
375private:
376 typedef Bu::Hash<Bu::FString, int> TokenHash;
377 TokenHash hTokens;
378 BnfLexer &l;
379 BnfLexer::Token *pCur;
380 int iLastToken;
381 Parser p;
382};
383
384int main( int argc, char *argv[] )
385{
386 File fIn( argv[1], File::Read );
387
388 BnfLexer bl( fIn );
389 BnfParser parser( bl );
390
391 parser.parse();
392
393/*
394 for(;;)
395 {
396 Lexer::Token *pTok = bl.nextToken();
397 sio << bl.tokenToString(*pTok);
398 if( pTok->vExtra.isSet() )
399 {
400 sio << " - " << pTok->vExtra;
401 }
402 sio << sio.nl;
403 if( pTok->iToken == tokEos )
404 break;
405 }
406*/
407
408 return 0;
409}
410