summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2010-10-18 04:38:19 +0000
committerMike Buland <eichlan@xagasoft.com>2010-10-18 04:38:19 +0000
commit78463c30031a478936b21168a6fc93ae6eeaaeb9 (patch)
tree2216344a91f23088dcb4cf93492802c75577fad1 /src
parent00ecd458ced768b3b8752cdd421a22244b7adc01 (diff)
downloadlibbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.tar.gz
libbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.tar.bz2
libbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.tar.xz
libbu++-78463c30031a478936b21168a6fc93ae6eeaaeb9.zip
Several of these new files will go away, but I didn't want to lose them for now.
The parser works! The parser compiler works! It makes parsers! Now we just have to implement post processing, token lookup tables, and storage.
Diffstat (limited to 'src')
-rw-r--r--src/lexer.h4
-rw-r--r--src/parser.cpp10
-rw-r--r--src/parser.h2
-rw-r--r--src/tools/bnfcompile.cpp410
4 files changed, 426 insertions, 0 deletions
diff --git a/src/lexer.h b/src/lexer.h
index 2cb5f51..9840afe 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -36,6 +36,10 @@ namespace Bu
36 } 36 }
37 TokenType iToken; 37 TokenType iToken;
38 Bu::Variant vExtra; 38 Bu::Variant vExtra;
39 int iStartCol;
40 int iStartRow;
41 int iEndCol;
42 int iEndRow;
39 }; 43 };
40 44
41 virtual Token *nextToken()=0; 45 virtual Token *nextToken()=0;
diff --git a/src/parser.cpp b/src/parser.cpp
index 3ce73b6..4ad4ff9 100644
--- a/src/parser.cpp
+++ b/src/parser.cpp
@@ -205,6 +205,11 @@ int Bu::Parser::getNonTerminalId( const Bu::FString &sName )
205 return hNonTerminalName.get( sName ); 205 return hNonTerminalName.get( sName );
206} 206}
207 207
208bool Bu::Parser::hasNonTerminal( const Bu::FString &sName )
209{
210 return hNonTerminalName.has( sName );
211}
212
208int Bu::Parser::addReduction( const Bu::FString &sName, const Reduction &r ) 213int Bu::Parser::addReduction( const Bu::FString &sName, const Reduction &r )
209{ 214{
210 int iId = aReduction.getSize(); 215 int iId = aReduction.getSize();
@@ -231,6 +236,11 @@ int Bu::Parser::getReductionId( const Bu::FString &sName )
231 return hReductionName.get( sName ); 236 return hReductionName.get( sName );
232} 237}
233 238
239bool Bu::Parser::hasReduction( const Bu::FString &sName )
240{
241 return hReductionName.has( sName );
242}
243
234// 244//
235// Bu::Parser::State 245// Bu::Parser::State
236// 246//
diff --git a/src/parser.h b/src/parser.h
index 1679c7f..a925188 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -91,11 +91,13 @@ namespace Bu
91 int addNonTerminal( const Bu::FString &sName ); 91 int addNonTerminal( const Bu::FString &sName );
92 void setNonTerminal( const Bu::FString &sName, NonTerminal &nt ); 92 void setNonTerminal( const Bu::FString &sName, NonTerminal &nt );
93 int getNonTerminalId( const Bu::FString &sName ); 93 int getNonTerminalId( const Bu::FString &sName );
94 bool hasNonTerminal( const Bu::FString &sName );
94 95
95 int addReduction( const Bu::FString &sName, const Reduction &r ); 96 int addReduction( const Bu::FString &sName, const Reduction &r );
96 int addReduction( const Bu::FString &sName ); 97 int addReduction( const Bu::FString &sName );
97 void setReduction( const Bu::FString &sName, const Reduction &r ); 98 void setReduction( const Bu::FString &sName, const Reduction &r );
98 int getReductionId( const Bu::FString &sName ); 99 int getReductionId( const Bu::FString &sName );
100 bool hasReduction( const Bu::FString &sName );
99 101
100 private: 102 private:
101 bool selectProduction( int iNt, Lexer::Token *ptCur ); 103 bool selectProduction( int iNt, Lexer::Token *ptCur );
diff --git a/src/tools/bnfcompile.cpp b/src/tools/bnfcompile.cpp
new file mode 100644
index 0000000..16e75a5
--- /dev/null
+++ b/src/tools/bnfcompile.cpp
@@ -0,0 +1,410 @@
1#include <bu/sio.h>
2#include <bu/lexer.h>
3#include <bu/parser.h>
4#include <bu/file.h>
5#include <bu/queuebuf.h>
6
7using namespace Bu;
8
9enum TokenType
10{
11 tokIdentifier,
12 tokColon,
13 tokOr,
14 tokSemiColon,
15 tokTokens,
16 tokEquals,
17 tokOpenCurly,
18 tokCloseCurly,
19 tokOpenSquare,
20 tokCloseSquare,
21
22 tokEos=-1
23};
24
25class BnfLexer : public Lexer
26{
27public:
28 BnfLexer( Stream &rSrc ) :
29 rSrc( rSrc )
30 {
31 }
32
33 virtual ~BnfLexer()
34 {
35 }
36
37 virtual Token *nextToken()
38 {
39 char cBuf;
40
41 for(;;)
42 {
43 if( qbIn.getSize() == 0 )
44 {
45 char buf[4096];
46 qbIn.write( buf, rSrc.read( buf, 4096 ) );
47
48 if( rSrc.isEos() && qbIn.getSize() == 0 )
49 return new Token( tokEos );
50 }
51 qbIn.peek( &cBuf, 1 );
52 if( (cBuf >= 'a' && cBuf <= 'z') ||
53 (cBuf >= 'A' && cBuf <= 'Z') ||
54 (cBuf >= '0' && cBuf <= '9') ||
55 cBuf == '_' )
56 {
57 sBuf.append( cBuf );
58 qbIn.seek( 1 );
59 }
60 else if( sBuf.isSet() )
61 {
62 if( sBuf == "tokens" )
63 {
64 sBuf.clear();
65 return new Token( tokTokens );
66 }
67 else
68 {
69 Token *pRet = new Token( tokIdentifier, sBuf );
70 sBuf.clear();
71 return pRet;
72 }
73 }
74 else
75 {
76 switch( cBuf )
77 {
78 case ' ':
79 case '\t':
80 case '\n':
81 case '\r':
82 qbIn.seek( 1 );
83 continue;
84
85 case ':':
86 qbIn.seek( 1 );
87 return new Token( tokColon );
88
89 case ';':
90 qbIn.seek( 1 );
91 return new Token( tokSemiColon );
92
93 case '|':
94 qbIn.seek( 1 );
95 return new Token( tokOr );
96
97 case '=':
98 qbIn.seek( 1 );
99 return new Token( tokEquals );
100
101 case '[':
102 qbIn.seek( 1 );
103 return new Token( tokOpenSquare );
104
105 case ']':
106 qbIn.seek( 1 );
107 return new Token( tokCloseSquare );
108
109 case '{':
110 qbIn.seek( 1 );
111 return new Token( tokOpenCurly );
112
113 case '}':
114 qbIn.seek( 1 );
115 return new Token( tokCloseCurly );
116
117 default:
118 throw ExceptionBase("Unexpected character '%c'.",
119 cBuf );
120 break;
121 }
122 }
123 }
124 }
125
126 virtual FString tokenToString( const Token &t )
127 {
128 switch( (TokenType)t.iToken )
129 {
130 case tokIdentifier: return "tokIdentifier";
131 case tokColon: return "tokColon";
132 case tokOr: return "tokOr";
133 case tokSemiColon: return "tokSemiColon";
134 case tokTokens: return "tokTokens";
135 case tokEquals: return "tokEquals";
136 case tokOpenCurly: return "tokOpenCurly";
137 case tokCloseCurly: return "tokCloseCurly";
138 case tokOpenSquare: return "tokOpenSquare";
139 case tokCloseSquare: return "tokCloseSquare";
140 case tokEos: return "tokEos";
141 }
142
143 return "???";
144 }
145
146private:
147 Stream &rSrc;
148 QueueBuf qbIn;
149 FString sBuf;
150};
151
152class BnfParser
153{
154public:
155 BnfParser( BnfLexer &l ) :
156 l( l ),
157 pCur( NULL ),
158 iLastToken( 0 )
159 {
160 }
161
162 virtual ~BnfParser()
163 {
164 delete pCur;
165 pCur = NULL;
166 }
167
168 void parse()
169 {
170 for(;;)
171 {
172 next();
173 switch( pCur->iToken )
174 {
175 case tokTokens:
176 tokens();
177 break;
178
179 case tokIdentifier:
180 nonTerminal();
181 break;
182
183 case tokEos:
184 return;
185 break;
186
187 default:
188 tokenError("tokTokens, tokIdentifier, or tokEos");
189 }
190 }
191 }
192
193private:
194 void tokens()
195 {
196 next();
197 if( pCur->iToken != tokEquals )
198 tokenError("tokEquals");
199 for(;;)
200 {
201 next();
202 if( pCur->iToken == tokIdentifier )
203 {
204 hTokens.insert( pCur->vExtra.get<Bu::FString>(), ++iLastToken );
205 sio << "Added token[" << iLastToken << "]: "
206 << pCur->vExtra.get<Bu::FString>() << sio.nl;
207 }
208 else if( pCur->iToken == tokSemiColon )
209 break;
210 else
211 tokenError("tokIdentifier or tokSemiColon");
212 }
213 }
214
215 void nonTerminal()
216 {
217 Bu::FString sNtName = pCur->vExtra.get<Bu::FString>();
218 Parser::NonTerminal nt;
219 p.addNonTerminal( sNtName );
220 sio.incIndent();
221 sio << "Created non-terminal: " << sNtName << sio.nl;
222
223 next();
224 if( pCur->iToken != tokColon )
225 tokenError("tokColon");
226 production( nt );
227 for(;;)
228 {
229 switch( pCur->iToken )
230 {
231 case tokOr:
232 production( nt );
233 break;
234
235 case tokSemiColon:
236 p.setNonTerminal( sNtName, nt );
237 sio.decIndent();
238 sio << "Closing non-terminal." << sio.nl;
239 return;
240
241 default:
242 tokenError("tkOr or tokSemiColon");
243 break;
244 }
245 }
246 }
247
248 void production( Parser::NonTerminal &nt )
249 {
250 sio.incIndent();
251 sio << "Adding new production:" << sio.nl;
252 Parser::Production pr;
253 bool bAnything = false;
254 for(;;)
255 {
256 next();
257 switch( pCur->iToken )
258 {
259 case tokIdentifier:
260 {
261 const Bu::FString &sName =
262 pCur->vExtra.get<Bu::FString>();
263 if( hTokens.has( sName ) )
264 {
265 pr.append(
266 Parser::State(
267 Parser::State::typeTerminal,
268 hTokens.get( sName )
269 )
270 );
271 sio << "Added terminal " << sName << sio.nl;
272 }
273 else
274 {
275 if( !p.hasNonTerminal( sName ) )
276 {
277 p.addNonTerminal( sName );
278 }
279 pr.append(
280 Parser::State(
281 Parser::State::typeNonTerminal,
282 p.getNonTerminalId( sName )
283 )
284 );
285 sio << "Added non-terminal " << sName << sio.nl;
286 }
287 }
288 break;
289
290 case tokOpenSquare:
291 {
292 next();
293 if( pCur->iToken != tokIdentifier )
294 tokenError("tokIdentifier");
295 Bu::FString sName =
296 pCur->vExtra.get<Bu::FString>();
297 next();
298 if( pCur->iToken != tokCloseSquare )
299 tokenError("tokCloseSquare");
300
301 if( !hTokens.has( sName ) )
302 throw ExceptionBase("Only token names may be "
303 "enclosed in square brackets.");
304
305 pr.append(
306 Parser::State(
307 Parser::State::typeTerminalPush,
308 hTokens.get( sName )
309 )
310 );
311 sio << "Added terminal-push " << sName << sio.nl;
312 }
313 break;
314
315 case tokOpenCurly:
316 {
317 next();
318 if( pCur->iToken != tokIdentifier )
319 tokenError("tokIdentifier");
320 Bu::FString sName =
321 pCur->vExtra.get<Bu::FString>();
322 next();
323 if( pCur->iToken != tokCloseCurly )
324 tokenError("tokCloseCurly");
325
326 if( !p.hasReduction( sName ) )
327 p.addReduction( sName );
328
329 pr.append(
330 Parser::State(
331 Parser::State::typeReduction,
332 p.getReductionId( sName )
333 )
334 );
335 sio << "Added reduction " << sName << sio.nl;
336 }
337 break;
338
339 case tokOr:
340 case tokSemiColon:
341 if( bAnything )
342 {
343 nt.addProduction( pr );
344 sio.decIndent();
345 sio << "Closing production." << sio.nl;
346 }
347 else
348 {
349 nt.setCanSkip();
350 sio.decIndent();
351 sio << "Closing empty production." << sio.nl;
352 }
353 return;
354
355 default:
356 tokenError("tokIdentifier, tokOpenSquare, tokOr, "
357 "tokOpenCurly, or tokSemiColon");
358 }
359 }
360 }
361
362private:
363 void next()
364 {
365 delete pCur;
366 pCur = l.nextToken();
367 }
368
369 void tokenError( const FString &s )
370 {
371 throw ExceptionBase( ("Expected " + s + " but found "
372 + l.tokenToString( *pCur ) + ".").getStr() );
373 }
374
375private:
376 typedef Bu::Hash<Bu::FString, int> TokenHash;
377 TokenHash hTokens;
378 BnfLexer &l;
379 BnfLexer::Token *pCur;
380 int iLastToken;
381 Parser p;
382};
383
384int main( int argc, char *argv[] )
385{
386 File fIn( argv[1], File::Read );
387
388 BnfLexer bl( fIn );
389 BnfParser parser( bl );
390
391 parser.parse();
392
393/*
394 for(;;)
395 {
396 Lexer::Token *pTok = bl.nextToken();
397 sio << bl.tokenToString(*pTok);
398 if( pTok->vExtra.isSet() )
399 {
400 sio << " - " << pTok->vExtra;
401 }
402 sio << sio.nl;
403 if( pTok->iToken == tokEos )
404 break;
405 }
406*/
407
408 return 0;
409}
410