From 00ecd458ced768b3b8752cdd421a22244b7adc01 Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Sun, 17 Oct 2010 05:50:57 +0000 Subject: Hey! The parser parses now! It's actually a little stupid, I didn't implement lookahead or precedence, but I should be able to do that easily with the next version. I'm treating this more as a proof of concept than a real working model. Although it can handle +, -, (), and = :) --- src/parser.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/parser.h') diff --git a/src/parser.h b/src/parser.h index 5b5d4a8..1679c7f 100644 --- a/src/parser.h +++ b/src/parser.h @@ -34,6 +34,9 @@ namespace Bu */ void popLexer(); + Lexer::Token *popToken(); + void pushToken( Lexer::Token *pTok ); + /** * Execute a parse. */ -- cgit v1.2.3 From 78463c30031a478936b21168a6fc93ae6eeaaeb9 Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Mon, 18 Oct 2010 04:38:19 +0000 Subject: Several of these new files will go away, but I didn't want to lose them for now. The parser works! The parser compiler works! It makes parsers! Now we just have to implement post processing, token lookup tables, and storage. --- bnftest | 18 +++ bnftest.2 | 24 +++ src/lexer.h | 4 + src/parser.cpp | 10 ++ src/parser.h | 2 + src/tools/bnfcompile.cpp | 410 +++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 468 insertions(+) create mode 100644 bnftest create mode 100644 bnftest.2 create mode 100644 src/tools/bnfcompile.cpp (limited to 'src/parser.h') diff --git a/bnftest b/bnftest new file mode 100644 index 0000000..7e61b1a --- /dev/null +++ b/bnftest @@ -0,0 +1,18 @@ +tokens = tokPlus tokMinus tokMult tokDivide tokOpenParen tokCloseParen + tokEquals tokNumber; + +input: input line + | + ; + +line: expr tokEquals {print} + ; + +expr: expr tokPlus expr {add} + | expr tokMinus expr {subtract} + | expr tokMult expr {multiply} + | expr tokDivide expr {divide} + | tokOpenParen expr tokCloseParen + | [tokNumber] + ; + diff --git a/bnftest.2 b/bnftest.2 new file mode 100644 index 0000000..229943b --- /dev/null +++ b/bnftest.2 @@ -0,0 +1,24 @@ +tokens = tokPlus tokMinus tokMult tokDivide tokOpenParen tokCloseParen + tokEquals tokNumber; + +input: line input# + | + ; + +input#: line input# + | + ; + +line: expr tokEquals {print} + ; + +expr: tokOpenParen expr tokCloseParen expr# + | [tokNumber] expr# + ; + +expr#: tokPlus expr {add} expr# + | tokMinus expr {subtract} expr# + | tokMult expr {multiply} expr# + | tokDivide expr {divide} expr# + | + ; diff --git a/src/lexer.h b/src/lexer.h index 2cb5f51..9840afe 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -36,6 +36,10 @@ namespace Bu } TokenType iToken; Bu::Variant vExtra; + int iStartCol; + int iStartRow; + int iEndCol; + int iEndRow; }; virtual Token *nextToken()=0; diff --git a/src/parser.cpp b/src/parser.cpp index 3ce73b6..4ad4ff9 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -205,6 +205,11 @@ int Bu::Parser::getNonTerminalId( const Bu::FString &sName ) return hNonTerminalName.get( sName ); } +bool Bu::Parser::hasNonTerminal( const Bu::FString &sName ) +{ + return hNonTerminalName.has( sName ); +} + int Bu::Parser::addReduction( const Bu::FString &sName, const Reduction &r ) { int iId = aReduction.getSize(); @@ -231,6 +236,11 @@ int Bu::Parser::getReductionId( const Bu::FString &sName ) return hReductionName.get( sName ); } +bool Bu::Parser::hasReduction( const Bu::FString &sName ) +{ + return hReductionName.has( sName ); +} + // // Bu::Parser::State // diff --git a/src/parser.h b/src/parser.h index 1679c7f..a925188 100644 --- a/src/parser.h +++ b/src/parser.h @@ -91,11 +91,13 @@ namespace Bu int addNonTerminal( const Bu::FString &sName ); void setNonTerminal( const Bu::FString &sName, NonTerminal &nt ); int getNonTerminalId( const Bu::FString &sName ); + bool hasNonTerminal( const Bu::FString &sName ); int addReduction( const Bu::FString &sName, const Reduction &r ); int addReduction( const Bu::FString &sName ); void setReduction( const Bu::FString &sName, const Reduction &r ); int getReductionId( const Bu::FString &sName ); + bool hasReduction( const Bu::FString &sName ); private: bool selectProduction( int iNt, Lexer::Token *ptCur ); diff --git a/src/tools/bnfcompile.cpp b/src/tools/bnfcompile.cpp new file mode 100644 index 0000000..16e75a5 --- /dev/null +++ b/src/tools/bnfcompile.cpp @@ -0,0 +1,410 @@ +#include +#include +#include +#include +#include + +using namespace Bu; + +enum TokenType +{ + tokIdentifier, + tokColon, + tokOr, + tokSemiColon, + tokTokens, + tokEquals, + tokOpenCurly, + tokCloseCurly, + tokOpenSquare, + tokCloseSquare, + + tokEos=-1 +}; + +class BnfLexer : public Lexer +{ +public: + BnfLexer( Stream &rSrc ) : + rSrc( rSrc ) + { + } + + virtual ~BnfLexer() + { + } + + virtual Token *nextToken() + { + char cBuf; + + for(;;) + { + if( qbIn.getSize() == 0 ) + { + char buf[4096]; + qbIn.write( buf, rSrc.read( buf, 4096 ) ); + + if( rSrc.isEos() && qbIn.getSize() == 0 ) + return new Token( tokEos ); + } + qbIn.peek( &cBuf, 1 ); + if( (cBuf >= 'a' && cBuf <= 'z') || + (cBuf >= 'A' && cBuf <= 'Z') || + (cBuf >= '0' && cBuf <= '9') || + cBuf == '_' ) + { + sBuf.append( cBuf ); + qbIn.seek( 1 ); + } + else if( sBuf.isSet() ) + { + if( sBuf == "tokens" ) + { + sBuf.clear(); + return new Token( tokTokens ); + } + else + { + Token *pRet = new Token( tokIdentifier, sBuf ); + sBuf.clear(); + return pRet; + } + } + else + { + switch( cBuf ) + { + case ' ': + case '\t': + case '\n': + case '\r': + qbIn.seek( 1 ); + continue; + + case ':': + qbIn.seek( 1 ); + return new Token( tokColon ); + + case ';': + qbIn.seek( 1 ); + return new Token( tokSemiColon ); + + case '|': + qbIn.seek( 1 ); + return new Token( tokOr ); + + case '=': + qbIn.seek( 1 ); + return new Token( tokEquals ); + + case '[': + qbIn.seek( 1 ); + return new Token( tokOpenSquare ); + + case ']': + qbIn.seek( 1 ); + return new Token( tokCloseSquare ); + + case '{': + qbIn.seek( 1 ); + return new Token( tokOpenCurly ); + + case '}': + qbIn.seek( 1 ); + return new Token( tokCloseCurly ); + + default: + throw ExceptionBase("Unexpected character '%c'.", + cBuf ); + break; + } + } + } + } + + virtual FString tokenToString( const Token &t ) + { + switch( (TokenType)t.iToken ) + { + case tokIdentifier: return "tokIdentifier"; + case tokColon: return "tokColon"; + case tokOr: return "tokOr"; + case tokSemiColon: return "tokSemiColon"; + case tokTokens: return "tokTokens"; + case tokEquals: return "tokEquals"; + case tokOpenCurly: return "tokOpenCurly"; + case tokCloseCurly: return "tokCloseCurly"; + case tokOpenSquare: return "tokOpenSquare"; + case tokCloseSquare: return "tokCloseSquare"; + case tokEos: return "tokEos"; + } + + return "???"; + } + +private: + Stream &rSrc; + QueueBuf qbIn; + FString sBuf; +}; + +class BnfParser +{ +public: + BnfParser( BnfLexer &l ) : + l( l ), + pCur( NULL ), + iLastToken( 0 ) + { + } + + virtual ~BnfParser() + { + delete pCur; + pCur = NULL; + } + + void parse() + { + for(;;) + { + next(); + switch( pCur->iToken ) + { + case tokTokens: + tokens(); + break; + + case tokIdentifier: + nonTerminal(); + break; + + case tokEos: + return; + break; + + default: + tokenError("tokTokens, tokIdentifier, or tokEos"); + } + } + } + +private: + void tokens() + { + next(); + if( pCur->iToken != tokEquals ) + tokenError("tokEquals"); + for(;;) + { + next(); + if( pCur->iToken == tokIdentifier ) + { + hTokens.insert( pCur->vExtra.get(), ++iLastToken ); + sio << "Added token[" << iLastToken << "]: " + << pCur->vExtra.get() << sio.nl; + } + else if( pCur->iToken == tokSemiColon ) + break; + else + tokenError("tokIdentifier or tokSemiColon"); + } + } + + void nonTerminal() + { + Bu::FString sNtName = pCur->vExtra.get(); + Parser::NonTerminal nt; + p.addNonTerminal( sNtName ); + sio.incIndent(); + sio << "Created non-terminal: " << sNtName << sio.nl; + + next(); + if( pCur->iToken != tokColon ) + tokenError("tokColon"); + production( nt ); + for(;;) + { + switch( pCur->iToken ) + { + case tokOr: + production( nt ); + break; + + case tokSemiColon: + p.setNonTerminal( sNtName, nt ); + sio.decIndent(); + sio << "Closing non-terminal." << sio.nl; + return; + + default: + tokenError("tkOr or tokSemiColon"); + break; + } + } + } + + void production( Parser::NonTerminal &nt ) + { + sio.incIndent(); + sio << "Adding new production:" << sio.nl; + Parser::Production pr; + bool bAnything = false; + for(;;) + { + next(); + switch( pCur->iToken ) + { + case tokIdentifier: + { + const Bu::FString &sName = + pCur->vExtra.get(); + if( hTokens.has( sName ) ) + { + pr.append( + Parser::State( + Parser::State::typeTerminal, + hTokens.get( sName ) + ) + ); + sio << "Added terminal " << sName << sio.nl; + } + else + { + if( !p.hasNonTerminal( sName ) ) + { + p.addNonTerminal( sName ); + } + pr.append( + Parser::State( + Parser::State::typeNonTerminal, + p.getNonTerminalId( sName ) + ) + ); + sio << "Added non-terminal " << sName << sio.nl; + } + } + break; + + case tokOpenSquare: + { + next(); + if( pCur->iToken != tokIdentifier ) + tokenError("tokIdentifier"); + Bu::FString sName = + pCur->vExtra.get(); + next(); + if( pCur->iToken != tokCloseSquare ) + tokenError("tokCloseSquare"); + + if( !hTokens.has( sName ) ) + throw ExceptionBase("Only token names may be " + "enclosed in square brackets."); + + pr.append( + Parser::State( + Parser::State::typeTerminalPush, + hTokens.get( sName ) + ) + ); + sio << "Added terminal-push " << sName << sio.nl; + } + break; + + case tokOpenCurly: + { + next(); + if( pCur->iToken != tokIdentifier ) + tokenError("tokIdentifier"); + Bu::FString sName = + pCur->vExtra.get(); + next(); + if( pCur->iToken != tokCloseCurly ) + tokenError("tokCloseCurly"); + + if( !p.hasReduction( sName ) ) + p.addReduction( sName ); + + pr.append( + Parser::State( + Parser::State::typeReduction, + p.getReductionId( sName ) + ) + ); + sio << "Added reduction " << sName << sio.nl; + } + break; + + case tokOr: + case tokSemiColon: + if( bAnything ) + { + nt.addProduction( pr ); + sio.decIndent(); + sio << "Closing production." << sio.nl; + } + else + { + nt.setCanSkip(); + sio.decIndent(); + sio << "Closing empty production." << sio.nl; + } + return; + + default: + tokenError("tokIdentifier, tokOpenSquare, tokOr, " + "tokOpenCurly, or tokSemiColon"); + } + } + } + +private: + void next() + { + delete pCur; + pCur = l.nextToken(); + } + + void tokenError( const FString &s ) + { + throw ExceptionBase( ("Expected " + s + " but found " + + l.tokenToString( *pCur ) + ".").getStr() ); + } + +private: + typedef Bu::Hash TokenHash; + TokenHash hTokens; + BnfLexer &l; + BnfLexer::Token *pCur; + int iLastToken; + Parser p; +}; + +int main( int argc, char *argv[] ) +{ + File fIn( argv[1], File::Read ); + + BnfLexer bl( fIn ); + BnfParser parser( bl ); + + parser.parse(); + +/* + for(;;) + { + Lexer::Token *pTok = bl.nextToken(); + sio << bl.tokenToString(*pTok); + if( pTok->vExtra.isSet() ) + { + sio << " - " << pTok->vExtra; + } + sio << sio.nl; + if( pTok->iToken == tokEos ) + break; + } +*/ + + return 0; +} + -- cgit v1.2.3