From fc16fc104a038146c8ab6c2e9fad38e18663a09f Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Tue, 12 Oct 2010 18:10:47 +0000 Subject: It's getting close. I'm not 100% sure abouth this method yet... --- src/lexer.h | 2 +- src/parser.cpp | 160 +++++++++++++++++++++++++++++++++++++++++++++++---- src/parser.h | 13 ++++- src/tools/parser.cpp | 57 +++++++++++++++--- 4 files changed, 211 insertions(+), 21 deletions(-) (limited to 'src') diff --git a/src/lexer.h b/src/lexer.h index 5847269..e8a17b5 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -19,7 +19,7 @@ namespace Bu Lexer(); virtual ~Lexer(); - typedef int32_t TokenType; + typedef int TokenType; class Token { diff --git a/src/parser.cpp b/src/parser.cpp index e8e8ff2..b563691 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -24,21 +24,127 @@ void Bu::Parser::popLexer() void Bu::Parser::parse() { - for(;;) + int iCurNt = iRootNonTerminal; + Lexer::Token *ptCur = sLexer.peek()->nextToken(); + //Lexer::Token *ptNext = sLexer.peek()->nextToken(); + selectProduction( iCurNt, ptCur ); + sio << "Token: " << *ptCur << sio.nl; + + while( !sState.isEmpty() ) { - Bu::Lexer::Token *pToken = sLexer.peek()->nextToken(); - sio << sLexer.peek()->tokenToString( *pToken ) << sio.nl; - if( pToken->iToken < 0 ) + sio << "Currently: " << *sState.peek() << sio.nl; + switch( (*sState.peek()).eType ) { - delete sLexer.peekPop(); - if( sLexer.isEmpty() ) + case State::typeTerminal: + sio << "terminal: " << ptCur->iToken << " == " + << (*sState.peek()).iIndex << sio.nl; + if( ptCur->iToken == (*sState.peek()).iIndex ) + { + advanceState(); + delete ptCur; + ptCur = sLexer.peek()->nextToken(); + sio << "Token: " << *ptCur << sio.nl; + } + else + { + throw Bu::ExceptionBase("Error parsing code."); + } + break; + + case State::typeTerminalPush: + sio << "terminalpush: " << ptCur->iToken << " == " + << (*sState.peek()).iIndex << sio.nl; + if( ptCur->iToken == (*sState.peek()).iIndex ) + { + advanceState(); + sToken.push( ptCur ); + + ptCur = sLexer.peek()->nextToken(); + sio << "Token: " << *ptCur << sio.nl; + } + else + { + throw Bu::ExceptionBase("Error parsing code."); + } + break; + + case State::typeNonTerminal: + sio << "nonterminal: " << ptCur->iToken << " --> " + << (*sState.peek()).iIndex << sio.nl; + { + int iNt = (*sState.peek()).iIndex; + advanceState(); + if( !selectProduction( iNt, ptCur ) ) + { + throw Bu::ExceptionBase("Error parsing code."); + } + } + break; + + case State::typeReduction: + sio << "reduction" << sio.nl; + aReduction[(*sState.peek()).iIndex]( *this ); + advanceState(); + break; + } + } +} + +bool Bu::Parser::selectProduction( int iNt, Lexer::Token *ptCur ) +{ + NonTerminal &nt = aNonTerminal[iNt]; + int j = 0; + for( NonTerminal::ProductionList::iterator i = nt.lProduction.begin(); + i; i++,j++ ) + { + if( (*i).isEmpty() ) + continue; + if( (*i).first().eType == State::typeTerminal || + (*i).first().eType == State::typeTerminalPush ) + { + if( (*i).first().iIndex == ptCur->iToken ) { - delete pToken; - return; + sState.push( (*i).begin() ); + sio << "Pushing production " << j << " from nt " << iNt + << sio.nl; + return true; } } - delete pToken; - } + else if( (*i).first().eType == State::typeNonTerminal ) + { + sState.push( (*i).begin() ); + sio << "Pushing production " << j << " from nt " << iNt + << " as test." << sio.nl; + if( !selectProduction( (*i).first().iIndex, ptCur ) ) + { + sState.pop(); + sio << "Production " << j << " from nt " << iNt + << " didn't work out." << sio.nl; + } + else + { + return true; + } + } + } + if( nt.bCanSkip ) + return true; + return false; +} + +void Bu::Parser::advanceState() +{ + if( sState.isEmpty() ) + return; + + sState.peek()++; + if( !sState.peek() ) + { + sState.pop(); + sio << "State advanced, End of production." << sio.nl; + return; + } + sio << "State advanced, now: " << *(sState.peek()) << sio.nl; } void Bu::Parser::setRootNonTerminal( int iRoot ) @@ -56,6 +162,7 @@ int Bu::Parser::addNonTerminal( const Bu::FString &sName, NonTerminal &nt ) int iId = aNonTerminal.getSize(); aNonTerminal.append( nt ); hNonTerminalName.insert( sName, iId ); + sio << "nt '" << sName << "' = " << iId << sio.nl; return iId; } @@ -64,6 +171,7 @@ int Bu::Parser::addNonTerminal( const Bu::FString &sName ) int iId = aNonTerminal.getSize(); aNonTerminal.append( NonTerminal() ); hNonTerminalName.insert( sName, iId ); + sio << "nt '" << sName << "' = " << iId << sio.nl; return iId; } @@ -121,7 +229,8 @@ Bu::Parser::State::~State() // Bu::Parser::NonTerminal // -Bu::Parser::NonTerminal::NonTerminal() +Bu::Parser::NonTerminal::NonTerminal() : + bCanSkip( false ) { } @@ -134,3 +243,32 @@ void Bu::Parser::NonTerminal::addProduction( Production p ) lProduction.append( p ); } +void Bu::Parser::NonTerminal::setCanSkip() +{ + bCanSkip = true; +} + +Bu::Formatter &Bu::operator<<( Bu::Formatter &f, Bu::Parser::State::Type t ) +{ + switch( t ) + { + case Bu::Parser::State::typeTerminal: + return f << "typeTerminal"; + + case Bu::Parser::State::typeTerminalPush: + return f << "typeTerminalPush"; + + case Bu::Parser::State::typeNonTerminal: + return f << "typeNonTerminal"; + + case Bu::Parser::State::typeReduction: + return f << "typeReduction"; + } + return f << "***error***"; +} + +Bu::Formatter &Bu::operator<<( Bu::Formatter &f, const Bu::Parser::State &s ) +{ + return f << "{" << s.eType << ": " << s.iIndex << "}"; +} + diff --git a/src/parser.h b/src/parser.h index e720e54..5b5d4a8 100644 --- a/src/parser.h +++ b/src/parser.h @@ -62,7 +62,7 @@ namespace Bu State( Type eType, int iIndex ); virtual ~State(); - private: + //private: Type eType; int iIndex; }; @@ -76,10 +76,12 @@ namespace Bu virtual ~NonTerminal(); void addProduction( Production p ); + void setCanSkip(); - private: +// private: typedef Bu::List ProductionList; ProductionList lProduction; + bool bCanSkip; }; int addNonTerminal( const Bu::FString &sName, NonTerminal &nt ); @@ -92,6 +94,10 @@ namespace Bu void setReduction( const Bu::FString &sName, const Reduction &r ); int getReductionId( const Bu::FString &sName ); + private: + bool selectProduction( int iNt, Lexer::Token *ptCur ); + void advanceState(); + private: typedef Bu::List LexerStack; typedef Bu::List TokenStack; @@ -109,6 +115,9 @@ namespace Bu NameIndexHash hNonTerminalName; int iRootNonTerminal; }; +Bu::Formatter &operator<<( Bu::Formatter &f, Bu::Parser::State::Type t ); +Bu::Formatter &operator<<( Bu::Formatter &f, const Bu::Parser::State &s ); }; + #endif diff --git a/src/tools/parser.cpp b/src/tools/parser.cpp index e4fc95f..76d4a72 100644 --- a/src/tools/parser.cpp +++ b/src/tools/parser.cpp @@ -157,15 +157,44 @@ void redPrint( Bu::Parser &p ) { } +/* Basic grammer example: + * + * input: expr '=' + * ; + * + * expr: expr '+' expr + * | '(' expr ')' + * | NUMBER + * ; + * + * The problem is, that we can't actually make something left hand recursive, + * so we break it into two exprs: + * + * expr': '(' expr ')' + * | NUMBER + * ; + * + * expr: expr' expr'' + * ; + * + * expr'': '+' expr + * | + * ; + * + * 5 + 5 + 5 = + */ + int main( int argc, char *argv[] ) { File fIn( argv[1], File::Read ); Parser p; + p.addNonTerminal("expr"); + p.addNonTerminal("expr'"); + p.addNonTerminal("expr''"); { Parser::NonTerminal nt; - int iSelf = p.addNonTerminal("expr"); nt.addProduction( Parser::Production( Parser::State( @@ -175,7 +204,7 @@ int main( int argc, char *argv[] ) ).append( Parser::State( Parser::State::typeNonTerminal, - iSelf + p.getNonTerminalId("expr") ) ).append( Parser::State( @@ -185,9 +214,11 @@ int main( int argc, char *argv[] ) ) ); nt.addProduction( - Parser::Production() + Parser::Production( + ) ); - p.addNonTerminal( "expr", nt ); + nt.setCanSkip(); + p.setNonTerminal("expr''", nt ); } { Parser::NonTerminal nt; @@ -197,14 +228,26 @@ int main( int argc, char *argv[] ) Parser::State::typeTerminalPush, tokNumber ) + ) + ); + p.setNonTerminal("expr'", nt ); + } + { + Parser::NonTerminal nt; + nt.addProduction( + Parser::Production( + Parser::State( + Parser::State::typeNonTerminal, + p.getNonTerminalId("expr'") + ) ).append( Parser::State( Parser::State::typeNonTerminal, - p.getNonTerminalId("expr") + p.getNonTerminalId("expr''") ) ) ); - p.addNonTerminal( "expr'", nt ); + p.setNonTerminal("expr", nt ); } { Parser::NonTerminal nt; @@ -212,7 +255,7 @@ int main( int argc, char *argv[] ) Parser::Production( Parser::State( Parser::State::typeNonTerminal, - p.getNonTerminalId("expr'") + p.getNonTerminalId("expr") ) ).append( Parser::State( -- cgit v1.2.3