diff options
| -rw-r--r-- | src/lexer.h | 2 | ||||
| -rw-r--r-- | src/parser.cpp | 160 | ||||
| -rw-r--r-- | src/parser.h | 13 | ||||
| -rw-r--r-- | src/tools/parser.cpp | 57 |
4 files changed, 211 insertions, 21 deletions
diff --git a/src/lexer.h b/src/lexer.h index 5847269..e8a17b5 100644 --- a/src/lexer.h +++ b/src/lexer.h | |||
| @@ -19,7 +19,7 @@ namespace Bu | |||
| 19 | Lexer(); | 19 | Lexer(); |
| 20 | virtual ~Lexer(); | 20 | virtual ~Lexer(); |
| 21 | 21 | ||
| 22 | typedef int32_t TokenType; | 22 | typedef int TokenType; |
| 23 | 23 | ||
| 24 | class Token | 24 | class Token |
| 25 | { | 25 | { |
diff --git a/src/parser.cpp b/src/parser.cpp index e8e8ff2..b563691 100644 --- a/src/parser.cpp +++ b/src/parser.cpp | |||
| @@ -24,21 +24,127 @@ void Bu::Parser::popLexer() | |||
| 24 | 24 | ||
| 25 | void Bu::Parser::parse() | 25 | void Bu::Parser::parse() |
| 26 | { | 26 | { |
| 27 | for(;;) | 27 | int iCurNt = iRootNonTerminal; |
| 28 | Lexer::Token *ptCur = sLexer.peek()->nextToken(); | ||
| 29 | //Lexer::Token *ptNext = sLexer.peek()->nextToken(); | ||
| 30 | selectProduction( iCurNt, ptCur ); | ||
| 31 | sio << "Token: " << *ptCur << sio.nl; | ||
| 32 | |||
| 33 | while( !sState.isEmpty() ) | ||
| 28 | { | 34 | { |
| 29 | Bu::Lexer::Token *pToken = sLexer.peek()->nextToken(); | 35 | sio << "Currently: " << *sState.peek() << sio.nl; |
| 30 | sio << sLexer.peek()->tokenToString( *pToken ) << sio.nl; | 36 | switch( (*sState.peek()).eType ) |
| 31 | if( pToken->iToken < 0 ) | ||
| 32 | { | 37 | { |
| 33 | delete sLexer.peekPop(); | 38 | case State::typeTerminal: |
| 34 | if( sLexer.isEmpty() ) | 39 | sio << "terminal: " << ptCur->iToken << " == " |
| 40 | << (*sState.peek()).iIndex << sio.nl; | ||
| 41 | if( ptCur->iToken == (*sState.peek()).iIndex ) | ||
| 42 | { | ||
| 43 | advanceState(); | ||
| 44 | delete ptCur; | ||
| 45 | ptCur = sLexer.peek()->nextToken(); | ||
| 46 | sio << "Token: " << *ptCur << sio.nl; | ||
| 47 | } | ||
| 48 | else | ||
| 49 | { | ||
| 50 | throw Bu::ExceptionBase("Error parsing code."); | ||
| 51 | } | ||
| 52 | break; | ||
| 53 | |||
| 54 | case State::typeTerminalPush: | ||
| 55 | sio << "terminalpush: " << ptCur->iToken << " == " | ||
| 56 | << (*sState.peek()).iIndex << sio.nl; | ||
| 57 | if( ptCur->iToken == (*sState.peek()).iIndex ) | ||
| 58 | { | ||
| 59 | advanceState(); | ||
| 60 | sToken.push( ptCur ); | ||
| 61 | |||
| 62 | ptCur = sLexer.peek()->nextToken(); | ||
| 63 | sio << "Token: " << *ptCur << sio.nl; | ||
| 64 | } | ||
| 65 | else | ||
| 66 | { | ||
| 67 | throw Bu::ExceptionBase("Error parsing code."); | ||
| 68 | } | ||
| 69 | break; | ||
| 70 | |||
| 71 | case State::typeNonTerminal: | ||
| 72 | sio << "nonterminal: " << ptCur->iToken << " --> " | ||
| 73 | << (*sState.peek()).iIndex << sio.nl; | ||
| 74 | { | ||
| 75 | int iNt = (*sState.peek()).iIndex; | ||
| 76 | advanceState(); | ||
| 77 | if( !selectProduction( iNt, ptCur ) ) | ||
| 78 | { | ||
| 79 | throw Bu::ExceptionBase("Error parsing code."); | ||
| 80 | } | ||
| 81 | } | ||
| 82 | break; | ||
| 83 | |||
| 84 | case State::typeReduction: | ||
| 85 | sio << "reduction" << sio.nl; | ||
| 86 | aReduction[(*sState.peek()).iIndex]( *this ); | ||
| 87 | advanceState(); | ||
| 88 | break; | ||
| 89 | } | ||
| 90 | } | ||
| 91 | } | ||
| 92 | |||
| 93 | bool Bu::Parser::selectProduction( int iNt, Lexer::Token *ptCur ) | ||
| 94 | { | ||
| 95 | NonTerminal &nt = aNonTerminal[iNt]; | ||
| 96 | int j = 0; | ||
| 97 | for( NonTerminal::ProductionList::iterator i = nt.lProduction.begin(); | ||
| 98 | i; i++,j++ ) | ||
| 99 | { | ||
| 100 | if( (*i).isEmpty() ) | ||
| 101 | continue; | ||
| 102 | if( (*i).first().eType == State::typeTerminal || | ||
| 103 | (*i).first().eType == State::typeTerminalPush ) | ||
| 104 | { | ||
| 105 | if( (*i).first().iIndex == ptCur->iToken ) | ||
| 35 | { | 106 | { |
| 36 | delete pToken; | 107 | sState.push( (*i).begin() ); |
| 37 | return; | 108 | sio << "Pushing production " << j << " from nt " << iNt |
| 109 | << sio.nl; | ||
| 110 | return true; | ||
| 38 | } | 111 | } |
| 39 | } | 112 | } |
| 40 | delete pToken; | 113 | else if( (*i).first().eType == State::typeNonTerminal ) |
| 41 | } | 114 | { |
| 115 | sState.push( (*i).begin() ); | ||
| 116 | sio << "Pushing production " << j << " from nt " << iNt | ||
| 117 | << " as test." << sio.nl; | ||
| 118 | if( !selectProduction( (*i).first().iIndex, ptCur ) ) | ||
| 119 | { | ||
| 120 | sState.pop(); | ||
| 121 | sio << "Production " << j << " from nt " << iNt | ||
| 122 | << " didn't work out." << sio.nl; | ||
| 123 | } | ||
| 124 | else | ||
| 125 | { | ||
| 126 | return true; | ||
| 127 | } | ||
| 128 | } | ||
| 129 | } | ||
| 130 | if( nt.bCanSkip ) | ||
| 131 | return true; | ||
| 132 | return false; | ||
| 133 | } | ||
| 134 | |||
| 135 | void Bu::Parser::advanceState() | ||
| 136 | { | ||
| 137 | if( sState.isEmpty() ) | ||
| 138 | return; | ||
| 139 | |||
| 140 | sState.peek()++; | ||
| 141 | if( !sState.peek() ) | ||
| 142 | { | ||
| 143 | sState.pop(); | ||
| 144 | sio << "State advanced, End of production." << sio.nl; | ||
| 145 | return; | ||
| 146 | } | ||
| 147 | sio << "State advanced, now: " << *(sState.peek()) << sio.nl; | ||
| 42 | } | 148 | } |
| 43 | 149 | ||
| 44 | void Bu::Parser::setRootNonTerminal( int iRoot ) | 150 | void Bu::Parser::setRootNonTerminal( int iRoot ) |
| @@ -56,6 +162,7 @@ int Bu::Parser::addNonTerminal( const Bu::FString &sName, NonTerminal &nt ) | |||
| 56 | int iId = aNonTerminal.getSize(); | 162 | int iId = aNonTerminal.getSize(); |
| 57 | aNonTerminal.append( nt ); | 163 | aNonTerminal.append( nt ); |
| 58 | hNonTerminalName.insert( sName, iId ); | 164 | hNonTerminalName.insert( sName, iId ); |
| 165 | sio << "nt '" << sName << "' = " << iId << sio.nl; | ||
| 59 | return iId; | 166 | return iId; |
| 60 | } | 167 | } |
| 61 | 168 | ||
| @@ -64,6 +171,7 @@ int Bu::Parser::addNonTerminal( const Bu::FString &sName ) | |||
| 64 | int iId = aNonTerminal.getSize(); | 171 | int iId = aNonTerminal.getSize(); |
| 65 | aNonTerminal.append( NonTerminal() ); | 172 | aNonTerminal.append( NonTerminal() ); |
| 66 | hNonTerminalName.insert( sName, iId ); | 173 | hNonTerminalName.insert( sName, iId ); |
| 174 | sio << "nt '" << sName << "' = " << iId << sio.nl; | ||
| 67 | return iId; | 175 | return iId; |
| 68 | } | 176 | } |
| 69 | 177 | ||
| @@ -121,7 +229,8 @@ Bu::Parser::State::~State() | |||
| 121 | // Bu::Parser::NonTerminal | 229 | // Bu::Parser::NonTerminal |
| 122 | // | 230 | // |
| 123 | 231 | ||
| 124 | Bu::Parser::NonTerminal::NonTerminal() | 232 | Bu::Parser::NonTerminal::NonTerminal() : |
| 233 | bCanSkip( false ) | ||
| 125 | { | 234 | { |
| 126 | } | 235 | } |
| 127 | 236 | ||
| @@ -134,3 +243,32 @@ void Bu::Parser::NonTerminal::addProduction( Production p ) | |||
| 134 | lProduction.append( p ); | 243 | lProduction.append( p ); |
| 135 | } | 244 | } |
| 136 | 245 | ||
| 246 | void Bu::Parser::NonTerminal::setCanSkip() | ||
| 247 | { | ||
| 248 | bCanSkip = true; | ||
| 249 | } | ||
| 250 | |||
| 251 | Bu::Formatter &Bu::operator<<( Bu::Formatter &f, Bu::Parser::State::Type t ) | ||
| 252 | { | ||
| 253 | switch( t ) | ||
| 254 | { | ||
| 255 | case Bu::Parser::State::typeTerminal: | ||
| 256 | return f << "typeTerminal"; | ||
| 257 | |||
| 258 | case Bu::Parser::State::typeTerminalPush: | ||
| 259 | return f << "typeTerminalPush"; | ||
| 260 | |||
| 261 | case Bu::Parser::State::typeNonTerminal: | ||
| 262 | return f << "typeNonTerminal"; | ||
| 263 | |||
| 264 | case Bu::Parser::State::typeReduction: | ||
| 265 | return f << "typeReduction"; | ||
| 266 | } | ||
| 267 | return f << "***error***"; | ||
| 268 | } | ||
| 269 | |||
| 270 | Bu::Formatter &Bu::operator<<( Bu::Formatter &f, const Bu::Parser::State &s ) | ||
| 271 | { | ||
| 272 | return f << "{" << s.eType << ": " << s.iIndex << "}"; | ||
| 273 | } | ||
| 274 | |||
diff --git a/src/parser.h b/src/parser.h index e720e54..5b5d4a8 100644 --- a/src/parser.h +++ b/src/parser.h | |||
| @@ -62,7 +62,7 @@ namespace Bu | |||
| 62 | State( Type eType, int iIndex ); | 62 | State( Type eType, int iIndex ); |
| 63 | virtual ~State(); | 63 | virtual ~State(); |
| 64 | 64 | ||
| 65 | private: | 65 | //private: |
| 66 | Type eType; | 66 | Type eType; |
| 67 | int iIndex; | 67 | int iIndex; |
| 68 | }; | 68 | }; |
| @@ -76,10 +76,12 @@ namespace Bu | |||
| 76 | virtual ~NonTerminal(); | 76 | virtual ~NonTerminal(); |
| 77 | 77 | ||
| 78 | void addProduction( Production p ); | 78 | void addProduction( Production p ); |
| 79 | void setCanSkip(); | ||
| 79 | 80 | ||
| 80 | private: | 81 | // private: |
| 81 | typedef Bu::List<Production> ProductionList; | 82 | typedef Bu::List<Production> ProductionList; |
| 82 | ProductionList lProduction; | 83 | ProductionList lProduction; |
| 84 | bool bCanSkip; | ||
| 83 | }; | 85 | }; |
| 84 | 86 | ||
| 85 | int addNonTerminal( const Bu::FString &sName, NonTerminal &nt ); | 87 | int addNonTerminal( const Bu::FString &sName, NonTerminal &nt ); |
| @@ -93,6 +95,10 @@ namespace Bu | |||
| 93 | int getReductionId( const Bu::FString &sName ); | 95 | int getReductionId( const Bu::FString &sName ); |
| 94 | 96 | ||
| 95 | private: | 97 | private: |
| 98 | bool selectProduction( int iNt, Lexer::Token *ptCur ); | ||
| 99 | void advanceState(); | ||
| 100 | |||
| 101 | private: | ||
| 96 | typedef Bu::List<Lexer *> LexerStack; | 102 | typedef Bu::List<Lexer *> LexerStack; |
| 97 | typedef Bu::List<Lexer::Token *> TokenStack; | 103 | typedef Bu::List<Lexer::Token *> TokenStack; |
| 98 | typedef Bu::List<Production::const_iterator> StateStack; | 104 | typedef Bu::List<Production::const_iterator> StateStack; |
| @@ -109,6 +115,9 @@ namespace Bu | |||
| 109 | NameIndexHash hNonTerminalName; | 115 | NameIndexHash hNonTerminalName; |
| 110 | int iRootNonTerminal; | 116 | int iRootNonTerminal; |
| 111 | }; | 117 | }; |
| 118 | Bu::Formatter &operator<<( Bu::Formatter &f, Bu::Parser::State::Type t ); | ||
| 119 | Bu::Formatter &operator<<( Bu::Formatter &f, const Bu::Parser::State &s ); | ||
| 112 | }; | 120 | }; |
| 113 | 121 | ||
| 122 | |||
| 114 | #endif | 123 | #endif |
diff --git a/src/tools/parser.cpp b/src/tools/parser.cpp index e4fc95f..76d4a72 100644 --- a/src/tools/parser.cpp +++ b/src/tools/parser.cpp | |||
| @@ -157,15 +157,44 @@ void redPrint( Bu::Parser &p ) | |||
| 157 | { | 157 | { |
| 158 | } | 158 | } |
| 159 | 159 | ||
| 160 | /* Basic grammer example: | ||
| 161 | * | ||
| 162 | * input: expr '=' | ||
| 163 | * ; | ||
| 164 | * | ||
| 165 | * expr: expr '+' expr | ||
| 166 | * | '(' expr ')' | ||
| 167 | * | NUMBER | ||
| 168 | * ; | ||
| 169 | * | ||
| 170 | * The problem is, that we can't actually make something left hand recursive, | ||
| 171 | * so we break it into two exprs: | ||
| 172 | * | ||
| 173 | * expr': '(' expr ')' | ||
| 174 | * | NUMBER | ||
| 175 | * ; | ||
| 176 | * | ||
| 177 | * expr: expr' expr'' | ||
| 178 | * ; | ||
| 179 | * | ||
| 180 | * expr'': '+' expr | ||
| 181 | * | | ||
| 182 | * ; | ||
| 183 | * | ||
| 184 | * 5 + 5 + 5 = | ||
| 185 | */ | ||
| 186 | |||
| 160 | int main( int argc, char *argv[] ) | 187 | int main( int argc, char *argv[] ) |
| 161 | { | 188 | { |
| 162 | File fIn( argv[1], File::Read ); | 189 | File fIn( argv[1], File::Read ); |
| 163 | 190 | ||
| 164 | Parser p; | 191 | Parser p; |
| 165 | 192 | ||
| 193 | p.addNonTerminal("expr"); | ||
| 194 | p.addNonTerminal("expr'"); | ||
| 195 | p.addNonTerminal("expr''"); | ||
| 166 | { | 196 | { |
| 167 | Parser::NonTerminal nt; | 197 | Parser::NonTerminal nt; |
| 168 | int iSelf = p.addNonTerminal("expr"); | ||
| 169 | nt.addProduction( | 198 | nt.addProduction( |
| 170 | Parser::Production( | 199 | Parser::Production( |
| 171 | Parser::State( | 200 | Parser::State( |
| @@ -175,7 +204,7 @@ int main( int argc, char *argv[] ) | |||
| 175 | ).append( | 204 | ).append( |
| 176 | Parser::State( | 205 | Parser::State( |
| 177 | Parser::State::typeNonTerminal, | 206 | Parser::State::typeNonTerminal, |
| 178 | iSelf | 207 | p.getNonTerminalId("expr") |
| 179 | ) | 208 | ) |
| 180 | ).append( | 209 | ).append( |
| 181 | Parser::State( | 210 | Parser::State( |
| @@ -185,9 +214,11 @@ int main( int argc, char *argv[] ) | |||
| 185 | ) | 214 | ) |
| 186 | ); | 215 | ); |
| 187 | nt.addProduction( | 216 | nt.addProduction( |
| 188 | Parser::Production() | 217 | Parser::Production( |
| 218 | ) | ||
| 189 | ); | 219 | ); |
| 190 | p.addNonTerminal( "expr", nt ); | 220 | nt.setCanSkip(); |
| 221 | p.setNonTerminal("expr''", nt ); | ||
| 191 | } | 222 | } |
| 192 | { | 223 | { |
| 193 | Parser::NonTerminal nt; | 224 | Parser::NonTerminal nt; |
| @@ -197,14 +228,26 @@ int main( int argc, char *argv[] ) | |||
| 197 | Parser::State::typeTerminalPush, | 228 | Parser::State::typeTerminalPush, |
| 198 | tokNumber | 229 | tokNumber |
| 199 | ) | 230 | ) |
| 231 | ) | ||
| 232 | ); | ||
| 233 | p.setNonTerminal("expr'", nt ); | ||
| 234 | } | ||
| 235 | { | ||
| 236 | Parser::NonTerminal nt; | ||
| 237 | nt.addProduction( | ||
| 238 | Parser::Production( | ||
| 239 | Parser::State( | ||
| 240 | Parser::State::typeNonTerminal, | ||
| 241 | p.getNonTerminalId("expr'") | ||
| 242 | ) | ||
| 200 | ).append( | 243 | ).append( |
| 201 | Parser::State( | 244 | Parser::State( |
| 202 | Parser::State::typeNonTerminal, | 245 | Parser::State::typeNonTerminal, |
| 203 | p.getNonTerminalId("expr") | 246 | p.getNonTerminalId("expr''") |
| 204 | ) | 247 | ) |
| 205 | ) | 248 | ) |
| 206 | ); | 249 | ); |
| 207 | p.addNonTerminal( "expr'", nt ); | 250 | p.setNonTerminal("expr", nt ); |
| 208 | } | 251 | } |
| 209 | { | 252 | { |
| 210 | Parser::NonTerminal nt; | 253 | Parser::NonTerminal nt; |
| @@ -212,7 +255,7 @@ int main( int argc, char *argv[] ) | |||
| 212 | Parser::Production( | 255 | Parser::Production( |
| 213 | Parser::State( | 256 | Parser::State( |
| 214 | Parser::State::typeNonTerminal, | 257 | Parser::State::typeNonTerminal, |
| 215 | p.getNonTerminalId("expr'") | 258 | p.getNonTerminalId("expr") |
| 216 | ) | 259 | ) |
| 217 | ).append( | 260 | ).append( |
| 218 | Parser::State( | 261 | Parser::State( |
