diff options
author | Mike Buland <eichlan@xagasoft.com> | 2010-10-12 18:10:47 +0000 |
---|---|---|
committer | Mike Buland <eichlan@xagasoft.com> | 2010-10-12 18:10:47 +0000 |
commit | fc16fc104a038146c8ab6c2e9fad38e18663a09f (patch) | |
tree | b8b50c7349ebd2869ee8599be89c82705235fd2a /src | |
parent | 0981b9d6a12bd7aadbf9286459e033ac1a2ba910 (diff) | |
download | libbu++-fc16fc104a038146c8ab6c2e9fad38e18663a09f.tar.gz libbu++-fc16fc104a038146c8ab6c2e9fad38e18663a09f.tar.bz2 libbu++-fc16fc104a038146c8ab6c2e9fad38e18663a09f.tar.xz libbu++-fc16fc104a038146c8ab6c2e9fad38e18663a09f.zip |
It's getting close. I'm not 100% sure abouth this method yet...
Diffstat (limited to 'src')
-rw-r--r-- | src/lexer.h | 2 | ||||
-rw-r--r-- | src/parser.cpp | 160 | ||||
-rw-r--r-- | src/parser.h | 13 | ||||
-rw-r--r-- | src/tools/parser.cpp | 57 |
4 files changed, 211 insertions, 21 deletions
diff --git a/src/lexer.h b/src/lexer.h index 5847269..e8a17b5 100644 --- a/src/lexer.h +++ b/src/lexer.h | |||
@@ -19,7 +19,7 @@ namespace Bu | |||
19 | Lexer(); | 19 | Lexer(); |
20 | virtual ~Lexer(); | 20 | virtual ~Lexer(); |
21 | 21 | ||
22 | typedef int32_t TokenType; | 22 | typedef int TokenType; |
23 | 23 | ||
24 | class Token | 24 | class Token |
25 | { | 25 | { |
diff --git a/src/parser.cpp b/src/parser.cpp index e8e8ff2..b563691 100644 --- a/src/parser.cpp +++ b/src/parser.cpp | |||
@@ -24,21 +24,127 @@ void Bu::Parser::popLexer() | |||
24 | 24 | ||
25 | void Bu::Parser::parse() | 25 | void Bu::Parser::parse() |
26 | { | 26 | { |
27 | for(;;) | 27 | int iCurNt = iRootNonTerminal; |
28 | Lexer::Token *ptCur = sLexer.peek()->nextToken(); | ||
29 | //Lexer::Token *ptNext = sLexer.peek()->nextToken(); | ||
30 | selectProduction( iCurNt, ptCur ); | ||
31 | sio << "Token: " << *ptCur << sio.nl; | ||
32 | |||
33 | while( !sState.isEmpty() ) | ||
28 | { | 34 | { |
29 | Bu::Lexer::Token *pToken = sLexer.peek()->nextToken(); | 35 | sio << "Currently: " << *sState.peek() << sio.nl; |
30 | sio << sLexer.peek()->tokenToString( *pToken ) << sio.nl; | 36 | switch( (*sState.peek()).eType ) |
31 | if( pToken->iToken < 0 ) | ||
32 | { | 37 | { |
33 | delete sLexer.peekPop(); | 38 | case State::typeTerminal: |
34 | if( sLexer.isEmpty() ) | 39 | sio << "terminal: " << ptCur->iToken << " == " |
40 | << (*sState.peek()).iIndex << sio.nl; | ||
41 | if( ptCur->iToken == (*sState.peek()).iIndex ) | ||
42 | { | ||
43 | advanceState(); | ||
44 | delete ptCur; | ||
45 | ptCur = sLexer.peek()->nextToken(); | ||
46 | sio << "Token: " << *ptCur << sio.nl; | ||
47 | } | ||
48 | else | ||
49 | { | ||
50 | throw Bu::ExceptionBase("Error parsing code."); | ||
51 | } | ||
52 | break; | ||
53 | |||
54 | case State::typeTerminalPush: | ||
55 | sio << "terminalpush: " << ptCur->iToken << " == " | ||
56 | << (*sState.peek()).iIndex << sio.nl; | ||
57 | if( ptCur->iToken == (*sState.peek()).iIndex ) | ||
58 | { | ||
59 | advanceState(); | ||
60 | sToken.push( ptCur ); | ||
61 | |||
62 | ptCur = sLexer.peek()->nextToken(); | ||
63 | sio << "Token: " << *ptCur << sio.nl; | ||
64 | } | ||
65 | else | ||
66 | { | ||
67 | throw Bu::ExceptionBase("Error parsing code."); | ||
68 | } | ||
69 | break; | ||
70 | |||
71 | case State::typeNonTerminal: | ||
72 | sio << "nonterminal: " << ptCur->iToken << " --> " | ||
73 | << (*sState.peek()).iIndex << sio.nl; | ||
74 | { | ||
75 | int iNt = (*sState.peek()).iIndex; | ||
76 | advanceState(); | ||
77 | if( !selectProduction( iNt, ptCur ) ) | ||
78 | { | ||
79 | throw Bu::ExceptionBase("Error parsing code."); | ||
80 | } | ||
81 | } | ||
82 | break; | ||
83 | |||
84 | case State::typeReduction: | ||
85 | sio << "reduction" << sio.nl; | ||
86 | aReduction[(*sState.peek()).iIndex]( *this ); | ||
87 | advanceState(); | ||
88 | break; | ||
89 | } | ||
90 | } | ||
91 | } | ||
92 | |||
93 | bool Bu::Parser::selectProduction( int iNt, Lexer::Token *ptCur ) | ||
94 | { | ||
95 | NonTerminal &nt = aNonTerminal[iNt]; | ||
96 | int j = 0; | ||
97 | for( NonTerminal::ProductionList::iterator i = nt.lProduction.begin(); | ||
98 | i; i++,j++ ) | ||
99 | { | ||
100 | if( (*i).isEmpty() ) | ||
101 | continue; | ||
102 | if( (*i).first().eType == State::typeTerminal || | ||
103 | (*i).first().eType == State::typeTerminalPush ) | ||
104 | { | ||
105 | if( (*i).first().iIndex == ptCur->iToken ) | ||
35 | { | 106 | { |
36 | delete pToken; | 107 | sState.push( (*i).begin() ); |
37 | return; | 108 | sio << "Pushing production " << j << " from nt " << iNt |
109 | << sio.nl; | ||
110 | return true; | ||
38 | } | 111 | } |
39 | } | 112 | } |
40 | delete pToken; | 113 | else if( (*i).first().eType == State::typeNonTerminal ) |
41 | } | 114 | { |
115 | sState.push( (*i).begin() ); | ||
116 | sio << "Pushing production " << j << " from nt " << iNt | ||
117 | << " as test." << sio.nl; | ||
118 | if( !selectProduction( (*i).first().iIndex, ptCur ) ) | ||
119 | { | ||
120 | sState.pop(); | ||
121 | sio << "Production " << j << " from nt " << iNt | ||
122 | << " didn't work out." << sio.nl; | ||
123 | } | ||
124 | else | ||
125 | { | ||
126 | return true; | ||
127 | } | ||
128 | } | ||
129 | } | ||
130 | if( nt.bCanSkip ) | ||
131 | return true; | ||
132 | return false; | ||
133 | } | ||
134 | |||
135 | void Bu::Parser::advanceState() | ||
136 | { | ||
137 | if( sState.isEmpty() ) | ||
138 | return; | ||
139 | |||
140 | sState.peek()++; | ||
141 | if( !sState.peek() ) | ||
142 | { | ||
143 | sState.pop(); | ||
144 | sio << "State advanced, End of production." << sio.nl; | ||
145 | return; | ||
146 | } | ||
147 | sio << "State advanced, now: " << *(sState.peek()) << sio.nl; | ||
42 | } | 148 | } |
43 | 149 | ||
44 | void Bu::Parser::setRootNonTerminal( int iRoot ) | 150 | void Bu::Parser::setRootNonTerminal( int iRoot ) |
@@ -56,6 +162,7 @@ int Bu::Parser::addNonTerminal( const Bu::FString &sName, NonTerminal &nt ) | |||
56 | int iId = aNonTerminal.getSize(); | 162 | int iId = aNonTerminal.getSize(); |
57 | aNonTerminal.append( nt ); | 163 | aNonTerminal.append( nt ); |
58 | hNonTerminalName.insert( sName, iId ); | 164 | hNonTerminalName.insert( sName, iId ); |
165 | sio << "nt '" << sName << "' = " << iId << sio.nl; | ||
59 | return iId; | 166 | return iId; |
60 | } | 167 | } |
61 | 168 | ||
@@ -64,6 +171,7 @@ int Bu::Parser::addNonTerminal( const Bu::FString &sName ) | |||
64 | int iId = aNonTerminal.getSize(); | 171 | int iId = aNonTerminal.getSize(); |
65 | aNonTerminal.append( NonTerminal() ); | 172 | aNonTerminal.append( NonTerminal() ); |
66 | hNonTerminalName.insert( sName, iId ); | 173 | hNonTerminalName.insert( sName, iId ); |
174 | sio << "nt '" << sName << "' = " << iId << sio.nl; | ||
67 | return iId; | 175 | return iId; |
68 | } | 176 | } |
69 | 177 | ||
@@ -121,7 +229,8 @@ Bu::Parser::State::~State() | |||
121 | // Bu::Parser::NonTerminal | 229 | // Bu::Parser::NonTerminal |
122 | // | 230 | // |
123 | 231 | ||
124 | Bu::Parser::NonTerminal::NonTerminal() | 232 | Bu::Parser::NonTerminal::NonTerminal() : |
233 | bCanSkip( false ) | ||
125 | { | 234 | { |
126 | } | 235 | } |
127 | 236 | ||
@@ -134,3 +243,32 @@ void Bu::Parser::NonTerminal::addProduction( Production p ) | |||
134 | lProduction.append( p ); | 243 | lProduction.append( p ); |
135 | } | 244 | } |
136 | 245 | ||
246 | void Bu::Parser::NonTerminal::setCanSkip() | ||
247 | { | ||
248 | bCanSkip = true; | ||
249 | } | ||
250 | |||
251 | Bu::Formatter &Bu::operator<<( Bu::Formatter &f, Bu::Parser::State::Type t ) | ||
252 | { | ||
253 | switch( t ) | ||
254 | { | ||
255 | case Bu::Parser::State::typeTerminal: | ||
256 | return f << "typeTerminal"; | ||
257 | |||
258 | case Bu::Parser::State::typeTerminalPush: | ||
259 | return f << "typeTerminalPush"; | ||
260 | |||
261 | case Bu::Parser::State::typeNonTerminal: | ||
262 | return f << "typeNonTerminal"; | ||
263 | |||
264 | case Bu::Parser::State::typeReduction: | ||
265 | return f << "typeReduction"; | ||
266 | } | ||
267 | return f << "***error***"; | ||
268 | } | ||
269 | |||
270 | Bu::Formatter &Bu::operator<<( Bu::Formatter &f, const Bu::Parser::State &s ) | ||
271 | { | ||
272 | return f << "{" << s.eType << ": " << s.iIndex << "}"; | ||
273 | } | ||
274 | |||
diff --git a/src/parser.h b/src/parser.h index e720e54..5b5d4a8 100644 --- a/src/parser.h +++ b/src/parser.h | |||
@@ -62,7 +62,7 @@ namespace Bu | |||
62 | State( Type eType, int iIndex ); | 62 | State( Type eType, int iIndex ); |
63 | virtual ~State(); | 63 | virtual ~State(); |
64 | 64 | ||
65 | private: | 65 | //private: |
66 | Type eType; | 66 | Type eType; |
67 | int iIndex; | 67 | int iIndex; |
68 | }; | 68 | }; |
@@ -76,10 +76,12 @@ namespace Bu | |||
76 | virtual ~NonTerminal(); | 76 | virtual ~NonTerminal(); |
77 | 77 | ||
78 | void addProduction( Production p ); | 78 | void addProduction( Production p ); |
79 | void setCanSkip(); | ||
79 | 80 | ||
80 | private: | 81 | // private: |
81 | typedef Bu::List<Production> ProductionList; | 82 | typedef Bu::List<Production> ProductionList; |
82 | ProductionList lProduction; | 83 | ProductionList lProduction; |
84 | bool bCanSkip; | ||
83 | }; | 85 | }; |
84 | 86 | ||
85 | int addNonTerminal( const Bu::FString &sName, NonTerminal &nt ); | 87 | int addNonTerminal( const Bu::FString &sName, NonTerminal &nt ); |
@@ -93,6 +95,10 @@ namespace Bu | |||
93 | int getReductionId( const Bu::FString &sName ); | 95 | int getReductionId( const Bu::FString &sName ); |
94 | 96 | ||
95 | private: | 97 | private: |
98 | bool selectProduction( int iNt, Lexer::Token *ptCur ); | ||
99 | void advanceState(); | ||
100 | |||
101 | private: | ||
96 | typedef Bu::List<Lexer *> LexerStack; | 102 | typedef Bu::List<Lexer *> LexerStack; |
97 | typedef Bu::List<Lexer::Token *> TokenStack; | 103 | typedef Bu::List<Lexer::Token *> TokenStack; |
98 | typedef Bu::List<Production::const_iterator> StateStack; | 104 | typedef Bu::List<Production::const_iterator> StateStack; |
@@ -109,6 +115,9 @@ namespace Bu | |||
109 | NameIndexHash hNonTerminalName; | 115 | NameIndexHash hNonTerminalName; |
110 | int iRootNonTerminal; | 116 | int iRootNonTerminal; |
111 | }; | 117 | }; |
118 | Bu::Formatter &operator<<( Bu::Formatter &f, Bu::Parser::State::Type t ); | ||
119 | Bu::Formatter &operator<<( Bu::Formatter &f, const Bu::Parser::State &s ); | ||
112 | }; | 120 | }; |
113 | 121 | ||
122 | |||
114 | #endif | 123 | #endif |
diff --git a/src/tools/parser.cpp b/src/tools/parser.cpp index e4fc95f..76d4a72 100644 --- a/src/tools/parser.cpp +++ b/src/tools/parser.cpp | |||
@@ -157,15 +157,44 @@ void redPrint( Bu::Parser &p ) | |||
157 | { | 157 | { |
158 | } | 158 | } |
159 | 159 | ||
160 | /* Basic grammer example: | ||
161 | * | ||
162 | * input: expr '=' | ||
163 | * ; | ||
164 | * | ||
165 | * expr: expr '+' expr | ||
166 | * | '(' expr ')' | ||
167 | * | NUMBER | ||
168 | * ; | ||
169 | * | ||
170 | * The problem is, that we can't actually make something left hand recursive, | ||
171 | * so we break it into two exprs: | ||
172 | * | ||
173 | * expr': '(' expr ')' | ||
174 | * | NUMBER | ||
175 | * ; | ||
176 | * | ||
177 | * expr: expr' expr'' | ||
178 | * ; | ||
179 | * | ||
180 | * expr'': '+' expr | ||
181 | * | | ||
182 | * ; | ||
183 | * | ||
184 | * 5 + 5 + 5 = | ||
185 | */ | ||
186 | |||
160 | int main( int argc, char *argv[] ) | 187 | int main( int argc, char *argv[] ) |
161 | { | 188 | { |
162 | File fIn( argv[1], File::Read ); | 189 | File fIn( argv[1], File::Read ); |
163 | 190 | ||
164 | Parser p; | 191 | Parser p; |
165 | 192 | ||
193 | p.addNonTerminal("expr"); | ||
194 | p.addNonTerminal("expr'"); | ||
195 | p.addNonTerminal("expr''"); | ||
166 | { | 196 | { |
167 | Parser::NonTerminal nt; | 197 | Parser::NonTerminal nt; |
168 | int iSelf = p.addNonTerminal("expr"); | ||
169 | nt.addProduction( | 198 | nt.addProduction( |
170 | Parser::Production( | 199 | Parser::Production( |
171 | Parser::State( | 200 | Parser::State( |
@@ -175,7 +204,7 @@ int main( int argc, char *argv[] ) | |||
175 | ).append( | 204 | ).append( |
176 | Parser::State( | 205 | Parser::State( |
177 | Parser::State::typeNonTerminal, | 206 | Parser::State::typeNonTerminal, |
178 | iSelf | 207 | p.getNonTerminalId("expr") |
179 | ) | 208 | ) |
180 | ).append( | 209 | ).append( |
181 | Parser::State( | 210 | Parser::State( |
@@ -185,9 +214,11 @@ int main( int argc, char *argv[] ) | |||
185 | ) | 214 | ) |
186 | ); | 215 | ); |
187 | nt.addProduction( | 216 | nt.addProduction( |
188 | Parser::Production() | 217 | Parser::Production( |
218 | ) | ||
189 | ); | 219 | ); |
190 | p.addNonTerminal( "expr", nt ); | 220 | nt.setCanSkip(); |
221 | p.setNonTerminal("expr''", nt ); | ||
191 | } | 222 | } |
192 | { | 223 | { |
193 | Parser::NonTerminal nt; | 224 | Parser::NonTerminal nt; |
@@ -197,14 +228,26 @@ int main( int argc, char *argv[] ) | |||
197 | Parser::State::typeTerminalPush, | 228 | Parser::State::typeTerminalPush, |
198 | tokNumber | 229 | tokNumber |
199 | ) | 230 | ) |
231 | ) | ||
232 | ); | ||
233 | p.setNonTerminal("expr'", nt ); | ||
234 | } | ||
235 | { | ||
236 | Parser::NonTerminal nt; | ||
237 | nt.addProduction( | ||
238 | Parser::Production( | ||
239 | Parser::State( | ||
240 | Parser::State::typeNonTerminal, | ||
241 | p.getNonTerminalId("expr'") | ||
242 | ) | ||
200 | ).append( | 243 | ).append( |
201 | Parser::State( | 244 | Parser::State( |
202 | Parser::State::typeNonTerminal, | 245 | Parser::State::typeNonTerminal, |
203 | p.getNonTerminalId("expr") | 246 | p.getNonTerminalId("expr''") |
204 | ) | 247 | ) |
205 | ) | 248 | ) |
206 | ); | 249 | ); |
207 | p.addNonTerminal( "expr'", nt ); | 250 | p.setNonTerminal("expr", nt ); |
208 | } | 251 | } |
209 | { | 252 | { |
210 | Parser::NonTerminal nt; | 253 | Parser::NonTerminal nt; |
@@ -212,7 +255,7 @@ int main( int argc, char *argv[] ) | |||
212 | Parser::Production( | 255 | Parser::Production( |
213 | Parser::State( | 256 | Parser::State( |
214 | Parser::State::typeNonTerminal, | 257 | Parser::State::typeNonTerminal, |
215 | p.getNonTerminalId("expr'") | 258 | p.getNonTerminalId("expr") |
216 | ) | 259 | ) |
217 | ).append( | 260 | ).append( |
218 | Parser::State( | 261 | Parser::State( |