summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2010-10-12 18:10:47 +0000
committerMike Buland <eichlan@xagasoft.com>2010-10-12 18:10:47 +0000
commitfc16fc104a038146c8ab6c2e9fad38e18663a09f (patch)
treeb8b50c7349ebd2869ee8599be89c82705235fd2a /src
parent0981b9d6a12bd7aadbf9286459e033ac1a2ba910 (diff)
downloadlibbu++-fc16fc104a038146c8ab6c2e9fad38e18663a09f.tar.gz
libbu++-fc16fc104a038146c8ab6c2e9fad38e18663a09f.tar.bz2
libbu++-fc16fc104a038146c8ab6c2e9fad38e18663a09f.tar.xz
libbu++-fc16fc104a038146c8ab6c2e9fad38e18663a09f.zip
It's getting close. I'm not 100% sure abouth this method yet...
Diffstat (limited to '')
-rw-r--r--src/lexer.h2
-rw-r--r--src/parser.cpp160
-rw-r--r--src/parser.h13
-rw-r--r--src/tools/parser.cpp57
4 files changed, 211 insertions, 21 deletions
diff --git a/src/lexer.h b/src/lexer.h
index 5847269..e8a17b5 100644
--- a/src/lexer.h
+++ b/src/lexer.h
@@ -19,7 +19,7 @@ namespace Bu
19 Lexer(); 19 Lexer();
20 virtual ~Lexer(); 20 virtual ~Lexer();
21 21
22 typedef int32_t TokenType; 22 typedef int TokenType;
23 23
24 class Token 24 class Token
25 { 25 {
diff --git a/src/parser.cpp b/src/parser.cpp
index e8e8ff2..b563691 100644
--- a/src/parser.cpp
+++ b/src/parser.cpp
@@ -24,21 +24,127 @@ void Bu::Parser::popLexer()
24 24
25void Bu::Parser::parse() 25void Bu::Parser::parse()
26{ 26{
27 for(;;) 27 int iCurNt = iRootNonTerminal;
28 Lexer::Token *ptCur = sLexer.peek()->nextToken();
29 //Lexer::Token *ptNext = sLexer.peek()->nextToken();
30 selectProduction( iCurNt, ptCur );
31 sio << "Token: " << *ptCur << sio.nl;
32
33 while( !sState.isEmpty() )
28 { 34 {
29 Bu::Lexer::Token *pToken = sLexer.peek()->nextToken(); 35 sio << "Currently: " << *sState.peek() << sio.nl;
30 sio << sLexer.peek()->tokenToString( *pToken ) << sio.nl; 36 switch( (*sState.peek()).eType )
31 if( pToken->iToken < 0 )
32 { 37 {
33 delete sLexer.peekPop(); 38 case State::typeTerminal:
34 if( sLexer.isEmpty() ) 39 sio << "terminal: " << ptCur->iToken << " == "
40 << (*sState.peek()).iIndex << sio.nl;
41 if( ptCur->iToken == (*sState.peek()).iIndex )
42 {
43 advanceState();
44 delete ptCur;
45 ptCur = sLexer.peek()->nextToken();
46 sio << "Token: " << *ptCur << sio.nl;
47 }
48 else
49 {
50 throw Bu::ExceptionBase("Error parsing code.");
51 }
52 break;
53
54 case State::typeTerminalPush:
55 sio << "terminalpush: " << ptCur->iToken << " == "
56 << (*sState.peek()).iIndex << sio.nl;
57 if( ptCur->iToken == (*sState.peek()).iIndex )
58 {
59 advanceState();
60 sToken.push( ptCur );
61
62 ptCur = sLexer.peek()->nextToken();
63 sio << "Token: " << *ptCur << sio.nl;
64 }
65 else
66 {
67 throw Bu::ExceptionBase("Error parsing code.");
68 }
69 break;
70
71 case State::typeNonTerminal:
72 sio << "nonterminal: " << ptCur->iToken << " --> "
73 << (*sState.peek()).iIndex << sio.nl;
74 {
75 int iNt = (*sState.peek()).iIndex;
76 advanceState();
77 if( !selectProduction( iNt, ptCur ) )
78 {
79 throw Bu::ExceptionBase("Error parsing code.");
80 }
81 }
82 break;
83
84 case State::typeReduction:
85 sio << "reduction" << sio.nl;
86 aReduction[(*sState.peek()).iIndex]( *this );
87 advanceState();
88 break;
89 }
90 }
91}
92
93bool Bu::Parser::selectProduction( int iNt, Lexer::Token *ptCur )
94{
95 NonTerminal &nt = aNonTerminal[iNt];
96 int j = 0;
97 for( NonTerminal::ProductionList::iterator i = nt.lProduction.begin();
98 i; i++,j++ )
99 {
100 if( (*i).isEmpty() )
101 continue;
102 if( (*i).first().eType == State::typeTerminal ||
103 (*i).first().eType == State::typeTerminalPush )
104 {
105 if( (*i).first().iIndex == ptCur->iToken )
35 { 106 {
36 delete pToken; 107 sState.push( (*i).begin() );
37 return; 108 sio << "Pushing production " << j << " from nt " << iNt
109 << sio.nl;
110 return true;
38 } 111 }
39 } 112 }
40 delete pToken; 113 else if( (*i).first().eType == State::typeNonTerminal )
41 } 114 {
115 sState.push( (*i).begin() );
116 sio << "Pushing production " << j << " from nt " << iNt
117 << " as test." << sio.nl;
118 if( !selectProduction( (*i).first().iIndex, ptCur ) )
119 {
120 sState.pop();
121 sio << "Production " << j << " from nt " << iNt
122 << " didn't work out." << sio.nl;
123 }
124 else
125 {
126 return true;
127 }
128 }
129 }
130 if( nt.bCanSkip )
131 return true;
132 return false;
133}
134
135void Bu::Parser::advanceState()
136{
137 if( sState.isEmpty() )
138 return;
139
140 sState.peek()++;
141 if( !sState.peek() )
142 {
143 sState.pop();
144 sio << "State advanced, End of production." << sio.nl;
145 return;
146 }
147 sio << "State advanced, now: " << *(sState.peek()) << sio.nl;
42} 148}
43 149
44void Bu::Parser::setRootNonTerminal( int iRoot ) 150void Bu::Parser::setRootNonTerminal( int iRoot )
@@ -56,6 +162,7 @@ int Bu::Parser::addNonTerminal( const Bu::FString &sName, NonTerminal &nt )
56 int iId = aNonTerminal.getSize(); 162 int iId = aNonTerminal.getSize();
57 aNonTerminal.append( nt ); 163 aNonTerminal.append( nt );
58 hNonTerminalName.insert( sName, iId ); 164 hNonTerminalName.insert( sName, iId );
165 sio << "nt '" << sName << "' = " << iId << sio.nl;
59 return iId; 166 return iId;
60} 167}
61 168
@@ -64,6 +171,7 @@ int Bu::Parser::addNonTerminal( const Bu::FString &sName )
64 int iId = aNonTerminal.getSize(); 171 int iId = aNonTerminal.getSize();
65 aNonTerminal.append( NonTerminal() ); 172 aNonTerminal.append( NonTerminal() );
66 hNonTerminalName.insert( sName, iId ); 173 hNonTerminalName.insert( sName, iId );
174 sio << "nt '" << sName << "' = " << iId << sio.nl;
67 return iId; 175 return iId;
68} 176}
69 177
@@ -121,7 +229,8 @@ Bu::Parser::State::~State()
121// Bu::Parser::NonTerminal 229// Bu::Parser::NonTerminal
122// 230//
123 231
124Bu::Parser::NonTerminal::NonTerminal() 232Bu::Parser::NonTerminal::NonTerminal() :
233 bCanSkip( false )
125{ 234{
126} 235}
127 236
@@ -134,3 +243,32 @@ void Bu::Parser::NonTerminal::addProduction( Production p )
134 lProduction.append( p ); 243 lProduction.append( p );
135} 244}
136 245
246void Bu::Parser::NonTerminal::setCanSkip()
247{
248 bCanSkip = true;
249}
250
251Bu::Formatter &Bu::operator<<( Bu::Formatter &f, Bu::Parser::State::Type t )
252{
253 switch( t )
254 {
255 case Bu::Parser::State::typeTerminal:
256 return f << "typeTerminal";
257
258 case Bu::Parser::State::typeTerminalPush:
259 return f << "typeTerminalPush";
260
261 case Bu::Parser::State::typeNonTerminal:
262 return f << "typeNonTerminal";
263
264 case Bu::Parser::State::typeReduction:
265 return f << "typeReduction";
266 }
267 return f << "***error***";
268}
269
270Bu::Formatter &Bu::operator<<( Bu::Formatter &f, const Bu::Parser::State &s )
271{
272 return f << "{" << s.eType << ": " << s.iIndex << "}";
273}
274
diff --git a/src/parser.h b/src/parser.h
index e720e54..5b5d4a8 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -62,7 +62,7 @@ namespace Bu
62 State( Type eType, int iIndex ); 62 State( Type eType, int iIndex );
63 virtual ~State(); 63 virtual ~State();
64 64
65 private: 65 //private:
66 Type eType; 66 Type eType;
67 int iIndex; 67 int iIndex;
68 }; 68 };
@@ -76,10 +76,12 @@ namespace Bu
76 virtual ~NonTerminal(); 76 virtual ~NonTerminal();
77 77
78 void addProduction( Production p ); 78 void addProduction( Production p );
79 void setCanSkip();
79 80
80 private: 81// private:
81 typedef Bu::List<Production> ProductionList; 82 typedef Bu::List<Production> ProductionList;
82 ProductionList lProduction; 83 ProductionList lProduction;
84 bool bCanSkip;
83 }; 85 };
84 86
85 int addNonTerminal( const Bu::FString &sName, NonTerminal &nt ); 87 int addNonTerminal( const Bu::FString &sName, NonTerminal &nt );
@@ -93,6 +95,10 @@ namespace Bu
93 int getReductionId( const Bu::FString &sName ); 95 int getReductionId( const Bu::FString &sName );
94 96
95 private: 97 private:
98 bool selectProduction( int iNt, Lexer::Token *ptCur );
99 void advanceState();
100
101 private:
96 typedef Bu::List<Lexer *> LexerStack; 102 typedef Bu::List<Lexer *> LexerStack;
97 typedef Bu::List<Lexer::Token *> TokenStack; 103 typedef Bu::List<Lexer::Token *> TokenStack;
98 typedef Bu::List<Production::const_iterator> StateStack; 104 typedef Bu::List<Production::const_iterator> StateStack;
@@ -109,6 +115,9 @@ namespace Bu
109 NameIndexHash hNonTerminalName; 115 NameIndexHash hNonTerminalName;
110 int iRootNonTerminal; 116 int iRootNonTerminal;
111 }; 117 };
118Bu::Formatter &operator<<( Bu::Formatter &f, Bu::Parser::State::Type t );
119Bu::Formatter &operator<<( Bu::Formatter &f, const Bu::Parser::State &s );
112}; 120};
113 121
122
114#endif 123#endif
diff --git a/src/tools/parser.cpp b/src/tools/parser.cpp
index e4fc95f..76d4a72 100644
--- a/src/tools/parser.cpp
+++ b/src/tools/parser.cpp
@@ -157,15 +157,44 @@ void redPrint( Bu::Parser &p )
157{ 157{
158} 158}
159 159
160/* Basic grammer example:
161 *
162 * input: expr '='
163 * ;
164 *
165 * expr: expr '+' expr
166 * | '(' expr ')'
167 * | NUMBER
168 * ;
169 *
170 * The problem is, that we can't actually make something left hand recursive,
171 * so we break it into two exprs:
172 *
173 * expr': '(' expr ')'
174 * | NUMBER
175 * ;
176 *
177 * expr: expr' expr''
178 * ;
179 *
180 * expr'': '+' expr
181 * |
182 * ;
183 *
184 * 5 + 5 + 5 =
185 */
186
160int main( int argc, char *argv[] ) 187int main( int argc, char *argv[] )
161{ 188{
162 File fIn( argv[1], File::Read ); 189 File fIn( argv[1], File::Read );
163 190
164 Parser p; 191 Parser p;
165 192
193 p.addNonTerminal("expr");
194 p.addNonTerminal("expr'");
195 p.addNonTerminal("expr''");
166 { 196 {
167 Parser::NonTerminal nt; 197 Parser::NonTerminal nt;
168 int iSelf = p.addNonTerminal("expr");
169 nt.addProduction( 198 nt.addProduction(
170 Parser::Production( 199 Parser::Production(
171 Parser::State( 200 Parser::State(
@@ -175,7 +204,7 @@ int main( int argc, char *argv[] )
175 ).append( 204 ).append(
176 Parser::State( 205 Parser::State(
177 Parser::State::typeNonTerminal, 206 Parser::State::typeNonTerminal,
178 iSelf 207 p.getNonTerminalId("expr")
179 ) 208 )
180 ).append( 209 ).append(
181 Parser::State( 210 Parser::State(
@@ -185,9 +214,11 @@ int main( int argc, char *argv[] )
185 ) 214 )
186 ); 215 );
187 nt.addProduction( 216 nt.addProduction(
188 Parser::Production() 217 Parser::Production(
218 )
189 ); 219 );
190 p.addNonTerminal( "expr", nt ); 220 nt.setCanSkip();
221 p.setNonTerminal("expr''", nt );
191 } 222 }
192 { 223 {
193 Parser::NonTerminal nt; 224 Parser::NonTerminal nt;
@@ -197,14 +228,26 @@ int main( int argc, char *argv[] )
197 Parser::State::typeTerminalPush, 228 Parser::State::typeTerminalPush,
198 tokNumber 229 tokNumber
199 ) 230 )
231 )
232 );
233 p.setNonTerminal("expr'", nt );
234 }
235 {
236 Parser::NonTerminal nt;
237 nt.addProduction(
238 Parser::Production(
239 Parser::State(
240 Parser::State::typeNonTerminal,
241 p.getNonTerminalId("expr'")
242 )
200 ).append( 243 ).append(
201 Parser::State( 244 Parser::State(
202 Parser::State::typeNonTerminal, 245 Parser::State::typeNonTerminal,
203 p.getNonTerminalId("expr") 246 p.getNonTerminalId("expr''")
204 ) 247 )
205 ) 248 )
206 ); 249 );
207 p.addNonTerminal( "expr'", nt ); 250 p.setNonTerminal("expr", nt );
208 } 251 }
209 { 252 {
210 Parser::NonTerminal nt; 253 Parser::NonTerminal nt;
@@ -212,7 +255,7 @@ int main( int argc, char *argv[] )
212 Parser::Production( 255 Parser::Production(
213 Parser::State( 256 Parser::State(
214 Parser::State::typeNonTerminal, 257 Parser::State::typeNonTerminal,
215 p.getNonTerminalId("expr'") 258 p.getNonTerminalId("expr")
216 ) 259 )
217 ).append( 260 ).append(
218 Parser::State( 261 Parser::State(