diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/lexer.h | 4 | ||||
| -rw-r--r-- | src/parser.cpp | 10 | ||||
| -rw-r--r-- | src/parser.h | 2 | ||||
| -rw-r--r-- | src/tools/bnfcompile.cpp | 410 |
4 files changed, 426 insertions, 0 deletions
diff --git a/src/lexer.h b/src/lexer.h index 2cb5f51..9840afe 100644 --- a/src/lexer.h +++ b/src/lexer.h | |||
| @@ -36,6 +36,10 @@ namespace Bu | |||
| 36 | } | 36 | } |
| 37 | TokenType iToken; | 37 | TokenType iToken; |
| 38 | Bu::Variant vExtra; | 38 | Bu::Variant vExtra; |
| 39 | int iStartCol; | ||
| 40 | int iStartRow; | ||
| 41 | int iEndCol; | ||
| 42 | int iEndRow; | ||
| 39 | }; | 43 | }; |
| 40 | 44 | ||
| 41 | virtual Token *nextToken()=0; | 45 | virtual Token *nextToken()=0; |
diff --git a/src/parser.cpp b/src/parser.cpp index 3ce73b6..4ad4ff9 100644 --- a/src/parser.cpp +++ b/src/parser.cpp | |||
| @@ -205,6 +205,11 @@ int Bu::Parser::getNonTerminalId( const Bu::FString &sName ) | |||
| 205 | return hNonTerminalName.get( sName ); | 205 | return hNonTerminalName.get( sName ); |
| 206 | } | 206 | } |
| 207 | 207 | ||
| 208 | bool Bu::Parser::hasNonTerminal( const Bu::FString &sName ) | ||
| 209 | { | ||
| 210 | return hNonTerminalName.has( sName ); | ||
| 211 | } | ||
| 212 | |||
| 208 | int Bu::Parser::addReduction( const Bu::FString &sName, const Reduction &r ) | 213 | int Bu::Parser::addReduction( const Bu::FString &sName, const Reduction &r ) |
| 209 | { | 214 | { |
| 210 | int iId = aReduction.getSize(); | 215 | int iId = aReduction.getSize(); |
| @@ -231,6 +236,11 @@ int Bu::Parser::getReductionId( const Bu::FString &sName ) | |||
| 231 | return hReductionName.get( sName ); | 236 | return hReductionName.get( sName ); |
| 232 | } | 237 | } |
| 233 | 238 | ||
| 239 | bool Bu::Parser::hasReduction( const Bu::FString &sName ) | ||
| 240 | { | ||
| 241 | return hReductionName.has( sName ); | ||
| 242 | } | ||
| 243 | |||
| 234 | // | 244 | // |
| 235 | // Bu::Parser::State | 245 | // Bu::Parser::State |
| 236 | // | 246 | // |
diff --git a/src/parser.h b/src/parser.h index 1679c7f..a925188 100644 --- a/src/parser.h +++ b/src/parser.h | |||
| @@ -91,11 +91,13 @@ namespace Bu | |||
| 91 | int addNonTerminal( const Bu::FString &sName ); | 91 | int addNonTerminal( const Bu::FString &sName ); |
| 92 | void setNonTerminal( const Bu::FString &sName, NonTerminal &nt ); | 92 | void setNonTerminal( const Bu::FString &sName, NonTerminal &nt ); |
| 93 | int getNonTerminalId( const Bu::FString &sName ); | 93 | int getNonTerminalId( const Bu::FString &sName ); |
| 94 | bool hasNonTerminal( const Bu::FString &sName ); | ||
| 94 | 95 | ||
| 95 | int addReduction( const Bu::FString &sName, const Reduction &r ); | 96 | int addReduction( const Bu::FString &sName, const Reduction &r ); |
| 96 | int addReduction( const Bu::FString &sName ); | 97 | int addReduction( const Bu::FString &sName ); |
| 97 | void setReduction( const Bu::FString &sName, const Reduction &r ); | 98 | void setReduction( const Bu::FString &sName, const Reduction &r ); |
| 98 | int getReductionId( const Bu::FString &sName ); | 99 | int getReductionId( const Bu::FString &sName ); |
| 100 | bool hasReduction( const Bu::FString &sName ); | ||
| 99 | 101 | ||
| 100 | private: | 102 | private: |
| 101 | bool selectProduction( int iNt, Lexer::Token *ptCur ); | 103 | bool selectProduction( int iNt, Lexer::Token *ptCur ); |
diff --git a/src/tools/bnfcompile.cpp b/src/tools/bnfcompile.cpp new file mode 100644 index 0000000..16e75a5 --- /dev/null +++ b/src/tools/bnfcompile.cpp | |||
| @@ -0,0 +1,410 @@ | |||
| 1 | #include <bu/sio.h> | ||
| 2 | #include <bu/lexer.h> | ||
| 3 | #include <bu/parser.h> | ||
| 4 | #include <bu/file.h> | ||
| 5 | #include <bu/queuebuf.h> | ||
| 6 | |||
| 7 | using namespace Bu; | ||
| 8 | |||
| 9 | enum TokenType | ||
| 10 | { | ||
| 11 | tokIdentifier, | ||
| 12 | tokColon, | ||
| 13 | tokOr, | ||
| 14 | tokSemiColon, | ||
| 15 | tokTokens, | ||
| 16 | tokEquals, | ||
| 17 | tokOpenCurly, | ||
| 18 | tokCloseCurly, | ||
| 19 | tokOpenSquare, | ||
| 20 | tokCloseSquare, | ||
| 21 | |||
| 22 | tokEos=-1 | ||
| 23 | }; | ||
| 24 | |||
| 25 | class BnfLexer : public Lexer | ||
| 26 | { | ||
| 27 | public: | ||
| 28 | BnfLexer( Stream &rSrc ) : | ||
| 29 | rSrc( rSrc ) | ||
| 30 | { | ||
| 31 | } | ||
| 32 | |||
| 33 | virtual ~BnfLexer() | ||
| 34 | { | ||
| 35 | } | ||
| 36 | |||
| 37 | virtual Token *nextToken() | ||
| 38 | { | ||
| 39 | char cBuf; | ||
| 40 | |||
| 41 | for(;;) | ||
| 42 | { | ||
| 43 | if( qbIn.getSize() == 0 ) | ||
| 44 | { | ||
| 45 | char buf[4096]; | ||
| 46 | qbIn.write( buf, rSrc.read( buf, 4096 ) ); | ||
| 47 | |||
| 48 | if( rSrc.isEos() && qbIn.getSize() == 0 ) | ||
| 49 | return new Token( tokEos ); | ||
| 50 | } | ||
| 51 | qbIn.peek( &cBuf, 1 ); | ||
| 52 | if( (cBuf >= 'a' && cBuf <= 'z') || | ||
| 53 | (cBuf >= 'A' && cBuf <= 'Z') || | ||
| 54 | (cBuf >= '0' && cBuf <= '9') || | ||
| 55 | cBuf == '_' ) | ||
| 56 | { | ||
| 57 | sBuf.append( cBuf ); | ||
| 58 | qbIn.seek( 1 ); | ||
| 59 | } | ||
| 60 | else if( sBuf.isSet() ) | ||
| 61 | { | ||
| 62 | if( sBuf == "tokens" ) | ||
| 63 | { | ||
| 64 | sBuf.clear(); | ||
| 65 | return new Token( tokTokens ); | ||
| 66 | } | ||
| 67 | else | ||
| 68 | { | ||
| 69 | Token *pRet = new Token( tokIdentifier, sBuf ); | ||
| 70 | sBuf.clear(); | ||
| 71 | return pRet; | ||
| 72 | } | ||
| 73 | } | ||
| 74 | else | ||
| 75 | { | ||
| 76 | switch( cBuf ) | ||
| 77 | { | ||
| 78 | case ' ': | ||
| 79 | case '\t': | ||
| 80 | case '\n': | ||
| 81 | case '\r': | ||
| 82 | qbIn.seek( 1 ); | ||
| 83 | continue; | ||
| 84 | |||
| 85 | case ':': | ||
| 86 | qbIn.seek( 1 ); | ||
| 87 | return new Token( tokColon ); | ||
| 88 | |||
| 89 | case ';': | ||
| 90 | qbIn.seek( 1 ); | ||
| 91 | return new Token( tokSemiColon ); | ||
| 92 | |||
| 93 | case '|': | ||
| 94 | qbIn.seek( 1 ); | ||
| 95 | return new Token( tokOr ); | ||
| 96 | |||
| 97 | case '=': | ||
| 98 | qbIn.seek( 1 ); | ||
| 99 | return new Token( tokEquals ); | ||
| 100 | |||
| 101 | case '[': | ||
| 102 | qbIn.seek( 1 ); | ||
| 103 | return new Token( tokOpenSquare ); | ||
| 104 | |||
| 105 | case ']': | ||
| 106 | qbIn.seek( 1 ); | ||
| 107 | return new Token( tokCloseSquare ); | ||
| 108 | |||
| 109 | case '{': | ||
| 110 | qbIn.seek( 1 ); | ||
| 111 | return new Token( tokOpenCurly ); | ||
| 112 | |||
| 113 | case '}': | ||
| 114 | qbIn.seek( 1 ); | ||
| 115 | return new Token( tokCloseCurly ); | ||
| 116 | |||
| 117 | default: | ||
| 118 | throw ExceptionBase("Unexpected character '%c'.", | ||
| 119 | cBuf ); | ||
| 120 | break; | ||
| 121 | } | ||
| 122 | } | ||
| 123 | } | ||
| 124 | } | ||
| 125 | |||
| 126 | virtual FString tokenToString( const Token &t ) | ||
| 127 | { | ||
| 128 | switch( (TokenType)t.iToken ) | ||
| 129 | { | ||
| 130 | case tokIdentifier: return "tokIdentifier"; | ||
| 131 | case tokColon: return "tokColon"; | ||
| 132 | case tokOr: return "tokOr"; | ||
| 133 | case tokSemiColon: return "tokSemiColon"; | ||
| 134 | case tokTokens: return "tokTokens"; | ||
| 135 | case tokEquals: return "tokEquals"; | ||
| 136 | case tokOpenCurly: return "tokOpenCurly"; | ||
| 137 | case tokCloseCurly: return "tokCloseCurly"; | ||
| 138 | case tokOpenSquare: return "tokOpenSquare"; | ||
| 139 | case tokCloseSquare: return "tokCloseSquare"; | ||
| 140 | case tokEos: return "tokEos"; | ||
| 141 | } | ||
| 142 | |||
| 143 | return "???"; | ||
| 144 | } | ||
| 145 | |||
| 146 | private: | ||
| 147 | Stream &rSrc; | ||
| 148 | QueueBuf qbIn; | ||
| 149 | FString sBuf; | ||
| 150 | }; | ||
| 151 | |||
| 152 | class BnfParser | ||
| 153 | { | ||
| 154 | public: | ||
| 155 | BnfParser( BnfLexer &l ) : | ||
| 156 | l( l ), | ||
| 157 | pCur( NULL ), | ||
| 158 | iLastToken( 0 ) | ||
| 159 | { | ||
| 160 | } | ||
| 161 | |||
| 162 | virtual ~BnfParser() | ||
| 163 | { | ||
| 164 | delete pCur; | ||
| 165 | pCur = NULL; | ||
| 166 | } | ||
| 167 | |||
| 168 | void parse() | ||
| 169 | { | ||
| 170 | for(;;) | ||
| 171 | { | ||
| 172 | next(); | ||
| 173 | switch( pCur->iToken ) | ||
| 174 | { | ||
| 175 | case tokTokens: | ||
| 176 | tokens(); | ||
| 177 | break; | ||
| 178 | |||
| 179 | case tokIdentifier: | ||
| 180 | nonTerminal(); | ||
| 181 | break; | ||
| 182 | |||
| 183 | case tokEos: | ||
| 184 | return; | ||
| 185 | break; | ||
| 186 | |||
| 187 | default: | ||
| 188 | tokenError("tokTokens, tokIdentifier, or tokEos"); | ||
| 189 | } | ||
| 190 | } | ||
| 191 | } | ||
| 192 | |||
| 193 | private: | ||
| 194 | void tokens() | ||
| 195 | { | ||
| 196 | next(); | ||
| 197 | if( pCur->iToken != tokEquals ) | ||
| 198 | tokenError("tokEquals"); | ||
| 199 | for(;;) | ||
| 200 | { | ||
| 201 | next(); | ||
| 202 | if( pCur->iToken == tokIdentifier ) | ||
| 203 | { | ||
| 204 | hTokens.insert( pCur->vExtra.get<Bu::FString>(), ++iLastToken ); | ||
| 205 | sio << "Added token[" << iLastToken << "]: " | ||
| 206 | << pCur->vExtra.get<Bu::FString>() << sio.nl; | ||
| 207 | } | ||
| 208 | else if( pCur->iToken == tokSemiColon ) | ||
| 209 | break; | ||
| 210 | else | ||
| 211 | tokenError("tokIdentifier or tokSemiColon"); | ||
| 212 | } | ||
| 213 | } | ||
| 214 | |||
| 215 | void nonTerminal() | ||
| 216 | { | ||
| 217 | Bu::FString sNtName = pCur->vExtra.get<Bu::FString>(); | ||
| 218 | Parser::NonTerminal nt; | ||
| 219 | p.addNonTerminal( sNtName ); | ||
| 220 | sio.incIndent(); | ||
| 221 | sio << "Created non-terminal: " << sNtName << sio.nl; | ||
| 222 | |||
| 223 | next(); | ||
| 224 | if( pCur->iToken != tokColon ) | ||
| 225 | tokenError("tokColon"); | ||
| 226 | production( nt ); | ||
| 227 | for(;;) | ||
| 228 | { | ||
| 229 | switch( pCur->iToken ) | ||
| 230 | { | ||
| 231 | case tokOr: | ||
| 232 | production( nt ); | ||
| 233 | break; | ||
| 234 | |||
| 235 | case tokSemiColon: | ||
| 236 | p.setNonTerminal( sNtName, nt ); | ||
| 237 | sio.decIndent(); | ||
| 238 | sio << "Closing non-terminal." << sio.nl; | ||
| 239 | return; | ||
| 240 | |||
| 241 | default: | ||
| 242 | tokenError("tkOr or tokSemiColon"); | ||
| 243 | break; | ||
| 244 | } | ||
| 245 | } | ||
| 246 | } | ||
| 247 | |||
| 248 | void production( Parser::NonTerminal &nt ) | ||
| 249 | { | ||
| 250 | sio.incIndent(); | ||
| 251 | sio << "Adding new production:" << sio.nl; | ||
| 252 | Parser::Production pr; | ||
| 253 | bool bAnything = false; | ||
| 254 | for(;;) | ||
| 255 | { | ||
| 256 | next(); | ||
| 257 | switch( pCur->iToken ) | ||
| 258 | { | ||
| 259 | case tokIdentifier: | ||
| 260 | { | ||
| 261 | const Bu::FString &sName = | ||
| 262 | pCur->vExtra.get<Bu::FString>(); | ||
| 263 | if( hTokens.has( sName ) ) | ||
| 264 | { | ||
| 265 | pr.append( | ||
| 266 | Parser::State( | ||
| 267 | Parser::State::typeTerminal, | ||
| 268 | hTokens.get( sName ) | ||
| 269 | ) | ||
| 270 | ); | ||
| 271 | sio << "Added terminal " << sName << sio.nl; | ||
| 272 | } | ||
| 273 | else | ||
| 274 | { | ||
| 275 | if( !p.hasNonTerminal( sName ) ) | ||
| 276 | { | ||
| 277 | p.addNonTerminal( sName ); | ||
| 278 | } | ||
| 279 | pr.append( | ||
| 280 | Parser::State( | ||
| 281 | Parser::State::typeNonTerminal, | ||
| 282 | p.getNonTerminalId( sName ) | ||
| 283 | ) | ||
| 284 | ); | ||
| 285 | sio << "Added non-terminal " << sName << sio.nl; | ||
| 286 | } | ||
| 287 | } | ||
| 288 | break; | ||
| 289 | |||
| 290 | case tokOpenSquare: | ||
| 291 | { | ||
| 292 | next(); | ||
| 293 | if( pCur->iToken != tokIdentifier ) | ||
| 294 | tokenError("tokIdentifier"); | ||
| 295 | Bu::FString sName = | ||
| 296 | pCur->vExtra.get<Bu::FString>(); | ||
| 297 | next(); | ||
| 298 | if( pCur->iToken != tokCloseSquare ) | ||
| 299 | tokenError("tokCloseSquare"); | ||
| 300 | |||
| 301 | if( !hTokens.has( sName ) ) | ||
| 302 | throw ExceptionBase("Only token names may be " | ||
| 303 | "enclosed in square brackets."); | ||
| 304 | |||
| 305 | pr.append( | ||
| 306 | Parser::State( | ||
| 307 | Parser::State::typeTerminalPush, | ||
| 308 | hTokens.get( sName ) | ||
| 309 | ) | ||
| 310 | ); | ||
| 311 | sio << "Added terminal-push " << sName << sio.nl; | ||
| 312 | } | ||
| 313 | break; | ||
| 314 | |||
| 315 | case tokOpenCurly: | ||
| 316 | { | ||
| 317 | next(); | ||
| 318 | if( pCur->iToken != tokIdentifier ) | ||
| 319 | tokenError("tokIdentifier"); | ||
| 320 | Bu::FString sName = | ||
| 321 | pCur->vExtra.get<Bu::FString>(); | ||
| 322 | next(); | ||
| 323 | if( pCur->iToken != tokCloseCurly ) | ||
| 324 | tokenError("tokCloseCurly"); | ||
| 325 | |||
| 326 | if( !p.hasReduction( sName ) ) | ||
| 327 | p.addReduction( sName ); | ||
| 328 | |||
| 329 | pr.append( | ||
| 330 | Parser::State( | ||
| 331 | Parser::State::typeReduction, | ||
| 332 | p.getReductionId( sName ) | ||
| 333 | ) | ||
| 334 | ); | ||
| 335 | sio << "Added reduction " << sName << sio.nl; | ||
| 336 | } | ||
| 337 | break; | ||
| 338 | |||
| 339 | case tokOr: | ||
| 340 | case tokSemiColon: | ||
| 341 | if( bAnything ) | ||
| 342 | { | ||
| 343 | nt.addProduction( pr ); | ||
| 344 | sio.decIndent(); | ||
| 345 | sio << "Closing production." << sio.nl; | ||
| 346 | } | ||
| 347 | else | ||
| 348 | { | ||
| 349 | nt.setCanSkip(); | ||
| 350 | sio.decIndent(); | ||
| 351 | sio << "Closing empty production." << sio.nl; | ||
| 352 | } | ||
| 353 | return; | ||
| 354 | |||
| 355 | default: | ||
| 356 | tokenError("tokIdentifier, tokOpenSquare, tokOr, " | ||
| 357 | "tokOpenCurly, or tokSemiColon"); | ||
| 358 | } | ||
| 359 | } | ||
| 360 | } | ||
| 361 | |||
| 362 | private: | ||
| 363 | void next() | ||
| 364 | { | ||
| 365 | delete pCur; | ||
| 366 | pCur = l.nextToken(); | ||
| 367 | } | ||
| 368 | |||
| 369 | void tokenError( const FString &s ) | ||
| 370 | { | ||
| 371 | throw ExceptionBase( ("Expected " + s + " but found " | ||
| 372 | + l.tokenToString( *pCur ) + ".").getStr() ); | ||
| 373 | } | ||
| 374 | |||
| 375 | private: | ||
| 376 | typedef Bu::Hash<Bu::FString, int> TokenHash; | ||
| 377 | TokenHash hTokens; | ||
| 378 | BnfLexer &l; | ||
| 379 | BnfLexer::Token *pCur; | ||
| 380 | int iLastToken; | ||
| 381 | Parser p; | ||
| 382 | }; | ||
| 383 | |||
| 384 | int main( int argc, char *argv[] ) | ||
| 385 | { | ||
| 386 | File fIn( argv[1], File::Read ); | ||
| 387 | |||
| 388 | BnfLexer bl( fIn ); | ||
| 389 | BnfParser parser( bl ); | ||
| 390 | |||
| 391 | parser.parse(); | ||
| 392 | |||
| 393 | /* | ||
| 394 | for(;;) | ||
| 395 | { | ||
| 396 | Lexer::Token *pTok = bl.nextToken(); | ||
| 397 | sio << bl.tokenToString(*pTok); | ||
| 398 | if( pTok->vExtra.isSet() ) | ||
| 399 | { | ||
| 400 | sio << " - " << pTok->vExtra; | ||
| 401 | } | ||
| 402 | sio << sio.nl; | ||
| 403 | if( pTok->iToken == tokEos ) | ||
| 404 | break; | ||
| 405 | } | ||
| 406 | */ | ||
| 407 | |||
| 408 | return 0; | ||
| 409 | } | ||
| 410 | |||
