From df6a899ad6c9a1c792582ef1a3b838c7eb6d330c Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Thu, 3 May 2012 06:30:36 +0000 Subject: Rearranging some stuff. --- src/tools/bnfcompile.cpp | 422 ----------------------------------------------- src/tools/parser.cpp | 369 ----------------------------------------- 2 files changed, 791 deletions(-) delete mode 100644 src/tools/bnfcompile.cpp delete mode 100644 src/tools/parser.cpp (limited to 'src/tools') diff --git a/src/tools/bnfcompile.cpp b/src/tools/bnfcompile.cpp deleted file mode 100644 index af7a0eb..0000000 --- a/src/tools/bnfcompile.cpp +++ /dev/null @@ -1,422 +0,0 @@ -/* - * Copyright (C) 2007-2012 Xagasoft, All rights reserved. - * - * This file is part of the libbu++ library and is released under the - * terms of the license contained in the file LICENSE. - */ - -#include -#include -#include -#include -#include - -using namespace Bu; - -enum TokenType -{ - tokIdentifier, - tokColon, - tokOr, - tokSemiColon, - tokTokens, - tokEquals, - tokOpenCurly, - tokCloseCurly, - tokOpenSquare, - tokCloseSquare, - - tokEos=-1 -}; - -class BnfLexer : public Lexer -{ -public: - BnfLexer( Stream &rSrc ) : - rSrc( rSrc ) - { - } - - virtual ~BnfLexer() - { - } - - virtual Token *nextToken() - { - char cBuf; - - for(;;) - { - if( qbIn.getSize() == 0 ) - { - char buf[4096]; - qbIn.write( buf, rSrc.read( buf, 4096 ) ); - - if( rSrc.isEos() && qbIn.getSize() == 0 ) - return new Token( tokEos ); - } - qbIn.peek( &cBuf, 1 ); - if( (cBuf >= 'a' && cBuf <= 'z') || - (cBuf >= 'A' && cBuf <= 'Z') || - (cBuf >= '0' && cBuf <= '9') || - cBuf == '_' ) - { - sBuf.append( cBuf ); - qbIn.seek( 1 ); - } - else if( sBuf.isSet() ) - { - if( sBuf == "tokens" ) - { - sBuf.clear(); - return new Token( tokTokens ); - } - else - { - Token *pRet = new Token( tokIdentifier, sBuf ); - sBuf.clear(); - return pRet; - } - } - else - { - switch( cBuf ) - { - case ' ': - case '\t': - case '\n': - case '\r': - qbIn.seek( 1 ); - continue; - - case ':': - qbIn.seek( 1 ); - return new Token( tokColon ); - - case ';': - qbIn.seek( 1 ); - return new Token( tokSemiColon ); - - case '|': - qbIn.seek( 1 ); - return new Token( tokOr ); - - case '=': - qbIn.seek( 1 ); - return new Token( tokEquals ); - - case '[': - qbIn.seek( 1 ); - return new Token( tokOpenSquare ); - - case ']': - qbIn.seek( 1 ); - return new Token( tokCloseSquare ); - - case '{': - qbIn.seek( 1 ); - return new Token( tokOpenCurly ); - - case '}': - qbIn.seek( 1 ); - return new Token( tokCloseCurly ); - - default: - throw ExceptionBase("Unexpected character '%c'.", - cBuf ); - break; - } - } - } - } - - virtual String tokenToString( const Token &t ) - { - switch( (TokenType)t.iToken ) - { - case tokIdentifier: return "tokIdentifier"; - case tokColon: return "tokColon"; - case tokOr: return "tokOr"; - case tokSemiColon: return "tokSemiColon"; - case tokTokens: return "tokTokens"; - case tokEquals: return "tokEquals"; - case tokOpenCurly: return "tokOpenCurly"; - case tokCloseCurly: return "tokCloseCurly"; - case tokOpenSquare: return "tokOpenSquare"; - case tokCloseSquare: return "tokCloseSquare"; - case tokEos: return "tokEos"; - } - - return "???"; - } - -private: - Stream &rSrc; - QueueBuf qbIn; - String sBuf; -}; - -class BnfParser -{ -public: - BnfParser( BnfLexer &l ) : - l( l ), - pCur( NULL ), - iLastToken( 0 ) - { - } - - virtual ~BnfParser() - { - delete pCur; - pCur = NULL; - } - - void parse() - { - for(;;) - { - next(); - switch( pCur->iToken ) - { - case tokTokens: - tokens(); - break; - - case tokIdentifier: - nonTerminal(); - break; - - case tokEos: - return; - break; - - default: - tokenError("tokTokens, tokIdentifier, or tokEos"); - } - } - } - -private: - void tokens() - { - next(); - if( pCur->iToken != tokEquals ) - tokenError("tokEquals"); - for(;;) - { - next(); - if( pCur->iToken == tokIdentifier ) - { - hTokens.insert( pCur->vExtra.get(), ++iLastToken ); - sio << "Added token[" << iLastToken << "]: " - << pCur->vExtra.get() << sio.nl; - } - else if( pCur->iToken == tokSemiColon ) - break; - else - tokenError("tokIdentifier or tokSemiColon"); - } - } - - void nonTerminal() - { - Bu::String sNtName = pCur->vExtra.get(); - Parser::NonTerminal nt; - p.addNonTerminal( sNtName ); - sio.incIndent(); - sio << "Created non-terminal: " << sNtName << sio.nl; - - next(); - if( pCur->iToken != tokColon ) - tokenError("tokColon"); - production( nt ); - for(;;) - { - switch( pCur->iToken ) - { - case tokOr: - production( nt ); - break; - - case tokSemiColon: - p.setNonTerminal( sNtName, nt ); - sio.decIndent(); - sio << "Closing non-terminal." << sio.nl; - return; - - default: - tokenError("tkOr or tokSemiColon"); - break; - } - } - } - - void production( Parser::NonTerminal &nt ) - { - sio.incIndent(); - sio << "Adding new production:" << sio.nl; - Parser::Production pr; - bool bAnything = false; - for(;;) - { - next(); - switch( pCur->iToken ) - { - case tokIdentifier: - { - const Bu::String &sName = - pCur->vExtra.get(); - if( hTokens.has( sName ) ) - { - pr.append( - Parser::State( - Parser::State::typeTerminal, - hTokens.get( sName ) - ) - ); - sio << "Added terminal " << sName << sio.nl; - } - else - { - if( !p.hasNonTerminal( sName ) ) - { - p.addNonTerminal( sName ); - } - pr.append( - Parser::State( - Parser::State::typeNonTerminal, - p.getNonTerminalId( sName ) - ) - ); - sio << "Added non-terminal " << sName << sio.nl; - } - } - break; - - case tokOpenSquare: - { - next(); - if( pCur->iToken != tokIdentifier ) - tokenError("tokIdentifier"); - Bu::String sName = - pCur->vExtra.get(); - next(); - if( pCur->iToken != tokCloseSquare ) - tokenError("tokCloseSquare"); - - if( !hTokens.has( sName ) ) - throw ExceptionBase("Only token names may be " - "enclosed in square brackets."); - - pr.append( - Parser::State( - Parser::State::typeTerminalPush, - hTokens.get( sName ) - ) - ); - sio << "Added terminal-push " << sName << sio.nl; - } - break; - - case tokOpenCurly: - { - next(); - if( pCur->iToken != tokIdentifier ) - tokenError("tokIdentifier"); - Bu::String sName = - pCur->vExtra.get(); - next(); - if( pCur->iToken != tokCloseCurly ) - tokenError("tokCloseCurly"); - - if( !p.hasReduction( sName ) ) - p.addReduction( sName ); - - pr.append( - Parser::State( - Parser::State::typeReduction, - p.getReductionId( sName ) - ) - ); - sio << "Added reduction " << sName << sio.nl; - } - break; - - case tokOr: - case tokSemiColon: - if( bAnything ) - { - nt.addProduction( pr ); - sio.decIndent(); - sio << "Closing production." << sio.nl; - } - else - { - nt.setCanSkip(); - sio.decIndent(); - sio << "Closing empty production." << sio.nl; - } - return; - - default: - tokenError("tokIdentifier, tokOpenSquare, tokOr, " - "tokOpenCurly, or tokSemiColon"); - } - } - } - -private: - void next() - { - delete pCur; - pCur = l.nextToken(); - } - - void tokenError( const String &s ) - { - throw ExceptionBase( ("Expected " + s + " but found " - + l.tokenToString( *pCur ) + ".").getStr() ); - } - -private: - typedef Bu::Hash TokenHash; - TokenHash hTokens; - BnfLexer &l; - BnfLexer::Token *pCur; - int iLastToken; - Parser p; -}; - -int main( int argc, char *argv[] ) -{ - if( argc < 2 ) - { - println("Provide an input filename as the first parameter."); - return 0; - } - File fIn( argv[1], File::Read ); - - BnfLexer bl( fIn ); - BnfParser parser( bl ); - - parser.parse(); - -/* - for(;;) - { - Lexer::Token *pTok = bl.nextToken(); - sio << bl.tokenToString(*pTok); - if( pTok->vExtra.isSet() ) - { - sio << " - " << pTok->vExtra; - } - sio << sio.nl; - if( pTok->iToken == tokEos ) - break; - } -*/ - - return 0; -} - diff --git a/src/tools/parser.cpp b/src/tools/parser.cpp deleted file mode 100644 index af53bc8..0000000 --- a/src/tools/parser.cpp +++ /dev/null @@ -1,369 +0,0 @@ -/* - * Copyright (C) 2007-2012 Xagasoft, All rights reserved. - * - * This file is part of the libbu++ library and is released under the - * terms of the license contained in the file LICENSE. - */ - -#include -#include -#include -#include -#include -#include - -using namespace Bu; - -enum Tok -{ - tokNumber, - tokPlus, - tokMinus, - tokDivide, - tokMultiply, - tokOpenParen, - tokCloseParen, - tokCompute, - tokEndOfInput=-1 -}; - -Bu::Formatter &operator<<( Bu::Formatter &f, Tok e ) -{ - switch( e ) - { - case tokNumber: return f << "tokNumber"; - case tokPlus: return f << "tokPlus"; - case tokMinus: return f << "tokMinus"; - case tokDivide: return f << "tokDivide"; - case tokMultiply: return f << "tokMultiply"; - case tokOpenParen: return f << "tokOpenParen"; - case tokCloseParen: return f << "tokCloseParen"; - case tokCompute: return f << "tokCompute"; - case tokEndOfInput: return f << "tokEndOfInput"; - } - - return f << "***error***"; -} - -class MathLexer : public Lexer -{ -public: - MathLexer( Bu::Stream &rSrc ) : - rSrc( rSrc ) - { - } - - virtual ~MathLexer() - { - } - - enum TokenTypes - { - tokStuff - }; - - virtual Token *nextToken() - { - for(;;) - { - if( qbIn.getSize() == 0 ) - { - char buf[4096]; - qbIn.write( buf, rSrc.read( buf, 4096 ) ); - - if( rSrc.isEos() && qbIn.getSize() == 0 ) - return new Token( tokEndOfInput ); - } - - char b; - qbIn.peek( &b, 1 ); - switch( b ) - { - case '+': - qbIn.seek( 1 ); - return new Token( tokPlus ); - - case '-': - qbIn.seek( 1 ); - return new Token( tokMinus ); - - case '/': - qbIn.seek( 1 ); - return new Token( tokDivide ); - - case '*': - qbIn.seek( 1 ); - return new Token( tokMultiply ); - - case ' ': - case '\t': - case '\n': - qbIn.seek( 1 ); - break; - - case '=': - qbIn.seek( 1 ); - return new Token( tokCompute ); - - case '(': - qbIn.seek( 1 ); - return new Token( tokOpenParen ); - - case ')': - qbIn.seek( 1 ); - return new Token( tokCloseParen ); - - case '.': - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - Bu::String sTmp; - sTmp += b; - qbIn.seek( 1 ); - for(;;) - { - qbIn.peek( &b, 1 ); - if( b != '.' && (b < '0' || b > '9') ) - { - sio << "!! Convert '" << sTmp << "' to " - << strtod( sTmp.getStr(), NULL ) << sio.nl; - return new Token( - tokNumber, strtod( sTmp.getStr(), NULL ) - ); - } - qbIn.seek( 1 ); - sTmp += b; - } - } - break; - - default: - throw Bu::ExceptionBase("Unexpected character '%c'.", b ); - } - } - } - -private: - Bu::Stream &rSrc; - QueueBuf qbIn; -}; - -void redAdd( Bu::Parser &p ) -{ - Lexer::Token *a = p.popToken(); - Lexer::Token *b = p.popToken(); - - sio << "Add! " << b->vExtra.get() << " + " - << a->vExtra.get() << sio.nl; - - Lexer::Token *c = new Lexer::Token( tokNumber, - b->vExtra.get() + a->vExtra.get() - ); - p.pushToken( c ); - - delete a; - delete b; -} - -void redSubtract( Bu::Parser &p ) -{ - Lexer::Token *a = p.popToken(); - Lexer::Token *b = p.popToken(); - - sio << "Subtract! " << b->vExtra.get() << " - " - << a->vExtra.get() << sio.nl; - - Lexer::Token *c = new Lexer::Token( tokNumber, - b->vExtra.get() - a->vExtra.get() - ); - p.pushToken( c ); - - delete a; - delete b; -} - -void redPrint( Bu::Parser &p ) -{ - Lexer::Token *a = p.popToken(); - sio << "Print! = " << a->vExtra.get() << sio.nl; - delete a; -} - -/* Basic grammer example: - * - * input: expr '=' - * ; - * - * expr: expr '+' expr - * | '(' expr ')' - * | NUMBER - * ; - * - * The problem is, that we can't actually make something left hand recursive, - * so we break it into two exprs: - * - * expr-sub1: '(' expr ')' - * | NUMBER - * ; - * - * expr: expr-sub1 expr-sub2 - * ; - * - * expr-sub2: '+' expr - * | '-' expr - * | - * ; - * - * 5 + 5 + 5 = - */ - -int main( int argc, char *argv[] ) -{ - if( argc < 2 ) - { - println("Provide an input filename as the first parameter."); - return 0; - } - File fIn( argv[1], File::Read ); - - Parser p; - - p.addNonTerminal("expr"); - p.addNonTerminal("expr-sub1"); - p.addNonTerminal("expr-sub2"); - { - Parser::NonTerminal nt; - nt.addProduction( - Parser::Production( - Parser::State( - Parser::State::typeTerminal, - tokPlus - ) - ).append( - Parser::State( - Parser::State::typeNonTerminal, - p.getNonTerminalId("expr") - ) - ).append( - Parser::State( - Parser::State::typeReduction, - p.addReduction("add") - ) - ) - ); - nt.addProduction( - Parser::Production( - Parser::State( - Parser::State::typeTerminal, - tokMinus - ) - ).append( - Parser::State( - Parser::State::typeNonTerminal, - p.getNonTerminalId("expr") - ) - ).append( - Parser::State( - Parser::State::typeReduction, - p.addReduction("subtract") - ) - ) - ); - nt.addProduction( - Parser::Production( - ) - ); - nt.setCanSkip(); - p.setNonTerminal("expr-sub2", nt ); - } - { - Parser::NonTerminal nt; - nt.addProduction( - Parser::Production( - Parser::State( - Parser::State::typeTerminalPush, - tokNumber - ) - ) - ); - nt.addProduction( - Parser::Production( - Parser::State( - Parser::State::typeTerminal, - tokOpenParen - ) - ).append( - Parser::State( - Parser::State::typeNonTerminal, - p.getNonTerminalId("expr") - ) - ).append( - Parser::State( - Parser::State::typeTerminal, - tokCloseParen - ) - ) - ); - p.setNonTerminal("expr-sub1", nt ); - } - { - Parser::NonTerminal nt; - nt.addProduction( - Parser::Production( - Parser::State( - Parser::State::typeNonTerminal, - p.getNonTerminalId("expr-sub1") - ) - ).append( - Parser::State( - Parser::State::typeNonTerminal, - p.getNonTerminalId("expr-sub2") - ) - ) - ); - p.setNonTerminal("expr", nt ); - } - { - Parser::NonTerminal nt; - nt.addProduction( - Parser::Production( - Parser::State( - Parser::State::typeNonTerminal, - p.getNonTerminalId("expr") - ) - ).append( - Parser::State( - Parser::State::typeTerminal, - tokCompute - ) - ).append( - Parser::State( - Parser::State::typeReduction, - p.addReduction("print") - ) - ) - ); - p.addNonTerminal("input", nt ); - } - - p.setRootNonTerminal("input"); - - p.setReduction("add", Bu::slot( &redAdd ) ); - p.setReduction("subtract", Bu::slot( &redSubtract ) ); - p.setReduction("print", Bu::slot( &redPrint ) ); - - p.pushLexer( new MathLexer( fIn ) ); - - p.parse(); - - return 0; -} - -- cgit v1.2.3