From df6a899ad6c9a1c792582ef1a3b838c7eb6d330c Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Thu, 3 May 2012 06:30:36 +0000 Subject: Rearranging some stuff. --- src/tests/bnfcompile.cpp | 422 +++++++++++++++++++++++++++++++++++++++++++++++ src/tests/parser.cpp | 369 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 791 insertions(+) create mode 100644 src/tests/bnfcompile.cpp create mode 100644 src/tests/parser.cpp (limited to 'src/tests') diff --git a/src/tests/bnfcompile.cpp b/src/tests/bnfcompile.cpp new file mode 100644 index 0000000..af7a0eb --- /dev/null +++ b/src/tests/bnfcompile.cpp @@ -0,0 +1,422 @@ +/* + * Copyright (C) 2007-2012 Xagasoft, All rights reserved. + * + * This file is part of the libbu++ library and is released under the + * terms of the license contained in the file LICENSE. + */ + +#include +#include +#include +#include +#include + +using namespace Bu; + +enum TokenType +{ + tokIdentifier, + tokColon, + tokOr, + tokSemiColon, + tokTokens, + tokEquals, + tokOpenCurly, + tokCloseCurly, + tokOpenSquare, + tokCloseSquare, + + tokEos=-1 +}; + +class BnfLexer : public Lexer +{ +public: + BnfLexer( Stream &rSrc ) : + rSrc( rSrc ) + { + } + + virtual ~BnfLexer() + { + } + + virtual Token *nextToken() + { + char cBuf; + + for(;;) + { + if( qbIn.getSize() == 0 ) + { + char buf[4096]; + qbIn.write( buf, rSrc.read( buf, 4096 ) ); + + if( rSrc.isEos() && qbIn.getSize() == 0 ) + return new Token( tokEos ); + } + qbIn.peek( &cBuf, 1 ); + if( (cBuf >= 'a' && cBuf <= 'z') || + (cBuf >= 'A' && cBuf <= 'Z') || + (cBuf >= '0' && cBuf <= '9') || + cBuf == '_' ) + { + sBuf.append( cBuf ); + qbIn.seek( 1 ); + } + else if( sBuf.isSet() ) + { + if( sBuf == "tokens" ) + { + sBuf.clear(); + return new Token( tokTokens ); + } + else + { + Token *pRet = new Token( tokIdentifier, sBuf ); + sBuf.clear(); + return pRet; + } + } + else + { + switch( cBuf ) + { + case ' ': + case '\t': + case '\n': + case '\r': + qbIn.seek( 1 ); + continue; + + case ':': + qbIn.seek( 1 ); + return new Token( tokColon ); + + case ';': + qbIn.seek( 1 ); + return new Token( tokSemiColon ); + + case '|': + qbIn.seek( 1 ); + return new Token( tokOr ); + + case '=': + qbIn.seek( 1 ); + return new Token( tokEquals ); + + case '[': + qbIn.seek( 1 ); + return new Token( tokOpenSquare ); + + case ']': + qbIn.seek( 1 ); + return new Token( tokCloseSquare ); + + case '{': + qbIn.seek( 1 ); + return new Token( tokOpenCurly ); + + case '}': + qbIn.seek( 1 ); + return new Token( tokCloseCurly ); + + default: + throw ExceptionBase("Unexpected character '%c'.", + cBuf ); + break; + } + } + } + } + + virtual String tokenToString( const Token &t ) + { + switch( (TokenType)t.iToken ) + { + case tokIdentifier: return "tokIdentifier"; + case tokColon: return "tokColon"; + case tokOr: return "tokOr"; + case tokSemiColon: return "tokSemiColon"; + case tokTokens: return "tokTokens"; + case tokEquals: return "tokEquals"; + case tokOpenCurly: return "tokOpenCurly"; + case tokCloseCurly: return "tokCloseCurly"; + case tokOpenSquare: return "tokOpenSquare"; + case tokCloseSquare: return "tokCloseSquare"; + case tokEos: return "tokEos"; + } + + return "???"; + } + +private: + Stream &rSrc; + QueueBuf qbIn; + String sBuf; +}; + +class BnfParser +{ +public: + BnfParser( BnfLexer &l ) : + l( l ), + pCur( NULL ), + iLastToken( 0 ) + { + } + + virtual ~BnfParser() + { + delete pCur; + pCur = NULL; + } + + void parse() + { + for(;;) + { + next(); + switch( pCur->iToken ) + { + case tokTokens: + tokens(); + break; + + case tokIdentifier: + nonTerminal(); + break; + + case tokEos: + return; + break; + + default: + tokenError("tokTokens, tokIdentifier, or tokEos"); + } + } + } + +private: + void tokens() + { + next(); + if( pCur->iToken != tokEquals ) + tokenError("tokEquals"); + for(;;) + { + next(); + if( pCur->iToken == tokIdentifier ) + { + hTokens.insert( pCur->vExtra.get(), ++iLastToken ); + sio << "Added token[" << iLastToken << "]: " + << pCur->vExtra.get() << sio.nl; + } + else if( pCur->iToken == tokSemiColon ) + break; + else + tokenError("tokIdentifier or tokSemiColon"); + } + } + + void nonTerminal() + { + Bu::String sNtName = pCur->vExtra.get(); + Parser::NonTerminal nt; + p.addNonTerminal( sNtName ); + sio.incIndent(); + sio << "Created non-terminal: " << sNtName << sio.nl; + + next(); + if( pCur->iToken != tokColon ) + tokenError("tokColon"); + production( nt ); + for(;;) + { + switch( pCur->iToken ) + { + case tokOr: + production( nt ); + break; + + case tokSemiColon: + p.setNonTerminal( sNtName, nt ); + sio.decIndent(); + sio << "Closing non-terminal." << sio.nl; + return; + + default: + tokenError("tkOr or tokSemiColon"); + break; + } + } + } + + void production( Parser::NonTerminal &nt ) + { + sio.incIndent(); + sio << "Adding new production:" << sio.nl; + Parser::Production pr; + bool bAnything = false; + for(;;) + { + next(); + switch( pCur->iToken ) + { + case tokIdentifier: + { + const Bu::String &sName = + pCur->vExtra.get(); + if( hTokens.has( sName ) ) + { + pr.append( + Parser::State( + Parser::State::typeTerminal, + hTokens.get( sName ) + ) + ); + sio << "Added terminal " << sName << sio.nl; + } + else + { + if( !p.hasNonTerminal( sName ) ) + { + p.addNonTerminal( sName ); + } + pr.append( + Parser::State( + Parser::State::typeNonTerminal, + p.getNonTerminalId( sName ) + ) + ); + sio << "Added non-terminal " << sName << sio.nl; + } + } + break; + + case tokOpenSquare: + { + next(); + if( pCur->iToken != tokIdentifier ) + tokenError("tokIdentifier"); + Bu::String sName = + pCur->vExtra.get(); + next(); + if( pCur->iToken != tokCloseSquare ) + tokenError("tokCloseSquare"); + + if( !hTokens.has( sName ) ) + throw ExceptionBase("Only token names may be " + "enclosed in square brackets."); + + pr.append( + Parser::State( + Parser::State::typeTerminalPush, + hTokens.get( sName ) + ) + ); + sio << "Added terminal-push " << sName << sio.nl; + } + break; + + case tokOpenCurly: + { + next(); + if( pCur->iToken != tokIdentifier ) + tokenError("tokIdentifier"); + Bu::String sName = + pCur->vExtra.get(); + next(); + if( pCur->iToken != tokCloseCurly ) + tokenError("tokCloseCurly"); + + if( !p.hasReduction( sName ) ) + p.addReduction( sName ); + + pr.append( + Parser::State( + Parser::State::typeReduction, + p.getReductionId( sName ) + ) + ); + sio << "Added reduction " << sName << sio.nl; + } + break; + + case tokOr: + case tokSemiColon: + if( bAnything ) + { + nt.addProduction( pr ); + sio.decIndent(); + sio << "Closing production." << sio.nl; + } + else + { + nt.setCanSkip(); + sio.decIndent(); + sio << "Closing empty production." << sio.nl; + } + return; + + default: + tokenError("tokIdentifier, tokOpenSquare, tokOr, " + "tokOpenCurly, or tokSemiColon"); + } + } + } + +private: + void next() + { + delete pCur; + pCur = l.nextToken(); + } + + void tokenError( const String &s ) + { + throw ExceptionBase( ("Expected " + s + " but found " + + l.tokenToString( *pCur ) + ".").getStr() ); + } + +private: + typedef Bu::Hash TokenHash; + TokenHash hTokens; + BnfLexer &l; + BnfLexer::Token *pCur; + int iLastToken; + Parser p; +}; + +int main( int argc, char *argv[] ) +{ + if( argc < 2 ) + { + println("Provide an input filename as the first parameter."); + return 0; + } + File fIn( argv[1], File::Read ); + + BnfLexer bl( fIn ); + BnfParser parser( bl ); + + parser.parse(); + +/* + for(;;) + { + Lexer::Token *pTok = bl.nextToken(); + sio << bl.tokenToString(*pTok); + if( pTok->vExtra.isSet() ) + { + sio << " - " << pTok->vExtra; + } + sio << sio.nl; + if( pTok->iToken == tokEos ) + break; + } +*/ + + return 0; +} + diff --git a/src/tests/parser.cpp b/src/tests/parser.cpp new file mode 100644 index 0000000..af53bc8 --- /dev/null +++ b/src/tests/parser.cpp @@ -0,0 +1,369 @@ +/* + * Copyright (C) 2007-2012 Xagasoft, All rights reserved. + * + * This file is part of the libbu++ library and is released under the + * terms of the license contained in the file LICENSE. + */ + +#include +#include +#include +#include +#include +#include + +using namespace Bu; + +enum Tok +{ + tokNumber, + tokPlus, + tokMinus, + tokDivide, + tokMultiply, + tokOpenParen, + tokCloseParen, + tokCompute, + tokEndOfInput=-1 +}; + +Bu::Formatter &operator<<( Bu::Formatter &f, Tok e ) +{ + switch( e ) + { + case tokNumber: return f << "tokNumber"; + case tokPlus: return f << "tokPlus"; + case tokMinus: return f << "tokMinus"; + case tokDivide: return f << "tokDivide"; + case tokMultiply: return f << "tokMultiply"; + case tokOpenParen: return f << "tokOpenParen"; + case tokCloseParen: return f << "tokCloseParen"; + case tokCompute: return f << "tokCompute"; + case tokEndOfInput: return f << "tokEndOfInput"; + } + + return f << "***error***"; +} + +class MathLexer : public Lexer +{ +public: + MathLexer( Bu::Stream &rSrc ) : + rSrc( rSrc ) + { + } + + virtual ~MathLexer() + { + } + + enum TokenTypes + { + tokStuff + }; + + virtual Token *nextToken() + { + for(;;) + { + if( qbIn.getSize() == 0 ) + { + char buf[4096]; + qbIn.write( buf, rSrc.read( buf, 4096 ) ); + + if( rSrc.isEos() && qbIn.getSize() == 0 ) + return new Token( tokEndOfInput ); + } + + char b; + qbIn.peek( &b, 1 ); + switch( b ) + { + case '+': + qbIn.seek( 1 ); + return new Token( tokPlus ); + + case '-': + qbIn.seek( 1 ); + return new Token( tokMinus ); + + case '/': + qbIn.seek( 1 ); + return new Token( tokDivide ); + + case '*': + qbIn.seek( 1 ); + return new Token( tokMultiply ); + + case ' ': + case '\t': + case '\n': + qbIn.seek( 1 ); + break; + + case '=': + qbIn.seek( 1 ); + return new Token( tokCompute ); + + case '(': + qbIn.seek( 1 ); + return new Token( tokOpenParen ); + + case ')': + qbIn.seek( 1 ); + return new Token( tokCloseParen ); + + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + Bu::String sTmp; + sTmp += b; + qbIn.seek( 1 ); + for(;;) + { + qbIn.peek( &b, 1 ); + if( b != '.' && (b < '0' || b > '9') ) + { + sio << "!! Convert '" << sTmp << "' to " + << strtod( sTmp.getStr(), NULL ) << sio.nl; + return new Token( + tokNumber, strtod( sTmp.getStr(), NULL ) + ); + } + qbIn.seek( 1 ); + sTmp += b; + } + } + break; + + default: + throw Bu::ExceptionBase("Unexpected character '%c'.", b ); + } + } + } + +private: + Bu::Stream &rSrc; + QueueBuf qbIn; +}; + +void redAdd( Bu::Parser &p ) +{ + Lexer::Token *a = p.popToken(); + Lexer::Token *b = p.popToken(); + + sio << "Add! " << b->vExtra.get() << " + " + << a->vExtra.get() << sio.nl; + + Lexer::Token *c = new Lexer::Token( tokNumber, + b->vExtra.get() + a->vExtra.get() + ); + p.pushToken( c ); + + delete a; + delete b; +} + +void redSubtract( Bu::Parser &p ) +{ + Lexer::Token *a = p.popToken(); + Lexer::Token *b = p.popToken(); + + sio << "Subtract! " << b->vExtra.get() << " - " + << a->vExtra.get() << sio.nl; + + Lexer::Token *c = new Lexer::Token( tokNumber, + b->vExtra.get() - a->vExtra.get() + ); + p.pushToken( c ); + + delete a; + delete b; +} + +void redPrint( Bu::Parser &p ) +{ + Lexer::Token *a = p.popToken(); + sio << "Print! = " << a->vExtra.get() << sio.nl; + delete a; +} + +/* Basic grammer example: + * + * input: expr '=' + * ; + * + * expr: expr '+' expr + * | '(' expr ')' + * | NUMBER + * ; + * + * The problem is, that we can't actually make something left hand recursive, + * so we break it into two exprs: + * + * expr-sub1: '(' expr ')' + * | NUMBER + * ; + * + * expr: expr-sub1 expr-sub2 + * ; + * + * expr-sub2: '+' expr + * | '-' expr + * | + * ; + * + * 5 + 5 + 5 = + */ + +int main( int argc, char *argv[] ) +{ + if( argc < 2 ) + { + println("Provide an input filename as the first parameter."); + return 0; + } + File fIn( argv[1], File::Read ); + + Parser p; + + p.addNonTerminal("expr"); + p.addNonTerminal("expr-sub1"); + p.addNonTerminal("expr-sub2"); + { + Parser::NonTerminal nt; + nt.addProduction( + Parser::Production( + Parser::State( + Parser::State::typeTerminal, + tokPlus + ) + ).append( + Parser::State( + Parser::State::typeNonTerminal, + p.getNonTerminalId("expr") + ) + ).append( + Parser::State( + Parser::State::typeReduction, + p.addReduction("add") + ) + ) + ); + nt.addProduction( + Parser::Production( + Parser::State( + Parser::State::typeTerminal, + tokMinus + ) + ).append( + Parser::State( + Parser::State::typeNonTerminal, + p.getNonTerminalId("expr") + ) + ).append( + Parser::State( + Parser::State::typeReduction, + p.addReduction("subtract") + ) + ) + ); + nt.addProduction( + Parser::Production( + ) + ); + nt.setCanSkip(); + p.setNonTerminal("expr-sub2", nt ); + } + { + Parser::NonTerminal nt; + nt.addProduction( + Parser::Production( + Parser::State( + Parser::State::typeTerminalPush, + tokNumber + ) + ) + ); + nt.addProduction( + Parser::Production( + Parser::State( + Parser::State::typeTerminal, + tokOpenParen + ) + ).append( + Parser::State( + Parser::State::typeNonTerminal, + p.getNonTerminalId("expr") + ) + ).append( + Parser::State( + Parser::State::typeTerminal, + tokCloseParen + ) + ) + ); + p.setNonTerminal("expr-sub1", nt ); + } + { + Parser::NonTerminal nt; + nt.addProduction( + Parser::Production( + Parser::State( + Parser::State::typeNonTerminal, + p.getNonTerminalId("expr-sub1") + ) + ).append( + Parser::State( + Parser::State::typeNonTerminal, + p.getNonTerminalId("expr-sub2") + ) + ) + ); + p.setNonTerminal("expr", nt ); + } + { + Parser::NonTerminal nt; + nt.addProduction( + Parser::Production( + Parser::State( + Parser::State::typeNonTerminal, + p.getNonTerminalId("expr") + ) + ).append( + Parser::State( + Parser::State::typeTerminal, + tokCompute + ) + ).append( + Parser::State( + Parser::State::typeReduction, + p.addReduction("print") + ) + ) + ); + p.addNonTerminal("input", nt ); + } + + p.setRootNonTerminal("input"); + + p.setReduction("add", Bu::slot( &redAdd ) ); + p.setReduction("subtract", Bu::slot( &redSubtract ) ); + p.setReduction("print", Bu::slot( &redPrint ) ); + + p.pushLexer( new MathLexer( fIn ) ); + + p.parse(); + + return 0; +} + -- cgit v1.2.3