From 1ee5f374ed986333d5cdbbf41390f1c4c755a8e3 Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Tue, 12 Oct 2010 06:09:48 +0000 Subject: This commit has a minor tweak to the variant class to make it easier to use, and introduces the parser and lexer classes. I also made a test for parser and put it in the tools directory. That is silly, it shouldn't be. However, it's necesarry right now, because I don't want to do a full build to compile all the parser tests. However, this commit doesn't actually build yet. It will soon, I just wanted to get it all committed. --- src/lexer.cpp | 31 ++++++++++ src/lexer.h | 44 ++++++++++++++ src/parser.cpp | 43 ++++++++++++++ src/parser.h | 48 +++++++++++++++ src/tools/parser.cpp | 164 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/variant.cpp | 2 +- src/variant.h | 8 ++- 7 files changed, 338 insertions(+), 2 deletions(-) create mode 100644 src/lexer.cpp create mode 100644 src/lexer.h create mode 100644 src/parser.cpp create mode 100644 src/parser.h create mode 100644 src/tools/parser.cpp (limited to 'src') diff --git a/src/lexer.cpp b/src/lexer.cpp new file mode 100644 index 0000000..c7a6fcb --- /dev/null +++ b/src/lexer.cpp @@ -0,0 +1,31 @@ +#include "bu/lexer.h" + +Bu::Lexer::Lexer() +{ +} + +Bu::Lexer::~Lexer() +{ +} + +Bu::Lexer::Token::Token() : + iToken( -1 ) +{ +} + +Bu::Lexer::Token::Token( int iToken ) : + iToken( iToken ) +{ +} + +Bu::FString Bu::Lexer::tokenToString( const Bu::Lexer::Token &t ) +{ + Bu::MemBuf mb; + Bu::Formatter f( mb ); + f << "<" << t.iToken << ">"; + if( t.vExtra.isSet() ) + f << " (" << t.vExtra << ")"; + + return mb.getString(); +} + diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..37d268f --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,44 @@ +#ifndef BU_LEXER_H +#define BU_LEXER_H + +#include "bu/variant.h" + +namespace Bu +{ + class Stream; + + /** + * The base class for creating a lexical analyzer. This is designed to work + * in tandem with the Bu::Parser class, which uses this to tokenize textual + * input. It can be used by just about anything that cares about tokens + * more than raw input, though. + */ + class Lexer + { + public: + Lexer(); + virtual ~Lexer(); + + class Token + { + public: + Token(); + Token( int iToken ); + + template + Token( int iToken, const t &v ) : + iToken( iToken ), + vExtra( v ) + { + } + int iToken; + Bu::Variant vExtra; + }; + + virtual Token *nextToken()=0; + + virtual Bu::FString tokenToString( const Token &t ); + }; +}; + +#endif diff --git a/src/parser.cpp b/src/parser.cpp new file mode 100644 index 0000000..7015070 --- /dev/null +++ b/src/parser.cpp @@ -0,0 +1,43 @@ +#include "bu/parser.h" +#include "bu/lexer.h" + +#include "bu/sio.h" +using namespace Bu; + +Bu::Parser::Parser() +{ +} + +Bu::Parser::~Parser() +{ +} + +void Bu::Parser::pushLexer( Lexer *pLex ) +{ + sLexer.push( pLex ); +} + +void Bu::Parser::popLexer() +{ + delete sLexer.peekPop(); +} + +void Bu::Parser::parse() +{ + for(;;) + { + Bu::Lexer::Token *pToken = sLexer.peek()->nextToken(); + sio << sLexer.peek()->tokenToString( *pToken ) << sio.nl; + if( pToken->iToken < 0 ) + { + delete sLexer.peekPop(); + if( sLexer.isEmpty() ) + { + delete pToken; + return; + } + } + delete pToken; + } +} + diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..26b15a6 --- /dev/null +++ b/src/parser.h @@ -0,0 +1,48 @@ +#ifndef BU_PARSER_H +#define BU_PARSER_H + +#include "bu/list.h" +#include "bu/fstring.h" +#include "bu/lexer.h" + +namespace Bu +{ + /** + * The base framework for a LR(1) grammar parser. Provided a proper set of + * ParserStates this will prase any input the lexer can provide. + */ + class Parser + { + public: + Parser(); + virtual ~Parser(); + + /** + * When a Lexer is pushed onto the stack it becomes the source for + * future tokens read by the parser until it is popped off the stack. + * The Parser takes ownership of every Lexer pushed onto the stack, + * and will delete it when it is popped off the stack. + */ + void pushLexer( Lexer *pLex ); + + /** + * Pop a lexer off the stack, and delete it. + */ + void popLexer(); + + /** + * Execute a parse. + */ + void parse(); + + private: + typedef Bu::List LexerStack; + typedef Bu::List TokenStack; + typedef Bu::List StateStack; + LexerStack sLexer; + TokenStack sToken; + StateStack sState; + }; +}; + +#endif diff --git a/src/tools/parser.cpp b/src/tools/parser.cpp new file mode 100644 index 0000000..a70dfa4 --- /dev/null +++ b/src/tools/parser.cpp @@ -0,0 +1,164 @@ +#include +#include +#include +#include +#include +#include + +using namespace Bu; + +enum Tok +{ + tokNumber, + tokPlus, + tokMinus, + tokDivide, + tokMultiply, + tokOpenParen, + tokCloseParen, + tokCompute, + tokEndOfInput=-1 +}; + +Bu::Formatter &operator<<( Bu::Formatter &f, Tok e ) +{ + switch( e ) + { + case tokNumber: return f << "tokNumber"; + case tokPlus: return f << "tokPlus"; + case tokMinus: return f << "tokMinus"; + case tokDivide: return f << "tokDivide"; + case tokMultiply: return f << "tokMultiply"; + case tokOpenParen: return f << "tokOpenParen"; + case tokCloseParen: return f << "tokCloseParen"; + case tokCompute: return f << "tokCompute"; + case tokEndOfInput: return f << "tokEndOfInput"; + } + + return f << "***error***"; +} + +class MathLexer : public Lexer +{ +public: + MathLexer( Bu::Stream &rSrc ) : + rSrc( rSrc ) + { + } + + virtual ~MathLexer() + { + } + + enum TokenTypes + { + tokStuff + }; + + virtual Token *nextToken() + { + for(;;) + { + if( qbIn.getSize() == 0 ) + { + char buf[4096]; + qbIn.write( buf, rSrc.read( buf, 4096 ) ); + + if( rSrc.isEos() && qbIn.getSize() == 0 ) + return new Token( tokEndOfInput ); + } + + char b; + qbIn.peek( &b, 1 ); + switch( b ) + { + case '+': + qbIn.seek( 1 ); + return new Token( tokPlus ); + + case '-': + qbIn.seek( 1 ); + return new Token( tokMinus ); + + case '/': + qbIn.seek( 1 ); + return new Token( tokDivide ); + + case '*': + qbIn.seek( 1 ); + return new Token( tokMultiply ); + + case ' ': + case '\t': + case '\n': + qbIn.seek( 1 ); + break; + + case '=': + qbIn.seek( 1 ); + return new Token( tokCompute ); + + case '(': + qbIn.seek( 1 ); + return new Token( tokOpenParen ); + + case ')': + qbIn.seek( 1 ); + return new Token( tokCloseParen ); + + case '.': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + { + Bu::FString sTmp; + sTmp += b; + qbIn.seek( 1 ); + for(;;) + { + qbIn.peek( &b, 1 ); + if( b != '.' && (b < '0' || b > '9') ) + { + sio << "!! Convert '" << sTmp << "' to " + << strtod( sTmp.getStr(), NULL ) << sio.nl; + return new Token( + tokNumber, strtod( sTmp.getStr(), NULL ) + ); + } + qbIn.seek( 1 ); + sTmp += b; + } + } + break; + + default: + throw Bu::ExceptionBase("Unexpected character '%c'.", b ); + } + } + } + +private: + Bu::Stream &rSrc; + QueueBuf qbIn; +}; + +int main( int argc, char *argv[] ) +{ + File fIn( argv[1], File::Read ); + + Parser p; + + p.pushLexer( new MathLexer( fIn ) ); + + p.parse(); + + return 0; +} + diff --git a/src/variant.cpp b/src/variant.cpp index 6b304ba..a66ec39 100644 --- a/src/variant.cpp +++ b/src/variant.cpp @@ -43,7 +43,7 @@ Bu::Variant::~Variant() } } -bool Bu::Variant::isSet() +bool Bu::Variant::isSet() const { return pCore != NULL; } diff --git a/src/variant.h b/src/variant.h index 809aab9..5482ee3 100644 --- a/src/variant.h +++ b/src/variant.h @@ -98,9 +98,15 @@ namespace Bu public: Variant(); Variant( const Variant &v ); + template + Variant( const t &v ) : + pCore( new VariantType() ) + { + (*dynamic_cast *>(pCore)) = v; + } virtual ~Variant(); - bool isSet(); + bool isSet() const; Bu::FString toString() const; const std::type_info &getType() const; -- cgit v1.2.3