From 2909f50d008920568f0e50da760b266388ccc124 Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Mon, 22 Apr 2013 13:05:22 -0600 Subject: There is now a parser & calculator interface. --- src/lexer.cpp | 163 +++++++++++++++++++++++++++++++++++ src/lexer.h | 32 +++++++ src/main.cpp | 16 +++- src/parser.cpp | 266 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/parser.h | 35 ++++++++ src/token.cpp | 70 +++++++++++++++ src/token.h | 47 ++++++++++ 7 files changed, 625 insertions(+), 4 deletions(-) create mode 100644 src/lexer.cpp create mode 100644 src/lexer.h create mode 100644 src/parser.cpp create mode 100644 src/parser.h create mode 100644 src/token.cpp create mode 100644 src/token.h diff --git a/src/lexer.cpp b/src/lexer.cpp new file mode 100644 index 0000000..834d3bc --- /dev/null +++ b/src/lexer.cpp @@ -0,0 +1,163 @@ +#include "lexer.h" +#include "token.h" +#include "number.h" + +#include + +Lexer::Lexer( Bu::Stream &rIn ) : + rIn( rIn ), + iBufPos( 0 ), + iScale( 0 ), + iRadix( 10 ), + numRangeTop('9'), + ascRangeTop(0) +{ +} + +Lexer::~Lexer() +{ +} + +Token Lexer::nextToken() +{ + for(;;) + { + if( iBufPos >= sBuf.getSize() ) + { + iBufPos = -1; + return Token( Token::tEndOfLine ); + } + + if( iBufPos < 0 ) + { + if( rIn.isEos() ) + return Token( Token::tEndOfInput ); + + sBuf = rIn.readLine(); + if( sBuf.getSize() == 0 ) + { + iBufPos = -1; + continue; + } + iBufPos = 0; + } + + //Bu::println("Testing char '%1' at %2").arg( sBuf[iBufPos] ).arg( iBufPos ); + switch( sBuf[iBufPos] ) + { + case ' ': + case '\t': + iBufPos++; + break; + + case '\\': + { + Bu::String *sTmp = new Bu::String(); + for( iBufPos++; iBufPos < sBuf.getSize() && + sBuf[iBufPos] != ' ' && sBuf[iBufPos] != '\t'; + iBufPos++ ) + { + sTmp->append( sBuf[iBufPos] ); + } + return Token( Token::tCommand, sTmp ); + } + break; + + case '+': + iBufPos++; + return Token( Token::tPlus ); + + case '-': + iBufPos++; + return Token( Token::tMinus ); + + case '*': + iBufPos++; + return Token( Token::tMultiply ); + + case '/': + iBufPos++; + return Token( Token::tDivide ); + + case '(': + iBufPos++; + return Token( Token::tOpenParen ); + + case ')': + iBufPos++; + return Token( Token::tCloseParen ); + + default: + { + Bu::String *sTmp = new Bu::String(); + if( (sBuf[iBufPos] >= '0' && + sBuf[iBufPos] <= numRangeTop) || + (sBuf[iBufPos] >= 'a' && + sBuf[iBufPos] <= ascRangeTop) || + sBuf[iBufPos] == '.' ) + { + for( ; iBufPos < sBuf.getSize() ; iBufPos++ ) + { + if( (sBuf[iBufPos] >= '0' && + sBuf[iBufPos] <= numRangeTop) || + (sBuf[iBufPos] >= 'a' && + sBuf[iBufPos] <= ascRangeTop) || + sBuf[iBufPos] == '.' ) + { + sTmp->append( sBuf[iBufPos] ); + } + else + { + break; + } + } + Number *n = new Number( *sTmp, iScale, iRadix ); + delete sTmp; + return Token( Token::tNumber, n ); + } + else if( (sBuf[iBufPos]>=(ascRangeTop+1) && sBuf[iBufPos]<='z') || + (sBuf[iBufPos]>='A' && sBuf[iBufPos]<='Z') || + sBuf[iBufPos] == '_' ) + { + for( ; iBufPos < sBuf.getSize(); iBufPos++ ) + { + if( (sBuf[iBufPos]>='a' && sBuf[iBufPos]<='z') || + (sBuf[iBufPos]>='A' && sBuf[iBufPos]<='Z') || + (sBuf[iBufPos]>='0' && sBuf[iBufPos]<='9') || + sBuf[iBufPos] == '_' ) + { + sTmp->append( sBuf[iBufPos] ); + } + else + { + break; + } + } + return Token( Token::tString, sTmp ); + } + else + { + sBuf.clear(); + Bu::println("Invalid character discovered!"); + } + } + break; + } + } +} + +void Lexer::setRadix( int i ) +{ + iRadix = i; + if( iRadix <= 10 ) + { + numRangeTop = '0'+iRadix-1; + ascRangeTop = 0; + } + else + { + numRangeTop = '9'; + ascRangeTop = 'a'+iRadix-11; + } +} + diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..4e6d73d --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,32 @@ +#ifndef LEXER_H +#define LEXER_H + +#include + +class Token; + +class Lexer +{ +public: + Lexer( Bu::Stream &rIn ); + virtual ~Lexer(); + + Token nextToken(); + + int getScale() const { return iScale; } + void setScale( int i ) { iScale = i; } + + int getRadix() const { return iRadix; } + void setRadix( int i ); + +private: + Bu::Stream &rIn; + Bu::String sBuf; + int iBufPos; + int iScale; + int iRadix; + char numRangeTop; + char ascRangeTop; +}; + +#endif diff --git a/src/main.cpp b/src/main.cpp index d1b9d1c..dec7867 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,9 +1,13 @@ #include "number.h" #include "packedintarray.h" +#include "lexer.h" +#include "token.h" +#include "parser.h" #include #include #include +#include using namespace Bu; void packedtest1() @@ -361,12 +365,16 @@ int main( int , char *[] ) println("CliC"); // packedtest1(); - numbertest1(); - numbertestcomp(); - radixtest(); - fractest(); +// numbertest1(); +// numbertestcomp(); +// radixtest(); +// fractest(); // ordertest(); + Lexer lex( sioRaw ); + Parser parser( lex, sioRaw ); + parser.parse(); + return 0; } diff --git a/src/parser.cpp b/src/parser.cpp new file mode 100644 index 0000000..21ccc11 --- /dev/null +++ b/src/parser.cpp @@ -0,0 +1,266 @@ +#include "parser.h" +#include "lexer.h" +#include "number.h" + +#include + +Parser::Parser( Lexer &lex, Bu::Stream &rOut ) : + lex( lex ), + rOut( rOut ) +{ +} + +Parser::~Parser() +{ +} + +void Parser::parse() +{ + for(;;) + { + Token t = lex.nextToken(); + switch( t.eType ) + { + case Token::tEndOfInput: + return; + + case Token::tEndOfLine: + unwind(); + if( !tsTerminal.isEmpty() ) + { + Bu::println( rOut, "%1").arg( *tsTerminal.peek().nVal ); + } + tsTerminal.clear(); + break; + + case Token::tCommand: + if( *t.sVal == "exit" || *t.sVal == "quit" ) + return; + else if( *t.sVal == "scale" ) + { + Token t2 = lex.nextToken(); + if( t2.eType == Token::tEndOfLine ) + { + Bu::println( rOut, "Current scale: %1"). + arg( lex.getScale() ); + } + else if( t2.eType == Token::tNumber ) + { + int32_t i = t2.nVal->toInt32(); + lex.setScale( i ); + if( i < 0 ) + { + Bu::println( rOut, "ERROR: You must provide a " + "positive integer or zero as the parameter " + "to scale."); + } + else + { + Bu::println( rOut, "Scale changed to: %1"). + arg( lex.getScale() ); + } + } + else + { + Bu::println( rOut, "ERROR: You must provide a number " + "as the parameter to scale."); + } + } + else if( *t.sVal == "radix" ) + { + Token t2 = lex.nextToken(); + if( t2.eType == Token::tEndOfLine ) + { + Bu::println( rOut, "Current radix: %1"). + arg( lex.getRadix() ); + } + else if( t2.eType == Token::tNumber ) + { + int32_t i = t2.nVal->toInt32(); + if( i < 2 || i > 36 ) + Bu::println( rOut, "ERROR: Radix must be between " + "2 and 36 inclusive"); + else + { + lex.setRadix( i ); + Bu::println( rOut, "Radix changed to: %1"). + arg( lex.getRadix() ); + } + } + else + { + Bu::println( rOut, "You must provide a number as " + "the parameter to radix."); + } + } + else + { + Bu::println( rOut, "ERROR: Unknown command '%1'"). + arg( *t.sVal ); + } + break; + + case Token::tNumber: + tsTerminal.push( t ); + break; + + default: + if( tsNonTerminal.getSize() == 0 || + getPriority( tsNonTerminal.peek().eType ) <= + getPriority( t.eType ) ) + { +// Bu::println("Pushing non-terminal: %1").arg( t.eType ); + tsNonTerminal.push( t ); + +// for( TokenStack::iterator i = tsTerminal.begin(); i; i++ ) Bu::print(" [%1]").arg( *(*i).nVal ); Bu::println(""); +// for( TokenStack::iterator i = tsNonTerminal.begin(); i; i++ ) Bu::print(" <%1>").arg( (*i).eType ); Bu::println(""); + } + else + { +// Bu::println("Unwinding stack before pushing: %1").arg( t.eType ); + unwind(); + tsNonTerminal.push( t ); +// for( TokenStack::iterator i = tsTerminal.begin(); i; i++ ) Bu::print(" [%1]").arg( *(*i).nVal ); Bu::println(""); +// for( TokenStack::iterator i = tsNonTerminal.begin(); i; i++ ) Bu::print(" <%1>").arg( (*i).eType ); Bu::println(""); + } + break; + } + } +} + +void Parser::unwind() +{ + for(;;) + { +// for( TokenStack::iterator i = tsTerminal.begin(); i; i++ ) Bu::print(" [%1]").arg( *(*i).nVal ); Bu::println(""); +// for( TokenStack::iterator i = tsNonTerminal.begin(); i; i++ ) Bu::print(" <%1>").arg( (*i).eType ); Bu::println(""); + if( tsNonTerminal.isEmpty() ) + return; + + if( tsTerminal.getSize() < reqTokens( tsNonTerminal.peek().eType ) ) + { + return; + } + + Token t = tsNonTerminal.peekPop(); + switch( t.eType ) + { + case Token::tPlus: + { + Token b = tsTerminal.peekPop(); + Token a = tsTerminal.peekPop(); + tsTerminal.push( + Token( Token::tNumber, new Number( *a.nVal + *b.nVal ) ) + ); + } + break; + + case Token::tMinus: + { + Token b = tsTerminal.peekPop(); + Token a = tsTerminal.peekPop(); + tsTerminal.push( + Token( Token::tNumber, new Number( *a.nVal - *b.nVal ) ) + ); + } + break; + + case Token::tMultiply: + { + Token b = tsTerminal.peekPop(); + Token a = tsTerminal.peekPop(); + tsTerminal.push( + Token( Token::tNumber, new Number( *a.nVal * *b.nVal ) ) + ); + } + break; + + case Token::tDivide: + { + Token b = tsTerminal.peekPop(); + Token a = tsTerminal.peekPop(); + tsTerminal.push( + Token( Token::tNumber, new Number( *a.nVal / *b.nVal ) ) + ); + } + break; + + case Token::tOpenParen: + tsNonTerminal.push( t ); + return; + + case Token::tCloseParen: + unwind(); + if( tsNonTerminal.peek().eType == Token::tOpenParen ) + { + + tsNonTerminal.pop(); + } + else + { + throw Bu::ExceptionBase("Close paren found without open paren."); + } + break; + + case Token::tNumber: + case Token::tString: + case Token::tCommand: + case Token::tEndOfLine: + case Token::tEndOfInput: + // These should never show up at all + break; + } + } +} + +int Parser::reqTokens( Token::Type eType ) +{ + switch( eType ) + { + case Token::tPlus: + case Token::tMinus: + case Token::tDivide: + case Token::tMultiply: + return 2; + + case Token::tOpenParen: + return 0; + + case Token::tCloseParen: + return 1; + + default: + return 0; + } +} + +int Parser::getPriority( Token::Type eType ) +{ + switch( eType ) + { + case Token::tNumber: + case Token::tString: + case Token::tCommand: + return 0; + + case Token::tPlus: + case Token::tMinus: + return 1; + + case Token::tDivide: + case Token::tMultiply: + return 2; + + case Token::tOpenParen: + case Token::tCloseParen: + return 3; + + case Token::tEndOfLine: + case Token::tEndOfInput: + return -1; + + default: + throw Bu::ExceptionBase("Invalid type in getPriority"); + } +} + diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..5563613 --- /dev/null +++ b/src/parser.h @@ -0,0 +1,35 @@ +#ifndef PARSER_H +#define PARSER_H + +#include +#include "token.h" + +namespace Bu +{ + class Stream; +}; + +class Lexer; + +class Parser +{ +public: + Parser( Lexer &lex, Bu::Stream &rOut ); + virtual ~Parser(); + + void parse(); + +private: + void unwind(); + int reqTokens( Token::Type eType ); + int getPriority( Token::Type eType ); + +private: + Lexer &lex; + Bu::Stream &rOut; + typedef Bu::List TokenStack; + TokenStack tsTerminal; + TokenStack tsNonTerminal; +}; + +#endif diff --git a/src/token.cpp b/src/token.cpp new file mode 100644 index 0000000..d7fbe88 --- /dev/null +++ b/src/token.cpp @@ -0,0 +1,70 @@ +#include "token.h" + +#include "number.h" +#include +#include + +Token::Token( Type eType ) : + eType( eType ), + sVal( 0 ) +{ +} + +Token::Token( Type eType, Bu::String *s ) : + eType( eType ), + sVal( s ) +{ +} + +Token::Token( Type eType, Number *n ) : + eType( eType ), + nVal( n ) +{ +} + +Token::Token( const Token &rSrc ) : + eType( rSrc.eType ), + sVal( rSrc.sVal ) +{ + Token &rMod = const_cast(rSrc); + rMod.sVal = 0; +} + +Token::~Token() +{ + switch( eType ) + { + case tNumber: + delete nVal; + break; + + case tString: + case tCommand: + delete sVal; + break; + + default: + break; + } +} + +Bu::Formatter &operator<<( Bu::Formatter &f, Token::Type eType ) +{ + switch( eType ) + { + case Token::tNumber: return f << "num"; + case Token::tString: return f << "str"; + case Token::tCommand: return f << "cmd"; + case Token::tPlus: return f << "+"; + case Token::tMinus: return f << "-"; + case Token::tDivide: return f << "/"; + case Token::tMultiply: return f << "*"; + case Token::tOpenParen: return f << "("; + case Token::tCloseParen: return f << ")"; + case Token::tEndOfLine: return f << "eol"; + case Token::tEndOfInput: return f << "eoi"; + + default: return f << "???"; + } +} + diff --git a/src/token.h b/src/token.h new file mode 100644 index 0000000..3b5caff --- /dev/null +++ b/src/token.h @@ -0,0 +1,47 @@ +#ifndef TOKEN_H +#define TOKEN_H + +class Number; +namespace Bu +{ + class String; + class Formatter; +}; + +class Token +{ +public: + enum Type + { + tNumber, + tString, + tCommand, + tPlus, + tMinus, + tDivide, + tMultiply, + tOpenParen, + tCloseParen, + + tEndOfLine, + + tEndOfInput + }; + + Token( Type eType ); + Token( Type eType, Bu::String *s ); + Token( Type eType, Number *n ); + Token( const Token &rSrc ); + ~Token(); + + Type eType; + union + { + Bu::String *sVal; + class Number *nVal; + }; +}; + +Bu::Formatter &operator<<( Bu::Formatter &f, Token::Type eType ); + +#endif -- cgit v1.2.3