diff options
| author | Mike Buland <eichlan@xagasoft.com> | 2010-10-12 06:09:48 +0000 |
|---|---|---|
| committer | Mike Buland <eichlan@xagasoft.com> | 2010-10-12 06:09:48 +0000 |
| commit | 1ee5f374ed986333d5cdbbf41390f1c4c755a8e3 (patch) | |
| tree | 67b02598d3dca87a82263629a1290bd7b7a79006 /src | |
| parent | 313e28df2a8776c82f5493aef6fe44ad40f1935a (diff) | |
| download | libbu++-1ee5f374ed986333d5cdbbf41390f1c4c755a8e3.tar.gz libbu++-1ee5f374ed986333d5cdbbf41390f1c4c755a8e3.tar.bz2 libbu++-1ee5f374ed986333d5cdbbf41390f1c4c755a8e3.tar.xz libbu++-1ee5f374ed986333d5cdbbf41390f1c4c755a8e3.zip | |
This commit has a minor tweak to the variant class to make it easier to use,
and introduces the parser and lexer classes. I also made a test for parser and
put it in the tools directory. That is silly, it shouldn't be. However, it's
necesarry right now, because I don't want to do a full build to compile all
the parser tests.
However, this commit doesn't actually build yet. It will soon, I just wanted
to get it all committed.
Diffstat (limited to 'src')
| -rw-r--r-- | src/lexer.cpp | 31 | ||||
| -rw-r--r-- | src/lexer.h | 44 | ||||
| -rw-r--r-- | src/parser.cpp | 43 | ||||
| -rw-r--r-- | src/parser.h | 48 | ||||
| -rw-r--r-- | src/tools/parser.cpp | 164 | ||||
| -rw-r--r-- | src/variant.cpp | 2 | ||||
| -rw-r--r-- | src/variant.h | 8 |
7 files changed, 338 insertions, 2 deletions
diff --git a/src/lexer.cpp b/src/lexer.cpp new file mode 100644 index 0000000..c7a6fcb --- /dev/null +++ b/src/lexer.cpp | |||
| @@ -0,0 +1,31 @@ | |||
| 1 | #include "bu/lexer.h" | ||
| 2 | |||
| 3 | Bu::Lexer::Lexer() | ||
| 4 | { | ||
| 5 | } | ||
| 6 | |||
| 7 | Bu::Lexer::~Lexer() | ||
| 8 | { | ||
| 9 | } | ||
| 10 | |||
| 11 | Bu::Lexer::Token::Token() : | ||
| 12 | iToken( -1 ) | ||
| 13 | { | ||
| 14 | } | ||
| 15 | |||
| 16 | Bu::Lexer::Token::Token( int iToken ) : | ||
| 17 | iToken( iToken ) | ||
| 18 | { | ||
| 19 | } | ||
| 20 | |||
| 21 | Bu::FString Bu::Lexer::tokenToString( const Bu::Lexer::Token &t ) | ||
| 22 | { | ||
| 23 | Bu::MemBuf mb; | ||
| 24 | Bu::Formatter f( mb ); | ||
| 25 | f << "<" << t.iToken << ">"; | ||
| 26 | if( t.vExtra.isSet() ) | ||
| 27 | f << " (" << t.vExtra << ")"; | ||
| 28 | |||
| 29 | return mb.getString(); | ||
| 30 | } | ||
| 31 | |||
diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..37d268f --- /dev/null +++ b/src/lexer.h | |||
| @@ -0,0 +1,44 @@ | |||
| 1 | #ifndef BU_LEXER_H | ||
| 2 | #define BU_LEXER_H | ||
| 3 | |||
| 4 | #include "bu/variant.h" | ||
| 5 | |||
| 6 | namespace Bu | ||
| 7 | { | ||
| 8 | class Stream; | ||
| 9 | |||
| 10 | /** | ||
| 11 | * The base class for creating a lexical analyzer. This is designed to work | ||
| 12 | * in tandem with the Bu::Parser class, which uses this to tokenize textual | ||
| 13 | * input. It can be used by just about anything that cares about tokens | ||
| 14 | * more than raw input, though. | ||
| 15 | */ | ||
| 16 | class Lexer | ||
| 17 | { | ||
| 18 | public: | ||
| 19 | Lexer(); | ||
| 20 | virtual ~Lexer(); | ||
| 21 | |||
| 22 | class Token | ||
| 23 | { | ||
| 24 | public: | ||
| 25 | Token(); | ||
| 26 | Token( int iToken ); | ||
| 27 | |||
| 28 | template<class t> | ||
| 29 | Token( int iToken, const t &v ) : | ||
| 30 | iToken( iToken ), | ||
| 31 | vExtra( v ) | ||
| 32 | { | ||
| 33 | } | ||
| 34 | int iToken; | ||
| 35 | Bu::Variant vExtra; | ||
| 36 | }; | ||
| 37 | |||
| 38 | virtual Token *nextToken()=0; | ||
| 39 | |||
| 40 | virtual Bu::FString tokenToString( const Token &t ); | ||
| 41 | }; | ||
| 42 | }; | ||
| 43 | |||
| 44 | #endif | ||
diff --git a/src/parser.cpp b/src/parser.cpp new file mode 100644 index 0000000..7015070 --- /dev/null +++ b/src/parser.cpp | |||
| @@ -0,0 +1,43 @@ | |||
| 1 | #include "bu/parser.h" | ||
| 2 | #include "bu/lexer.h" | ||
| 3 | |||
| 4 | #include "bu/sio.h" | ||
| 5 | using namespace Bu; | ||
| 6 | |||
| 7 | Bu::Parser::Parser() | ||
| 8 | { | ||
| 9 | } | ||
| 10 | |||
| 11 | Bu::Parser::~Parser() | ||
| 12 | { | ||
| 13 | } | ||
| 14 | |||
| 15 | void Bu::Parser::pushLexer( Lexer *pLex ) | ||
| 16 | { | ||
| 17 | sLexer.push( pLex ); | ||
| 18 | } | ||
| 19 | |||
| 20 | void Bu::Parser::popLexer() | ||
| 21 | { | ||
| 22 | delete sLexer.peekPop(); | ||
| 23 | } | ||
| 24 | |||
| 25 | void Bu::Parser::parse() | ||
| 26 | { | ||
| 27 | for(;;) | ||
| 28 | { | ||
| 29 | Bu::Lexer::Token *pToken = sLexer.peek()->nextToken(); | ||
| 30 | sio << sLexer.peek()->tokenToString( *pToken ) << sio.nl; | ||
| 31 | if( pToken->iToken < 0 ) | ||
| 32 | { | ||
| 33 | delete sLexer.peekPop(); | ||
| 34 | if( sLexer.isEmpty() ) | ||
| 35 | { | ||
| 36 | delete pToken; | ||
| 37 | return; | ||
| 38 | } | ||
| 39 | } | ||
| 40 | delete pToken; | ||
| 41 | } | ||
| 42 | } | ||
| 43 | |||
diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..26b15a6 --- /dev/null +++ b/src/parser.h | |||
| @@ -0,0 +1,48 @@ | |||
| 1 | #ifndef BU_PARSER_H | ||
| 2 | #define BU_PARSER_H | ||
| 3 | |||
| 4 | #include "bu/list.h" | ||
| 5 | #include "bu/fstring.h" | ||
| 6 | #include "bu/lexer.h" | ||
| 7 | |||
| 8 | namespace Bu | ||
| 9 | { | ||
| 10 | /** | ||
| 11 | * The base framework for a LR(1) grammar parser. Provided a proper set of | ||
| 12 | * ParserStates this will prase any input the lexer can provide. | ||
| 13 | */ | ||
| 14 | class Parser | ||
| 15 | { | ||
| 16 | public: | ||
| 17 | Parser(); | ||
| 18 | virtual ~Parser(); | ||
| 19 | |||
| 20 | /** | ||
| 21 | * When a Lexer is pushed onto the stack it becomes the source for | ||
| 22 | * future tokens read by the parser until it is popped off the stack. | ||
| 23 | * The Parser takes ownership of every Lexer pushed onto the stack, | ||
| 24 | * and will delete it when it is popped off the stack. | ||
| 25 | */ | ||
| 26 | void pushLexer( Lexer *pLex ); | ||
| 27 | |||
| 28 | /** | ||
| 29 | * Pop a lexer off the stack, and delete it. | ||
| 30 | */ | ||
| 31 | void popLexer(); | ||
| 32 | |||
| 33 | /** | ||
| 34 | * Execute a parse. | ||
| 35 | */ | ||
| 36 | void parse(); | ||
| 37 | |||
| 38 | private: | ||
| 39 | typedef Bu::List<Lexer *> LexerStack; | ||
| 40 | typedef Bu::List<Lexer::Token *> TokenStack; | ||
| 41 | typedef Bu::List<State *> StateStack; | ||
| 42 | LexerStack sLexer; | ||
| 43 | TokenStack sToken; | ||
| 44 | StateStack sState; | ||
| 45 | }; | ||
| 46 | }; | ||
| 47 | |||
| 48 | #endif | ||
diff --git a/src/tools/parser.cpp b/src/tools/parser.cpp new file mode 100644 index 0000000..a70dfa4 --- /dev/null +++ b/src/tools/parser.cpp | |||
| @@ -0,0 +1,164 @@ | |||
| 1 | #include <bu/parser.h> | ||
| 2 | #include <bu/lexer.h> | ||
| 3 | #include <bu/file.h> | ||
| 4 | #include <bu/sio.h> | ||
| 5 | #include <bu/queuebuf.h> | ||
| 6 | #include <stdlib.h> | ||
| 7 | |||
| 8 | using namespace Bu; | ||
| 9 | |||
| 10 | enum Tok | ||
| 11 | { | ||
| 12 | tokNumber, | ||
| 13 | tokPlus, | ||
| 14 | tokMinus, | ||
| 15 | tokDivide, | ||
| 16 | tokMultiply, | ||
| 17 | tokOpenParen, | ||
| 18 | tokCloseParen, | ||
| 19 | tokCompute, | ||
| 20 | tokEndOfInput=-1 | ||
| 21 | }; | ||
| 22 | |||
| 23 | Bu::Formatter &operator<<( Bu::Formatter &f, Tok e ) | ||
| 24 | { | ||
| 25 | switch( e ) | ||
| 26 | { | ||
| 27 | case tokNumber: return f << "tokNumber"; | ||
| 28 | case tokPlus: return f << "tokPlus"; | ||
| 29 | case tokMinus: return f << "tokMinus"; | ||
| 30 | case tokDivide: return f << "tokDivide"; | ||
| 31 | case tokMultiply: return f << "tokMultiply"; | ||
| 32 | case tokOpenParen: return f << "tokOpenParen"; | ||
| 33 | case tokCloseParen: return f << "tokCloseParen"; | ||
| 34 | case tokCompute: return f << "tokCompute"; | ||
| 35 | case tokEndOfInput: return f << "tokEndOfInput"; | ||
| 36 | } | ||
| 37 | |||
| 38 | return f << "***error***"; | ||
| 39 | } | ||
| 40 | |||
| 41 | class MathLexer : public Lexer | ||
| 42 | { | ||
| 43 | public: | ||
| 44 | MathLexer( Bu::Stream &rSrc ) : | ||
| 45 | rSrc( rSrc ) | ||
| 46 | { | ||
| 47 | } | ||
| 48 | |||
| 49 | virtual ~MathLexer() | ||
| 50 | { | ||
| 51 | } | ||
| 52 | |||
| 53 | enum TokenTypes | ||
| 54 | { | ||
| 55 | tokStuff | ||
| 56 | }; | ||
| 57 | |||
| 58 | virtual Token *nextToken() | ||
| 59 | { | ||
| 60 | for(;;) | ||
| 61 | { | ||
| 62 | if( qbIn.getSize() == 0 ) | ||
| 63 | { | ||
| 64 | char buf[4096]; | ||
| 65 | qbIn.write( buf, rSrc.read( buf, 4096 ) ); | ||
| 66 | |||
| 67 | if( rSrc.isEos() && qbIn.getSize() == 0 ) | ||
| 68 | return new Token( tokEndOfInput ); | ||
| 69 | } | ||
| 70 | |||
| 71 | char b; | ||
| 72 | qbIn.peek( &b, 1 ); | ||
| 73 | switch( b ) | ||
| 74 | { | ||
| 75 | case '+': | ||
| 76 | qbIn.seek( 1 ); | ||
| 77 | return new Token( tokPlus ); | ||
| 78 | |||
| 79 | case '-': | ||
| 80 | qbIn.seek( 1 ); | ||
| 81 | return new Token( tokMinus ); | ||
| 82 | |||
| 83 | case '/': | ||
| 84 | qbIn.seek( 1 ); | ||
| 85 | return new Token( tokDivide ); | ||
| 86 | |||
| 87 | case '*': | ||
| 88 | qbIn.seek( 1 ); | ||
| 89 | return new Token( tokMultiply ); | ||
| 90 | |||
| 91 | case ' ': | ||
| 92 | case '\t': | ||
| 93 | case '\n': | ||
| 94 | qbIn.seek( 1 ); | ||
| 95 | break; | ||
| 96 | |||
| 97 | case '=': | ||
| 98 | qbIn.seek( 1 ); | ||
| 99 | return new Token( tokCompute ); | ||
| 100 | |||
| 101 | case '(': | ||
| 102 | qbIn.seek( 1 ); | ||
| 103 | return new Token( tokOpenParen ); | ||
| 104 | |||
| 105 | case ')': | ||
| 106 | qbIn.seek( 1 ); | ||
| 107 | return new Token( tokCloseParen ); | ||
| 108 | |||
| 109 | case '.': | ||
| 110 | case '0': | ||
| 111 | case '1': | ||
| 112 | case '2': | ||
| 113 | case '3': | ||
| 114 | case '4': | ||
| 115 | case '5': | ||
| 116 | case '6': | ||
| 117 | case '7': | ||
| 118 | case '8': | ||
| 119 | case '9': | ||
| 120 | { | ||
| 121 | Bu::FString sTmp; | ||
| 122 | sTmp += b; | ||
| 123 | qbIn.seek( 1 ); | ||
| 124 | for(;;) | ||
| 125 | { | ||
| 126 | qbIn.peek( &b, 1 ); | ||
| 127 | if( b != '.' && (b < '0' || b > '9') ) | ||
| 128 | { | ||
| 129 | sio << "!! Convert '" << sTmp << "' to " | ||
| 130 | << strtod( sTmp.getStr(), NULL ) << sio.nl; | ||
| 131 | return new Token( | ||
| 132 | tokNumber, strtod( sTmp.getStr(), NULL ) | ||
| 133 | ); | ||
| 134 | } | ||
| 135 | qbIn.seek( 1 ); | ||
| 136 | sTmp += b; | ||
| 137 | } | ||
| 138 | } | ||
| 139 | break; | ||
| 140 | |||
| 141 | default: | ||
| 142 | throw Bu::ExceptionBase("Unexpected character '%c'.", b ); | ||
| 143 | } | ||
| 144 | } | ||
| 145 | } | ||
| 146 | |||
| 147 | private: | ||
| 148 | Bu::Stream &rSrc; | ||
| 149 | QueueBuf qbIn; | ||
| 150 | }; | ||
| 151 | |||
| 152 | int main( int argc, char *argv[] ) | ||
| 153 | { | ||
| 154 | File fIn( argv[1], File::Read ); | ||
| 155 | |||
| 156 | Parser p; | ||
| 157 | |||
| 158 | p.pushLexer( new MathLexer( fIn ) ); | ||
| 159 | |||
| 160 | p.parse(); | ||
| 161 | |||
| 162 | return 0; | ||
| 163 | } | ||
| 164 | |||
diff --git a/src/variant.cpp b/src/variant.cpp index 6b304ba..a66ec39 100644 --- a/src/variant.cpp +++ b/src/variant.cpp | |||
| @@ -43,7 +43,7 @@ Bu::Variant::~Variant() | |||
| 43 | } | 43 | } |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | bool Bu::Variant::isSet() | 46 | bool Bu::Variant::isSet() const |
| 47 | { | 47 | { |
| 48 | return pCore != NULL; | 48 | return pCore != NULL; |
| 49 | } | 49 | } |
diff --git a/src/variant.h b/src/variant.h index 809aab9..5482ee3 100644 --- a/src/variant.h +++ b/src/variant.h | |||
| @@ -98,9 +98,15 @@ namespace Bu | |||
| 98 | public: | 98 | public: |
| 99 | Variant(); | 99 | Variant(); |
| 100 | Variant( const Variant &v ); | 100 | Variant( const Variant &v ); |
| 101 | template<class t> | ||
| 102 | Variant( const t &v ) : | ||
| 103 | pCore( new VariantType<t>() ) | ||
| 104 | { | ||
| 105 | (*dynamic_cast<VariantType<t> *>(pCore)) = v; | ||
| 106 | } | ||
| 101 | virtual ~Variant(); | 107 | virtual ~Variant(); |
| 102 | 108 | ||
| 103 | bool isSet(); | 109 | bool isSet() const; |
| 104 | Bu::FString toString() const; | 110 | Bu::FString toString() const; |
| 105 | const std::type_info &getType() const; | 111 | const std::type_info &getType() const; |
| 106 | 112 | ||
