diff options
author | Mike Buland <eichlan@xagasoft.com> | 2010-10-12 06:09:48 +0000 |
---|---|---|
committer | Mike Buland <eichlan@xagasoft.com> | 2010-10-12 06:09:48 +0000 |
commit | 1ee5f374ed986333d5cdbbf41390f1c4c755a8e3 (patch) | |
tree | 67b02598d3dca87a82263629a1290bd7b7a79006 | |
parent | 313e28df2a8776c82f5493aef6fe44ad40f1935a (diff) | |
download | libbu++-1ee5f374ed986333d5cdbbf41390f1c4c755a8e3.tar.gz libbu++-1ee5f374ed986333d5cdbbf41390f1c4c755a8e3.tar.bz2 libbu++-1ee5f374ed986333d5cdbbf41390f1c4c755a8e3.tar.xz libbu++-1ee5f374ed986333d5cdbbf41390f1c4c755a8e3.zip |
This commit has a minor tweak to the variant class to make it easier to use,
and introduces the parser and lexer classes. I also made a test for parser and
put it in the tools directory. That is silly, it shouldn't be. However, it's
necesarry right now, because I don't want to do a full build to compile all
the parser tests.
However, this commit doesn't actually build yet. It will soon, I just wanted
to get it all committed.
-rw-r--r-- | src/lexer.cpp | 31 | ||||
-rw-r--r-- | src/lexer.h | 44 | ||||
-rw-r--r-- | src/parser.cpp | 43 | ||||
-rw-r--r-- | src/parser.h | 48 | ||||
-rw-r--r-- | src/tools/parser.cpp | 164 | ||||
-rw-r--r-- | src/variant.cpp | 2 | ||||
-rw-r--r-- | src/variant.h | 8 |
7 files changed, 338 insertions, 2 deletions
diff --git a/src/lexer.cpp b/src/lexer.cpp new file mode 100644 index 0000000..c7a6fcb --- /dev/null +++ b/src/lexer.cpp | |||
@@ -0,0 +1,31 @@ | |||
1 | #include "bu/lexer.h" | ||
2 | |||
3 | Bu::Lexer::Lexer() | ||
4 | { | ||
5 | } | ||
6 | |||
7 | Bu::Lexer::~Lexer() | ||
8 | { | ||
9 | } | ||
10 | |||
11 | Bu::Lexer::Token::Token() : | ||
12 | iToken( -1 ) | ||
13 | { | ||
14 | } | ||
15 | |||
16 | Bu::Lexer::Token::Token( int iToken ) : | ||
17 | iToken( iToken ) | ||
18 | { | ||
19 | } | ||
20 | |||
21 | Bu::FString Bu::Lexer::tokenToString( const Bu::Lexer::Token &t ) | ||
22 | { | ||
23 | Bu::MemBuf mb; | ||
24 | Bu::Formatter f( mb ); | ||
25 | f << "<" << t.iToken << ">"; | ||
26 | if( t.vExtra.isSet() ) | ||
27 | f << " (" << t.vExtra << ")"; | ||
28 | |||
29 | return mb.getString(); | ||
30 | } | ||
31 | |||
diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..37d268f --- /dev/null +++ b/src/lexer.h | |||
@@ -0,0 +1,44 @@ | |||
1 | #ifndef BU_LEXER_H | ||
2 | #define BU_LEXER_H | ||
3 | |||
4 | #include "bu/variant.h" | ||
5 | |||
6 | namespace Bu | ||
7 | { | ||
8 | class Stream; | ||
9 | |||
10 | /** | ||
11 | * The base class for creating a lexical analyzer. This is designed to work | ||
12 | * in tandem with the Bu::Parser class, which uses this to tokenize textual | ||
13 | * input. It can be used by just about anything that cares about tokens | ||
14 | * more than raw input, though. | ||
15 | */ | ||
16 | class Lexer | ||
17 | { | ||
18 | public: | ||
19 | Lexer(); | ||
20 | virtual ~Lexer(); | ||
21 | |||
22 | class Token | ||
23 | { | ||
24 | public: | ||
25 | Token(); | ||
26 | Token( int iToken ); | ||
27 | |||
28 | template<class t> | ||
29 | Token( int iToken, const t &v ) : | ||
30 | iToken( iToken ), | ||
31 | vExtra( v ) | ||
32 | { | ||
33 | } | ||
34 | int iToken; | ||
35 | Bu::Variant vExtra; | ||
36 | }; | ||
37 | |||
38 | virtual Token *nextToken()=0; | ||
39 | |||
40 | virtual Bu::FString tokenToString( const Token &t ); | ||
41 | }; | ||
42 | }; | ||
43 | |||
44 | #endif | ||
diff --git a/src/parser.cpp b/src/parser.cpp new file mode 100644 index 0000000..7015070 --- /dev/null +++ b/src/parser.cpp | |||
@@ -0,0 +1,43 @@ | |||
1 | #include "bu/parser.h" | ||
2 | #include "bu/lexer.h" | ||
3 | |||
4 | #include "bu/sio.h" | ||
5 | using namespace Bu; | ||
6 | |||
7 | Bu::Parser::Parser() | ||
8 | { | ||
9 | } | ||
10 | |||
11 | Bu::Parser::~Parser() | ||
12 | { | ||
13 | } | ||
14 | |||
15 | void Bu::Parser::pushLexer( Lexer *pLex ) | ||
16 | { | ||
17 | sLexer.push( pLex ); | ||
18 | } | ||
19 | |||
20 | void Bu::Parser::popLexer() | ||
21 | { | ||
22 | delete sLexer.peekPop(); | ||
23 | } | ||
24 | |||
25 | void Bu::Parser::parse() | ||
26 | { | ||
27 | for(;;) | ||
28 | { | ||
29 | Bu::Lexer::Token *pToken = sLexer.peek()->nextToken(); | ||
30 | sio << sLexer.peek()->tokenToString( *pToken ) << sio.nl; | ||
31 | if( pToken->iToken < 0 ) | ||
32 | { | ||
33 | delete sLexer.peekPop(); | ||
34 | if( sLexer.isEmpty() ) | ||
35 | { | ||
36 | delete pToken; | ||
37 | return; | ||
38 | } | ||
39 | } | ||
40 | delete pToken; | ||
41 | } | ||
42 | } | ||
43 | |||
diff --git a/src/parser.h b/src/parser.h new file mode 100644 index 0000000..26b15a6 --- /dev/null +++ b/src/parser.h | |||
@@ -0,0 +1,48 @@ | |||
1 | #ifndef BU_PARSER_H | ||
2 | #define BU_PARSER_H | ||
3 | |||
4 | #include "bu/list.h" | ||
5 | #include "bu/fstring.h" | ||
6 | #include "bu/lexer.h" | ||
7 | |||
8 | namespace Bu | ||
9 | { | ||
10 | /** | ||
11 | * The base framework for a LR(1) grammar parser. Provided a proper set of | ||
12 | * ParserStates this will prase any input the lexer can provide. | ||
13 | */ | ||
14 | class Parser | ||
15 | { | ||
16 | public: | ||
17 | Parser(); | ||
18 | virtual ~Parser(); | ||
19 | |||
20 | /** | ||
21 | * When a Lexer is pushed onto the stack it becomes the source for | ||
22 | * future tokens read by the parser until it is popped off the stack. | ||
23 | * The Parser takes ownership of every Lexer pushed onto the stack, | ||
24 | * and will delete it when it is popped off the stack. | ||
25 | */ | ||
26 | void pushLexer( Lexer *pLex ); | ||
27 | |||
28 | /** | ||
29 | * Pop a lexer off the stack, and delete it. | ||
30 | */ | ||
31 | void popLexer(); | ||
32 | |||
33 | /** | ||
34 | * Execute a parse. | ||
35 | */ | ||
36 | void parse(); | ||
37 | |||
38 | private: | ||
39 | typedef Bu::List<Lexer *> LexerStack; | ||
40 | typedef Bu::List<Lexer::Token *> TokenStack; | ||
41 | typedef Bu::List<State *> StateStack; | ||
42 | LexerStack sLexer; | ||
43 | TokenStack sToken; | ||
44 | StateStack sState; | ||
45 | }; | ||
46 | }; | ||
47 | |||
48 | #endif | ||
diff --git a/src/tools/parser.cpp b/src/tools/parser.cpp new file mode 100644 index 0000000..a70dfa4 --- /dev/null +++ b/src/tools/parser.cpp | |||
@@ -0,0 +1,164 @@ | |||
1 | #include <bu/parser.h> | ||
2 | #include <bu/lexer.h> | ||
3 | #include <bu/file.h> | ||
4 | #include <bu/sio.h> | ||
5 | #include <bu/queuebuf.h> | ||
6 | #include <stdlib.h> | ||
7 | |||
8 | using namespace Bu; | ||
9 | |||
10 | enum Tok | ||
11 | { | ||
12 | tokNumber, | ||
13 | tokPlus, | ||
14 | tokMinus, | ||
15 | tokDivide, | ||
16 | tokMultiply, | ||
17 | tokOpenParen, | ||
18 | tokCloseParen, | ||
19 | tokCompute, | ||
20 | tokEndOfInput=-1 | ||
21 | }; | ||
22 | |||
23 | Bu::Formatter &operator<<( Bu::Formatter &f, Tok e ) | ||
24 | { | ||
25 | switch( e ) | ||
26 | { | ||
27 | case tokNumber: return f << "tokNumber"; | ||
28 | case tokPlus: return f << "tokPlus"; | ||
29 | case tokMinus: return f << "tokMinus"; | ||
30 | case tokDivide: return f << "tokDivide"; | ||
31 | case tokMultiply: return f << "tokMultiply"; | ||
32 | case tokOpenParen: return f << "tokOpenParen"; | ||
33 | case tokCloseParen: return f << "tokCloseParen"; | ||
34 | case tokCompute: return f << "tokCompute"; | ||
35 | case tokEndOfInput: return f << "tokEndOfInput"; | ||
36 | } | ||
37 | |||
38 | return f << "***error***"; | ||
39 | } | ||
40 | |||
41 | class MathLexer : public Lexer | ||
42 | { | ||
43 | public: | ||
44 | MathLexer( Bu::Stream &rSrc ) : | ||
45 | rSrc( rSrc ) | ||
46 | { | ||
47 | } | ||
48 | |||
49 | virtual ~MathLexer() | ||
50 | { | ||
51 | } | ||
52 | |||
53 | enum TokenTypes | ||
54 | { | ||
55 | tokStuff | ||
56 | }; | ||
57 | |||
58 | virtual Token *nextToken() | ||
59 | { | ||
60 | for(;;) | ||
61 | { | ||
62 | if( qbIn.getSize() == 0 ) | ||
63 | { | ||
64 | char buf[4096]; | ||
65 | qbIn.write( buf, rSrc.read( buf, 4096 ) ); | ||
66 | |||
67 | if( rSrc.isEos() && qbIn.getSize() == 0 ) | ||
68 | return new Token( tokEndOfInput ); | ||
69 | } | ||
70 | |||
71 | char b; | ||
72 | qbIn.peek( &b, 1 ); | ||
73 | switch( b ) | ||
74 | { | ||
75 | case '+': | ||
76 | qbIn.seek( 1 ); | ||
77 | return new Token( tokPlus ); | ||
78 | |||
79 | case '-': | ||
80 | qbIn.seek( 1 ); | ||
81 | return new Token( tokMinus ); | ||
82 | |||
83 | case '/': | ||
84 | qbIn.seek( 1 ); | ||
85 | return new Token( tokDivide ); | ||
86 | |||
87 | case '*': | ||
88 | qbIn.seek( 1 ); | ||
89 | return new Token( tokMultiply ); | ||
90 | |||
91 | case ' ': | ||
92 | case '\t': | ||
93 | case '\n': | ||
94 | qbIn.seek( 1 ); | ||
95 | break; | ||
96 | |||
97 | case '=': | ||
98 | qbIn.seek( 1 ); | ||
99 | return new Token( tokCompute ); | ||
100 | |||
101 | case '(': | ||
102 | qbIn.seek( 1 ); | ||
103 | return new Token( tokOpenParen ); | ||
104 | |||
105 | case ')': | ||
106 | qbIn.seek( 1 ); | ||
107 | return new Token( tokCloseParen ); | ||
108 | |||
109 | case '.': | ||
110 | case '0': | ||
111 | case '1': | ||
112 | case '2': | ||
113 | case '3': | ||
114 | case '4': | ||
115 | case '5': | ||
116 | case '6': | ||
117 | case '7': | ||
118 | case '8': | ||
119 | case '9': | ||
120 | { | ||
121 | Bu::FString sTmp; | ||
122 | sTmp += b; | ||
123 | qbIn.seek( 1 ); | ||
124 | for(;;) | ||
125 | { | ||
126 | qbIn.peek( &b, 1 ); | ||
127 | if( b != '.' && (b < '0' || b > '9') ) | ||
128 | { | ||
129 | sio << "!! Convert '" << sTmp << "' to " | ||
130 | << strtod( sTmp.getStr(), NULL ) << sio.nl; | ||
131 | return new Token( | ||
132 | tokNumber, strtod( sTmp.getStr(), NULL ) | ||
133 | ); | ||
134 | } | ||
135 | qbIn.seek( 1 ); | ||
136 | sTmp += b; | ||
137 | } | ||
138 | } | ||
139 | break; | ||
140 | |||
141 | default: | ||
142 | throw Bu::ExceptionBase("Unexpected character '%c'.", b ); | ||
143 | } | ||
144 | } | ||
145 | } | ||
146 | |||
147 | private: | ||
148 | Bu::Stream &rSrc; | ||
149 | QueueBuf qbIn; | ||
150 | }; | ||
151 | |||
152 | int main( int argc, char *argv[] ) | ||
153 | { | ||
154 | File fIn( argv[1], File::Read ); | ||
155 | |||
156 | Parser p; | ||
157 | |||
158 | p.pushLexer( new MathLexer( fIn ) ); | ||
159 | |||
160 | p.parse(); | ||
161 | |||
162 | return 0; | ||
163 | } | ||
164 | |||
diff --git a/src/variant.cpp b/src/variant.cpp index 6b304ba..a66ec39 100644 --- a/src/variant.cpp +++ b/src/variant.cpp | |||
@@ -43,7 +43,7 @@ Bu::Variant::~Variant() | |||
43 | } | 43 | } |
44 | } | 44 | } |
45 | 45 | ||
46 | bool Bu::Variant::isSet() | 46 | bool Bu::Variant::isSet() const |
47 | { | 47 | { |
48 | return pCore != NULL; | 48 | return pCore != NULL; |
49 | } | 49 | } |
diff --git a/src/variant.h b/src/variant.h index 809aab9..5482ee3 100644 --- a/src/variant.h +++ b/src/variant.h | |||
@@ -98,9 +98,15 @@ namespace Bu | |||
98 | public: | 98 | public: |
99 | Variant(); | 99 | Variant(); |
100 | Variant( const Variant &v ); | 100 | Variant( const Variant &v ); |
101 | template<class t> | ||
102 | Variant( const t &v ) : | ||
103 | pCore( new VariantType<t>() ) | ||
104 | { | ||
105 | (*dynamic_cast<VariantType<t> *>(pCore)) = v; | ||
106 | } | ||
101 | virtual ~Variant(); | 107 | virtual ~Variant(); |
102 | 108 | ||
103 | bool isSet(); | 109 | bool isSet() const; |
104 | Bu::FString toString() const; | 110 | Bu::FString toString() const; |
105 | const std::type_info &getType() const; | 111 | const std::type_info &getType() const; |
106 | 112 | ||