summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMike Buland <eichlan@xagasoft.com>2010-10-12 06:09:48 +0000
committerMike Buland <eichlan@xagasoft.com>2010-10-12 06:09:48 +0000
commit1ee5f374ed986333d5cdbbf41390f1c4c755a8e3 (patch)
tree67b02598d3dca87a82263629a1290bd7b7a79006
parent313e28df2a8776c82f5493aef6fe44ad40f1935a (diff)
downloadlibbu++-1ee5f374ed986333d5cdbbf41390f1c4c755a8e3.tar.gz
libbu++-1ee5f374ed986333d5cdbbf41390f1c4c755a8e3.tar.bz2
libbu++-1ee5f374ed986333d5cdbbf41390f1c4c755a8e3.tar.xz
libbu++-1ee5f374ed986333d5cdbbf41390f1c4c755a8e3.zip
This commit has a minor tweak to the variant class to make it easier to use,
and introduces the parser and lexer classes. I also made a test for parser and put it in the tools directory. That is silly, it shouldn't be. However, it's necesarry right now, because I don't want to do a full build to compile all the parser tests. However, this commit doesn't actually build yet. It will soon, I just wanted to get it all committed.
-rw-r--r--src/lexer.cpp31
-rw-r--r--src/lexer.h44
-rw-r--r--src/parser.cpp43
-rw-r--r--src/parser.h48
-rw-r--r--src/tools/parser.cpp164
-rw-r--r--src/variant.cpp2
-rw-r--r--src/variant.h8
7 files changed, 338 insertions, 2 deletions
diff --git a/src/lexer.cpp b/src/lexer.cpp
new file mode 100644
index 0000000..c7a6fcb
--- /dev/null
+++ b/src/lexer.cpp
@@ -0,0 +1,31 @@
1#include "bu/lexer.h"
2
3Bu::Lexer::Lexer()
4{
5}
6
7Bu::Lexer::~Lexer()
8{
9}
10
11Bu::Lexer::Token::Token() :
12 iToken( -1 )
13{
14}
15
16Bu::Lexer::Token::Token( int iToken ) :
17 iToken( iToken )
18{
19}
20
21Bu::FString Bu::Lexer::tokenToString( const Bu::Lexer::Token &t )
22{
23 Bu::MemBuf mb;
24 Bu::Formatter f( mb );
25 f << "<" << t.iToken << ">";
26 if( t.vExtra.isSet() )
27 f << " (" << t.vExtra << ")";
28
29 return mb.getString();
30}
31
diff --git a/src/lexer.h b/src/lexer.h
new file mode 100644
index 0000000..37d268f
--- /dev/null
+++ b/src/lexer.h
@@ -0,0 +1,44 @@
1#ifndef BU_LEXER_H
2#define BU_LEXER_H
3
4#include "bu/variant.h"
5
6namespace Bu
7{
8 class Stream;
9
10 /**
11 * The base class for creating a lexical analyzer. This is designed to work
12 * in tandem with the Bu::Parser class, which uses this to tokenize textual
13 * input. It can be used by just about anything that cares about tokens
14 * more than raw input, though.
15 */
16 class Lexer
17 {
18 public:
19 Lexer();
20 virtual ~Lexer();
21
22 class Token
23 {
24 public:
25 Token();
26 Token( int iToken );
27
28 template<class t>
29 Token( int iToken, const t &v ) :
30 iToken( iToken ),
31 vExtra( v )
32 {
33 }
34 int iToken;
35 Bu::Variant vExtra;
36 };
37
38 virtual Token *nextToken()=0;
39
40 virtual Bu::FString tokenToString( const Token &t );
41 };
42};
43
44#endif
diff --git a/src/parser.cpp b/src/parser.cpp
new file mode 100644
index 0000000..7015070
--- /dev/null
+++ b/src/parser.cpp
@@ -0,0 +1,43 @@
1#include "bu/parser.h"
2#include "bu/lexer.h"
3
4#include "bu/sio.h"
5using namespace Bu;
6
7Bu::Parser::Parser()
8{
9}
10
11Bu::Parser::~Parser()
12{
13}
14
15void Bu::Parser::pushLexer( Lexer *pLex )
16{
17 sLexer.push( pLex );
18}
19
20void Bu::Parser::popLexer()
21{
22 delete sLexer.peekPop();
23}
24
25void Bu::Parser::parse()
26{
27 for(;;)
28 {
29 Bu::Lexer::Token *pToken = sLexer.peek()->nextToken();
30 sio << sLexer.peek()->tokenToString( *pToken ) << sio.nl;
31 if( pToken->iToken < 0 )
32 {
33 delete sLexer.peekPop();
34 if( sLexer.isEmpty() )
35 {
36 delete pToken;
37 return;
38 }
39 }
40 delete pToken;
41 }
42}
43
diff --git a/src/parser.h b/src/parser.h
new file mode 100644
index 0000000..26b15a6
--- /dev/null
+++ b/src/parser.h
@@ -0,0 +1,48 @@
1#ifndef BU_PARSER_H
2#define BU_PARSER_H
3
4#include "bu/list.h"
5#include "bu/fstring.h"
6#include "bu/lexer.h"
7
8namespace Bu
9{
10 /**
11 * The base framework for a LR(1) grammar parser. Provided a proper set of
12 * ParserStates this will prase any input the lexer can provide.
13 */
14 class Parser
15 {
16 public:
17 Parser();
18 virtual ~Parser();
19
20 /**
21 * When a Lexer is pushed onto the stack it becomes the source for
22 * future tokens read by the parser until it is popped off the stack.
23 * The Parser takes ownership of every Lexer pushed onto the stack,
24 * and will delete it when it is popped off the stack.
25 */
26 void pushLexer( Lexer *pLex );
27
28 /**
29 * Pop a lexer off the stack, and delete it.
30 */
31 void popLexer();
32
33 /**
34 * Execute a parse.
35 */
36 void parse();
37
38 private:
39 typedef Bu::List<Lexer *> LexerStack;
40 typedef Bu::List<Lexer::Token *> TokenStack;
41 typedef Bu::List<State *> StateStack;
42 LexerStack sLexer;
43 TokenStack sToken;
44 StateStack sState;
45 };
46};
47
48#endif
diff --git a/src/tools/parser.cpp b/src/tools/parser.cpp
new file mode 100644
index 0000000..a70dfa4
--- /dev/null
+++ b/src/tools/parser.cpp
@@ -0,0 +1,164 @@
1#include <bu/parser.h>
2#include <bu/lexer.h>
3#include <bu/file.h>
4#include <bu/sio.h>
5#include <bu/queuebuf.h>
6#include <stdlib.h>
7
8using namespace Bu;
9
10enum Tok
11{
12 tokNumber,
13 tokPlus,
14 tokMinus,
15 tokDivide,
16 tokMultiply,
17 tokOpenParen,
18 tokCloseParen,
19 tokCompute,
20 tokEndOfInput=-1
21};
22
23Bu::Formatter &operator<<( Bu::Formatter &f, Tok e )
24{
25 switch( e )
26 {
27 case tokNumber: return f << "tokNumber";
28 case tokPlus: return f << "tokPlus";
29 case tokMinus: return f << "tokMinus";
30 case tokDivide: return f << "tokDivide";
31 case tokMultiply: return f << "tokMultiply";
32 case tokOpenParen: return f << "tokOpenParen";
33 case tokCloseParen: return f << "tokCloseParen";
34 case tokCompute: return f << "tokCompute";
35 case tokEndOfInput: return f << "tokEndOfInput";
36 }
37
38 return f << "***error***";
39}
40
41class MathLexer : public Lexer
42{
43public:
44 MathLexer( Bu::Stream &rSrc ) :
45 rSrc( rSrc )
46 {
47 }
48
49 virtual ~MathLexer()
50 {
51 }
52
53 enum TokenTypes
54 {
55 tokStuff
56 };
57
58 virtual Token *nextToken()
59 {
60 for(;;)
61 {
62 if( qbIn.getSize() == 0 )
63 {
64 char buf[4096];
65 qbIn.write( buf, rSrc.read( buf, 4096 ) );
66
67 if( rSrc.isEos() && qbIn.getSize() == 0 )
68 return new Token( tokEndOfInput );
69 }
70
71 char b;
72 qbIn.peek( &b, 1 );
73 switch( b )
74 {
75 case '+':
76 qbIn.seek( 1 );
77 return new Token( tokPlus );
78
79 case '-':
80 qbIn.seek( 1 );
81 return new Token( tokMinus );
82
83 case '/':
84 qbIn.seek( 1 );
85 return new Token( tokDivide );
86
87 case '*':
88 qbIn.seek( 1 );
89 return new Token( tokMultiply );
90
91 case ' ':
92 case '\t':
93 case '\n':
94 qbIn.seek( 1 );
95 break;
96
97 case '=':
98 qbIn.seek( 1 );
99 return new Token( tokCompute );
100
101 case '(':
102 qbIn.seek( 1 );
103 return new Token( tokOpenParen );
104
105 case ')':
106 qbIn.seek( 1 );
107 return new Token( tokCloseParen );
108
109 case '.':
110 case '0':
111 case '1':
112 case '2':
113 case '3':
114 case '4':
115 case '5':
116 case '6':
117 case '7':
118 case '8':
119 case '9':
120 {
121 Bu::FString sTmp;
122 sTmp += b;
123 qbIn.seek( 1 );
124 for(;;)
125 {
126 qbIn.peek( &b, 1 );
127 if( b != '.' && (b < '0' || b > '9') )
128 {
129 sio << "!! Convert '" << sTmp << "' to "
130 << strtod( sTmp.getStr(), NULL ) << sio.nl;
131 return new Token(
132 tokNumber, strtod( sTmp.getStr(), NULL )
133 );
134 }
135 qbIn.seek( 1 );
136 sTmp += b;
137 }
138 }
139 break;
140
141 default:
142 throw Bu::ExceptionBase("Unexpected character '%c'.", b );
143 }
144 }
145 }
146
147private:
148 Bu::Stream &rSrc;
149 QueueBuf qbIn;
150};
151
152int main( int argc, char *argv[] )
153{
154 File fIn( argv[1], File::Read );
155
156 Parser p;
157
158 p.pushLexer( new MathLexer( fIn ) );
159
160 p.parse();
161
162 return 0;
163}
164
diff --git a/src/variant.cpp b/src/variant.cpp
index 6b304ba..a66ec39 100644
--- a/src/variant.cpp
+++ b/src/variant.cpp
@@ -43,7 +43,7 @@ Bu::Variant::~Variant()
43 } 43 }
44} 44}
45 45
46bool Bu::Variant::isSet() 46bool Bu::Variant::isSet() const
47{ 47{
48 return pCore != NULL; 48 return pCore != NULL;
49} 49}
diff --git a/src/variant.h b/src/variant.h
index 809aab9..5482ee3 100644
--- a/src/variant.h
+++ b/src/variant.h
@@ -98,9 +98,15 @@ namespace Bu
98 public: 98 public:
99 Variant(); 99 Variant();
100 Variant( const Variant &v ); 100 Variant( const Variant &v );
101 template<class t>
102 Variant( const t &v ) :
103 pCore( new VariantType<t>() )
104 {
105 (*dynamic_cast<VariantType<t> *>(pCore)) = v;
106 }
101 virtual ~Variant(); 107 virtual ~Variant();
102 108
103 bool isSet(); 109 bool isSet() const;
104 Bu::FString toString() const; 110 Bu::FString toString() const;
105 const std::type_info &getType() const; 111 const std::type_info &getType() const;
106 112