summaryrefslogtreecommitdiff
path: root/src/parser.h
blob: a925188a6a479e1cd7cb9aa94533f42b14580f7c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#ifndef BU_PARSER_H
#define BU_PARSER_H

#include "bu/fstring.h"
#include "bu/list.h"
#include "bu/array.h"
#include "bu/hash.h"
#include "bu/signals.h"

#include "bu/lexer.h"

namespace Bu
{
	/**
	 * The base framework for a LR(1) grammar parser.  Provided a proper set of
	 * ParserStates this will prase any input the lexer can provide.
	 */
	class Parser
	{
	public:
		Parser();
		virtual ~Parser();

		/**
		 * When a Lexer is pushed onto the stack it becomes the source for
		 * future tokens read by the parser until it is popped off the stack.
		 * The Parser takes ownership of every Lexer pushed onto the stack,
		 * and will delete it when it is popped off the stack.
		 */
		void pushLexer( Lexer *pLex );

		/**
		 * Pop a lexer off the stack, and delete it.
		 */
		void popLexer();

		Lexer::Token *popToken();
		void pushToken( Lexer::Token *pTok );

		/**
		 * Execute a parse.
		 */
		void parse();

		void setRootNonTerminal( int iRoot );
		void setRootNonTerminal( const Bu::FString &sRoot );

		typedef Bu::Signal1<void, Parser &> Reduction;

		/**
		 * Represents a possible state, either a terminal or non-terminal symbol
		 * in a Production.
		 */
		class State
		{
		public:
			enum Type
			{
				typeTerminal,
				typeTerminalPush,
				typeNonTerminal,
				typeReduction
			};

			State( Type eType, int iIndex );
			virtual ~State();

		//private:
			Type eType;
			int iIndex;
		};

		typedef Bu::List<State> Production;

		class NonTerminal
		{
		public:
			NonTerminal();
			virtual ~NonTerminal();

			void addProduction( Production p );
			void setCanSkip();

//		private:
			typedef Bu::List<Production> ProductionList;
			ProductionList lProduction;
			bool bCanSkip;
		};

		int addNonTerminal( const Bu::FString &sName, NonTerminal &nt );
		int addNonTerminal( const Bu::FString &sName );
		void setNonTerminal( const Bu::FString &sName, NonTerminal &nt );
		int getNonTerminalId( const Bu::FString &sName );
		bool hasNonTerminal( const Bu::FString &sName );

		int addReduction( const Bu::FString &sName, const Reduction &r );
		int addReduction( const Bu::FString &sName );
		void setReduction( const Bu::FString &sName, const Reduction &r );
		int getReductionId( const Bu::FString &sName );
		bool hasReduction( const Bu::FString &sName );

	private:
		bool selectProduction( int iNt, Lexer::Token *ptCur );
		void advanceState();

	private:
		typedef Bu::List<Lexer *> LexerStack;
		typedef Bu::List<Lexer::Token *> TokenStack;
		typedef Bu::List<Production::const_iterator> StateStack;
		typedef Bu::Array<Reduction> ReductionArray;
		typedef Bu::Hash<Bu::FString,int> NameIndexHash;
		typedef Bu::Array<NonTerminal> NonTerminalArray;

		LexerStack sLexer;
		TokenStack sToken;
		StateStack sState;
		ReductionArray aReduction;
		NameIndexHash hReductionName;
		NonTerminalArray aNonTerminal;
		NameIndexHash hNonTerminalName;
		int iRootNonTerminal;
	};
Bu::Formatter &operator<<( Bu::Formatter &f, Bu::Parser::State::Type t );
Bu::Formatter &operator<<( Bu::Formatter &f, const Bu::Parser::State &s );
};


#endif