From f254b23578a9a53a56e6cea980ba588e5f830314 Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Wed, 13 Apr 2011 23:25:17 +0000 Subject: Regular expression engine is started, it's...tricky, but I think I can get it. --- src/regexengine.cpp | 5 ++ src/regexengine.h | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/tests/regex.cpp | 40 +++++++++++++++ 3 files changed, 187 insertions(+) create mode 100644 src/regexengine.cpp create mode 100644 src/regexengine.h create mode 100644 src/tests/regex.cpp diff --git a/src/regexengine.cpp b/src/regexengine.cpp new file mode 100644 index 0000000..72bc381 --- /dev/null +++ b/src/regexengine.cpp @@ -0,0 +1,5 @@ +#include "bu/regexengine.h" +#include "bu/utfstring.h" + +template class Bu::RegExEngine; +template class Bu::RegExEngine; diff --git a/src/regexengine.h b/src/regexengine.h new file mode 100644 index 0000000..ec181c1 --- /dev/null +++ b/src/regexengine.h @@ -0,0 +1,142 @@ +#ifndef BU_REG_EX_ENGINE_H +#define BU_REG_EX_ENGINE_H + +#include "bu/sharedcore.h" +#include "bu/array.h" +#include "bu/sio.h" + +namespace Bu +{ + template class RegExEngine; + + template + class RegExEngineCore + { + friend class RegExEngine; + friend class SharedCore, RegExEngineCore >; + private: + RegExEngineCore() + { + } + + virtual ~RegExEngineCore() + { + } + + class Range + { + public: + Range( chr cLower, chr cUpper, int iTrgState ) : + cLower( cLower ), cUpper( cUpper ), iTrgState( iTrgState ) + { + } + + chr cLower; + chr cUpper; + int iTrgState; + }; + + class State + { + public: + Bu::Array aRange; + }; + + int addState() + { + aState.append( State() ); + return aState.getSize()-1; + } + + void addCompletion( int iState, chr cLower, chr cUpper, int iTrgState ) + { + aState[iState].aRange.append( Range( cLower, cUpper, iTrgState ) ); + } + + template + bool match( const str &sIn, int &iSize, int &iCompletion ) + { + bool bMatch; + int iState = 0; + iSize = 0; + for( typename str::const_iterator i = sIn.begin(); i; i++ ) + { + Bu::sio << "Finding char " << *i << " in state " << iState + << ":" << Bu::sio.nl; + bMatch = false; + for( typename Bu::Array::iterator j = + aState[iState].aRange.begin(); j; j++ ) + { + Bu::sio << " Testing range " << (*j).cLower << " - " << (*j).cUpper << Bu::sio.nl; + if( *i >= (*j).cLower && *i <= (*j).cUpper ) + { + iState = (*j).iTrgState; + bMatch = true; + iSize++; + if( iState < 0 ) + { + iCompletion = iState; + return true; + } + } + } + if( bMatch == false ) + { + return false; + } + } + + iCompletion = 0; + return true; + } + + typedef Bu::Array StateArray; + StateArray aState; + }; + + template + class RegExEngine : public SharedCore, + RegExEngineCore > + { + private: + typedef class RegExEngine MyType; + typedef class RegExEngineCore Core; + typedef class Core::Range Range; + typedef class Core::State State; + + protected: + using SharedCore::core; + using SharedCore::_hardCopy; + using SharedCore::_resetCore; + using SharedCore::_allocateCore; + + public: + RegExEngine() + { + } + + virtual ~RegExEngine() + { + } + + int addState() + { + return core->addState(); + } + + void addCompletion( int iState, chr cLower, chr cUpper, int iTrgState ) + { + core->addCompletion( iState, cLower, cUpper, iTrgState ); + } + + template + bool match( const str &sIn, int &iSize, int &iCompletion ) + { + return core->match( sIn, iSize, iCompletion ); + } + + private: + }; +}; + +#endif diff --git a/src/tests/regex.cpp b/src/tests/regex.cpp new file mode 100644 index 0000000..82c3466 --- /dev/null +++ b/src/tests/regex.cpp @@ -0,0 +1,40 @@ +#include +#include +#include + +using namespace Bu; + +void compile( const Bu::String &s, Bu::RegExEngine &ree ) +{ + int iRoot = ree.addState(); + int iCur = iRoot; + for( Bu::String::const_iterator i = s.begin(); i; i++ ) + { + int iNext = -1; + if( i+1 ) + iNext = ree.addState(); + ree.addCompletion( iCur, *i, *i, iNext ); + iCur = iNext; + } +} + +int main() +{ + Bu::String sRegEx("abcd"); + Bu::String sMatch("abcdefg"); + + Bu::RegExEngine ree; + + compile( sRegEx, ree ); + + bool bRet; + int iSize, iCompletion; + bRet = ree.match( sMatch, iSize, iCompletion ); + + sio << "Matched: " << bRet << sio.nl + << "Size: " << iSize << sio.nl + << "Completion: " << iCompletion << sio.nl; + + return 0; +} + -- cgit v1.2.3