diff options
| -rw-r--r-- | src/regexengine.cpp | 5 | ||||
| -rw-r--r-- | src/regexengine.h | 142 | ||||
| -rw-r--r-- | src/tests/regex.cpp | 40 |
3 files changed, 187 insertions, 0 deletions
diff --git a/src/regexengine.cpp b/src/regexengine.cpp new file mode 100644 index 0000000..72bc381 --- /dev/null +++ b/src/regexengine.cpp | |||
| @@ -0,0 +1,5 @@ | |||
| 1 | #include "bu/regexengine.h" | ||
| 2 | #include "bu/utfstring.h" | ||
| 3 | |||
| 4 | template class Bu::RegExEngine<char>; | ||
| 5 | template class Bu::RegExEngine<Bu::UtfChar>; | ||
diff --git a/src/regexengine.h b/src/regexengine.h new file mode 100644 index 0000000..ec181c1 --- /dev/null +++ b/src/regexengine.h | |||
| @@ -0,0 +1,142 @@ | |||
| 1 | #ifndef BU_REG_EX_ENGINE_H | ||
| 2 | #define BU_REG_EX_ENGINE_H | ||
| 3 | |||
| 4 | #include "bu/sharedcore.h" | ||
| 5 | #include "bu/array.h" | ||
| 6 | #include "bu/sio.h" | ||
| 7 | |||
| 8 | namespace Bu | ||
| 9 | { | ||
| 10 | template<typename chr> class RegExEngine; | ||
| 11 | |||
| 12 | template<typename chr> | ||
| 13 | class RegExEngineCore | ||
| 14 | { | ||
| 15 | friend class RegExEngine<chr>; | ||
| 16 | friend class SharedCore<RegExEngine<chr>, RegExEngineCore<chr> >; | ||
| 17 | private: | ||
| 18 | RegExEngineCore() | ||
| 19 | { | ||
| 20 | } | ||
| 21 | |||
| 22 | virtual ~RegExEngineCore() | ||
| 23 | { | ||
| 24 | } | ||
| 25 | |||
| 26 | class Range | ||
| 27 | { | ||
| 28 | public: | ||
| 29 | Range( chr cLower, chr cUpper, int iTrgState ) : | ||
| 30 | cLower( cLower ), cUpper( cUpper ), iTrgState( iTrgState ) | ||
| 31 | { | ||
| 32 | } | ||
| 33 | |||
| 34 | chr cLower; | ||
| 35 | chr cUpper; | ||
| 36 | int iTrgState; | ||
| 37 | }; | ||
| 38 | |||
| 39 | class State | ||
| 40 | { | ||
| 41 | public: | ||
| 42 | Bu::Array<Range> aRange; | ||
| 43 | }; | ||
| 44 | |||
| 45 | int addState() | ||
| 46 | { | ||
| 47 | aState.append( State() ); | ||
| 48 | return aState.getSize()-1; | ||
| 49 | } | ||
| 50 | |||
| 51 | void addCompletion( int iState, chr cLower, chr cUpper, int iTrgState ) | ||
| 52 | { | ||
| 53 | aState[iState].aRange.append( Range( cLower, cUpper, iTrgState ) ); | ||
| 54 | } | ||
| 55 | |||
| 56 | template<typename str> | ||
| 57 | bool match( const str &sIn, int &iSize, int &iCompletion ) | ||
| 58 | { | ||
| 59 | bool bMatch; | ||
| 60 | int iState = 0; | ||
| 61 | iSize = 0; | ||
| 62 | for( typename str::const_iterator i = sIn.begin(); i; i++ ) | ||
| 63 | { | ||
| 64 | Bu::sio << "Finding char " << *i << " in state " << iState | ||
| 65 | << ":" << Bu::sio.nl; | ||
| 66 | bMatch = false; | ||
| 67 | for( typename Bu::Array<Range>::iterator j = | ||
| 68 | aState[iState].aRange.begin(); j; j++ ) | ||
| 69 | { | ||
| 70 | Bu::sio << " Testing range " << (*j).cLower << " - " << (*j).cUpper << Bu::sio.nl; | ||
| 71 | if( *i >= (*j).cLower && *i <= (*j).cUpper ) | ||
| 72 | { | ||
| 73 | iState = (*j).iTrgState; | ||
| 74 | bMatch = true; | ||
| 75 | iSize++; | ||
| 76 | if( iState < 0 ) | ||
| 77 | { | ||
| 78 | iCompletion = iState; | ||
| 79 | return true; | ||
| 80 | } | ||
| 81 | } | ||
| 82 | } | ||
| 83 | if( bMatch == false ) | ||
| 84 | { | ||
| 85 | return false; | ||
| 86 | } | ||
| 87 | } | ||
| 88 | |||
| 89 | iCompletion = 0; | ||
| 90 | return true; | ||
| 91 | } | ||
| 92 | |||
| 93 | typedef Bu::Array<State> StateArray; | ||
| 94 | StateArray aState; | ||
| 95 | }; | ||
| 96 | |||
| 97 | template<typename chr> | ||
| 98 | class RegExEngine : public SharedCore<RegExEngine<chr>, | ||
| 99 | RegExEngineCore<chr> > | ||
| 100 | { | ||
| 101 | private: | ||
| 102 | typedef class RegExEngine<chr> MyType; | ||
| 103 | typedef class RegExEngineCore<chr> Core; | ||
| 104 | typedef class Core::Range Range; | ||
| 105 | typedef class Core::State State; | ||
| 106 | |||
| 107 | protected: | ||
| 108 | using SharedCore<MyType, Core>::core; | ||
| 109 | using SharedCore<MyType, Core>::_hardCopy; | ||
| 110 | using SharedCore<MyType, Core>::_resetCore; | ||
| 111 | using SharedCore<MyType, Core>::_allocateCore; | ||
| 112 | |||
| 113 | public: | ||
| 114 | RegExEngine() | ||
| 115 | { | ||
| 116 | } | ||
| 117 | |||
| 118 | virtual ~RegExEngine() | ||
| 119 | { | ||
| 120 | } | ||
| 121 | |||
| 122 | int addState() | ||
| 123 | { | ||
| 124 | return core->addState(); | ||
| 125 | } | ||
| 126 | |||
| 127 | void addCompletion( int iState, chr cLower, chr cUpper, int iTrgState ) | ||
| 128 | { | ||
| 129 | core->addCompletion( iState, cLower, cUpper, iTrgState ); | ||
| 130 | } | ||
| 131 | |||
| 132 | template<typename str> | ||
| 133 | bool match( const str &sIn, int &iSize, int &iCompletion ) | ||
| 134 | { | ||
| 135 | return core->match( sIn, iSize, iCompletion ); | ||
| 136 | } | ||
| 137 | |||
| 138 | private: | ||
| 139 | }; | ||
| 140 | }; | ||
| 141 | |||
| 142 | #endif | ||
diff --git a/src/tests/regex.cpp b/src/tests/regex.cpp new file mode 100644 index 0000000..82c3466 --- /dev/null +++ b/src/tests/regex.cpp | |||
| @@ -0,0 +1,40 @@ | |||
| 1 | #include <bu/string.h> | ||
| 2 | #include <bu/regexengine.h> | ||
| 3 | #include <bu/sio.h> | ||
| 4 | |||
| 5 | using namespace Bu; | ||
| 6 | |||
| 7 | void compile( const Bu::String &s, Bu::RegExEngine<char> &ree ) | ||
| 8 | { | ||
| 9 | int iRoot = ree.addState(); | ||
| 10 | int iCur = iRoot; | ||
| 11 | for( Bu::String::const_iterator i = s.begin(); i; i++ ) | ||
| 12 | { | ||
| 13 | int iNext = -1; | ||
| 14 | if( i+1 ) | ||
| 15 | iNext = ree.addState(); | ||
| 16 | ree.addCompletion( iCur, *i, *i, iNext ); | ||
| 17 | iCur = iNext; | ||
| 18 | } | ||
| 19 | } | ||
| 20 | |||
| 21 | int main() | ||
| 22 | { | ||
| 23 | Bu::String sRegEx("abcd"); | ||
| 24 | Bu::String sMatch("abcdefg"); | ||
| 25 | |||
| 26 | Bu::RegExEngine<char> ree; | ||
| 27 | |||
| 28 | compile( sRegEx, ree ); | ||
| 29 | |||
| 30 | bool bRet; | ||
| 31 | int iSize, iCompletion; | ||
| 32 | bRet = ree.match( sMatch, iSize, iCompletion ); | ||
| 33 | |||
| 34 | sio << "Matched: " << bRet << sio.nl | ||
| 35 | << "Size: " << iSize << sio.nl | ||
| 36 | << "Completion: " << iCompletion << sio.nl; | ||
| 37 | |||
| 38 | return 0; | ||
| 39 | } | ||
| 40 | |||
