From 3025ed54309f793c6afbcbc9a564f71cc741f2ef Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Mon, 27 Nov 2006 10:13:44 +0000 Subject: Added the new OrdHash, check the test file for an example. --- src/hash.h | 46 ++++++---- src/ordhash.cpp | 1 + src/ordhash.h | 104 +++++++++++++++++++++++ src/tests/ordhash.cpp | 48 +++++++++++ src/tests/qsort.cpp | 228 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/tqsort.h | 207 +++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 618 insertions(+), 16 deletions(-) create mode 100644 src/ordhash.cpp create mode 100644 src/ordhash.h create mode 100644 src/tests/ordhash.cpp create mode 100644 src/tests/qsort.cpp create mode 100644 src/tqsort.h diff --git a/src/hash.h b/src/hash.h index d53e2be..cd21a29 100644 --- a/src/hash.h +++ b/src/hash.h @@ -92,7 +92,10 @@ public: void erase() { if( bFilled ) + { hsh._erase( nPos ); + hsh.onDelete(); + } } _value operator=( _value nval ) @@ -101,10 +104,12 @@ public: { hsh.va.destroy( pValue ); hsh.va.construct( pValue, nval ); + hsh.onUpdate(); } else { hsh.fill( nPos, *pKey, nval, hash ); + hsh.onInsert(); } return nval; @@ -174,7 +179,7 @@ public: return nDeleted; } - HashProxy operator[]( key k ) + virtual HashProxy operator[]( key k ) { uint32_t hash = __calcHashCode( k ); bool bFill; @@ -190,7 +195,7 @@ public: } } - void insert( key k, value v ) + virtual void insert( key k, value v ) { uint32_t hash = __calcHashCode( k ); bool bFill; @@ -200,14 +205,16 @@ public: { va.destroy( &aValues[nPos] ); va.construct( &aValues[nPos], v ); + onUpdate(); } else { fill( nPos, k, v, hash ); + onInsert(); } } - void erase( key k ) + virtual void erase( key k ) { uint32_t hash = __calcHashCode( k ); bool bFill; @@ -216,10 +223,11 @@ public: if( bFill ) { _erase( nPos ); + onDelete(); } } - void clear() + virtual void clear() { for( uint32_t j = 0; j < nCapacity; j++ ) { @@ -228,13 +236,14 @@ public: { va.destroy( &aValues[j] ); ka.destroy( &aKeys[j] ); + onDelete(); } } clearBits(); } - value get( key k ) + virtual value get( key k ) { uint32_t hash = __calcHashCode( k ); bool bFill; @@ -253,7 +262,7 @@ public: } } - bool has( key k ) + virtual bool has( key k ) { bool bFill; probe( __calcHashCode( k ), k, bFill ); @@ -338,8 +347,13 @@ public: return iterator( *this, true ); } -private: - void clearBits() +protected: + virtual void onInsert() {} + virtual void onUpdate() {} + virtual void onDelete() {} + virtual void onReHash() {} + + virtual void clearBits() { for( uint32_t j = 0; j < nKeysSize; j++ ) { @@ -347,7 +361,7 @@ private: } } - void fill( uint32_t loc, key &k, value &v, uint32_t hash ) + virtual void fill( uint32_t loc, key &k, value &v, uint32_t hash ) { bFilled[loc/32] |= (1<<(loc%32)); va.construct( &aValues[loc], v ); @@ -356,19 +370,19 @@ private: nFilled++; } - void _erase( uint32_t loc ) + virtual void _erase( uint32_t loc ) { bDeleted[loc/32] |= (1<<(loc%32)); va.destroy( &aValues[loc] ); ka.destroy( &aKeys[loc] ); } - std::pair getAtPos( uint32_t nPos ) + virtual std::pair getAtPos( uint32_t nPos ) { return std::pair(aKeys[nPos],aValues[nPos]); } - uint32_t getFirstPos( bool &bFinished ) + virtual uint32_t getFirstPos( bool &bFinished ) { for( uint32_t j = 0; j < nCapacity; j++ ) { @@ -381,7 +395,7 @@ private: return 0; } - uint32_t getNextPos( uint32_t nPos, bool &bFinished ) + virtual uint32_t getNextPos( uint32_t nPos, bool &bFinished ) { for( uint32_t j = nPos+1; j < nCapacity; j++ ) { @@ -488,17 +502,17 @@ private: ca.deallocate( aOldHashCodes, nOldCapacity ); } - bool isFilled( uint32_t loc ) + virtual bool isFilled( uint32_t loc ) { return (bFilled[loc/32]&(1<<(loc%32)))!=0; } - bool isDeleted( uint32_t loc ) + virtual bool isDeleted( uint32_t loc ) { return (bDeleted[loc/32]&(1<<(loc%32)))!=0; } -private: +protected: uint32_t nCapacity; uint32_t nFilled; uint32_t nDeleted; diff --git a/src/ordhash.cpp b/src/ordhash.cpp new file mode 100644 index 0000000..77cbd61 --- /dev/null +++ b/src/ordhash.cpp @@ -0,0 +1 @@ +#include "ordhash.h" diff --git a/src/ordhash.h b/src/ordhash.h new file mode 100644 index 0000000..e946f95 --- /dev/null +++ b/src/ordhash.h @@ -0,0 +1,104 @@ +#ifndef ORD_HASH_H +#define ORD_HASH_H + +#include "hash.h" +#include "tqsort.h" + +template, typename valuealloc = std::allocator, typename challoc = std::allocator > +class OrdHash : public Hash +{ +public: + OrdHash() : + bSorted( false ), + aData( NULL ) + { + } + + virtual ~OrdHash() + { + } + +protected: + virtual void invalidate() + { + bSorted = false; + delete[] aData; + aData = NULL; + } + + virtual void onInsert() + { + invalidate(); + } + + virtual void onUpdate() + { + invalidate(); + } + + virtual void onDelete() + { + invalidate(); + } + + virtual void onReHash() + { + invalidate(); + } + + virtual std::pair getAtPos( uint32_t nPos ) + { + return Hash::getAtPos( aData[nPos].nIndex ); + } + + virtual void buildIndex() + { + aData = new struct ind[this->nFilled]; + uint32_t k = 0; + for( uint32_t j = 0; j < this->nCapacity; j++ ) + { + if( this->isFilled( j ) ) + { + if( !this->isDeleted( j ) ) + { + aData[k].pVal = &(this->aValues[j]); + aData[k].nIndex = j; + k++; + } + } + } + + tqsort::ind, cmpfnc, value **>( aData, this->nFilled ); + + bSorted = true; + } + + virtual uint32_t getFirstPos( bool &bFinished ) + { + if( bSorted == false ) + buildIndex(); + + return 0; + } + + virtual uint32_t getNextPos( uint32_t nPos, bool &bFinished ) + { + if( nPos+1 >= this->nFilled ) + { + bFinished = true; + return 0; + } + return ++nPos; + } +public: + typedef struct ind + { + value *pVal; + uint32_t nIndex; + } ind; +private: + bool bSorted; + ind *aData; +}; + +#endif diff --git a/src/tests/ordhash.cpp b/src/tests/ordhash.cpp new file mode 100644 index 0000000..f1d96ec --- /dev/null +++ b/src/tests/ordhash.cpp @@ -0,0 +1,48 @@ +#include "ordhash.h" +#include + +typedef struct eldef +{ + eldef( int a, int b, const std::string &c ) : + id( a ), nSequence( b ), sName( c ) {} + int id; + int nSequence; + std::string sName; +} eldef; + +struct seqcmp +{ + bool operator()( eldef **a, eldef **b ) + { + return (*a)->nSequence < (*b)->nSequence; + } +}; + +struct namcmp +{ + bool operator()( eldef **a, eldef **b ) + { + return (*a)->sName < (*b)->sName; + } +}; + +typedef OrdHash AHash; +//typedef OrdHash AHash; + +int main() +{ + AHash hsh; + hsh[1] = eldef( 0, 43, "Bob"); + hsh[4] = eldef( 1, 443, "Abby"); + hsh[2] = eldef( 2, 1, "Name"); + hsh[5] = eldef( 3, 0, "Catagory"); + hsh[32] = eldef( 4, 12, "Epilogue"); + + for( AHash::iterator i = hsh.begin(); i != hsh.end(); i++ ) + { + eldef e = (*i).second; + printf("%d, %d, %s\n", e.id, e.nSequence, e.sName.c_str() ); + } + +} + diff --git a/src/tests/qsort.cpp b/src/tests/qsort.cpp new file mode 100644 index 0000000..28c6f03 --- /dev/null +++ b/src/tests/qsort.cpp @@ -0,0 +1,228 @@ +#define _QSORT_SWAP(a, b, t) ((void)((t = *a), (*a = *b), (*b = t))) + +/* Discontinue quicksort algorithm when partition gets below this size. + This particular magic number was chosen to work best on a Sun 4/260. */ +#define _QSORT_MAX_THRESH 4 + +/* Stack node declarations used to store unfulfilled partition obligations + * (inlined in QSORT). +typedef struct { + QSORT_TYPE *_lo, *_hi; +} qsort_stack_node; + */ + +/* The next 4 #defines implement a very fast in-line stack abstraction. */ +/* The stack needs log (total_elements) entries (we could even subtract + log(MAX_THRESH)). Since total_elements has type unsigned, we get as + upper bound for log (total_elements): + bits per byte (CHAR_BIT) * sizeof(unsigned). */ +#define _QSORT_STACK_SIZE (8 * sizeof(unsigned)) +#define _QSORT_PUSH(top, low, high) \ + (((top->_lo = (low)), (top->_hi = (high)), ++top)) +#define _QSORT_POP(low, high, top) \ + ((--top, (low = top->_lo), (high = top->_hi))) +#define _QSORT_STACK_NOT_EMPTY (_stack < _top) + + +/* Order size using quicksort. This implementation incorporates + four optimizations discussed in Sedgewick: + + 1. Non-recursive, using an explicit stack of pointer that store the + next array partition to sort. To save time, this maximum amount + of space required to store an array of SIZE_MAX is allocated on the + stack. Assuming a 32-bit (64 bit) integer for size_t, this needs + only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes). + Pretty cheap, actually. + + 2. Chose the pivot element using a median-of-three decision tree. + This reduces the probability of selecting a bad pivot value and + eliminates certain extraneous comparisons. + + 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving + insertion sort to order the MAX_THRESH items within each partition. + This is a big win, since insertion sort is faster for small, mostly + sorted array segments. + + 4. The larger of the two sub-partitions is always pushed onto the + stack first, with the algorithm then concentrating on the + smaller partition. This *guarantees* no more than log (total_elems) + stack size is needed (actually O(1) in this case)! */ + +/* The main code starts here... */ + +template +void qsrt( QSORT_TYPE *QSORT_BASE, int QSORT_NELT ) +{ + QSORT_LTT QSORT_LT; + QSORT_TYPE *const _base = (QSORT_BASE); + const unsigned _elems = (QSORT_NELT); + QSORT_TYPE _hold; + + /* Don't declare two variables of type QSORT_TYPE in a single + * statement: eg `TYPE a, b;', in case if TYPE is a pointer, + * expands to `type* a, b;' wich isn't what we want. + */ + + if (_elems > _QSORT_MAX_THRESH) { + QSORT_TYPE *_lo = _base; + QSORT_TYPE *_hi = _lo + _elems - 1; + struct { + QSORT_TYPE *_hi; QSORT_TYPE *_lo; + } _stack[_QSORT_STACK_SIZE], *_top = _stack + 1; + + while (_QSORT_STACK_NOT_EMPTY) { + QSORT_TYPE *_left_ptr; QSORT_TYPE *_right_ptr; + + /* Select median value from among LO, MID, and HI. Rearrange + LO and HI so the three values are sorted. This lowers the + probability of picking a pathological pivot value and + skips a comparison for both the LEFT_PTR and RIGHT_PTR in + the while loops. */ + + QSORT_TYPE *_mid = _lo + ((_hi - _lo) >> 1); + + if (QSORT_LT (_mid, _lo)) + _QSORT_SWAP (_mid, _lo, _hold); + if (QSORT_LT (_hi, _mid)) + _QSORT_SWAP (_mid, _hi, _hold); + else + goto _jump_over; + if (QSORT_LT (_mid, _lo)) + _QSORT_SWAP (_mid, _lo, _hold); + _jump_over:; + + _left_ptr = _lo + 1; + _right_ptr = _hi - 1; + + /* Here's the famous ``collapse the walls'' section of quicksort. + Gotta like those tight inner loops! They are the main reason + that this algorithm runs much faster than others. */ + do { + while (QSORT_LT (_left_ptr, _mid)) + ++_left_ptr; + + while (QSORT_LT (_mid, _right_ptr)) + --_right_ptr; + + if (_left_ptr < _right_ptr) { + _QSORT_SWAP (_left_ptr, _right_ptr, _hold); + if (_mid == _left_ptr) + _mid = _right_ptr; + else if (_mid == _right_ptr) + _mid = _left_ptr; + ++_left_ptr; + --_right_ptr; + } + else if (_left_ptr == _right_ptr) { + ++_left_ptr; + --_right_ptr; + break; + } + } while (_left_ptr <= _right_ptr); + + /* Set up pointers for next iteration. First determine whether + left and right partitions are below the threshold size. If so, + ignore one or both. Otherwise, push the larger partition's + bounds on the stack and continue sorting the smaller one. */ + + if (_right_ptr - _lo <= _QSORT_MAX_THRESH) { + if (_hi - _left_ptr <= _QSORT_MAX_THRESH) + /* Ignore both small partitions. */ + _QSORT_POP (_lo, _hi, _top); + else + /* Ignore small left partition. */ + _lo = _left_ptr; + } + else if (_hi - _left_ptr <= _QSORT_MAX_THRESH) + /* Ignore small right partition. */ + _hi = _right_ptr; + else if (_right_ptr - _lo > _hi - _left_ptr) { + /* Push larger left partition indices. */ + _QSORT_PUSH (_top, _lo, _right_ptr); + _lo = _left_ptr; + } + else { + /* Push larger right partition indices. */ + _QSORT_PUSH (_top, _left_ptr, _hi); + _hi = _right_ptr; + } + } + } + + /* Once the BASE array is partially sorted by quicksort the rest + is completely sorted using insertion sort, since this is efficient + for partitions below MAX_THRESH size. BASE points to the + beginning of the array to sort, and END_PTR points at the very + last element in the array (*not* one beyond it!). */ + + { + QSORT_TYPE *const _end_ptr = _base + _elems - 1; + QSORT_TYPE *_tmp_ptr = _base; + register QSORT_TYPE *_run_ptr; + QSORT_TYPE *_thresh; + + _thresh = _base + _QSORT_MAX_THRESH; + if (_thresh > _end_ptr) + _thresh = _end_ptr; + + /* Find smallest element in first threshold and place it at the + array's beginning. This is the smallest array element, + and the operation speeds up insertion sort's inner loop. */ + + for (_run_ptr = _tmp_ptr + 1; _run_ptr <= _thresh; ++_run_ptr) + if (QSORT_LT (_run_ptr, _tmp_ptr)) + _tmp_ptr = _run_ptr; + + if (_tmp_ptr != _base) + _QSORT_SWAP (_tmp_ptr, _base, _hold); + + /* Insertion sort, running from left-hand-side + * up to right-hand-side. */ + + _run_ptr = _base + 1; + while (++_run_ptr <= _end_ptr) { + _tmp_ptr = _run_ptr - 1; + while (QSORT_LT (_run_ptr, _tmp_ptr)) + --_tmp_ptr; + + ++_tmp_ptr; + if (_tmp_ptr != _run_ptr) { + QSORT_TYPE *_trav = _run_ptr + 1; + while (--_trav >= _run_ptr) { + QSORT_TYPE *_hi; QSORT_TYPE *_lo; + _hold = *_trav; + + for (_hi = _lo = _trav; --_lo >= _tmp_ptr; _hi = _lo) + *_hi = *_lo; + *_hi = _hold; + } + } + } + } + +} + + +struct cc +{ + bool operator()( int *a, int *b ) + { + return *a < *b; + } +}; + +#include + +int main() +{ + int lst[] = { 43, 1, 342, 12, 491, 32, 12321, 32, 3, -3 }; + + for( int j = 0; j < 10; j++ ) + printf("%s%d", (j>0)?", ":"", lst[j] ); + printf("\n"); + qsrt( lst, 10 ); + for( int j = 0; j < 10; j++ ) + printf("%s%d", (j>0)?", ":"", lst[j] ); + printf("\n"); +} + diff --git a/src/tqsort.h b/src/tqsort.h new file mode 100644 index 0000000..c836b4f --- /dev/null +++ b/src/tqsort.h @@ -0,0 +1,207 @@ +#ifndef T_QSORT_H +#define T_QSORT_H + +#define _QSORT_SWAP(a, b, t) ((void)((t = *a), (*a = *b), (*b = t))) + +/* Discontinue quicksort algorithm when partition gets below this size. + This particular magic number was chosen to work best on a Sun 4/260. */ +#define _QSORT_MAX_THRESH 4 + +/* Stack node declarations used to store unfulfilled partition obligations + * (inlined in QSORT). +typedef struct { + QSORT_TYPE *_lo, *_hi; +} qsort_stack_node; + */ + +/* The next 4 #defines implement a very fast in-line stack abstraction. */ +/* The stack needs log (total_elements) entries (we could even subtract + log(MAX_THRESH)). Since total_elements has type unsigned, we get as + upper bound for log (total_elements): + bits per byte (CHAR_BIT) * sizeof(unsigned). */ +#define _QSORT_STACK_SIZE (8 * sizeof(unsigned)) +#define _QSORT_PUSH(top, low, high) \ + (((top->_lo = (low)), (top->_hi = (high)), ++top)) +#define _QSORT_POP(low, high, top) \ + ((--top, (low = top->_lo), (high = top->_hi))) +#define _QSORT_STACK_NOT_EMPTY (_stack < _top) + + +/* Order size using quicksort. This implementation incorporates + four optimizations discussed in Sedgewick: + + 1. Non-recursive, using an explicit stack of pointer that store the + next array partition to sort. To save time, this maximum amount + of space required to store an array of SIZE_MAX is allocated on the + stack. Assuming a 32-bit (64 bit) integer for size_t, this needs + only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes). + Pretty cheap, actually. + + 2. Chose the pivot element using a median-of-three decision tree. + This reduces the probability of selecting a bad pivot value and + eliminates certain extraneous comparisons. + + 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving + insertion sort to order the MAX_THRESH items within each partition. + This is a big win, since insertion sort is faster for small, mostly + sorted array segments. + + 4. The larger of the two sub-partitions is always pushed onto the + stack first, with the algorithm then concentrating on the + smaller partition. This *guarantees* no more than log (total_elems) + stack size is needed (actually O(1) in this case)! */ + +/* The main code starts here... */ + +template +void tqsort( QSORT_TYPE *QSORT_BASE, int QSORT_NELT ) +{ + QSORT_LTT QSORT_LT; + QSORT_TYPE *const _base = (QSORT_BASE); + const unsigned _elems = (QSORT_NELT); + QSORT_TYPE _hold; + + /* Don't declare two variables of type QSORT_TYPE in a single + * statement: eg `TYPE a, b;', in case if TYPE is a pointer, + * expands to `type* a, b;' wich isn't what we want. + */ + + if (_elems > _QSORT_MAX_THRESH) { + QSORT_TYPE *_lo = _base; + QSORT_TYPE *_hi = _lo + _elems - 1; + struct { + QSORT_TYPE *_hi; QSORT_TYPE *_lo; + } _stack[_QSORT_STACK_SIZE], *_top = _stack + 1; + + while (_QSORT_STACK_NOT_EMPTY) { + QSORT_TYPE *_left_ptr; QSORT_TYPE *_right_ptr; + + /* Select median value from among LO, MID, and HI. Rearrange + LO and HI so the three values are sorted. This lowers the + probability of picking a pathological pivot value and + skips a comparison for both the LEFT_PTR and RIGHT_PTR in + the while loops. */ + + QSORT_TYPE *_mid = _lo + ((_hi - _lo) >> 1); + + if (QSORT_LT ((CST)(_mid), (CST)(_lo))) + _QSORT_SWAP (_mid, _lo, _hold); + if (QSORT_LT ((CST)(_hi), (CST)(_mid))) + _QSORT_SWAP (_mid, _hi, _hold); + else + goto _jump_over; + if (QSORT_LT ((CST)(_mid), (CST)(_lo))) + _QSORT_SWAP (_mid, _lo, _hold); + _jump_over:; + + _left_ptr = _lo + 1; + _right_ptr = _hi - 1; + + /* Here's the famous ``collapse the walls'' section of quicksort. + Gotta like those tight inner loops! They are the main reason + that this algorithm runs much faster than others. */ + do { + while (QSORT_LT ((CST)(_left_ptr), (CST)(_mid))) + ++_left_ptr; + + while (QSORT_LT ((CST)(_mid), (CST)(_right_ptr))) + --_right_ptr; + + if (_left_ptr < _right_ptr) { + _QSORT_SWAP (_left_ptr, _right_ptr, _hold); + if (_mid == _left_ptr) + _mid = _right_ptr; + else if (_mid == _right_ptr) + _mid = _left_ptr; + ++_left_ptr; + --_right_ptr; + } + else if (_left_ptr == _right_ptr) { + ++_left_ptr; + --_right_ptr; + break; + } + } while (_left_ptr <= _right_ptr); + + /* Set up pointers for next iteration. First determine whether + left and right partitions are below the threshold size. If so, + ignore one or both. Otherwise, push the larger partition's + bounds on the stack and continue sorting the smaller one. */ + + if (_right_ptr - _lo <= _QSORT_MAX_THRESH) { + if (_hi - _left_ptr <= _QSORT_MAX_THRESH) + /* Ignore both small partitions. */ + _QSORT_POP (_lo, _hi, _top); + else + /* Ignore small left partition. */ + _lo = _left_ptr; + } + else if (_hi - _left_ptr <= _QSORT_MAX_THRESH) + /* Ignore small right partition. */ + _hi = _right_ptr; + else if (_right_ptr - _lo > _hi - _left_ptr) { + /* Push larger left partition indices. */ + _QSORT_PUSH (_top, _lo, _right_ptr); + _lo = _left_ptr; + } + else { + /* Push larger right partition indices. */ + _QSORT_PUSH (_top, _left_ptr, _hi); + _hi = _right_ptr; + } + } + } + + /* Once the BASE array is partially sorted by quicksort the rest + is completely sorted using insertion sort, since this is efficient + for partitions below MAX_THRESH size. BASE points to the + beginning of the array to sort, and END_PTR points at the very + last element in the array (*not* one beyond it!). */ + + { + QSORT_TYPE *const _end_ptr = _base + _elems - 1; + QSORT_TYPE *_tmp_ptr = _base; + register QSORT_TYPE *_run_ptr; + QSORT_TYPE *_thresh; + + _thresh = _base + _QSORT_MAX_THRESH; + if (_thresh > _end_ptr) + _thresh = _end_ptr; + + /* Find smallest element in first threshold and place it at the + array's beginning. This is the smallest array element, + and the operation speeds up insertion sort's inner loop. */ + + for (_run_ptr = _tmp_ptr + 1; _run_ptr <= _thresh; ++_run_ptr) + if (QSORT_LT ((CST)(_run_ptr), (CST)(_tmp_ptr))) + _tmp_ptr = _run_ptr; + + if (_tmp_ptr != _base) + _QSORT_SWAP (_tmp_ptr, _base, _hold); + + /* Insertion sort, running from left-hand-side + * up to right-hand-side. */ + + _run_ptr = _base + 1; + while (++_run_ptr <= _end_ptr) { + _tmp_ptr = _run_ptr - 1; + while (QSORT_LT ((CST)(_run_ptr), (CST)(_tmp_ptr))) + --_tmp_ptr; + + ++_tmp_ptr; + if (_tmp_ptr != _run_ptr) { + QSORT_TYPE *_trav = _run_ptr + 1; + while (--_trav >= _run_ptr) { + QSORT_TYPE *_hi; QSORT_TYPE *_lo; + _hold = *_trav; + + for (_hi = _lo = _trav; --_lo >= _tmp_ptr; _hi = _lo) + *_hi = *_lo; + *_hi = _hold; + } + } + } + } +} + +#endif -- cgit v1.2.3