From da89e6d30e57bd6dbb10b4d36b093ce9bbf5c666 Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Tue, 3 Apr 2007 04:50:36 +0000 Subject: The first batch seem to have made it alright. Unfortunately the Archive class isn't done yet, I'm going to make it rely on streams, so those will be next, then we can make it work all sortsa' well. --- src/hash.h | 745 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 745 insertions(+) create mode 100644 src/hash.h (limited to 'src/hash.h') diff --git a/src/hash.h b/src/hash.h new file mode 100644 index 0000000..9e498f1 --- /dev/null +++ b/src/hash.h @@ -0,0 +1,745 @@ +#ifndef HASH_H +#define HASH_H + +#include +#include +#include +#include +#include +#include +#include "exceptionbase.h" +#include "archable.h" +#include "archive.h" + +#define bitsToBytes( n ) (n/32+(n%32>0 ? 1 : 0)) + +namespace Bu +{ + subExceptionDecl( HashException ) + + enum eHashException + { + excodeNotFilled + }; + + template + uint32_t __calcHashCode( const T &k ); + + template + bool __cmpHashKeys( const T &a, const T &b ); + + struct __calcNextTSize_fast + { + uint32_t operator()( uint32_t nCapacity, uint32_t nFill, uint32_t nDeleted ) const + { + if( nDeleted >= nCapacity/2 ) + return nCapacity; + return nCapacity*2+1; + } + }; + + template, typename valuealloc = std::allocator, typename challoc = std::allocator > + class Hash; + + template< typename key, typename _value, typename sizecalc = __calcNextTSize_fast, typename keyalloc = std::allocator, typename valuealloc = std::allocator<_value>, typename challoc = std::allocator > + struct HashProxy + { + friend class Hash; + private: + HashProxy( Hash &h, key *k, uint32_t nPos, uint32_t hash ) : + hsh( h ), + pKey( k ), + nPos( nPos ), + hash( hash ), + bFilled( false ) + { + } + + HashProxy( Hash &h, uint32_t nPos, _value *pValue ) : + hsh( h ), + nPos( nPos ), + pValue( pValue ), + bFilled( true ) + { + } + + Hash &hsh; + key *pKey; + uint32_t nPos; + _value *pValue; + uint32_t hash; + bool bFilled; + + public: + operator _value &() + { + if( bFilled == false ) + throw HashException( + excodeNotFilled, + "No data assosiated with that key." + ); + return *pValue; + } + + _value &value() + { + if( bFilled == false ) + throw HashException( + excodeNotFilled, + "No data assosiated with that key." + ); + return *pValue; + } + + bool isFilled() + { + return bFilled; + } + + void erase() + { + if( bFilled ) + { + hsh._erase( nPos ); + hsh.onDelete(); + } + } + + _value operator=( _value nval ) + { + if( bFilled ) + { + hsh.va.destroy( pValue ); + hsh.va.construct( pValue, nval ); + hsh.onUpdate(); + } + else + { + hsh.fill( nPos, *pKey, nval, hash ); + hsh.onInsert(); + } + + return nval; + } + + _value *operator->() + { + if( bFilled == false ) + throw HashException( + excodeNotFilled, + "No data assosiated with that key." + ); + return pValue; + } + }; + + template + class Hash + { + friend struct HashProxy; + public: + Hash() : + nCapacity( 11 ), + nFilled( 0 ), + nDeleted( 0 ), + bFilled( NULL ), + bDeleted( NULL ), + aKeys( NULL ), + aValues( NULL ), + aHashCodes( NULL ) + { + nKeysSize = bitsToBytes( nCapacity ); + bFilled = ca.allocate( nKeysSize ); + bDeleted = ca.allocate( nKeysSize ); + clearBits(); + + aHashCodes = ca.allocate( nCapacity ); + aKeys = ka.allocate( nCapacity ); + aValues = va.allocate( nCapacity ); + } + + Hash( const Hash &src ) : + nCapacity( src.nCapacity ), + nFilled( 0 ), + nDeleted( 0 ), + bFilled( NULL ), + bDeleted( NULL ), + aKeys( NULL ), + aValues( NULL ), + aHashCodes( NULL ) + { + nKeysSize = bitsToBytes( nCapacity ); + bFilled = ca.allocate( nKeysSize ); + bDeleted = ca.allocate( nKeysSize ); + clearBits(); + + aHashCodes = ca.allocate( nCapacity ); + aKeys = ka.allocate( nCapacity ); + aValues = va.allocate( nCapacity ); + + for( uint32_t j = 0; j < src.nCapacity; j++ ) + { + if( src.isFilled( j ) ) + { + insert( src.aKeys[j], src.aValues[j] ); + } + } + } + + Hash &operator=( const Hash &src ) + { + for( uint32_t j = 0; j < nCapacity; j++ ) + { + if( isFilled( j ) ) + if( !isDeleted( j ) ) + { + va.destroy( &aValues[j] ); + ka.destroy( &aKeys[j] ); + } + } + va.deallocate( aValues, nCapacity ); + ka.deallocate( aKeys, nCapacity ); + ca.deallocate( bFilled, nKeysSize ); + ca.deallocate( bDeleted, nKeysSize ); + ca.deallocate( aHashCodes, nCapacity ); + + nFilled = 0; + nDeleted = 0; + nCapacity = src.nCapacity; + nKeysSize = bitsToBytes( nCapacity ); + bFilled = ca.allocate( nKeysSize ); + bDeleted = ca.allocate( nKeysSize ); + clearBits(); + + aHashCodes = ca.allocate( nCapacity ); + aKeys = ka.allocate( nCapacity ); + aValues = va.allocate( nCapacity ); + + for( uint32_t j = 0; j < src.nCapacity; j++ ) + { + if( src.isFilled( j ) ) + { + insert( src.aKeys[j], src.aValues[j] ); + } + } + + return *this; + } + + virtual ~Hash() + { + for( uint32_t j = 0; j < nCapacity; j++ ) + { + if( isFilled( j ) ) + if( !isDeleted( j ) ) + { + va.destroy( &aValues[j] ); + ka.destroy( &aKeys[j] ); + } + } + va.deallocate( aValues, nCapacity ); + ka.deallocate( aKeys, nCapacity ); + ca.deallocate( bFilled, nKeysSize ); + ca.deallocate( bDeleted, nKeysSize ); + ca.deallocate( aHashCodes, nCapacity ); + } + + uint32_t getCapacity() + { + return nCapacity; + } + + uint32_t getFill() + { + return nFilled; + } + + uint32_t size() + { + return nFilled-nDeleted; + } + + uint32_t getDeleted() + { + return nDeleted; + } + + virtual HashProxy operator[]( key k ) + { + uint32_t hash = __calcHashCode( k ); + bool bFill; + uint32_t nPos = probe( hash, k, bFill ); + + if( bFill ) + { + return HashProxy( *this, nPos, &aValues[nPos] ); + } + else + { + return HashProxy( *this, &k, nPos, hash ); + } + } + + virtual void insert( key k, value v ) + { + uint32_t hash = __calcHashCode( k ); + bool bFill; + uint32_t nPos = probe( hash, k, bFill ); + + if( bFill ) + { + va.destroy( &aValues[nPos] ); + va.construct( &aValues[nPos], v ); + onUpdate(); + } + else + { + fill( nPos, k, v, hash ); + onInsert(); + } + } + + virtual void erase( key k ) + { + uint32_t hash = __calcHashCode( k ); + bool bFill; + uint32_t nPos = probe( hash, k, bFill ); + + if( bFill ) + { + _erase( nPos ); + onDelete(); + } + } + + struct iterator; + virtual void erase( struct iterator &i ) + { + if( this != &i.hsh ) + throw HashException("This iterator didn't come from this Hash."); + if( isFilled( i.nPos ) && !isDeleted( i.nPos ) ) + { + _erase( i.nPos ); + onDelete(); + } + } + + virtual void clear() + { + for( uint32_t j = 0; j < nCapacity; j++ ) + { + if( isFilled( j ) ) + if( !isDeleted( j ) ) + { + va.destroy( &aValues[j] ); + ka.destroy( &aKeys[j] ); + onDelete(); + } + } + + clearBits(); + } + + virtual value &get( key k ) + { + uint32_t hash = __calcHashCode( k ); + bool bFill; + uint32_t nPos = probe( hash, k, bFill ); + + if( bFill ) + { + return aValues[nPos]; + } + else + { + throw HashException( + excodeNotFilled, + "No data assosiated with that key." + ); + } + } + + virtual bool has( key k ) + { + bool bFill; + probe( __calcHashCode( k ), k, bFill, false ); + + return bFill; + } + + typedef struct iterator + { + friend class Hash; + private: + iterator( Hash &hsh ) : + hsh( hsh ), + nPos( 0 ), + bFinished( false ) + { + nPos = hsh.getFirstPos( bFinished ); + } + + iterator( Hash &hsh, bool bDone ) : + hsh( hsh ), + nPos( 0 ), + bFinished( bDone ) + { + } + + Hash &hsh; + uint32_t nPos; + bool bFinished; + + public: + iterator operator++( int ) + { + if( bFinished == false ) + nPos = hsh.getNextPos( nPos, bFinished ); + + return *this; + } + + iterator operator++() + { + if( bFinished == false ) + nPos = hsh.getNextPos( nPos, bFinished ); + + return *this; + } + + bool operator==( const iterator &oth ) + { + if( bFinished != oth.bFinished ) + return false; + if( bFinished == true ) + { + return true; + } + else + { + if( oth.nPos == nPos ) + return true; + return false; + } + } + + bool operator!=( const iterator &oth ) + { + return !(*this == oth ); + } + + iterator operator=( const iterator &oth ) + { + if( &hsh != &oth.hsh ) + throw HashException( + "Cannot mix iterators from different hash objects."); + nPos = oth.nPos; + bFinished = oth.bFinished; + } + + std::pair operator *() + { + return hsh.getAtPos( nPos ); + } + + key &getKey() + { + return hsh.getKeyAtPos( nPos ); + } + + value &getValue() + { + return hsh.getValueAtPos( nPos ); + } + }; + + iterator begin() + { + return iterator( *this ); + } + + iterator end() + { + return iterator( *this, true ); + } + + std::list getKeys() + { + std::list lKeys; + + for( uint32_t j = 0; j < nCapacity; j++ ) + { + if( isFilled( j ) ) + { + if( !isDeleted( j ) ) + { + lKeys.push_back( aKeys[j] ); + } + } + } + + return lKeys; + } + + protected: + virtual void onInsert() {} + virtual void onUpdate() {} + virtual void onDelete() {} + virtual void onReHash() {} + + virtual void clearBits() + { + for( uint32_t j = 0; j < nKeysSize; j++ ) + { + bFilled[j] = bDeleted[j] = 0; + } + } + + virtual void fill( uint32_t loc, key &k, value &v, uint32_t hash ) + { + bFilled[loc/32] |= (1<<(loc%32)); + va.construct( &aValues[loc], v ); + ka.construct( &aKeys[loc], k ); + aHashCodes[loc] = hash; + nFilled++; + //printf("Filled: %d, Deleted: %d, Capacity: %d\n", + // nFilled, nDeleted, nCapacity ); + } + + virtual void _erase( uint32_t loc ) + { + bDeleted[loc/32] |= (1<<(loc%32)); + va.destroy( &aValues[loc] ); + ka.destroy( &aKeys[loc] ); + nDeleted++; + //printf("Filled: %d, Deleted: %d, Capacity: %d\n", + // nFilled, nDeleted, nCapacity ); + } + + virtual std::pair getAtPos( uint32_t nPos ) + { + return std::pair(aKeys[nPos],aValues[nPos]); + } + + virtual key &getKeyAtPos( uint32_t nPos ) + { + return aKeys[nPos]; + } + + virtual value &getValueAtPos( uint32_t nPos ) + { + return aValues[nPos]; + } + + virtual uint32_t getFirstPos( bool &bFinished ) + { + for( uint32_t j = 0; j < nCapacity; j++ ) + { + if( isFilled( j ) ) + if( !isDeleted( j ) ) + return j; + } + + bFinished = true; + return 0; + } + + virtual uint32_t getNextPos( uint32_t nPos, bool &bFinished ) + { + for( uint32_t j = nPos+1; j < nCapacity; j++ ) + { + if( isFilled( j ) ) + if( !isDeleted( j ) ) + return j; + } + + bFinished = true; + return 0; + } + + uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true ) + { + uint32_t nCur = hash%nCapacity; + + // First we scan to see if the key is already there, abort if we + // run out of probing room, or we find a non-filled entry + for( int8_t j = 0; + isFilled( nCur ) && j < 32; + nCur = (nCur + (1< uint32_t __calcHashCode( const int &k ); + template<> bool __cmpHashKeys( const int &a, const int &b ); + + template<> uint32_t __calcHashCode( const unsigned int &k ); + template<> bool __cmpHashKeys( const unsigned int &a, const unsigned int &b ); + + template<> uint32_t __calcHashCode( const char * const &k ); + template<> bool __cmpHashKeys( const char * const &a, const char * const &b ); + + template<> uint32_t __calcHashCode( char * const &k ); + template<> bool __cmpHashKeys( char * const &a, char * const &b ); + + template<> uint32_t __calcHashCode( const std::string &k ); + template<> bool __cmpHashKeys( const std::string &a, const std::string &b ); + + template + Archive &operator<<( Archive &ar, Hash &h ) + { + ar << h.size(); + for( typename Hash::iterator i = h.begin(); i != h.end(); i++ ) + { + std::pair p = *i; + ar << p.first << p.second; + } + + return ar; + } + + template + Archive &operator>>( Archive &ar, Hash &h ) + { + h.clear(); + uint32_t nSize; + ar >> nSize; + + for( uint32_t j = 0; j < nSize; j++ ) + { + key k; value v; + ar >> k >> v; + h.insert( k, v ); + } + + return ar; + } + + /* + template + Serializer &operator&&( Serializer &ar, Hash &h ) + { + if( ar.isLoading() ) + { + return ar >> h; + } + else + { + return ar << h; + } + }*/ +} + +#endif -- cgit v1.2.3