#ifndef HASH_H #define HASH_H #include #include #include #include #include "exceptionbase.h" #include "hashable.h" #include "serializable.h" #include "serializer.h" #define bitsToBytes( n ) (n/32+(n%32>0 ? 1 : 0)) subExceptionDecl( HashException ) enum eHashException { excodeNotFilled }; template uint32_t __calcHashCode( const T &k ); template bool __cmpHashKeys( const T &a, const T &b ); struct __calcNextTSize_fast { uint32_t operator()( uint32_t nCapacity, uint32_t nFill, uint32_t nDeleted ) const { if( nDeleted >= nCapacity/2 ) return nCapacity; return nCapacity*2+1; } }; template, typename valuealloc = std::allocator, typename challoc = std::allocator > class Hash; template< typename key, typename _value, typename sizecalc = __calcNextTSize_fast, typename keyalloc = std::allocator, typename valuealloc = std::allocator<_value>, typename challoc = std::allocator > struct HashProxy { friend class Hash; private: HashProxy( Hash &h, key *k, uint32_t nPos, uint32_t hash ) : hsh( h ), pKey( k ), nPos( nPos ), hash( hash ), bFilled( false ) { } HashProxy( Hash &h, uint32_t nPos, _value *pValue ) : hsh( h ), nPos( nPos ), pValue( pValue ), bFilled( true ) { } Hash &hsh; key *pKey; uint32_t nPos; _value *pValue; uint32_t hash; bool bFilled; public: operator _value &() { if( bFilled == false ) throw HashException( excodeNotFilled, "No data assosiated with that key." ); return *pValue; } _value &value() { if( bFilled == false ) throw HashException( excodeNotFilled, "No data assosiated with that key." ); return *pValue; } bool isFilled() { return bFilled; } void erase() { if( bFilled ) { hsh._erase( nPos ); hsh.onDelete(); } } _value operator=( _value nval ) { if( bFilled ) { hsh.va.destroy( pValue ); hsh.va.construct( pValue, nval ); hsh.onUpdate(); } else { hsh.fill( nPos, *pKey, nval, hash ); hsh.onInsert(); } return nval; } }; template class Hash { friend struct HashProxy; public: Hash() : nCapacity( 11 ), nFilled( 0 ), nDeleted( 0 ), bFilled( NULL ), bDeleted( NULL ), aKeys( NULL ), aValues( NULL ), aHashCodes( NULL ) { nKeysSize = bitsToBytes( nCapacity ); bFilled = ca.allocate( nKeysSize ); bDeleted = ca.allocate( nKeysSize ); clearBits(); aHashCodes = ca.allocate( nCapacity ); aKeys = ka.allocate( nCapacity ); aValues = va.allocate( nCapacity ); } Hash( const Hash &src ) : nCapacity( src.nCapacity ), nFilled( 0 ), nDeleted( 0 ), bFilled( NULL ), bDeleted( NULL ), aKeys( NULL ), aValues( NULL ), aHashCodes( NULL ) { nKeysSize = bitsToBytes( nCapacity ); bFilled = ca.allocate( nKeysSize ); bDeleted = ca.allocate( nKeysSize ); clearBits(); aHashCodes = ca.allocate( nCapacity ); aKeys = ka.allocate( nCapacity ); aValues = va.allocate( nCapacity ); for( uint32_t j = 0; j < src.nCapacity; j++ ) { if( src.isFilled( j ) ) { insert( src.aKeys[j], src.aValues[j] ); } } } Hash &operator=( const Hash &src ) { for( uint32_t j = 0; j < nCapacity; j++ ) { if( isFilled( j ) ) if( !isDeleted( j ) ) { va.destroy( &aValues[j] ); ka.destroy( &aKeys[j] ); } } va.deallocate( aValues, nCapacity ); ka.deallocate( aKeys, nCapacity ); ca.deallocate( bFilled, nKeysSize ); ca.deallocate( bDeleted, nKeysSize ); ca.deallocate( aHashCodes, nCapacity ); nFilled = 0; nDeleted = 0; nCapacity = src.nCapacity; nKeysSize = bitsToBytes( nCapacity ); bFilled = ca.allocate( nKeysSize ); bDeleted = ca.allocate( nKeysSize ); clearBits(); aHashCodes = ca.allocate( nCapacity ); aKeys = ka.allocate( nCapacity ); aValues = va.allocate( nCapacity ); for( uint32_t j = 0; j < src.nCapacity; j++ ) { if( src.isFilled( j ) ) { insert( src.aKeys[j], src.aValues[j] ); } } return *this; } virtual ~Hash() { for( uint32_t j = 0; j < nCapacity; j++ ) { if( isFilled( j ) ) if( !isDeleted( j ) ) { va.destroy( &aValues[j] ); ka.destroy( &aKeys[j] ); } } va.deallocate( aValues, nCapacity ); ka.deallocate( aKeys, nCapacity ); ca.deallocate( bFilled, nKeysSize ); ca.deallocate( bDeleted, nKeysSize ); ca.deallocate( aHashCodes, nCapacity ); } uint32_t getCapacity() { return nCapacity; } uint32_t getFill() { return nFilled; } uint32_t size() { return nFilled-nDeleted; } uint32_t getDeleted() { return nDeleted; } virtual HashProxy operator[]( key k ) { uint32_t hash = __calcHashCode( k ); bool bFill; uint32_t nPos = probe( hash, k, bFill ); if( bFill ) { return HashProxy( *this, nPos, &aValues[nPos] ); } else { return HashProxy( *this, &k, nPos, hash ); } } virtual void insert( key k, value v ) { uint32_t hash = __calcHashCode( k ); bool bFill; uint32_t nPos = probe( hash, k, bFill ); if( bFill ) { va.destroy( &aValues[nPos] ); va.construct( &aValues[nPos], v ); onUpdate(); } else { fill( nPos, k, v, hash ); onInsert(); } } virtual void erase( key k ) { uint32_t hash = __calcHashCode( k ); bool bFill; uint32_t nPos = probe( hash, k, bFill ); if( bFill ) { _erase( nPos ); onDelete(); } } struct iterator; virtual void erase( struct iterator &i ) { if( this != &i.hsh ) throw HashException("This iterator didn't come from this Hash."); if( isFilled( i.nPos ) && !isDeleted( i.nPos ) ) { _erase( i.nPos ); onDelete(); } } virtual void clear() { for( uint32_t j = 0; j < nCapacity; j++ ) { if( isFilled( j ) ) if( !isDeleted( j ) ) { va.destroy( &aValues[j] ); ka.destroy( &aKeys[j] ); onDelete(); } } clearBits(); } virtual value &get( key k ) { uint32_t hash = __calcHashCode( k ); bool bFill; uint32_t nPos = probe( hash, k, bFill ); if( bFill ) { return aValues[nPos]; } else { throw HashException( excodeNotFilled, "No data assosiated with that key." ); } } virtual bool has( key k ) { bool bFill; probe( __calcHashCode( k ), k, bFill ); return bFill; } typedef struct iterator { friend class Hash; private: iterator( Hash &hsh ) : hsh( hsh ), nPos( 0 ), bFinished( false ) { nPos = hsh.getFirstPos( bFinished ); } iterator( Hash &hsh, bool bDone ) : hsh( hsh ), nPos( 0 ), bFinished( bDone ) { } Hash &hsh; uint32_t nPos; bool bFinished; public: iterator operator++( int ) { if( bFinished == false ) nPos = hsh.getNextPos( nPos, bFinished ); return *this; } iterator operator++() { if( bFinished == false ) nPos = hsh.getNextPos( nPos, bFinished ); return *this; } bool operator==( const iterator &oth ) { if( bFinished != oth.bFinished ) return false; if( bFinished == true ) { return true; } else { if( oth.nPos == nPos ) return true; return false; } } bool operator!=( const iterator &oth ) { return !(*this == oth ); } iterator operator=( const iterator &oth ) { if( &hsh != &oth.hsh ) throw HashException( "Cannot mix iterators from different hash objects."); nPos = oth.nPos; bFinished = oth.bFinished; } std::pair operator *() { return hsh.getAtPos( nPos ); } key &getKey() { return hsh.getKeyAtPos( nPos ); } value &getValue() { return hsh.getValueAtPos( nPos ); } }; iterator begin() { return iterator( *this ); } iterator end() { return iterator( *this, true ); } protected: virtual void onInsert() {} virtual void onUpdate() {} virtual void onDelete() {} virtual void onReHash() {} virtual void clearBits() { for( uint32_t j = 0; j < nKeysSize; j++ ) { bFilled[j] = bDeleted[j] = 0; } } virtual void fill( uint32_t loc, key &k, value &v, uint32_t hash ) { bFilled[loc/32] |= (1<<(loc%32)); va.construct( &aValues[loc], v ); ka.construct( &aKeys[loc], k ); aHashCodes[loc] = hash; nFilled++; //printf("Filled: %d, Deleted: %d, Capacity: %d\n", // nFilled, nDeleted, nCapacity ); } virtual void _erase( uint32_t loc ) { bDeleted[loc/32] |= (1<<(loc%32)); va.destroy( &aValues[loc] ); ka.destroy( &aKeys[loc] ); nDeleted++; //printf("Filled: %d, Deleted: %d, Capacity: %d\n", // nFilled, nDeleted, nCapacity ); } virtual std::pair getAtPos( uint32_t nPos ) { return std::pair(aKeys[nPos],aValues[nPos]); } virtual key &getKeyAtPos( uint32_t nPos ) { return aKeys[nPos]; } virtual value &getValueAtPos( uint32_t nPos ) { return aValues[nPos]; } virtual uint32_t getFirstPos( bool &bFinished ) { for( uint32_t j = 0; j < nCapacity; j++ ) { if( isFilled( j ) ) if( !isDeleted( j ) ) return j; } bFinished = true; return 0; } virtual uint32_t getNextPos( uint32_t nPos, bool &bFinished ) { for( uint32_t j = nPos+1; j < nCapacity; j++ ) { if( isFilled( j ) ) if( !isDeleted( j ) ) return j; } bFinished = true; return 0; } uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true ) { uint32_t nCur = hash%nCapacity; // First we scan to see if the key is already there, abort if we // run out of probing room, or we find a non-filled entry for( int8_t j = 0; isFilled( nCur ) && j < 32; nCur = (nCur + (1< uint32_t __calcHashCode( const int &k ); template<> bool __cmpHashKeys( const int &a, const int &b ); template<> uint32_t __calcHashCode( const unsigned int &k ); template<> bool __cmpHashKeys( const unsigned int &a, const unsigned int &b ); template<> uint32_t __calcHashCode( const char * const &k ); template<> bool __cmpHashKeys( const char * const &a, const char * const &b ); template<> uint32_t __calcHashCode( char * const &k ); template<> bool __cmpHashKeys( char * const &a, char * const &b ); template<> uint32_t __calcHashCode( const std::string &k ); template<> bool __cmpHashKeys( const std::string &a, const std::string &b ); template<> uint32_t __calcHashCode( const Hashable &k ); template<> bool __cmpHashKeys( const Hashable &a, const Hashable &b ); template Serializer &operator<<( Serializer &ar, Hash &h ) { ar << h.size(); for( typename Hash::iterator i = h.begin(); i != h.end(); i++ ) { std::pair p = *i; ar << p.first << p.second; } return ar; } template Serializer &operator>>( Serializer &ar, Hash &h ) { h.clear(); uint32_t nSize; ar >> nSize; for( uint32_t j = 0; j < nSize; j++ ) { key k; value v; ar >> k >> v; h.insert( k, v ); } return ar; } template Serializer &operator&&( Serializer &ar, Hash &h ) { if( ar.isLoading() ) { return ar >> h; } else { return ar << h; } } #endif