From 9cf4949ff56464ce4737fd3a0e6b757032354b0e Mon Sep 17 00:00:00 2001 From: Mike Buland Date: Sun, 5 Aug 2007 04:57:04 +0000 Subject: Set is just a copy of hash for now. It'd be cool if they could be linked, not really sure how that could happen easily. --- src/set.cpp | 101 ++++++ src/set.h | 1072 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1173 insertions(+) create mode 100644 src/set.cpp create mode 100644 src/set.h diff --git a/src/set.cpp b/src/set.cpp new file mode 100644 index 0000000..a207c29 --- /dev/null +++ b/src/set.cpp @@ -0,0 +1,101 @@ +#include "hash.h" + +namespace Bu { subExceptionDef( HashException ) } + +template<> uint32_t Bu::__calcHashCode( const int &k ) +{ + return k; +} + +template<> bool Bu::__cmpHashKeys( const int &a, const int &b ) +{ + return a == b; +} + +template<> uint32_t Bu::__calcHashCode( const unsigned int &k ) +{ + return k; +} + +template<> bool Bu::__cmpHashKeys( const unsigned int &a, const unsigned int &b ) +{ + return a == b; +} + +template<> +uint32_t Bu::__calcHashCode( const char * const &k ) +{ + if (k == NULL) + { + return 0; + } + + unsigned long int nPos = 0; + for( const char *s = k; *s; s++ ) + { + nPos = *s + (nPos << 6) + (nPos << 16) - nPos; + } + + return nPos; +} + +template<> bool Bu::__cmpHashKeys( const char * const &a, const char * const &b ) +{ + if( a == b ) + return true; + + for(int j=0; a[j] == b[j]; j++ ) + if( a[j] == '\0' ) + return true; + + return false; +} + +template<> +uint32_t Bu::__calcHashCode( char * const &k ) +{ + if (k == NULL) + { + return 0; + } + + unsigned long int nPos = 0; + for( const char *s = k; *s; s++ ) + { + nPos = *s + (nPos << 6) + (nPos << 16) - nPos; + } + + return nPos; +} + +template<> bool Bu::__cmpHashKeys( char * const &a, char * const &b ) +{ + if( a == b ) + return true; + + for(int j=0; a[j] == b[j]; j++ ) + if( a[j] == '\0' ) + return true; + + return false; +} + +template<> uint32_t Bu::__calcHashCode( const std::string &k ) +{ + std::string::size_type j, sz = k.size(); + const char *s = k.c_str(); + + unsigned long int nPos = 0; + for( j = 0; j < sz; j++, s++ ) + { + nPos = *s + (nPos << 6) + (nPos << 16) - nPos; + } + + return nPos; +} + +template<> bool Bu::__cmpHashKeys( const std::string &a, const std::string &b ) +{ + return a == b; +} + diff --git a/src/set.h b/src/set.h new file mode 100644 index 0000000..36665a6 --- /dev/null +++ b/src/set.h @@ -0,0 +1,1072 @@ +#ifndef BU_HASH_H +#define BU_HASH_H + +#include +#include +#include +#include +#include +#include +#include "bu/exceptionbase.h" +#include "bu/list.h" +///#include "archival.h" +///#include "archive.h" + +#define bitsToBytes( n ) (n/32+(n%32>0 ? 1 : 0)) + +namespace Bu +{ + subExceptionDecl( HashException ) + + enum eHashException + { + excodeNotFilled + }; + + template + uint32_t __calcHashCode( const T &k ); + + template + bool __cmpHashKeys( const T &a, const T &b ); + + struct __calcNextTSize_fast + { + uint32_t operator()( uint32_t nCapacity, uint32_t nFill, uint32_t nDeleted ) const + { + if( nDeleted >= nCapacity/2 ) + return nCapacity; + return nCapacity*2+1; + } + }; + + template, typename valuealloc = std::allocator, typename challoc = std::allocator > + class Hash; + + template< typename key, typename _value, typename sizecalc = __calcNextTSize_fast, typename keyalloc = std::allocator, typename valuealloc = std::allocator<_value>, typename challoc = std::allocator > + struct HashProxy + { + friend class Hash; + private: + HashProxy( Hash &h, key *k, uint32_t nPos, uint32_t hash ) : + hsh( h ), + pKey( k ), + nPos( nPos ), + hash( hash ), + bFilled( false ) + { + } + + HashProxy( Hash &h, uint32_t nPos, _value *pValue ) : + hsh( h ), + nPos( nPos ), + pValue( pValue ), + bFilled( true ) + { + } + + Hash &hsh; + key *pKey; + uint32_t nPos; + _value *pValue; + uint32_t hash; + bool bFilled; + + public: + /** + * Cast operator for HashProxy. + *@returns (value_type &) The value the HashProxy is pointing to. + */ + operator _value &() + { + if( bFilled == false ) + throw HashException( + excodeNotFilled, + "No data assosiated with that key." + ); + return *pValue; + } + + /** + * Direct function for retrieving a value out of the HashProxy. + *@returns (value_type &) The value pointed to by this HashProxy. + */ + _value &value() + { + if( bFilled == false ) + throw HashException( + excodeNotFilled, + "No data assosiated with that key." + ); + return *pValue; + } + + /** + * Whether this HashProxy points to something real or not. + */ + bool isFilled() + { + return bFilled; + } + + /** + * Erase the data pointed to by this HashProxy. + */ + void erase() + { + if( bFilled ) + { + hsh._erase( nPos ); + hsh.onDelete(); + } + } + + /** + * Assign data to this point in the hash table. + *@param nval (value_type) the data to assign. + */ + _value operator=( _value nval ) + { + if( bFilled ) + { + hsh.va.destroy( pValue ); + hsh.va.construct( pValue, nval ); + hsh.onUpdate(); + } + else + { + hsh.fill( nPos, *pKey, nval, hash ); + hsh.onInsert(); + } + + return nval; + } + + /** + * Pointer extraction operator. Access to members of data pointed to + * by HashProxy. + *@returns (value_type *) + */ + _value *operator->() + { + if( bFilled == false ) + throw HashException( + excodeNotFilled, + "No data assosiated with that key." + ); + return pValue; + } + }; + + /** + * Libbu Template Hash Table + *@param key (typename) The datatype of the hashtable keys + *@param value (typename) The datatype of the hashtable data + *@param sizecalc (typename) Functor to compute new table size on rehash + *@param keyalloc (typename) Memory allocator for hashtable keys + *@param valuealloc (typename) Memory allocator for hashtable values + *@param challoc (typename) Byte allocator for bitflags + */ + template + class Hash + { + friend struct HashProxy; + public: + Hash() : + nCapacity( 11 ), + nFilled( 0 ), + nDeleted( 0 ), + bFilled( NULL ), + bDeleted( NULL ), + aKeys( NULL ), + aValues( NULL ), + aHashCodes( NULL ) + { + nKeysSize = bitsToBytes( nCapacity ); + bFilled = ca.allocate( nKeysSize ); + bDeleted = ca.allocate( nKeysSize ); + clearBits(); + + aHashCodes = ca.allocate( nCapacity ); + aKeys = ka.allocate( nCapacity ); + aValues = va.allocate( nCapacity ); + } + + Hash( const Hash &src ) : + nCapacity( src.nCapacity ), + nFilled( 0 ), + nDeleted( 0 ), + bFilled( NULL ), + bDeleted( NULL ), + aKeys( NULL ), + aValues( NULL ), + aHashCodes( NULL ) + { + nKeysSize = bitsToBytes( nCapacity ); + bFilled = ca.allocate( nKeysSize ); + bDeleted = ca.allocate( nKeysSize ); + clearBits(); + + aHashCodes = ca.allocate( nCapacity ); + aKeys = ka.allocate( nCapacity ); + aValues = va.allocate( nCapacity ); + + for( uint32_t j = 0; j < src.nCapacity; j++ ) + { + if( src.isFilled( j ) ) + { + insert( src.aKeys[j], src.aValues[j] ); + } + } + } + + /** + * Hashtable assignment operator. Clears this hashtable and + * copies RH into it. + */ + Hash &operator=( const Hash &src ) + { + for( uint32_t j = 0; j < nCapacity; j++ ) + { + if( isFilled( j ) ) + if( !isDeleted( j ) ) + { + va.destroy( &aValues[j] ); + ka.destroy( &aKeys[j] ); + } + } + va.deallocate( aValues, nCapacity ); + ka.deallocate( aKeys, nCapacity ); + ca.deallocate( bFilled, nKeysSize ); + ca.deallocate( bDeleted, nKeysSize ); + ca.deallocate( aHashCodes, nCapacity ); + + nFilled = 0; + nDeleted = 0; + nCapacity = src.nCapacity; + nKeysSize = bitsToBytes( nCapacity ); + bFilled = ca.allocate( nKeysSize ); + bDeleted = ca.allocate( nKeysSize ); + clearBits(); + + aHashCodes = ca.allocate( nCapacity ); + aKeys = ka.allocate( nCapacity ); + aValues = va.allocate( nCapacity ); + + for( uint32_t j = 0; j < src.nCapacity; j++ ) + { + if( src.isFilled( j ) ) + { + insert( src.aKeys[j], src.aValues[j] ); + } + } + + return *this; + } + + virtual ~Hash() + { + for( uint32_t j = 0; j < nCapacity; j++ ) + { + if( isFilled( j ) ) + if( !isDeleted( j ) ) + { + va.destroy( &aValues[j] ); + ka.destroy( &aKeys[j] ); + } + } + va.deallocate( aValues, nCapacity ); + ka.deallocate( aKeys, nCapacity ); + ca.deallocate( bFilled, nKeysSize ); + ca.deallocate( bDeleted, nKeysSize ); + ca.deallocate( aHashCodes, nCapacity ); + } + + /** + * Get the current hash table capacity. (Changes at re-hash) + *@returns (uint32_t) The current capacity. + */ + uint32_t getCapacity() + { + return nCapacity; + } + + /** + * Get the number of hash locations spoken for. (Including + * not-yet-cleaned-up deleted items.) + *@returns (uint32_t) The current fill state. + */ + uint32_t getFill() + { + return nFilled; + } + + /** + * Get the number of items stored in the hash table. + *@returns (uint32_t) The number of items stored in the hash table. + */ + uint32_t size() + { + return nFilled-nDeleted; + } + + /** + * Get the number of items which have been deleted, but not yet + * cleaned up. + *@returns (uint32_t) The number of deleted items. + */ + uint32_t getDeleted() + { + return nDeleted; + } + + /** + * Hash table index operator + *@param k (key_type) Key of data to be retrieved. + *@returns (HashProxy) Proxy pointing to the data. + */ + virtual HashProxy operator[]( key k ) + { + uint32_t hash = __calcHashCode( k ); + bool bFill; + uint32_t nPos = probe( hash, k, bFill ); + + if( bFill ) + { + return HashProxy( *this, nPos, &aValues[nPos] ); + } + else + { + return HashProxy( *this, &k, nPos, hash ); + } + } + + /** + * Insert a value (v) under key (k) into the hash table + *@param k (key_type) Key to list the value under. + *@param v (value_type) Value to store in the hash table. + */ + virtual void insert( key k, value v ) + { + uint32_t hash = __calcHashCode( k ); + bool bFill; + uint32_t nPos = probe( hash, k, bFill ); + + if( bFill ) + { + va.destroy( &aValues[nPos] ); + va.construct( &aValues[nPos], v ); + onUpdate(); + } + else + { + fill( nPos, k, v, hash ); + onInsert(); + } + } + + /** + * Remove a value from the hash table. + *@param k (key_type) The data under this key will be erased. + */ + virtual void erase( key k ) + { + uint32_t hash = __calcHashCode( k ); + bool bFill; + uint32_t nPos = probe( hash, k, bFill ); + + if( bFill ) + { + _erase( nPos ); + onDelete(); + } + } + + struct iterator; + + /** + * Remove a value from the hash pointed to from an iterator. + *@param i (iterator &) The data to be erased. + */ + virtual void erase( struct iterator &i ) + { + if( this != &i.hsh ) + throw HashException("This iterator didn't come from this Hash."); + if( isFilled( i.nPos ) && !isDeleted( i.nPos ) ) + { + _erase( i.nPos ); + onDelete(); + } + } + + /** + * Remove all data from the hash table. + */ + virtual void clear() + { + for( uint32_t j = 0; j < nCapacity; j++ ) + { + if( isFilled( j ) ) + if( !isDeleted( j ) ) + { + va.destroy( &aValues[j] ); + ka.destroy( &aKeys[j] ); + onDelete(); + } + } + + clearBits(); + } + + /** + * Get an item of data from the hash table. + *@param k (key_type) Key pointing to the data to be retrieved. + *@returns (value_type &) The data pointed to by (k). + */ + virtual value &get( key k ) + { + uint32_t hash = __calcHashCode( k ); + bool bFill; + uint32_t nPos = probe( hash, k, bFill ); + + if( bFill ) + { + return aValues[nPos]; + } + else + { + throw HashException( + excodeNotFilled, + "No data assosiated with that key." + ); + } + } + + /** + * Get a const item of data from the hash table. + *@param k (key_type) Key pointing to the data to be retrieved. + *@returns (const value_type &) A const version of the data pointed + * to by (k). + */ + virtual const value &get( key k ) const + { + uint32_t hash = __calcHashCode( k ); + bool bFill; + uint32_t nPos = probe( hash, k, bFill ); + + if( bFill ) + { + return aValues[nPos]; + } + else + { + throw HashException( + excodeNotFilled, + "No data assosiated with that key." + ); + } + } + + /** + * Does the hash table contain an item under key (k). + *@param k (key_type) The key to check. + *@returns (bool) Whether there was an item in the hash under key (k). + */ + virtual bool has( key k ) + { + bool bFill; + probe( __calcHashCode( k ), k, bFill, false ); + + return bFill; + } + + /** + * Iteration structure for iterating through the hash. + */ + typedef struct iterator + { + friend class Hash; + private: + iterator( Hash &hsh ) : + hsh( hsh ), + nPos( 0 ), + bFinished( false ) + { + nPos = hsh.getFirstPos( bFinished ); + } + + iterator( Hash &hsh, bool bDone ) : + hsh( hsh ), + nPos( 0 ), + bFinished( bDone ) + { + } + + Hash &hsh; + uint32_t nPos; + bool bFinished; + + public: + /** + * Iterator incrementation operator. Move the iterator forward. + */ + iterator operator++( int ) + { + if( bFinished == false ) + nPos = hsh.getNextPos( nPos, bFinished ); + + return *this; + } + + /** + * Iterator incrementation operator. Move the iterator forward. + */ + iterator operator++() + { + if( bFinished == false ) + nPos = hsh.getNextPos( nPos, bFinished ); + + return *this; + } + + /** + * Iterator equality comparison operator. Iterators the same? + */ + bool operator==( const iterator &oth ) + { + if( bFinished != oth.bFinished ) + return false; + if( bFinished == true ) + { + return true; + } + else + { + if( oth.nPos == nPos ) + return true; + return false; + } + } + + /** + * Iterator not equality comparison operator. Not the same? + */ + bool operator!=( const iterator &oth ) + { + return !(*this == oth ); + } + + /** + * Iterator assignment operator. + */ + iterator operator=( const iterator &oth ) + { + if( &hsh != &oth.hsh ) + throw HashException( + "Cannot mix iterators from different hash objects."); + nPos = oth.nPos; + bFinished = oth.bFinished; + } + + /** + * Iterator dereference operator... err.. get the value + *@returns (value_type &) The value behind this iterator. + */ + value &operator *() + { + return hsh.getValueAtPos( nPos ); + } + + /** + * Get the key behind this iterator. + *@returns (key_type &) The key behind this iterator. + */ + key &getKey() + { + return hsh.getKeyAtPos( nPos ); + } + + /** + * Get the value behind this iterator. + *@returs (value_type &) The value behind this iterator. + */ + value &getValue() + { + return hsh.getValueAtPos( nPos ); + } + }; + + /** + * Iteration structure for iterating through the hash (const). + */ + typedef struct const_iterator + { + friend class Hash; + private: + const_iterator( const Hash &hsh ) : + hsh( hsh ), + nPos( 0 ), + bFinished( false ) + { + nPos = hsh.getFirstPos( bFinished ); + } + + const_iterator( const Hash &hsh, bool bDone ) : + hsh( hsh ), + nPos( 0 ), + bFinished( bDone ) + { + } + + const Hash &hsh; + uint32_t nPos; + bool bFinished; + + public: + /** + * Iterator incrementation operator. Move the iterator forward. + */ + const_iterator operator++( int ) + { + if( bFinished == false ) + nPos = hsh.getNextPos( nPos, bFinished ); + + return *this; + } + + /** + * Iterator incrementation operator. Move the iterator forward. + */ + const_iterator operator++() + { + if( bFinished == false ) + nPos = hsh.getNextPos( nPos, bFinished ); + + return *this; + } + + /** + * Iterator equality comparison operator. Iterators the same? + */ + bool operator==( const const_iterator &oth ) + { + if( bFinished != oth.bFinished ) + return false; + if( bFinished == true ) + { + return true; + } + else + { + if( oth.nPos == nPos ) + return true; + return false; + } + } + + /** + * Iterator not equality comparison operator. Not the same? + */ + bool operator!=( const const_iterator &oth ) + { + return !(*this == oth ); + } + + /** + * Iterator assignment operator. + */ + const_iterator operator=( const const_iterator &oth ) + { + if( &hsh != &oth.hsh ) + throw HashException( + "Cannot mix iterators from different hash objects."); + nPos = oth.nPos; + bFinished = oth.bFinished; + } + + /** + * Iterator dereference operator... err.. get the value + *@returns (value_type &) The value behind this iterator. + */ + const value &operator *() const + { + return hsh.getValueAtPos( nPos ); + } + + /** + * Get the key behind this iterator. + *@returns (key_type &) The key behind this iterator. + */ + const key &getKey() const + { + return hsh.getKeyAtPos( nPos ); + } + + /** + * Get the value behind this iterator. + *@returs (value_type &) The value behind this iterator. + */ + const value &getValue() const + { + return hsh.getValueAtPos( nPos ); + } + }; + + /** + * Get an iterator pointing to the first item in the hash table. + *@returns (iterator) An iterator pointing to the first item in the + * hash table. + */ + iterator begin() + { + return iterator( *this ); + } + + const_iterator begin() const + { + return const_iterator( *this ); + } + + /** + * Get an iterator pointing to a point just past the last item in the + * hash table. + *@returns (iterator) An iterator pointing to a point just past the + * last item in the hash table. + */ + iterator end() + { + return iterator( *this, true ); + } + + const_iterator end() const + { + return const_iterator( *this, true ); + } + + /** + * Get a list of all the keys in the hash table. + *@returns (std::list) The list of keys in the hash table. + */ + Bu::List getKeys() const + { + Bu::List lKeys; + + for( uint32_t j = 0; j < nCapacity; j++ ) + { + if( isFilled( j ) ) + { + if( !isDeleted( j ) ) + { + lKeys.append( aKeys[j] ); + } + } + } + + return lKeys; + } + + protected: + virtual void onInsert() {} + virtual void onUpdate() {} + virtual void onDelete() {} + virtual void onReHash() {} + + virtual void clearBits() + { + for( uint32_t j = 0; j < nKeysSize; j++ ) + { + bFilled[j] = bDeleted[j] = 0; + } + } + + virtual void fill( uint32_t loc, key &k, value &v, uint32_t hash ) + { + bFilled[loc/32] |= (1<<(loc%32)); + va.construct( &aValues[loc], v ); + ka.construct( &aKeys[loc], k ); + aHashCodes[loc] = hash; + nFilled++; + //printf("Filled: %d, Deleted: %d, Capacity: %d\n", + // nFilled, nDeleted, nCapacity ); + } + + virtual void _erase( uint32_t loc ) + { + bDeleted[loc/32] |= (1<<(loc%32)); + va.destroy( &aValues[loc] ); + ka.destroy( &aKeys[loc] ); + nDeleted++; + //printf("Filled: %d, Deleted: %d, Capacity: %d\n", + // nFilled, nDeleted, nCapacity ); + } + + virtual std::pair getAtPos( uint32_t nPos ) + { + return std::pair(aKeys[nPos],aValues[nPos]); + } + + virtual key &getKeyAtPos( uint32_t nPos ) + { + return aKeys[nPos]; + } + + virtual const key &getKeyAtPos( uint32_t nPos ) const + { + return aKeys[nPos]; + } + + virtual value &getValueAtPos( uint32_t nPos ) + { + return aValues[nPos]; + } + + virtual const value &getValueAtPos( uint32_t nPos ) const + { + return aValues[nPos]; + } + + virtual uint32_t getFirstPos( bool &bFinished ) const + { + for( uint32_t j = 0; j < nCapacity; j++ ) + { + if( isFilled( j ) ) + if( !isDeleted( j ) ) + return j; + } + + bFinished = true; + return 0; + } + + virtual uint32_t getNextPos( uint32_t nPos, bool &bFinished ) const + { + for( uint32_t j = nPos+1; j < nCapacity; j++ ) + { + if( isFilled( j ) ) + if( !isDeleted( j ) ) + return j; + } + + bFinished = true; + return 0; + } + + uint32_t probe( uint32_t hash, key k, bool &bFill, bool rehash=true ) + { + uint32_t nCur = hash%nCapacity; + + // First we scan to see if the key is already there, abort if we + // run out of probing room, or we find a non-filled entry + int8_t j; + for( j = 0; + isFilled( nCur ) && j < 32; + nCur = (nCur + (1< uint32_t __calcHashCode( const int &k ); + template<> bool __cmpHashKeys( const int &a, const int &b ); + + template<> uint32_t __calcHashCode( const unsigned int &k ); + template<> bool __cmpHashKeys( const unsigned int &a, const unsigned int &b ); + + template<> uint32_t __calcHashCode( const char * const &k ); + template<> bool __cmpHashKeys( const char * const &a, const char * const &b ); + + template<> uint32_t __calcHashCode( char * const &k ); + template<> bool __cmpHashKeys( char * const &a, char * const &b ); + + template<> uint32_t __calcHashCode( const std::string &k ); + template<> bool __cmpHashKeys( const std::string &a, const std::string &b ); + + /* + template + Archive &operator<<( Archive &ar, Hash &h ) + { + ar << h.size(); + for( typename Hash::iterator i = h.begin(); i != h.end(); i++ ) + { + std::pair p = *i; + ar << p.first << p.second; + } + + return ar; + } + + template + Archive &operator>>( Archive &ar, Hash &h ) + { + h.clear(); + uint32_t nSize; + ar >> nSize; + + for( uint32_t j = 0; j < nSize; j++ ) + { + key k; value v; + ar >> k >> v; + h.insert( k, v ); + } + + return ar; + }*/ + + /* + template + Serializer &operator&&( Serializer &ar, Hash &h ) + { + if( ar.isLoading() ) + { + return ar >> h; + } + else + { + return ar << h; + } + }*/ +} + +#endif -- cgit v1.2.3