Generic Hash : Hash « Security « C# / C Sharp






Generic Hash

  
//******************************
// Written by Peter Golde
// Copyright (c) 2004-2007, Wintellect
//
// Use and restribution of this code is subject to the license agreement 
// contained in the file "License.txt" accompanying this file.
//******************************

using System;
using System.Diagnostics;
using System.Collections.Generic;
using System.Runtime.Serialization;


namespace Wintellect.PowerCollections
{
    /// <summary>
    /// The base implementation for various collections classes that use hash tables
    /// as part of their implementation. This class should not (and can not) be 
    /// used directly by end users; it's only for internal use by the collections package. The Hash
    /// does not handle duplicate values.
    /// </summary>
    /// <remarks>
    /// The Hash manages items of type T, and uses a IComparer&lt;ItemTYpe&gt; that
    /// hashes compares items to hash items into the table.  
    ///</remarks>
    internal class Hash<T> : IEnumerable<T>//, ISerializable, IDeserializationCallback
    {
        // NOTE: If you add new member variables, you very well may need to change the serialization
        // code to serialize that member.
        private IEqualityComparer<T> equalityComparer;      // interface for comparing elements

        private int count;            // The count of elements in the table.
        private int usedSlots;        // Includes real elements and deleted elements with the collision bit on. Used to determine
        // when we need to resize.
        private int totalSlots;               // Size of the table. Always a power of two.
        private float loadFactor;          // maximal load factor for the table.
        private int thresholdGrow;      // floor(totalSlots * loadFactor);
        private int thresholdShrink;     // thresholdGrow / 3.
        private int hashMask;              // Mask to convert hash values to the size of the table.
        private int secondaryShift;       // Shift to get the secondary skip value.

        private Slot[] table;                 // The hash table.

        private int changeStamp;        // An integer that is changed every time the table structurally changes.
        // Used so that enumerations throw an exception if the tree is changed
        // during enumeration.

        private const int MINSIZE = 16;       // minimum number of slots.

        //private SerializationInfo serializationInfo;       // Info used during deserialization.

        /// <summary>
        /// The structure that has each slot in the hash table. Each slot has three parts:
        /// 1. The collision bit. Indicates whether some item visited this slot but had to
        /// keep looking because the slot was full. 
        /// 2. 31-bit full hash value of the item. If zero, the slot is empty.
        /// 3. The item itself.
        /// </summary>
        struct Slot
        {
            private uint hash_collision;   // Lower 31 bits: the hash value. Top bit: the collision bit. 
            public T item;        // The item.

            /// <summary>
            /// The full hash value associated with the value in this slot, or zero
            /// if the slot is empty.
            /// </summary>
            public int HashValue
            {
                get
                {
                    return (int)(hash_collision & 0x7FFFFFFF);
                }
                set
                {
                    Debug.Assert((value & 0x80000000) == 0);  // make sure sign bit isn't set.
                    hash_collision = (uint)value | (hash_collision & 0x80000000);
                }
            }

            /// <summary>
            /// Is this slot empty?
            /// </summary>
            public bool Empty
            {
                get
                {
                    return HashValue == 0;
                }
            }

            /// <summary>
            /// Clear this slot, leaving the collision bit alone.
            /// </summary>
            public void Clear()
            {
                HashValue = 0;
                item = default(T);        // Done to avoid keeping things alive that shouldn't be.
            }

            /// <summary>
            /// The "Collision" bit indicates that some value hit this slot and
            /// collided, so had to try another slot.
            /// </summary>
            public bool Collision
            {
                get
                {
                    return (hash_collision & 0x80000000) != 0;
                }
                set
                {
                    if (value)
                        hash_collision |= 0x80000000;
                    else
                        hash_collision &= 0x7FFFFFFF;
                }
            }
        }

        /// <summary>
        /// Constructor. Create a new hash table.
        /// </summary>
        /// <param name="equalityComparer">The comparer to use to compare items. </param>
        public Hash(IEqualityComparer<T> equalityComparer)
        {
            this.equalityComparer = equalityComparer;
            this.loadFactor = 0.70F;           // default load factor.
        }

        /// <summary>
        /// Gets the current enumeration stamp. Call CheckEnumerationStamp later
        /// with this value to throw an exception if the hash table is changed.
        /// </summary>
        /// <returns>The current enumeration stamp.</returns>
        internal int GetEnumerationStamp()
        {
            return changeStamp;
        }

        /// <summary>
        /// Must be called whenever there is a structural change in the tree. Causes
        /// changeStamp to be changed, which causes any in-progress enumerations
        /// to throw exceptions.
        /// </summary>
        internal void StopEnumerations()
        {
            ++changeStamp;
        }

        /// <summary>
        /// Checks the given stamp against the current change stamp. If different, the
        /// collection has changed during enumeration and an InvalidOperationException
        /// must be thrown
        /// </summary>
        /// <param name="startStamp">changeStamp at the start of the enumeration.</param>
        internal void CheckEnumerationStamp(int startStamp)
        {
            if (startStamp != changeStamp)
            {
                throw new InvalidOperationException("Change During Enumeration");
            }
        }
        /// <summary>
        /// Gets the hash code for an object using a comparer. Correctly handles
        /// null.
        /// </summary>
        /// <param name="item">Item to get hash code for. Can be null.</param>
        /// <param name="equalityComparer">The comparer to use.</param>
        /// <returns>The hash code for the item.</returns>
        public static int GetHashCode<T>(T item, IEqualityComparer<T> equalityComparer)
        {
            if (item == null)
                return 0x1786E23C;
            else
                return equalityComparer.GetHashCode(item);
        }
        /// <summary>
        /// Gets the full hash code for an item.
        /// </summary>
        /// <param name="item">Item to get hash code for.</param>
        /// <returns>The full hash code. It is never zero.</returns>
        private int GetFullHash(T item)
        {
            uint hash;

            hash = (uint)GetHashCode(item, equalityComparer);

            // The .NET framework tends to produce pretty bad hash codes.
            // Scramble them up to be much more random!
            hash += ~(hash << 15);
            hash ^= (hash >> 10);
            hash += (hash << 3);
            hash ^= (hash >> 6);
            hash += ~(hash << 11);
            hash ^= (hash >> 16);
            hash &= 0x7FFFFFFF;
            if (hash == 0)
                hash = 0x7FFFFFFF;     // Make sure it isn't zero.
            return (int)hash;
        }

        /// <summary>
        /// Get the initial bucket number and skip amount from the full hash value.
        /// </summary>
        /// <param name="hash">The full hash value.</param>
        /// <param name="initialBucket">Returns the initial bucket. Always in the range 0..(totalSlots - 1).</param>
        /// <param name="skip">Returns the skip values. Always odd in the range 0..(totalSlots - 1).</param>
        private void GetHashValuesFromFullHash(int hash, out int initialBucket, out int skip)
        {
            initialBucket = hash & hashMask;

            // The skip value must be relatively prime to the table size. Since the table size is a 
            // power of two, any odd number is relatively prime, so oring in 1 will do it.
            skip = ((hash >> secondaryShift) & hashMask) | 1;
        }

        /// <summary>
        /// Gets the full hash value, initial bucket number, and skip amount for an item.
        /// </summary>
        /// <param name="item">Item to get hash value of.</param>
        /// <param name="initialBucket">Returns the initial bucket. Always in the range 0..(totalSlots - 1).</param>
        /// <param name="skip">Returns the skip values. Always odd in the range 0..(totalSlots - 1).</param>
        /// <returns>The full hash value. This is never zero.</returns>
        private int GetHashValues(T item, out int initialBucket, out int skip)
        {
            int hash = GetFullHash(item);
            GetHashValuesFromFullHash(hash, out initialBucket, out skip);
            return hash;
        }


        /// <summary>
        /// Make sure there are enough slots in the hash table that <paramref name="additionalItems"/>
        /// items can be inserted into the table.
        /// </summary>
        /// <param name="additionalItems">Number of additional items we are inserting.</param>
        private void EnsureEnoughSlots(int additionalItems)
        {
            StopEnumerations();

            if (usedSlots + additionalItems > thresholdGrow)
            {
                // We need to expand the table. Figure out to what size.
                int newSize;

                newSize = Math.Max(totalSlots, MINSIZE);
                while ((int)(newSize * loadFactor) < usedSlots + additionalItems)
                {
                    newSize *= 2;
                    if (newSize <= 0)
                    {
                        // Must have overflowed the size of an int. Hard to believe we didn't run out of memory first.
                        throw new InvalidOperationException("CollectionTooLarge");
                    }
                }

                ResizeTable(newSize);
            }
        }

        /// <summary>
        /// Check if the number of items in the table is small enough that
        /// we should shrink the table again.
        /// </summary>
        private void ShrinkIfNeeded()
        {
            if (count < thresholdShrink)
            {
                int newSize;

                if (count > 0)
                {
                    newSize = MINSIZE;
                    while ((int)(newSize * loadFactor) < count)
                        newSize *= 2;
                }
                else
                {
                    // We've removed all the elements. Shrink to zero.
                    newSize = 0;
                }

                ResizeTable(newSize);
            }
        }

        /// <summary>
        /// Given the size of a hash table, compute the "secondary shift" value -- the shift
        /// that is used to determine the skip amount for collision resolution.
        /// </summary>
        /// <param name="newSize">The new size of the table.</param>
        /// <returns>The secondary skip amount.</returns>
        private static int GetSecondaryShift(int newSize)
        {
            int x = newSize - 2;      // x is of the form 0000111110 -- a single string of 1's followed by a single zero.
            int secondaryShift = 0;

            // Keep shifting x until it is the set of bits we want to extract: it be the highest bits possible,
            // but can't overflow into the sign bit.
            while ((x & 0x40000000) == 0)
            {
                x <<= 1;
                ++secondaryShift;
            }

            return secondaryShift;
        }

        /// <summary>
        /// Resize the hash table to the given new size, moving all items into the
        /// new hash table.
        /// </summary>
        /// <param name="newSize">The new size of the hash table. Must be a power
        /// of two.</param>
        private void ResizeTable(int newSize)
        {
            Slot[] oldTable = table;        // Move all the items from this table to the new table.

            Debug.Assert((newSize & (newSize - 1)) == 0);            // Check newSize is a power of two.
            totalSlots = newSize;
            thresholdGrow = (int)(totalSlots * loadFactor);
            thresholdShrink = thresholdGrow / 3;
            if (thresholdShrink <= MINSIZE)
                thresholdShrink = 1;
            hashMask = newSize - 1;
            secondaryShift = GetSecondaryShift(newSize);
            if (totalSlots > 0)
                table = new Slot[totalSlots];
            else
                table = null;

            if (oldTable != null && table != null)
            {
                foreach (Slot oldSlot in oldTable)
                {
                    int hash, bucket, skip;

                    hash = oldSlot.HashValue;
                    GetHashValuesFromFullHash(hash, out bucket, out skip);

                    // Find an empty bucket.
                    while (!table[bucket].Empty)
                    {
                        // The slot is used, but isn't our item. Set the collision bit and keep looking.
                        table[bucket].Collision = true;
                        bucket = (bucket + skip) & hashMask;
                    }

                    // We found an empty bucket. 
                    table[bucket].HashValue = hash;
                    table[bucket].item = oldSlot.item;
                }
            }

            usedSlots = count;      // no deleted elements have the collision bit on now.
        }

        /// <summary>
        /// Get the number of items in the hash table.
        /// </summary>
        /// <value>The number of items stored in the hash table.</value>
        public int ElementCount
        {
            get
            {
                return count;
            }
        }

        /// <summary>
        /// Get the number of slots in the hash table. Exposed internally
        /// for testing purposes.
        /// </summary>
        /// <value>The number of slots in the hash table.</value>
        internal int SlotCount
        {
            get
            {
                return totalSlots;
            }
        }

        /// <summary>
        /// Get or change the load factor. Changing the load factor may cause
        /// the size of the table to grow or shrink accordingly.
        /// </summary>
        /// <value></value>
        public float LoadFactor
        {
            get
            {
                return loadFactor;
            }
            set
            {
                // Don't allow hopelessly inefficient load factors.
                if (value < 0.25 || value > 0.95)
                    throw new ArgumentOutOfRangeException("value", value.ToString());
                //throw new ArgumentOutOfRangeException("value", value, Strings.InvalidLoadFactor);

                StopEnumerations();

                bool maybeExpand = value < loadFactor;    // May need to expand or shrink the table -- which?

                // Update loadFactor and thresholds.
                loadFactor = value;
                thresholdGrow = (int)(totalSlots * loadFactor);
                thresholdShrink = thresholdGrow / 3;
                if (thresholdShrink <= MINSIZE)
                    thresholdShrink = 1;

                // Possibly expand or shrink the table.
                if (maybeExpand)
                    EnsureEnoughSlots(0);
                else
                    ShrinkIfNeeded();
            }
        }

        /// <summary>
        /// Insert a new item into the hash table. If a duplicate item exists, can replace or
        /// do nothing.
        /// </summary>
        /// <param name="item">The item to insert.</param>
        /// <param name="replaceOnDuplicate">If true, duplicate items are replaced. If false, nothing
        /// is done if a duplicate already exists.</param>
        /// <param name="previous">If a duplicate was found, returns it (whether replaced or not).</param>
        /// <returns>True if no duplicate existed, false if a duplicate was found.</returns>
        public bool Insert(T item, bool replaceOnDuplicate, out T previous)
        {
            int hash, bucket, skip;
            int emptyBucket = -1;                      // If >= 0, an empty bucket we can use for a true insert
            bool duplicateMightExist = true;      // If true, still the possibility that a duplicate exists.

            EnsureEnoughSlots(1);            // Ensure enough room to insert. Also stops enumerations.

            hash = GetHashValues(item, out bucket, out skip);

            for (; ; )
            {
                if (table[bucket].Empty)
                {
                    // Record the location of the first empty bucket seen. This is where the item will
                    // go if no duplicate exists.
                    if (emptyBucket == -1)
                        emptyBucket = bucket;

                    if (!duplicateMightExist || !table[bucket].Collision)
                    {
                        // There can't be a duplicate further on, because a bucket with the collision bit
                        // clear was found (here or earlier). We have the place to insert.
                        break;
                    }
                }
                else if (table[bucket].HashValue == hash && equalityComparer.Equals(table[bucket].item, item))
                {
                    // We found a duplicate item. Replace it if requested to.
                    previous = table[bucket].item;
                    if (replaceOnDuplicate)
                        table[bucket].item = item;
                    return false;
                }
                else
                {
                    // The slot is used, but isn't our item. 
                    if (!table[bucket].Collision)
                    {
                        // Since the collision bit is off, we can't have a duplicate. 
                        if (emptyBucket >= 0)
                        {
                            // We already have an empty bucket to use.
                            break;
                        }
                        else
                        {
                            // Keep searching for an empty bucket to place the item.
                            table[bucket].Collision = true;
                            duplicateMightExist = false;
                        }
                    }
                }

                bucket = (bucket + skip) & hashMask;
            }

            // We found an empty bucket. Insert the new item.
            table[emptyBucket].HashValue = hash;
            table[emptyBucket].item = item;

            ++count;
            if (!table[emptyBucket].Collision)
                ++usedSlots;
            previous = default(T);
            return true;
        }

        /// <summary>
        /// Deletes an item from the hash table. 
        /// </summary>
        /// <param name="item">Item to search for and delete.</param>
        /// <param name="itemDeleted">If true returned, the actual item stored in the hash table (must be 
        /// equal to <paramref name="item"/>, but may not be identical.</param>
        /// <returns>True if item was found and deleted, false if item wasn't found.</returns>
        public bool Delete(T item, out T itemDeleted)
        {
            int hash, bucket, skip;

            StopEnumerations();

            if (count == 0)
            {
                itemDeleted = default(T);
                return false;
            }

            hash = GetHashValues(item, out bucket, out skip);

            for (; ; )
            {
                if (table[bucket].HashValue == hash && equalityComparer.Equals(table[bucket].item, item))
                {
                    // Found the item. Remove it.
                    itemDeleted = table[bucket].item;
                    table[bucket].Clear();
                    --count;
                    if (!table[bucket].Collision)
                        --usedSlots;
                    ShrinkIfNeeded();
                    return true;
                }
                else if (!table[bucket].Collision)
                {
                    // No collision bit, so we can stop searching. No such element.
                    itemDeleted = default(T);
                    return false;
                }

                bucket = (bucket + skip) & hashMask;
            }
        }

        /// <summary>
        /// Find an item in the hash table. If found, optionally replace it with the
        /// finding item.
        /// </summary>
        /// <param name="find">Item to find.</param>
        /// <param name="replace">If true, replaces the equal item in the hash table
        /// with <paramref name="item"/>.</param>
        /// <param name="item">Returns the equal item found in the table, if true was returned.</param>
        /// <returns>True if the item was found, false otherwise.</returns>
        public bool Find(T find, bool replace, out T item)
        {
            int hash, bucket, skip;

            if (count == 0)
            {
                item = default(T);
                return false;
            }

            hash = GetHashValues(find, out bucket, out skip);

            for (; ; )
            {
                if (table[bucket].HashValue == hash && equalityComparer.Equals(table[bucket].item, find))
                {
                    // Found the item.  
                    item = table[bucket].item;
                    if (replace)
                        table[bucket].item = find;
                    return true;
                }
                else if (!table[bucket].Collision)
                {
                    // No collision bit, so we can stop searching. No such element.
                    item = default(T);
                    return false;
                }

                bucket = (bucket + skip) & hashMask;
            }
        }

        /// <summary>
        /// Enumerate all of the items in the hash table. The items
        /// are enumerated in a haphazard, unpredictable order.
        /// </summary>
        /// <returns>An IEnumerator&lt;T&gt; that enumerates the items
        /// in the hash table.</returns>
        public IEnumerator<T> GetEnumerator()
        {
            if (count > 0)
            {
                int startStamp = changeStamp;

                foreach (Slot slot in table)
                {
                    if (!slot.Empty)
                    {
                        yield return slot.item;
                        CheckEnumerationStamp(startStamp);
                    }
                }
            }
        }

        /// <summary>
        /// Enumerate all of the items in the hash table. The items
        /// are enumerated in a haphazard, unpredictable order.
        /// </summary>
        /// <returns>An IEnumerator that enumerates the items
        /// in the hash table.</returns>
        System.Collections.IEnumerator System.Collections.IEnumerable.GetEnumerator()
        {
            return GetEnumerator();
        }

        /// <summary>
        /// Creates a clone of this hash table.
        /// </summary>
        /// <param name="cloneItem">If non-null, this function is applied to each item when cloning. It must be the 
        /// case that this function does not modify the hash code or equality function.</param>
        /// <returns>A shallow clone that contains the same items.</returns>
        public Hash<T> Clone(Converter<T, T> cloneItem)
        {
            Hash<T> clone = new Hash<T>(equalityComparer);
            clone.count = this.count;
            clone.usedSlots = this.usedSlots;
            clone.totalSlots = this.totalSlots;
            clone.loadFactor = this.loadFactor;
            clone.thresholdGrow = this.thresholdGrow;
            clone.thresholdShrink = this.thresholdShrink;
            clone.hashMask = this.hashMask;
            clone.secondaryShift = this.secondaryShift;
            if (table != null)
            {
                clone.table = (Slot[])table.Clone();

                if (cloneItem != null)
                {
                    for (int i = 0; i < table.Length; ++i)
                    {
                        if (!table[i].Empty)
                            table[i].item = cloneItem(table[i].item);
                    }
                }
            }

            return clone;
        }

        #region Serialization
        /*

        /// <summary>
        /// Serialize the hash table. Called from the serialization infrastructure.
        /// </summary>
        void ISerializable.GetObjectData(SerializationInfo info, StreamingContext context)
        {
            if (info == null)
                throw new ArgumentNullException("info");

            info.AddValue("equalityComparer", equalityComparer, typeof(IEqualityComparer<T>));
            info.AddValue("loadFactor", loadFactor, typeof(float));
            T[] items = new T[count];
            int i = 0;
            foreach (Slot slot in table)
                if (! slot.Empty)
                    items[i++] = slot.item;
            info.AddValue("items", items, typeof(T[]));
        }

        /// <summary>
        /// Called on deserialization. We cannot deserialize now, because hash codes
        /// might not be correct now. We do real deserialization in the OnDeserialization call.
        /// </summary>
        protected Hash(SerializationInfo serInfo, StreamingContext context)
        {
            // Save away the serialization info for use later. We can't be sure of hash codes
            // being stable until the entire object graph is deserialized, so we wait until then
            // to deserialize.
            serializationInfo = serInfo;
        }

        /// <summary>
        /// Deserialize the hash table. Called from the serialization infrastructure when 
        /// the object graph has finished deserializing.
        /// </summary>
        void IDeserializationCallback.OnDeserialization(object sender)
        {
            if (serializationInfo == null)
                return;

            loadFactor = serializationInfo.GetSingle("loadFactor");
            equalityComparer = (IEqualityComparer<T>) serializationInfo.GetValue("equalityComparer", typeof(IEqualityComparer<T>));

            T[] items = (T[])serializationInfo.GetValue("items", typeof(T[]));
            T dummy;

            EnsureEnoughSlots(items.Length);
            foreach (T item in items)
                Insert(item, true, out dummy);

            serializationInfo = null;
        }
         */
        #endregion Serialization

#if DEBUG
        /// <summary>
        /// Print out basic stats about the hash table.
        /// </summary>
        internal void PrintStats()
        {
            Console.WriteLine("count={0}  usedSlots={1}  totalSlots={2}", count, usedSlots, totalSlots);
            Console.WriteLine("loadFactor={0}  thresholdGrow={1}  thresholdShrink={2}", loadFactor, thresholdGrow, thresholdShrink);
            Console.WriteLine("hashMask={0:X}  secondaryShift={1}", hashMask, secondaryShift);
            Console.WriteLine();
        }

        /// <summary>
        /// Print out the state of the hash table and each of the slots. Each slot looks like:
        ///     Slot    4: C 4513e41e hello
        /// where the "C" indicates the collision bit is on
        /// the next hex number is the hash value
        /// followed by ToString() on the item.
        /// </summary>
        internal void Print()
        {
            PrintStats();
            for (int i = 0; i < totalSlots; ++i)
                Console.WriteLine("Slot {0,4:X}: {1} {2,8:X} {3}", i, table[i].Collision ? "C" : " ",
                    table[i].HashValue, table[i].Empty ? "<empty>" : table[i].item.ToString());
            Console.WriteLine();
        }

        /// <summary>
        /// Check that everything appears to be OK in the hash table.
        /// </summary>
        internal void Validate()
        {
            Debug.Assert(count <= usedSlots);
            Debug.Assert(count <= totalSlots);
            Debug.Assert(usedSlots <= totalSlots);
            Debug.Assert(usedSlots <= thresholdGrow);
            Debug.Assert((int)(totalSlots * loadFactor) == thresholdGrow);
            if (thresholdShrink > 1)
                Debug.Assert(thresholdGrow / 3 == thresholdShrink);
            else
                Debug.Assert(thresholdGrow / 3 <= MINSIZE);
            if (totalSlots > 0)
            {
                Debug.Assert((totalSlots & (totalSlots - 1)) == 0);  // totalSlots is a power of two.
                Debug.Assert(totalSlots - 1 == hashMask);
                Debug.Assert(GetSecondaryShift(totalSlots) == secondaryShift);
                Debug.Assert(totalSlots == table.Length);
            }

            // Traverse the table. Make sure that count and usedSlots are right, and that
            // each slot looks reasonable.
            int expectedCount = 0, expectedUsed = 0, initialBucket, skip;
            if (table != null)
            {
                for (int i = 0; i < totalSlots; ++i)
                {
                    Slot slot = table[i];
                    if (slot.Empty)
                    {
                        // Empty slot
                        if (slot.Collision)
                            ++expectedUsed;
                        Debug.Assert(object.Equals(default(T), slot.item));
                    }
                    else
                    {
                        // not empty.
                        ++expectedCount;
                        ++expectedUsed;
                        Debug.Assert(slot.HashValue != 0);
                        Debug.Assert(GetHashValues(slot.item, out initialBucket, out skip) == slot.HashValue);
                        if (initialBucket != i)
                            Debug.Assert(table[initialBucket].Collision);
                    }
                }
            }

            Debug.Assert(expectedCount == count);
            Debug.Assert(expectedUsed == usedSlots);
        }

#endif //DEBUG

    }
}

   
    
  








Related examples in the same category

1.Verify Hex Hash, Base64 Hash, Byte Hash
2.Get Public Key Hash with HttpClientCertificate
3.Get Public Key Hash with SHA256
4.Get Public Key Hash for a string
5.Hash As Password
6.Generate Hash
7.Get Hash for password
8.Generates a hash code of the input string
9.Hash Data
10.Hash Password
11.Get SHA256 Hash
12.Create Password Hash