Compact HashSet : HashSet « Collections Data Structure « Java






Compact HashSet

    
//package net.ontopia.utils;

// WARNING: If you do any changes to this class, make sure that you
// update CompactIdentityHashSet.java, UniqueSet.java and
// SoftHashMapIndex.java accordingly.

import java.util.Collection;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.NoSuchElementException;

/**
 * INTERNAL: Implements the Set interface more compactly than java.util.HashSet
 * by using a closed hashtable.
 */
public class CompactHashSet<E> extends java.util.AbstractSet<E> {

  protected final static int INITIAL_SIZE = 3;
  protected final static double LOAD_FACTOR = 0.75;

  /**
   * This object is used to represent null, should clients add that to the
   * set.
   */
  protected final static Object nullObject = new Object();
  /**
   * When an object is deleted this object is put into the hashtable in its
   * place, so that other objects with the same key (collisions) further down
   * the hashtable are not lost after we delete an object in the collision
   * chain.
   */
  protected final static Object deletedObject = new Object();
  protected int elements;
  /**
   * This is the number of empty (null) cells. It's not necessarily the same
   * as objects.length - elements, because some cells may contain
   * deletedObject.
   */
  protected int freecells;
  protected E[] objects;
  protected int modCount;

  /**
   * Constructs a new, empty set.
   */
  public CompactHashSet() {
    this(INITIAL_SIZE);
  }

  /**
   * Constructs a new, empty set.
   */
  @SuppressWarnings("unchecked")
  public CompactHashSet(int size) {
    // NOTE: If array size is 0, we get a
    // "java.lang.ArithmeticException: / by zero" in add(Object).
    objects = (E[]) new Object[(size == 0 ? 1 : size)];
    elements = 0;
    freecells = objects.length;
    modCount = 0;
  }

  /**
   * Constructs a new set containing the elements in the specified collection.
   * 
   * @param c
   *            the collection whose elements are to be placed into this set.
   */
  public CompactHashSet(Collection<E> c) {
    this(c.size());
    addAll(c);
  }

  // SET IMPLEMENTATION 

  /**
   * Returns an iterator over the elements in this set. The elements are
   * returned in no particular order.
   * 
   * @return an Iterator over the elements in this set.
   * @see ConcurrentModificationException
   */
  public Iterator<E> iterator() {
    return new CompactHashIterator<E>();
  }

  /**
   * Returns the number of elements in this set (its cardinality).
   */
  public int size() {
    return elements;
  }

  /**
   * Returns <tt>true</tt> if this set contains no elements.
   */
  public boolean isEmpty() {
    return elements == 0;
  }

  /**
   * Returns <tt>true</tt> if this set contains the specified element.
   * 
   * @param o
   *            element whose presence in this set is to be tested.
   * @return <tt>true</tt> if this set contains the specified element.
   */
  public boolean contains(Object o) {
    if (o == null)
      o = nullObject;

    int hash = o.hashCode();
    int index = (hash & 0x7FFFFFFF) % objects.length;
    int offset = 1;

    // search for the object (continue while !null and !this object)
    while (objects[index] != null
        && !(objects[index].hashCode() == hash && objects[index]
            .equals(o))) {
      index = ((index + offset) & 0x7FFFFFFF) % objects.length;
      offset = offset * 2 + 1;

      if (offset == -1)
        offset = 2;
    }

    return objects[index] != null;
  }

  /**
   * Adds the specified element to this set if it is not already present.
   * 
   * @param o
   *            element to be added to this set.
   * @return <tt>true</tt> if the set did not already contain the specified
   *         element.
   */
  @SuppressWarnings("unchecked")
  public boolean add(Object o) {
    if (o == null)
      o = nullObject;

    int hash = o.hashCode();
    int index = (hash & 0x7FFFFFFF) % objects.length;
    int offset = 1;
    int deletedix = -1;

    // search for the object (continue while !null and !this object)
    while (objects[index] != null
        && !(objects[index].hashCode() == hash && objects[index]
            .equals(o))) {

      // if there's a deleted object here we can put this object here,
      // provided it's not in here somewhere else already
      if (objects[index] == deletedObject)
        deletedix = index;

      index = ((index + offset) & 0x7FFFFFFF) % objects.length;
      offset = offset * 2 + 1;

      if (offset == -1)
        offset = 2;
    }

    if (objects[index] == null) { // wasn't present already
      if (deletedix != -1) // reusing a deleted cell
        index = deletedix;
      else
        freecells--;

      modCount++;
      elements++;

      // here we face a problem regarding generics:
      // add(E o) is not possible because of the null Object. We cant do
      // 'new E()' or '(E) new Object()'
      // so adding an empty object is a problem here
      // If (! o instanceof E) : This will cause a class cast exception
      // If (o instanceof E) : This will work fine

      objects[index] = (E) o;

      // do we need to rehash?
      if (1 - (freecells / (double) objects.length) > LOAD_FACTOR)
        rehash();
      return true;
    } else
      // was there already
      return false;
  }

  /**
   * Removes the specified element from the set.
   */
  @SuppressWarnings("unchecked")
  public boolean remove(Object o) {
    if (o == null)
      o = nullObject;

    int hash = o.hashCode();
    int index = (hash & 0x7FFFFFFF) % objects.length;
    int offset = 1;

    // search for the object (continue while !null and !this object)
    while (objects[index] != null
        && !(objects[index].hashCode() == hash && objects[index]
            .equals(o))) {
      index = ((index + offset) & 0x7FFFFFFF) % objects.length;
      offset = offset * 2 + 1;

      if (offset == -1)
        offset = 2;
    }

    // we found the right position, now do the removal
    if (objects[index] != null) {
      // we found the object

      // same problem here as with add
      objects[index] = (E) deletedObject;
      modCount++;
      elements--;
      return true;
    } else
      // we did not find the object
      return false;
  }

  /**
   * Removes all of the elements from this set.
   */
  public void clear() {
    elements = 0;
    for (int ix = 0; ix < objects.length; ix++)
      objects[ix] = null;
    freecells = objects.length;
    modCount++;
  }

  public Object[] toArray() {
    Object[] result = new Object[elements];
    Object[] objects = this.objects;
    int pos = 0;
    for (int i = 0; i < objects.length; i++)
      if (objects[i] != null && objects[i] != deletedObject) {
        if (objects[i] == nullObject)
          result[pos++] = null;
        else
          result[pos++] = objects[i];
      }
    // unchecked because it should only contain E
    return result;
  }

  // not sure if this needs to have generics
  @SuppressWarnings("unchecked")
  public <T> T[] toArray(T[] a) {
    int size = elements;
    if (a.length < size)
      a = (T[]) java.lang.reflect.Array.newInstance(a.getClass()
          .getComponentType(), size);
    E[] objects = this.objects;
    int pos = 0;
    for (int i = 0; i < objects.length; i++)
      if (objects[i] != null && objects[i] != deletedObject) {
        if (objects[i] == nullObject)
          a[pos++] = null;
        else
          a[pos++] = (T) objects[i];
      }
    return a;
  }

  //  INTERNAL METHODS 

  /**
   * INTERNAL: Used for debugging only.
   */
  public void dump() {
    System.out.println("Size: " + objects.length);
    System.out.println("Elements: " + elements);
    System.out.println("Free cells: " + freecells);
    System.out.println();
    for (int ix = 0; ix < objects.length; ix++)
      System.out.println("[" + ix + "]: " + objects[ix]);
  }

  /**
   * INTERNAL: Figures out correct size for rehashed set, then does the
   * rehash.
   */
  protected void rehash() {
    // do we need to increase capacity, or are there so many
    // deleted objects hanging around that rehashing to the same
    // size is sufficient? if 5% (arbitrarily chosen number) of
    // cells can be freed up by a rehash, we do it.

    int gargagecells = objects.length - (elements + freecells);
    if (gargagecells / (double) objects.length > 0.05)
      // rehash with same size
      rehash(objects.length);
    else
      // rehash with increased capacity
      rehash(objects.length * 2 + 1);
  }

  /**
   * INTERNAL: Rehashes the hashset to a bigger size.
   */
  @SuppressWarnings("unchecked")
  protected void rehash(int newCapacity) {
    int oldCapacity = objects.length;
    @SuppressWarnings("unchecked")
    E[] newObjects = (E[]) new Object[newCapacity];

    for (int ix = 0; ix < oldCapacity; ix++) {
      Object o = objects[ix];
      if (o == null || o == deletedObject)
        continue;

      int hash = o.hashCode();
      int index = (hash & 0x7FFFFFFF) % newCapacity;
      int offset = 1;

      // search for the object
      while (newObjects[index] != null) { // no need to test for
                        // duplicates
        index = ((index + offset) & 0x7FFFFFFF) % newCapacity;
        offset = offset * 2 + 1;

        if (offset == -1)
          offset = 2;
      }

      newObjects[index] = (E) o;
    }

    objects = newObjects;
    freecells = objects.length - elements;
  }

  // ITERATOR IMPLEMENTATON 

  private class CompactHashIterator<T> implements Iterator<T> {
    private int index;
    private int lastReturned = -1;

    /**
     * The modCount value that the iterator believes that the backing
     * CompactHashSet should have. If this expectation is violated, the
     * iterator has detected concurrent modification.
     */
    private int expectedModCount;

    @SuppressWarnings("empty-statement")
    public CompactHashIterator() {
      for (index = 0; index < objects.length
          && (objects[index] == null || objects[index] == deletedObject); index++)
        ;
      expectedModCount = modCount;
    }

    public boolean hasNext() {
      return index < objects.length;
    }

    @SuppressWarnings({ "empty-statement", "unchecked" })
    public T next() {
      if (modCount != expectedModCount)
        throw new ConcurrentModificationException();
      int length = objects.length;
      if (index >= length) {
        lastReturned = -2;
        throw new NoSuchElementException();
      }

      lastReturned = index;
      for (index += 1; index < length
          && (objects[index] == null || objects[index] == deletedObject); index++)
        ;
      if (objects[lastReturned] == nullObject)
        return null;
      else
        return (T) objects[lastReturned];
    }

    @SuppressWarnings("unchecked")
    public void remove() {
      if (modCount != expectedModCount)
        throw new ConcurrentModificationException();
      if (lastReturned == -1 || lastReturned == -2)
        throw new IllegalStateException();
      // delete object
      if (objects[lastReturned] != null
          && objects[lastReturned] != deletedObject) {
        objects[lastReturned] = (E) deletedObject;
        elements--;
        modCount++;
        expectedModCount = modCount; // this is expected; we made the
                        // change
      }
    }
  }

}

   
    
    
    
  








Related examples in the same category

1.Add values to HashSet
2.HashSet implementation of setHashSet implementation of set
3.Generic collection conversion: HashSet and ArrayList
4.Remove one set from another set
5.Remove element from HashSet
6.Find maximum element of Java HashSet
7.Find Minimum element of Java HashSet
8.Get Enumeration over Java HashSet
9.Get Synchronized Set from Java HashSet
10.Check if a particular element exists in Java HashSet
11.Copy all elements of Java HashSet to an Object Array
12.Get Size of Java HashSet
13.Iterate through elements of Java HashSet
14.Integer value set
15.Listing the Elements of a Collection(iterate over the elements of set or list)
16.Remove specified element from Java HashSet
17.Remove all elements from Java HashSet
18.Convert array to Set
19.Implements a HashSet where the objects given are stored in weak references
20.Convert an ArrayList to HashSet
21.Create an array containing the elements in a set
22.Duplicate elements are discarded
23.Convert Set into array
24.A memory-efficient hash set.
25.Coarse-grained hash set.
26.Concurrent hash set that allows the lock array to be resized.
27.A CompactHashSet implements the set interface more tightly in memory and more efficiently than Java's java.util.HashSet.
28.Concurrent HashSet