Implement the Set interface more compactly than java.util.HashSet in Java


The following code shows how to implement the Set interface more compactly than java.util.HashSet.


//from ww  w  . j a v a  2 s.  c  om
//package net.ontopia.utils;

// WARNING: If you do any changes to this class, make sure that you
// update, and
// accordingly.

import java.util.Collection;
import java.util.ConcurrentModificationException;
import java.util.Iterator;
import java.util.NoSuchElementException;

 * INTERNAL: Implements the Set interface more compactly than java.util.HashSet
 * by using a closed hashtable.
public class CompactHashSet<E> extends java.util.AbstractSet<E> {

  protected final static int INITIAL_SIZE = 3;
  protected final static double LOAD_FACTOR = 0.75;

   * This object is used to represent null, should clients add that to the
   * set.
  protected final static Object nullObject = new Object();
   * When an object is deleted this object is put into the hashtable in its
   * place, so that other objects with the same key (collisions) further down
   * the hashtable are not lost after we delete an object in the collision
   * chain.
  protected final static Object deletedObject = new Object();
  protected int elements;
   * This is the number of empty (null) cells. It's not necessarily the same
   * as objects.length - elements, because some cells may contain
   * deletedObject.
  protected int freecells;
  protected E[] objects;
  protected int modCount;

   * Constructs a new, empty set.
  public CompactHashSet() {

   * Constructs a new, empty set.
  public CompactHashSet(int size) {
    // NOTE: If array size is 0, we get a
    // "java.lang.ArithmeticException: / by zero" in add(Object).
    objects = (E[]) new Object[(size == 0 ? 1 : size)];
    elements = 0;
    freecells = objects.length;
    modCount = 0;

   * Constructs a new set containing the elements in the specified collection.
   * @param c
   *            the collection whose elements are to be placed into this set.
  public CompactHashSet(Collection<E> c) {


   * Returns an iterator over the elements in this set. The elements are
   * returned in no particular order.
   * @return an Iterator over the elements in this set.
   * @see ConcurrentModificationException
  public Iterator<E> iterator() {
    return new CompactHashIterator<E>();

   * Returns the number of elements in this set (its cardinality).
  public int size() {
    return elements;

   * Returns <tt>true</tt> if this set contains no elements.
  public boolean isEmpty() {
    return elements == 0;

   * Returns <tt>true</tt> if this set contains the specified element.
   * @param o
   *            element whose presence in this set is to be tested.
   * @return <tt>true</tt> if this set contains the specified element.
  public boolean contains(Object o) {
    if (o == null)
      o = nullObject;

    int hash = o.hashCode();
    int index = (hash & 0x7FFFFFFF) % objects.length;
    int offset = 1;

    // search for the object (continue while !null and !this object)
    while (objects[index] != null
        && !(objects[index].hashCode() == hash && objects[index]
            .equals(o))) {
      index = ((index + offset) & 0x7FFFFFFF) % objects.length;
      offset = offset * 2 + 1;

      if (offset == -1)
        offset = 2;

    return objects[index] != null;

   * Adds the specified element to this set if it is not already present.
   * @param o
   *            element to be added to this set.
   * @return <tt>true</tt> if the set did not already contain the specified
   *         element.
  public boolean add(Object o) {
    if (o == null)
      o = nullObject;

    int hash = o.hashCode();
    int index = (hash & 0x7FFFFFFF) % objects.length;
    int offset = 1;
    int deletedix = -1;

    // search for the object (continue while !null and !this object)
    while (objects[index] != null
        && !(objects[index].hashCode() == hash && objects[index]
            .equals(o))) {

      // if there's a deleted object here we can put this object here,
      // provided it's not in here somewhere else already
      if (objects[index] == deletedObject)
        deletedix = index;

      index = ((index + offset) & 0x7FFFFFFF) % objects.length;
      offset = offset * 2 + 1;

      if (offset == -1)
        offset = 2;

    if (objects[index] == null) { // wasn't present already
      if (deletedix != -1) // reusing a deleted cell
        index = deletedix;


      // here we face a problem regarding generics:
      // add(E o) is not possible because of the null Object. We cant do
      // 'new E()' or '(E) new Object()'
      // so adding an empty object is a problem here
      // If (! o instanceof E) : This will cause a class cast exception
      // If (o instanceof E) : This will work fine

      objects[index] = (E) o;

      // do we need to rehash?
      if (1 - (freecells / (double) objects.length) > LOAD_FACTOR)
      return true;
    } else
      // was there already
      return false;

   * Removes the specified element from the set.
  public boolean remove(Object o) {
    if (o == null)
      o = nullObject;

    int hash = o.hashCode();
    int index = (hash & 0x7FFFFFFF) % objects.length;
    int offset = 1;

    // search for the object (continue while !null and !this object)
    while (objects[index] != null
        && !(objects[index].hashCode() == hash && objects[index]
            .equals(o))) {
      index = ((index + offset) & 0x7FFFFFFF) % objects.length;
      offset = offset * 2 + 1;

      if (offset == -1)
        offset = 2;

    // we found the right position, now do the removal
    if (objects[index] != null) {
      // we found the object

      // same problem here as with add
      objects[index] = (E) deletedObject;
      return true;
    } else
      // we did not find the object
      return false;

   * Removes all of the elements from this set.
  public void clear() {
    elements = 0;
    for (int ix = 0; ix < objects.length; ix++)
      objects[ix] = null;
    freecells = objects.length;

  public Object[] toArray() {
    Object[] result = new Object[elements];
    Object[] objects = this.objects;
    int pos = 0;
    for (int i = 0; i < objects.length; i++)
      if (objects[i] != null && objects[i] != deletedObject) {
        if (objects[i] == nullObject)
          result[pos++] = null;
          result[pos++] = objects[i];
    // unchecked because it should only contain E
    return result;

  // not sure if this needs to have generics
  public <T> T[] toArray(T[] a) {
    int size = elements;
    if (a.length < size)
      a = (T[]) java.lang.reflect.Array.newInstance(a.getClass()
          .getComponentType(), size);
    E[] objects = this.objects;
    int pos = 0;
    for (int i = 0; i < objects.length; i++)
      if (objects[i] != null && objects[i] != deletedObject) {
        if (objects[i] == nullObject)
          a[pos++] = null;
          a[pos++] = (T) objects[i];
    return a;


   * INTERNAL: Used for debugging only.
  public void dump() {
    System.out.println("Size: " + objects.length);
    System.out.println("Elements: " + elements);
    System.out.println("Free cells: " + freecells);
    for (int ix = 0; ix < objects.length; ix++)
      System.out.println("[" + ix + "]: " + objects[ix]);

   * INTERNAL: Figures out correct size for rehashed set, then does the
   * rehash.
  protected void rehash() {
    // do we need to increase capacity, or are there so many
    // deleted objects hanging around that rehashing to the same
    // size is sufficient? if 5% (arbitrarily chosen number) of
    // cells can be freed up by a rehash, we do it.

    int gargagecells = objects.length - (elements + freecells);
    if (gargagecells / (double) objects.length > 0.05)
      // rehash with same size
      // rehash with increased capacity
      rehash(objects.length * 2 + 1);

   * INTERNAL: Rehashes the hashset to a bigger size.
  protected void rehash(int newCapacity) {
    int oldCapacity = objects.length;
    E[] newObjects = (E[]) new Object[newCapacity];

    for (int ix = 0; ix < oldCapacity; ix++) {
      Object o = objects[ix];
      if (o == null || o == deletedObject)

      int hash = o.hashCode();
      int index = (hash & 0x7FFFFFFF) % newCapacity;
      int offset = 1;

      // search for the object
      while (newObjects[index] != null) { // no need to test for
                        // duplicates
        index = ((index + offset) & 0x7FFFFFFF) % newCapacity;
        offset = offset * 2 + 1;

        if (offset == -1)
          offset = 2;

      newObjects[index] = (E) o;

    objects = newObjects;
    freecells = objects.length - elements;


  private class CompactHashIterator<T> implements Iterator<T> {
    private int index;
    private int lastReturned = -1;

     * The modCount value that the iterator believes that the backing
     * CompactHashSet should have. If this expectation is violated, the
     * iterator has detected concurrent modification.
    private int expectedModCount;

    public CompactHashIterator() {
      for (index = 0; index < objects.length
          && (objects[index] == null || objects[index] == deletedObject); index++)
      expectedModCount = modCount;

    public boolean hasNext() {
      return index < objects.length;

    @SuppressWarnings({ "empty-statement", "unchecked" })
    public T next() {
      if (modCount != expectedModCount)
        throw new ConcurrentModificationException();
      int length = objects.length;
      if (index >= length) {
        lastReturned = -2;
        throw new NoSuchElementException();

      lastReturned = index;
      for (index += 1; index < length
          && (objects[index] == null || objects[index] == deletedObject); index++)
      if (objects[lastReturned] == nullObject)
        return null;
        return (T) objects[lastReturned];

    public void remove() {
      if (modCount != expectedModCount)
        throw new ConcurrentModificationException();
      if (lastReturned == -1 || lastReturned == -2)
        throw new IllegalStateException();
      // delete object
      if (objects[lastReturned] != null
          && objects[lastReturned] != deletedObject) {
        objects[lastReturned] = (E) deletedObject;
        expectedModCount = modCount; // this is expected; we made the
                        // change


Home »
  Java Tutorial »
    Java Collection »

Java ArrayList
Java Collection
Java Comparable
Java Comparator
Java HashMap
Java HashSet
Java Iterator
Java LinkedHashMap
Java LinkedHashSet
Java LinkedList
Java List
Java ListIterator
Java Map
Java Set
Java TreeMap