javaewah.EWAHCompressedBitmap.java Source code

Java tutorial

Introduction

Here is the source code for javaewah.EWAHCompressedBitmap.java

Source

package javaewah;

/*
 * Copyright 2009-2012, Daniel Lemire
 * Licensed under APL 2.0.
 */
import java.io.DataInput;
import java.io.DataOutput;
import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.*;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;

/**
 * <p>This implements the patent-free(1) EWAH scheme. Roughly speaking, it is a
 * 64-bit variant of the BBC compression scheme used by Oracle for its bitmap
 * indexes.</p>
 * 
 * <p>The objective of this compression type is to provide some compression, while
 * reducing as much as possible the CPU cycle usage.</p>
 * 
 * 
 * <p>This implementation being 64-bit, it assumes a 64-bit CPU together with a
 * 64-bit Java Virtual Machine. This same code on a 32-bit machine may not be as
 * fast.<p>
 * 
 * <p>For more details, see the following paper:</p>
 * 
 * <ul><li>Daniel Lemire, Owen Kaser, Kamel Aouiche, Sorting improves word-aligned
 * bitmap indexes. Data & Knowledge Engineering 69 (1), pages 3-28, 2010.
 * http://arxiv.org/abs/0901.3751</li>
 * </ul>
 * 
 * <p>It was first described by Wu et al. and named WBC:</p>
 * 
 * <ul><li>K. Wu, E. J. Otoo, A. Shoshani, H. Nordberg, Notes on design and
 * implementation of compressed bit vectors, Tech. Rep. LBNL/PUB-3161, Lawrence
 * Berkeley National Laboratory, available from http://crd.lbl.
 * gov/~kewu/ps/PUB-3161.html (2001).</li>
 * </ul>
 * 
 * <p>1- The author (D. Lemire) does not know of any patent infringed by the
 * following implementation. However, similar schemes, like WAH are covered by
 * patents.</p>
 */
public final class EWAHCompressedBitmap extends DocIdSet
        implements Bits, Cloneable, Externalizable /* conflict with DocIdSet, Iterable<Integer> */ {

    /**
     * Creates an empty bitmap (no bit set to true).
     */
    public EWAHCompressedBitmap() {
        this.buffer = new long[defaultbuffersize];
        this.rlw = new RunningLengthWord(this.buffer, 0);
    }

    /**
     * Sets explicitly the buffer size (in 64-bit words). The initial memory usage
     * will be "buffersize * 64". For large poorly compressible bitmaps, using
     * large values may improve performance.
     *
     * @param buffersize number of 64-bit words reserved when the object is created)
     */
    public EWAHCompressedBitmap(final int buffersize) {
        this.buffer = new long[buffersize];
        this.rlw = new RunningLengthWord(this.buffer, 0);
    }

    /**
     * Gets an EWAHIterator over the data. This is a customized
     * iterator which iterates over run length word. For experts only.
     *
     * @return the EWAHIterator
     */
    private EWAHIterator getEWAHIterator() {
        return new EWAHIterator(this.buffer, this.actualsizeinwords);
    }

    /**
     * Returns a new compressed bitmap contained the bitwise XOR values of the
     * current bitmap with some other bitmap.
     * 
     * The running time is proportional to the sum of the compressed sizes (as
     * reported by sizeInBytes()).
     *
     * @param a the other bitmap
     * @return the EWAH compressed bitmap
     */
    public EWAHCompressedBitmap xor(final EWAHCompressedBitmap a) {
        final EWAHCompressedBitmap container = new EWAHCompressedBitmap();
        container.reserve(this.actualsizeinwords + a.actualsizeinwords);
        final EWAHIterator i = a.getEWAHIterator();
        final EWAHIterator j = getEWAHIterator();
        if (!(i.hasNext() && j.hasNext())) {// this never happens...
            container.sizeinbits = sizeInBits();
            return container;
        }
        // at this point, this is safe:
        BufferedRunningLengthWord rlwi = new BufferedRunningLengthWord(i.next());
        BufferedRunningLengthWord rlwj = new BufferedRunningLengthWord(j.next());
        while (true) {
            final boolean i_is_prey = rlwi.size() < rlwj.size();
            final BufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj;
            final BufferedRunningLengthWord predator = i_is_prey ? rlwj : rlwi;
            if (prey.getRunningBit() == false) {
                final long predatorrl = predator.getRunningLength();
                final long preyrl = prey.getRunningLength();
                final long tobediscarded = (predatorrl >= preyrl) ? preyrl : predatorrl;
                container.addStreamOfEmptyWords(predator.getRunningBit(), tobediscarded);
                final long dw_predator = predator.dirtywordoffset + (i_is_prey ? j.dirtyWords() : i.dirtyWords());
                container.addStreamOfDirtyWords(i_is_prey ? j.buffer() : i.buffer(), dw_predator,
                        preyrl - tobediscarded);
                predator.discardFirstWords(preyrl);
                prey.discardFirstWords(preyrl);
            } else {
                // we have a stream of 1x11
                final long predatorrl = predator.getRunningLength();
                final long preyrl = prey.getRunningLength();
                final long tobediscarded = (predatorrl >= preyrl) ? preyrl : predatorrl;
                container.addStreamOfEmptyWords(!predator.getRunningBit(), tobediscarded);
                final int dw_predator = predator.dirtywordoffset + (i_is_prey ? j.dirtyWords() : i.dirtyWords());
                final long[] buf = i_is_prey ? j.buffer() : i.buffer();
                for (int k = 0; k < preyrl - tobediscarded; ++k) {
                    container.add(~buf[k + dw_predator]);
                }
                predator.discardFirstWords(preyrl);
                prey.discardFirstWords(preyrl);
            }
            final long predatorrl = predator.getRunningLength();
            if (predatorrl > 0) {
                if (predator.getRunningBit() == false) {
                    final long nbre_dirty_prey = prey.getNumberOfLiteralWords();
                    final long tobediscarded = (predatorrl >= nbre_dirty_prey) ? nbre_dirty_prey : predatorrl;
                    final long dw_prey = prey.dirtywordoffset + (i_is_prey ? i.dirtyWords() : j.dirtyWords());
                    predator.discardFirstWords(tobediscarded);
                    prey.discardFirstWords(tobediscarded);
                    container.addStreamOfDirtyWords(i_is_prey ? i.buffer() : j.buffer(), dw_prey, tobediscarded);
                } else {
                    final long nbre_dirty_prey = prey.getNumberOfLiteralWords();
                    final long tobediscarded = (predatorrl >= nbre_dirty_prey) ? nbre_dirty_prey : predatorrl;
                    final int dw_prey = prey.dirtywordoffset + (i_is_prey ? i.dirtyWords() : j.dirtyWords());
                    predator.discardFirstWords(tobediscarded);
                    prey.discardFirstWords(tobediscarded);
                    final long[] buf = i_is_prey ? i.buffer() : j.buffer();
                    for (int k = 0; k < tobediscarded; ++k) {
                        container.add(~buf[k + dw_prey]);
                    }
                }
            }
            // all that is left to do now is to AND the dirty words
            final long nbre_dirty_prey = prey.getNumberOfLiteralWords();
            if (nbre_dirty_prey > 0) {
                for (int k = 0; k < nbre_dirty_prey; ++k) {
                    if (i_is_prey)
                        container.add(i.buffer()[prey.dirtywordoffset + i.dirtyWords() + k]
                                ^ j.buffer()[predator.dirtywordoffset + j.dirtyWords() + k]);
                    else
                        container.add(i.buffer()[predator.dirtywordoffset + i.dirtyWords() + k]
                                ^ j.buffer()[prey.dirtywordoffset + j.dirtyWords() + k]);
                }
                predator.discardFirstWords(nbre_dirty_prey);
            }
            if (i_is_prey) {
                if (!i.hasNext()) {
                    rlwi = null;
                    break;
                }
                rlwi.reset(i.next());
            } else {
                if (!j.hasNext()) {
                    rlwj = null;
                    break;
                }
                rlwj.reset(j.next());
            }
        }
        if (rlwi != null)
            discharge(rlwi, i, container);
        if (rlwj != null)
            discharge(rlwj, j, container);
        container.sizeinbits = Math.max(sizeInBits(), a.sizeInBits());
        return container;
    }

    /**
     * Returns a new compressed bitmap contained the bitwise AND values of the
     * current bitmap with some other bitmap.
     * 
     * The running time is proportional to the sum of the compressed sizes (as
     * reported by sizeInBytes()).
     *
     * @param a the other bitmap
     * @return the EWAH compressed bitmap
     */
    public EWAHCompressedBitmap and(final EWAHCompressedBitmap a) {
        final EWAHCompressedBitmap container = new EWAHCompressedBitmap();
        container.reserve(
                this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords : a.actualsizeinwords);
        final EWAHIterator i = a.getEWAHIterator();
        final EWAHIterator j = getEWAHIterator();
        if (!(i.hasNext() && j.hasNext())) {// this never happens...
            container.sizeinbits = sizeInBits();
            return container;
        }
        // at this point, this is safe:
        BufferedRunningLengthWord rlwi = new BufferedRunningLengthWord(i.next());
        BufferedRunningLengthWord rlwj = new BufferedRunningLengthWord(j.next());
        while (true) {
            final boolean i_is_prey = rlwi.size() < rlwj.size();
            final BufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj;
            final BufferedRunningLengthWord predator = i_is_prey ? rlwj : rlwi;
            if (prey.getRunningBit() == false) {
                container.addStreamOfEmptyWords(false, prey.RunningLength);
                predator.discardFirstWords(prey.RunningLength);
                prey.RunningLength = 0;
            } else {
                // we have a stream of 1x11
                final long predatorrl = predator.getRunningLength();
                final long preyrl = prey.getRunningLength();
                final long tobediscarded = (predatorrl >= preyrl) ? preyrl : predatorrl;
                container.addStreamOfEmptyWords(predator.getRunningBit(), tobediscarded);
                final int dw_predator = predator.dirtywordoffset + (i_is_prey ? j.dirtyWords() : i.dirtyWords());
                container.addStreamOfDirtyWords(i_is_prey ? j.buffer() : i.buffer(), dw_predator,
                        preyrl - tobediscarded);
                predator.discardFirstWords(preyrl);
                prey.RunningLength = 0;
            }
            final long predatorrl = predator.getRunningLength();
            if (predatorrl > 0) {
                if (predator.getRunningBit() == false) {
                    final long nbre_dirty_prey = prey.getNumberOfLiteralWords();
                    final long tobediscarded = (predatorrl >= nbre_dirty_prey) ? nbre_dirty_prey : predatorrl;
                    predator.discardFirstWords(tobediscarded);
                    prey.discardFirstWords(tobediscarded);
                    container.addStreamOfEmptyWords(false, tobediscarded);
                } else {
                    final long nbre_dirty_prey = prey.getNumberOfLiteralWords();
                    final int dw_prey = prey.dirtywordoffset + (i_is_prey ? i.dirtyWords() : j.dirtyWords());
                    final long tobediscarded = (predatorrl >= nbre_dirty_prey) ? nbre_dirty_prey : predatorrl;
                    container.addStreamOfDirtyWords(i_is_prey ? i.buffer() : j.buffer(), dw_prey, tobediscarded);
                    predator.discardFirstWords(tobediscarded);
                    prey.discardFirstWords(tobediscarded);
                }
            }
            // all that is left to do now is to AND the dirty words
            final long nbre_dirty_prey = prey.getNumberOfLiteralWords();
            if (nbre_dirty_prey > 0) {
                for (int k = 0; k < nbre_dirty_prey; ++k) {
                    if (i_is_prey)
                        container.add(i.buffer()[prey.dirtywordoffset + i.dirtyWords() + k]
                                & j.buffer()[predator.dirtywordoffset + j.dirtyWords() + k]);
                    else
                        container.add(i.buffer()[predator.dirtywordoffset + i.dirtyWords() + k]
                                & j.buffer()[prey.dirtywordoffset + j.dirtyWords() + k]);
                }
                predator.discardFirstWords(nbre_dirty_prey);
            }
            if (i_is_prey) {
                if (!i.hasNext()) {
                    rlwi = null;
                    break;
                }
                rlwi.reset(i.next());
            } else {
                if (!j.hasNext()) {
                    rlwj = null;
                    break;
                }
                rlwj.reset(j.next());
            }
        }
        if (rlwi != null)
            dischargeAsEmpty(rlwi, i, container);
        if (rlwj != null)
            dischargeAsEmpty(rlwj, j, container);
        container.sizeinbits = Math.max(sizeInBits(), a.sizeInBits());
        return container;
    }

    /**
     * Returns a new compressed bitmap contained the bitwise AND NOT values of the
     * current bitmap with some other bitmap.
     * 
     * The running time is proportional to the sum of the compressed sizes (as
     * reported by sizeInBytes()).
     *
     * @param a the other bitmap
     * @return the EWAH compressed bitmap
     */
    public EWAHCompressedBitmap andNot(final EWAHCompressedBitmap a) {
        final EWAHCompressedBitmap container = new EWAHCompressedBitmap();
        container.reserve(
                this.actualsizeinwords > a.actualsizeinwords ? this.actualsizeinwords : a.actualsizeinwords);
        final EWAHIterator i = a.getEWAHIterator();
        final EWAHIterator j = getEWAHIterator();
        if (!(i.hasNext() && j.hasNext())) {// this never happens...
            container.sizeinbits = sizeInBits();
            return container;
        }
        // at this point, this is safe:
        BufferedRunningLengthWord rlwi = new BufferedRunningLengthWord(i.next());
        rlwi.setRunningBit(!rlwi.getRunningBit());
        BufferedRunningLengthWord rlwj = new BufferedRunningLengthWord(j.next());
        while (true) {
            final boolean i_is_prey = rlwi.size() < rlwj.size();
            final BufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj;
            final BufferedRunningLengthWord predator = i_is_prey ? rlwj : rlwi;

            if (prey.getRunningBit() == false) {
                container.addStreamOfEmptyWords(false, prey.RunningLength);
                predator.discardFirstWords(prey.RunningLength);
                prey.RunningLength = 0;
            } else {
                // we have a stream of 1x11
                final long predatorrl = predator.getRunningLength();
                final long preyrl = prey.getRunningLength();
                final long tobediscarded = (predatorrl >= preyrl) ? preyrl : predatorrl;
                container.addStreamOfEmptyWords(predator.getRunningBit(), tobediscarded);
                final int dw_predator = predator.dirtywordoffset + (i_is_prey ? j.dirtyWords() : i.dirtyWords());
                if (i_is_prey)
                    container.addStreamOfDirtyWords(j.buffer(), dw_predator, preyrl - tobediscarded);
                else
                    container.addStreamOfNegatedDirtyWords(i.buffer(), dw_predator, preyrl - tobediscarded);
                predator.discardFirstWords(preyrl);
                prey.RunningLength = 0;
            }
            final long predatorrl = predator.getRunningLength();
            if (predatorrl > 0) {
                if (predator.getRunningBit() == false) {
                    final long nbre_dirty_prey = prey.getNumberOfLiteralWords();
                    final long tobediscarded = (predatorrl >= nbre_dirty_prey) ? nbre_dirty_prey : predatorrl;
                    predator.discardFirstWords(tobediscarded);
                    prey.discardFirstWords(tobediscarded);
                    container.addStreamOfEmptyWords(false, tobediscarded);
                } else {
                    final long nbre_dirty_prey = prey.getNumberOfLiteralWords();
                    final int dw_prey = prey.dirtywordoffset + (i_is_prey ? i.dirtyWords() : j.dirtyWords());
                    final long tobediscarded = (predatorrl >= nbre_dirty_prey) ? nbre_dirty_prey : predatorrl;
                    if (i_is_prey)
                        container.addStreamOfNegatedDirtyWords(i.buffer(), dw_prey, tobediscarded);
                    else
                        container.addStreamOfDirtyWords(j.buffer(), dw_prey, tobediscarded);
                    predator.discardFirstWords(tobediscarded);
                    prey.discardFirstWords(tobediscarded);
                }
            }
            // all that is left to do now is to AND the dirty words
            final long nbre_dirty_prey = prey.getNumberOfLiteralWords();
            if (nbre_dirty_prey > 0) {
                for (int k = 0; k < nbre_dirty_prey; ++k) {
                    if (i_is_prey)
                        container.add((~i.buffer()[prey.dirtywordoffset + i.dirtyWords() + k])
                                & j.buffer()[predator.dirtywordoffset + j.dirtyWords() + k]);
                    else
                        container.add((~i.buffer()[predator.dirtywordoffset + i.dirtyWords() + k])
                                & j.buffer()[prey.dirtywordoffset + j.dirtyWords() + k]);
                }
                predator.discardFirstWords(nbre_dirty_prey);
            }
            if (i_is_prey) {
                if (!i.hasNext()) {
                    rlwi = null;
                    break;
                }
                rlwi.reset(i.next());
                rlwi.setRunningBit(!rlwi.getRunningBit());
            } else {
                if (!j.hasNext()) {
                    rlwj = null;
                    break;
                }
                rlwj.reset(j.next());
            }
        }
        if (rlwi != null)
            dischargeAsEmpty(rlwi, i, container);
        if (rlwj != null)
            discharge(rlwj, j, container);
        container.sizeinbits = Math.max(sizeInBits(), a.sizeInBits());
        return container;
    }

    /**
     * Negate (bitwise) the current bitmap. To get a negated copy, do
     * ((EWAHCompressedBitmap) mybitmap.clone()).not();
     * 
     * The running time is proportional to the compressed size (as reported by
     * sizeInBytes()).
     * 
     */
    public void not() {
        final EWAHIterator i = new EWAHIterator(this.buffer, this.actualsizeinwords);
        if (!i.hasNext())
            return;
        while (true) {
            final RunningLengthWord rlw1 = i.next();
            rlw1.setRunningBit(!rlw1.getRunningBit());
            for (int j = 0; j < rlw1.getNumberOfLiteralWords(); ++j) {
                i.buffer()[i.dirtyWords() + j] = ~i.buffer()[i.dirtyWords() + j];
            }
            if (!i.hasNext()) {// must potentially adjust the last dirty word
                if (rlw1.getNumberOfLiteralWords() == 0)
                    return;
                int usedbitsinlast = this.sizeinbits % wordinbits;
                if (usedbitsinlast == 0)
                    return;
                i.buffer()[i.dirtyWords() + rlw1.getNumberOfLiteralWords()
                        - 1] &= ((~0l) >>> (wordinbits - usedbitsinlast));
                return;
            }
        }
    }

    /**
     * Returns a new compressed bitmap contained the bitwise OR values of the
     * current bitmap with some other bitmap.
     * 
     * The running time is proportional to the sum of the compressed sizes (as
     * reported by sizeInBytes()).
     *
     * @param a the other bitmap
     * @return the EWAH compressed bitmap
     */
    public EWAHCompressedBitmap or(final EWAHCompressedBitmap a) {
        final EWAHCompressedBitmap container = new EWAHCompressedBitmap();
        container.reserve(this.actualsizeinwords + a.actualsizeinwords);
        final EWAHIterator i = a.getEWAHIterator();
        final EWAHIterator j = getEWAHIterator();
        if (!(i.hasNext() && j.hasNext())) {// this never happens...
            container.sizeinbits = sizeInBits();
            return container;
        }
        // at this point, this is safe:
        BufferedRunningLengthWord rlwi = new BufferedRunningLengthWord(i.next());
        BufferedRunningLengthWord rlwj = new BufferedRunningLengthWord(j.next());
        // RunningLength;
        while (true) {
            final boolean i_is_prey = rlwi.size() < rlwj.size();
            final BufferedRunningLengthWord prey = i_is_prey ? rlwi : rlwj;
            final BufferedRunningLengthWord predator = i_is_prey ? rlwj : rlwi;
            if (prey.getRunningBit() == false) {
                final long predatorrl = predator.getRunningLength();
                final long preyrl = prey.getRunningLength();
                final long tobediscarded = (predatorrl >= preyrl) ? preyrl : predatorrl;
                container.addStreamOfEmptyWords(predator.getRunningBit(), tobediscarded);
                final long dw_predator = predator.dirtywordoffset + (i_is_prey ? j.dirtyWords() : i.dirtyWords());
                container.addStreamOfDirtyWords(i_is_prey ? j.buffer() : i.buffer(), dw_predator,
                        preyrl - tobediscarded);
                predator.discardFirstWords(preyrl);
                prey.discardFirstWords(preyrl);
                prey.RunningLength = 0;
            } else {
                // we have a stream of 1x11
                container.addStreamOfEmptyWords(true, prey.RunningLength);
                predator.discardFirstWords(prey.RunningLength);
                prey.RunningLength = 0;
            }
            long predatorrl = predator.getRunningLength();
            if (predatorrl > 0) {
                if (predator.getRunningBit() == false) {
                    final long nbre_dirty_prey = prey.getNumberOfLiteralWords();
                    final long tobediscarded = (predatorrl >= nbre_dirty_prey) ? nbre_dirty_prey : predatorrl;
                    final long dw_prey = prey.dirtywordoffset + (i_is_prey ? i.dirtyWords() : j.dirtyWords());
                    predator.discardFirstWords(tobediscarded);
                    prey.discardFirstWords(tobediscarded);
                    container.addStreamOfDirtyWords(i_is_prey ? i.buffer() : j.buffer(), dw_prey, tobediscarded);
                } else {
                    final long nbre_dirty_prey = prey.getNumberOfLiteralWords();
                    final long tobediscarded = (predatorrl >= nbre_dirty_prey) ? nbre_dirty_prey : predatorrl;
                    container.addStreamOfEmptyWords(true, tobediscarded);
                    predator.discardFirstWords(tobediscarded);
                    prey.discardFirstWords(tobediscarded);
                }
            }
            // all that is left to do now is to OR the dirty words
            final long nbre_dirty_prey = prey.getNumberOfLiteralWords();
            if (nbre_dirty_prey > 0) {
                for (int k = 0; k < nbre_dirty_prey; ++k) {
                    if (i_is_prey)
                        container.add(i.buffer()[prey.dirtywordoffset + i.dirtyWords() + k]
                                | j.buffer()[predator.dirtywordoffset + j.dirtyWords() + k]);
                    else
                        container.add(i.buffer()[predator.dirtywordoffset + i.dirtyWords() + k]
                                | j.buffer()[prey.dirtywordoffset + j.dirtyWords() + k]);
                }
                predator.discardFirstWords(nbre_dirty_prey);
            }
            if (i_is_prey) {
                if (!i.hasNext()) {
                    rlwi = null;
                    break;
                }
                rlwi.reset(i.next());// = new
                // BufferedRunningLengthWord(i.next());
            } else {
                if (!j.hasNext()) {
                    rlwj = null;
                    break;
                }
                rlwj.reset(j.next());// = new
                // BufferedRunningLengthWord(
                // j.next());
            }
        }
        if (rlwi != null)
            discharge(rlwi, i, container);
        if (rlwj != null)
            discharge(rlwj, j, container);
        container.sizeinbits = Math.max(sizeInBits(), a.sizeInBits());
        return container;
    }

    /**
     * For internal use.
     *
     * @param initialWord the initial word
     * @param iterator the iterator
     * @param container the container
     */
    private static void discharge(final BufferedRunningLengthWord initialWord, final EWAHIterator iterator,
            final EWAHCompressedBitmap container) {
        BufferedRunningLengthWord runningLengthWord = initialWord;
        for (;;) {
            final long runningLength = runningLengthWord.getRunningLength();
            container.addStreamOfEmptyWords(runningLengthWord.getRunningBit(), runningLength);
            container.addStreamOfDirtyWords(iterator.buffer(),
                    iterator.dirtyWords() + runningLengthWord.dirtywordoffset,
                    runningLengthWord.getNumberOfLiteralWords());
            if (!iterator.hasNext())
                break;
            runningLengthWord = new BufferedRunningLengthWord(iterator.next());
        }
    }

    /**
     * For internal use.
     *
     * @param initialWord the initial word
     * @param iterator the iterator
     * @param container the container
     */
    private static void dischargeAsEmpty(final BufferedRunningLengthWord initialWord, final EWAHIterator iterator,
            final EWAHCompressedBitmap container) {
        BufferedRunningLengthWord runningLengthWord = initialWord;
        for (;;) {
            final long runningLength = runningLengthWord.getRunningLength();
            container.addStreamOfEmptyWords(false, runningLength + runningLengthWord.getNumberOfLiteralWords());
            if (!iterator.hasNext())
                break;
            runningLengthWord = new BufferedRunningLengthWord(iterator.next());
        }
    }

    /**
     * set the bit at position i to true, the bits must be set in increasing
     * order. For example, set(15) and then set(7) will fail. You must do set(7)
     * and then set(15).
     *
     * @param i the index
     * @return true if the value was set (always true when i>= sizeInBits()).
     */
    public boolean set(final int i) {
        if (i < this.sizeinbits)
            return false;
        // must I complete a word?
        if ((this.sizeinbits % 64) != 0) {
            final int possiblesizeinbits = (this.sizeinbits / 64) * 64 + 64;
            if (possiblesizeinbits < i + 1) {
                this.sizeinbits = possiblesizeinbits;
            }
        }
        addStreamOfEmptyWords(false, (i / 64) - this.sizeinbits / 64);
        final int bittoflip = i - (this.sizeinbits / 64 * 64);
        // next, we set the bit
        if ((this.rlw.getNumberOfLiteralWords() == 0) || ((this.sizeinbits - 1) / 64 < i / 64)) {
            final long newdata = 1l << bittoflip;
            addLiteralWord(newdata);
        } else {
            this.buffer[this.actualsizeinwords - 1] |= 1l << bittoflip;
            // check if we just completed a stream of 1s
            if (this.buffer[this.actualsizeinwords - 1] == ~0l) {
                // we remove the last dirty word
                this.buffer[this.actualsizeinwords - 1] = 0;
                --this.actualsizeinwords;
                this.rlw.setNumberOfLiteralWords(this.rlw.getNumberOfLiteralWords() - 1);
                // next we add one clean word
                addEmptyWord(true);
            }
        }
        this.sizeinbits = i + 1;
        return true;
    }

    /**
     * Adding words directly to the bitmap (for expert use).
     * 
     * This is normally how you add data to the array. So you add bits in streams
     * of 8*8 bits.
     *
     * @param newdata the word
     * @return the number of words added to the buffer
     */
    public int add(final long newdata) {
        return add(newdata, wordinbits);
    }

    /**
     * For experts: You want to add many
     * zeroes or ones? This is the method you use.
     *
     * @param v the boolean value
     * @param number the number
     * @return the number of words added to the buffer
     */
    public int addStreamOfEmptyWords(final boolean v, final long number) {
        if (number == 0)
            return 0;
        final boolean noliteralword = (this.rlw.getNumberOfLiteralWords() == 0);
        final long runlen = this.rlw.getRunningLength();
        if ((noliteralword) && (runlen == 0)) {
            this.rlw.setRunningBit(v);
        }
        int wordsadded = 0;
        if ((noliteralword) && (this.rlw.getRunningBit() == v)
                && (runlen < RunningLengthWord.largestrunninglengthcount)) {
            long whatwecanadd = number < RunningLengthWord.largestrunninglengthcount - runlen ? number
                    : RunningLengthWord.largestrunninglengthcount - runlen;
            this.rlw.setRunningLength(runlen + whatwecanadd);
            this.sizeinbits += whatwecanadd * wordinbits;
            if (number - whatwecanadd > 0)
                wordsadded += addStreamOfEmptyWords(v, number - whatwecanadd);
        } else {
            push_back(0);
            ++wordsadded;
            this.rlw.position = this.actualsizeinwords - 1;
            final long whatwecanadd = number < RunningLengthWord.largestrunninglengthcount ? number
                    : RunningLengthWord.largestrunninglengthcount;
            this.rlw.setRunningBit(v);
            this.rlw.setRunningLength(whatwecanadd);
            this.sizeinbits += whatwecanadd * wordinbits;
            if (number - whatwecanadd > 0)
                wordsadded += addStreamOfEmptyWords(v, number - whatwecanadd);
        }
        return wordsadded;
    }

    /**
     * Same as addStreamOfDirtyWords, but the words are negated.
     *
     * @param data the dirty words
     * @param start the starting point in the array
     * @param number the number of dirty words to add
     * @return how many (compressed) words were added to the bitmap
     */
    private long addStreamOfNegatedDirtyWords(final long[] data, final long start, final long number) {
        if (number == 0)
            return 0;
        final long NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords();
        final long whatwecanadd = number < RunningLengthWord.largestliteralcount - NumberOfLiteralWords ? number
                : RunningLengthWord.largestliteralcount - NumberOfLiteralWords;
        this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords + whatwecanadd);
        final long leftovernumber = number - whatwecanadd;
        negative_push_back(data, (int) start, (int) whatwecanadd);
        this.sizeinbits += whatwecanadd * wordinbits;
        long wordsadded = whatwecanadd;
        if (leftovernumber > 0) {
            push_back(0);
            this.rlw.position = this.actualsizeinwords - 1;
            ++wordsadded;
            wordsadded += addStreamOfDirtyWords(data, start + whatwecanadd, leftovernumber);
        }
        return wordsadded;
    }

    /**
     * if you have several dirty words to copy over, this might be faster.
     *
     *
     * @param data the dirty words
     * @param start the starting point in the array
     * @param number the number of dirty words to add
     * @return how many (compressed) words were added to the bitmap
     */
    private long addStreamOfDirtyWords(final long[] data, final long start, final long number) {
        if (number == 0)
            return 0;
        final long NumberOfLiteralWords = this.rlw.getNumberOfLiteralWords();
        final long whatwecanadd = number < RunningLengthWord.largestliteralcount - NumberOfLiteralWords ? number
                : RunningLengthWord.largestliteralcount - NumberOfLiteralWords;
        this.rlw.setNumberOfLiteralWords(NumberOfLiteralWords + whatwecanadd);
        final long leftovernumber = number - whatwecanadd;
        push_back(data, (int) start, (int) whatwecanadd);
        this.sizeinbits += whatwecanadd * wordinbits;
        long wordsadded = whatwecanadd;
        if (leftovernumber > 0) {
            push_back(0);
            this.rlw.position = this.actualsizeinwords - 1;
            ++wordsadded;
            wordsadded += addStreamOfDirtyWords(data, start + whatwecanadd, leftovernumber);
        }
        return wordsadded;
    }

    /**
     * Adding words directly to the bitmap (for expert use).
     *
     * @param newdata the word
     * @param bitsthatmatter the number of significant bits (by default it should be 64)
     * @return the number of words added to the buffer
     */
    public int add(final long newdata, final int bitsthatmatter) {
        this.sizeinbits += bitsthatmatter;
        if (newdata == 0) {
            return addEmptyWord(false);
        } else if (newdata == ~0l) {
            return addEmptyWord(true);
        } else {
            return addLiteralWord(newdata);
        }
    }

    /**
     * Returns the size in bits of the *uncompressed* bitmap represented by this
     * compressed bitmap. Initially, the sizeInBits is zero. It is extended
     * automatically when you set bits to true.
     *
     * @return the size
     */
    public int sizeInBits() {
        return this.sizeinbits;
    }

    /**
     * Change the reported size in bits of the *uncompressed* bitmap represented
     * by this compressed bitmap. It is not possible to reduce the sizeInBits, but
     * it can be extended. The new bits are set to false or true depending on the
     * value of defaultvalue.
     *
     * @param size the size in bits
     * @param defaultvalue the default boolean value
     * @return true if the update was possible
     */
    public boolean setSizeInBits(final int size, final boolean defaultvalue) {
        if (size < this.sizeinbits)
            return false;
        // next loop could be optimized further
        if (defaultvalue)
            while (((this.sizeinbits % 64) != 0) && (this.sizeinbits < size)) {
                this.set(this.sizeinbits);
            }
        final int leftover = size % 64;
        if (defaultvalue == false)
            this.addStreamOfEmptyWords(defaultvalue, (size / 64) - this.sizeinbits / 64 + (leftover != 0 ? 1 : 0));
        else {
            this.addStreamOfEmptyWords(defaultvalue, (size / 64) - this.sizeinbits / 64);
            final long newdata = (1l << leftover) + ((1l << leftover) - 1);
            this.addLiteralWord(newdata);
        }
        this.sizeinbits = size;
        return true;
    }

    /**
     * Report the *compressed* size of the bitmap (equivalent to memory usage,
     * after accounting for some overhead).
     *
     * @return the size in bytes
     */
    public int sizeInBytes() {
        return this.actualsizeinwords * 8;
    }

    /**
     * For internal use (trading off memory for speed).
     *
     * @param size the number of words to allocate
     * @return True if the operation was a success.
     */
    private boolean reserve(final int size) {
        if (size > this.buffer.length) {
            final long oldbuffer[] = this.buffer;
            this.buffer = new long[size];
            System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length);
            this.rlw.array = this.buffer;
            return true;
        }
        return false;
    }

    /**
     * For internal use.
     *
     * @param data the word to be added
     */
    private void push_back(final long data) {
        if (this.actualsizeinwords == this.buffer.length) {
            final long oldbuffer[] = this.buffer;
            this.buffer = new long[oldbuffer.length * 2];
            System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length);
            this.rlw.array = this.buffer;
        }
        this.buffer[this.actualsizeinwords++] = data;
    }

    /**
     * For internal use.
     *
     * @param data the array of words to be added
     * @param start the starting point
     * @param number the number of words to add
     */
    private void push_back(final long[] data, final int start, final int number) {
        while (this.actualsizeinwords + number >= this.buffer.length) {
            final long oldbuffer[] = this.buffer;
            this.buffer = new long[oldbuffer.length * 2];
            System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length);
            this.rlw.array = this.buffer;
        }
        System.arraycopy(data, start, this.buffer, this.actualsizeinwords, number);
        this.actualsizeinwords += number;
    }

    /**
     * For internal use.
     *
     * @param data the array of words to be added
     * @param start the starting point
     * @param number the number of words to add
     */
    private void negative_push_back(final long[] data, final int start, final int number) {
        while (this.actualsizeinwords + number >= this.buffer.length) {
            final long oldbuffer[] = this.buffer;
            this.buffer = new long[oldbuffer.length * 2];
            System.arraycopy(oldbuffer, 0, this.buffer, 0, oldbuffer.length);
            this.rlw.array = this.buffer;
        }
        for (int k = 0; k < number; ++k) {
            this.buffer[this.actualsizeinwords + k] = ~data[start + k];
        }
        this.actualsizeinwords += number;
    }

    /**
     * For internal use.
     *
     * @param v the boolean value
     * @return the storage cost of the addition
     */
    private int addEmptyWord(final boolean v) {
        final boolean noliteralword = (this.rlw.getNumberOfLiteralWords() == 0);
        final long runlen = this.rlw.getRunningLength();
        if ((noliteralword) && (runlen == 0)) {
            this.rlw.setRunningBit(v);
        }
        if ((noliteralword) && (this.rlw.getRunningBit() == v)
                && (runlen < RunningLengthWord.largestrunninglengthcount)) {
            this.rlw.setRunningLength(runlen + 1);
            return 0;
        }
        push_back(0);
        this.rlw.position = this.actualsizeinwords - 1;
        this.rlw.setRunningBit(v);
        this.rlw.setRunningLength(1);
        return 1;
    }

    /**
     * For internal use.
     *
     * @param newdata the dirty word
     * @return the storage cost of the addition
     */
    private int addLiteralWord(final long newdata) {
        final long numbersofar = this.rlw.getNumberOfLiteralWords();
        if (numbersofar >= RunningLengthWord.largestliteralcount) {
            push_back(0);
            this.rlw.position = this.actualsizeinwords - 1;
            this.rlw.setNumberOfLiteralWords(1);
            push_back(newdata);
            return 2;
        }
        this.rlw.setNumberOfLiteralWords(numbersofar + 1);
        push_back(newdata);
        return 1;
    }

    /**
     * reports the number of bits set to true. Running time is proportional to
     * compressed size (as reported by sizeInBytes).
     *
     * @return the number of bits set to true
     */
    public int cardinality() {
        int counter = 0;
        final EWAHIterator i = new EWAHIterator(this.buffer, this.actualsizeinwords);
        while (i.hasNext()) {
            RunningLengthWord localrlw = i.next();
            if (localrlw.getRunningBit()) {
                counter += wordinbits * localrlw.getRunningLength();
            }
            for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) {
                long data = i.buffer()[i.dirtyWords() + j];
                for (int c = 0; c < wordinbits; ++c) {
                    if ((data & (1l << c)) != 0)
                        ++counter;
                }
            }
        }
        return counter;
    }

    /**
     * A string describing the bitmap.
     *
     * @return the string
     */
    @Override
    public String toString() {
        String ans = " EWAHCompressedBitmap, size in bits = " + this.sizeinbits + " size in words = "
                + this.actualsizeinwords + "\n";
        final EWAHIterator i = new EWAHIterator(this.buffer, this.actualsizeinwords);
        while (i.hasNext()) {
            RunningLengthWord localrlw = i.next();
            if (localrlw.getRunningBit()) {
                ans += localrlw.getRunningLength() + " 1x11\n";
            } else {
                ans += localrlw.getRunningLength() + " 0x00\n";
            }
            ans += localrlw.getNumberOfLiteralWords() + " dirties\n";
        }
        return ans;
    }

    /**
     * A more detailed string describing the bitmap (useful for debugging).
     *
     * @return the string
     */
    public String toDebugString() {
        String ans = " EWAHCompressedBitmap, size in bits = " + this.sizeinbits + " size in words = "
                + this.actualsizeinwords + "\n";
        final EWAHIterator i = new EWAHIterator(this.buffer, this.actualsizeinwords);
        while (i.hasNext()) {
            RunningLengthWord localrlw = i.next();
            if (localrlw.getRunningBit()) {
                ans += localrlw.getRunningLength() + " 1x11\n";
            } else {
                ans += localrlw.getRunningLength() + " 0x00\n";
            }
            ans += localrlw.getNumberOfLiteralWords() + " dirties\n";
            for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) {
                long data = i.buffer()[i.dirtyWords() + j];
                ans += "\t" + data + "\n";
            }
        }
        return ans;
    }

    /**
     * Iterator over the set bits (this is what most people will want to use to
     * browse the content). The location of the set bits is returned, in 
     * increasing order.
     *
     * @return the int iterator
     */
    public IntIterator intIterator() {
        final EWAHIterator i = new EWAHIterator(this.buffer, this.actualsizeinwords);
        return new IntIterator() {

            int pos = 0;
            RunningLengthWord localrlw = null;
            final static int initcapacity = 512;
            int[] localbuffer = new int[initcapacity];
            int localbuffersize = 0;
            int bufferpos = 0;

            public boolean hasNext() {
                while (this.localbuffersize == 0) {
                    if (!loadNextRLE())
                        return false;
                    loadBuffer();
                }
                return true;
            }

            private boolean loadNextRLE() {
                while (i.hasNext()) {
                    this.localrlw = i.next();
                    return true;
                }
                return false;
            }

            private void add(final int val) {
                ++this.localbuffersize;
                while (this.localbuffersize > this.localbuffer.length) {
                    int[] oldbuffer = this.localbuffer;
                    this.localbuffer = new int[this.localbuffer.length * 2];
                    System.arraycopy(oldbuffer, 0, this.localbuffer, 0, oldbuffer.length);
                }
                this.localbuffer[this.localbuffersize - 1] = val;
            }

            private void loadBuffer() {
                this.bufferpos = 0;
                this.localbuffersize = 0;
                if (this.localrlw.getRunningBit()) {
                    for (int j = 0; j < this.localrlw.getRunningLength(); ++j) {
                        for (int c = 0; c < wordinbits; ++c) {
                            add(this.pos++);
                        }
                    }
                } else {
                    this.pos += wordinbits * this.localrlw.getRunningLength();
                }
                for (int j = 0; j < this.localrlw.getNumberOfLiteralWords(); ++j) {
                    final long data = i.buffer()[i.dirtyWords() + j];
                    for (long c = 0; c < wordinbits; ++c) {
                        if (((1l << c) & data) != 0) {
                            add(this.pos);
                        }
                        ++this.pos;
                    }
                }
            }

            public int next() {
                final int answer = this.localbuffer[this.bufferpos++];
                if (this.localbuffersize == this.bufferpos) {
                    this.localbuffersize = 0;
                }
                return answer;
            }
        };
    }

    /**
     * iterate over the positions of the true values.
     * This is similar to intIterator(), but it uses
     * Java generics.
     *
     * @return the iterator
     */
    public Iterator<Integer> integerIterator() {
        return new Iterator<Integer>() {

            final private IntIterator under = intIterator();

            public Integer next() {
                return new Integer(this.under.next());
            }

            public boolean hasNext() {
                return this.under.hasNext();
            }

            public void remove() {
                throw new UnsupportedOperationException("bitsets do not support remove");
            }
        };
    }

    /**
     * get the locations of the true values as one vector. (may use more memory
     * than iterator())
     *
     * @return the positions
     */
    public List<Integer> getPositions() {
        final ArrayList<Integer> v = new ArrayList<Integer>();
        final EWAHIterator i = new EWAHIterator(this.buffer, this.actualsizeinwords);
        int pos = 0;
        while (i.hasNext()) {
            RunningLengthWord localrlw = i.next();
            if (localrlw.getRunningBit()) {
                for (int j = 0; j < localrlw.getRunningLength(); ++j) {
                    for (int c = 0; c < wordinbits; ++c) {
                        v.add(new Integer(pos++));
                    }
                }
            } else {
                pos += wordinbits * localrlw.getRunningLength();
            }
            for (int j = 0; j < localrlw.getNumberOfLiteralWords(); ++j) {
                final long data = i.buffer()[i.dirtyWords() + j];
                for (long c = 0; c < wordinbits; ++c) {
                    if (((1l << c) & data) != 0) {
                        v.add(new Integer(pos));
                    }
                    ++pos;
                }
            }
        }
        while ((v.size() > 0) && (v.get(v.size() - 1).intValue() >= this.sizeinbits)) {
            v.remove(v.size() - 1);
        }
        return v;
    }

    /** 
     * Check to see whether the two compressed bitmaps contain the same data.
     * 
     * @see java.lang.Object#equals(java.lang.Object)
     */
    @Override
    public boolean equals(Object o) {
        if (o instanceof EWAHCompressedBitmap) {
            EWAHCompressedBitmap other = (EWAHCompressedBitmap) o;
            if (sizeinbits == other.sizeinbits && actualsizeinwords == other.actualsizeinwords
                    && rlw.position == other.rlw.position) {
                for (int k = 0; k < actualsizeinwords; ++k) {
                    if (buffer[k] != other.buffer[k])
                        return false;
                }
                return true;
            }
        }
        return false;
    }

    /**
     * Returns a customized hash code (based on Karp-Rabin).
     * Naturally, if the bitmaps are equal, they will hash to the same value.
     * 
     */
    @Override
    public int hashCode() {
        int karprabin = 0;
        final int B = 31;
        for (int k = 0; k < actualsizeinwords; ++k) {
            karprabin += B * karprabin + (buffer[k] & ((1l << 32) - 1));
            karprabin += B * karprabin + (buffer[k] >>> 32);
        }
        return sizeinbits ^ karprabin;
    }

    /* 
     * @see java.lang.Object#clone()
     */
    @Override
    public Object clone() throws java.lang.CloneNotSupportedException {
        final EWAHCompressedBitmap clone = (EWAHCompressedBitmap) super.clone();
        clone.buffer = this.buffer.clone();
        clone.actualsizeinwords = this.actualsizeinwords;
        clone.sizeinbits = this.sizeinbits;
        return clone;
    }

    /*
     * @see java.io.Externalizable#readExternal(java.io.ObjectInput)
     */
    public void readExternal(ObjectInput in) throws IOException {
        deserialize(in);
    }

    /**
     * Deserialize.
     *
     * @param in the DataInput stream
     * @throws IOException Signals that an I/O exception has occurred.
     */
    public void deserialize(DataInput in) throws IOException {
        this.sizeinbits = in.readInt();
        this.actualsizeinwords = in.readInt();
        this.buffer = new long[in.readInt()];
        for (int k = 0; k < this.actualsizeinwords; ++k) {
            this.buffer[k] = in.readLong();
        }
        this.rlw = new RunningLengthWord(this.buffer, in.readInt());
    }

    /*
     * @see java.io.Externalizable#writeExternal(java.io.ObjectOutput)
     */
    public void writeExternal(ObjectOutput out) throws IOException {
        serialize(out);
    }

    /**
     * Serialize.
     *
     * @param out the DataOutput stream
     * @throws IOException Signals that an I/O exception has occurred.
     */
    public void serialize(DataOutput out) throws IOException {
        out.writeInt(this.sizeinbits);
        out.writeInt(this.actualsizeinwords);
        out.writeInt(this.buffer.length);
        for (int k = 0; k < this.actualsizeinwords; ++k) {
            out.writeLong(this.buffer[k]);
        }
        out.writeInt(this.rlw.position);
    }

    @Override
    public DocIdSetIterator iterator() {
        return new DocIdSetIterator() {

            final private IntIterator under = intIterator();
            int docID;

            @Override
            public int docID() {
                return docID;
            }

            @Override
            public int nextDoc() throws IOException {
                if (under.hasNext())
                    return docID = under.next();
                else
                    return docID = DocIdSetIterator.NO_MORE_DOCS;
            }

            @Override
            public int advance(int target) throws IOException {
                // we can do faster! -> wordinbits
                while ((docID = nextDoc()) < target) {
                }
                return docID;
            }
        };
    }

    /** The Constant defaultbuffersize: default memory allocation when the object is constructed. */
    static final int defaultbuffersize = 4;
    /** The buffer (array of 64-bit words) */
    long buffer[] = null;
    /** The actual size in words. */
    int actualsizeinwords = 1;
    /** sizeinbits: number of bits in the (uncompressed) bitmap. */
    int sizeinbits = 0;
    /** The current (last) running length word. */
    RunningLengthWord rlw = null;
    /** The Constant wordinbits represents the number of bits in a long. */
    public static final int wordinbits = 64;
    IntIterator lastIter;
    int lastIndex = -1;
    int lastInt;

    /**
     * WARN: only retrieval in ascending index order is optimal
     * TODO: not thread safe -> use lastIter per thread?
     */
    public boolean get(int index) {
        if (index < 0)
            throw new ArrayIndexOutOfBoundsException("negative indices are not allowed");

        if (lastIndex < 0 || index < lastIndex) {
            lastIter = intIterator();
            lastInt = -1;
        }
        lastIndex = index;

        while (lastIter.hasNext() && lastInt < index) {
            lastInt = lastIter.next();
        }
        return index == lastInt;
    }

    public int length() {
        return sizeinbits;
    }
}