package it.unimi.dsi.mg4j.compression;
/*
* MG4J: Managing Gigabytes for Java
*
* Copyright (C) 2005-2007 Sebastiano Vigna
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by the Free
* Software Foundation; either version 2.1 of the License, or (at your option)
* any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
*/
import it.unimi.dsi.fastutil.booleans.AbstractBooleanIterator;
import it.unimi.dsi.fastutil.chars.Char2IntMap;
import java.util.NoSuchElementException;
import cern.colt.bitvector.BitVector;
/** A wrapper for character sequences that exposes them as a boolean iterators
* returning the bits obtained coding the sequence, using
* a given map from character to symbols in a prefix coder.
*
* <P>This class is very lightweight: it just scans the string, concatenating
* lazily the bits of each codeword.
* @deprecated Moved to <code>dsiutils</code>.
*/
@Deprecated
public class CodedCharSequenceBooleanIterator extends AbstractBooleanIterator {
private final CharSequence s;
private final int length;
private final Char2IntMap char2code;
private final BitVector[] codeWord;
private BitVector char2codeWord( final char c ) {
return codeWord[ char2code.get( c ) ];
}
private int pos, index;
private BitVector currCodeWord;
/** Creates a new boolean iterator over a character sequence.
*
* @param s a character sequence.
* @param prefixCoder a prefix coder that is able to code the characters in <code>s</code>, or possibly <code>null</code> if
* <code>s</code> is empty.
* @param char2code the map from characters in <code>s</code> to symbols in <code>coder</code>.
*/
public CodedCharSequenceBooleanIterator( final CharSequence s, final PrefixCoder prefixCoder, final Char2IntMap char2code ) {
this.s = s;
this.char2code = char2code;
this.length = s.length();
if ( s.length() > 0 && prefixCoder == null ) throw new IllegalArgumentException( "Empty PrefixCoder with non empty string: " + s );
this.codeWord = prefixCoder != null ? prefixCoder.codeWords() : null;
if ( length != 0 ) currCodeWord = char2codeWord( s.charAt( 0 ) );
}
public boolean hasNext() {
/* If the current codeword has lenth 0, there's nothing to code. Note that
* in this case by definition the current codeword is also the *only* codeword. */
return currCodeWord.size() > 0 && pos < length;
}
public boolean nextBoolean() {
if ( ! hasNext() ) throw new NoSuchElementException();
final boolean bit = currCodeWord.get( index++ );
if ( index == currCodeWord.size() ) {
pos++;
if ( pos < length ) {
currCodeWord = char2codeWord( s.charAt( pos ) );
index = 0;
}
}
return bit;
}
}
|