org.apache.tez.runtime.library.common.ValuesIterator.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.tez.runtime.library.common.ValuesIterator.java

Source

/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.tez.runtime.library.common;

import java.io.IOException;
import java.util.Iterator;
import java.util.NoSuchElementException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.serializer.Deserializer;
import org.apache.hadoop.io.serializer.SerializationFactory;
import org.apache.tez.common.counters.TezCounter;
import org.apache.tez.runtime.library.common.sort.impl.TezRawKeyValueIterator;

import com.google.common.base.Preconditions;

/**
 * Iterates values while keys match in sorted input.
 * 
 * This class is not thread safe. Accessing methods from multiple threads will
 * lead to corrupt data.
 * 
 */

@Private
public class ValuesIterator<KEY, VALUE> {
    private static final Log LOG = LogFactory.getLog(ValuesIterator.class.getName());
    protected TezRawKeyValueIterator in; //input iterator
    private KEY key; // current key
    private KEY nextKey;
    private VALUE value; // current value
    //private boolean hasNext;                      // more w/ this key
    private boolean more; // more in file
    private RawComparator<KEY> comparator;
    private Deserializer<KEY> keyDeserializer;
    private Deserializer<VALUE> valDeserializer;
    private DataInputBuffer keyIn = new DataInputBuffer();
    private DataInputBuffer valueIn = new DataInputBuffer();
    private TezCounter inputKeyCounter;
    private TezCounter inputValueCounter;

    private int keyCtr = 0;
    private boolean hasMoreValues; // For the current key.
    private boolean isFirstRecord = true;

    public ValuesIterator(TezRawKeyValueIterator in, RawComparator<KEY> comparator, Class<KEY> keyClass,
            Class<VALUE> valClass, Configuration conf, TezCounter inputKeyCounter, TezCounter inputValueCounter)
            throws IOException {
        this.in = in;
        this.comparator = comparator;
        this.inputKeyCounter = inputKeyCounter;
        this.inputValueCounter = inputValueCounter;
        SerializationFactory serializationFactory = new SerializationFactory(conf);
        this.keyDeserializer = serializationFactory.getDeserializer(keyClass);
        this.keyDeserializer.open(keyIn);
        this.valDeserializer = serializationFactory.getDeserializer(valClass);
        this.valDeserializer.open(this.valueIn);
        LOG.info("keyDeserializer=" + keyDeserializer + "; valueDeserializer=" + valDeserializer);
    }

    TezRawKeyValueIterator getRawIterator() {
        return in;
    }

    /**
     * Move to the next K-Vs pair
     * @return true if another pair exists, otherwise false.
     * @throws IOException 
     */
    public boolean moveToNext() throws IOException {
        if (isFirstRecord) {
            readNextKey();
            key = nextKey;
            nextKey = null;
            hasMoreValues = more;
            isFirstRecord = false;
        } else {
            nextKey();
        }
        return more;
    }

    /** The current key. */
    public KEY getKey() {
        return key;
    }

    // TODO NEWTEZ Maybe add another method which returns an iterator instead of iterable

    public Iterable<VALUE> getValues() {
        return new Iterable<VALUE>() {

            @Override
            public Iterator<VALUE> iterator() {

                return new Iterator<VALUE>() {

                    private final int keyNumber = keyCtr;

                    @Override
                    public boolean hasNext() {
                        return hasMoreValues;
                    }

                    @Override
                    public VALUE next() {
                        if (!hasMoreValues) {
                            throw new NoSuchElementException("iterate past last value");
                        }
                        Preconditions.checkState(keyNumber == keyCtr,
                                "Cannot use values iterator on the previous K-V pair after moveToNext has been invoked to move to the next K-V pair");

                        try {
                            readNextValue();
                            readNextKey();
                        } catch (IOException ie) {
                            throw new RuntimeException("problem advancing post rec#" + keyCtr, ie);
                        }
                        inputValueCounter.increment(1);
                        return value;
                    }

                    @Override
                    public void remove() {
                        throw new UnsupportedOperationException("Cannot remove elements");
                    }
                };
            }
        };
    }

    /** Start processing next unique key. */
    private void nextKey() throws IOException {
        // read until we find a new key
        while (hasMoreValues) {
            readNextKey();
        }
        if (more) {
            inputKeyCounter.increment(1);
            ++keyCtr;
        }

        // move the next key to the current one
        KEY tmpKey = key;
        key = nextKey;
        nextKey = tmpKey;
        hasMoreValues = more;
    }

    /** 
     * read the next key - which may be the same as the current key.
     */
    private void readNextKey() throws IOException {
        more = in.next();
        if (more) {
            DataInputBuffer nextKeyBytes = in.getKey();
            if (!in.isSameKey()) {
                keyIn.reset(nextKeyBytes.getData(), nextKeyBytes.getPosition(),
                        nextKeyBytes.getLength() - nextKeyBytes.getPosition());
                nextKey = keyDeserializer.deserialize(nextKey);
                // TODO Is a counter increment required here ?
                hasMoreValues = key != null && (comparator.compare(key, nextKey) == 0);
            } else {
                hasMoreValues = in.isSameKey();
            }
        } else {
            hasMoreValues = false;
        }
    }

    /**
     * Read the next value
     * @throws IOException
     */
    private void readNextValue() throws IOException {
        DataInputBuffer nextValueBytes = in.getValue();
        valueIn.reset(nextValueBytes.getData(), nextValueBytes.getPosition(),
                nextValueBytes.getLength() - nextValueBytes.getPosition());
        value = valDeserializer.deserialize(value);
    }
}