cascading.scheme.WritableSequenceFile.java Source code

Java tutorial

Introduction

Here is the source code for cascading.scheme.WritableSequenceFile.java

Source

/*
 * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
 *
 * Project and contact information: http://www.cascading.org/
 *
 * This file is part of the Cascading project.
 *
 * Cascading is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Cascading is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Cascading.  If not, see <http://www.gnu.org/licenses/>.
 */

package cascading.scheme;

import java.beans.ConstructorProperties;
import java.io.IOException;

import cascading.tap.Tap;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;

/**
 * Class WritableSequenceFile is a sub-class of {@link SequenceFile} that reads and writes values of the given
 * {@code writableType} {@code Class}, instead of {@link Tuple} instances used by default in SequenceFile.
 * <p/>
 * This Class is a convenience for those who need to read/write specific types from existing sequence files without
 * them being wrapped in a Tuple instance.
 * <p/>
 * Note due to the nature of sequence files, only one type can be stored in the key and value positions, they they can be
 * uniquely different types (LongWritable, Text).
 * <p/>
 * If keyType is null, valueType must not be null, and vice versa, assuming you only wish to store a single value.
 * <p/>
 * {@link NullWritable} is used as the empty type for either a null keyType or valueType.
 */
public class WritableSequenceFile extends SequenceFile {
    protected Class<? extends Writable> keyType;
    protected Class<? extends Writable> valueType;

    /**
     * Constructor WritableSequenceFile creates a new WritableSequenceFile instance.
     *
     * @param fields    of type Fields
     * @param valueType of type Class<? extends Writable>, may not be null
     */
    @ConstructorProperties({ "fields", "valueType" })
    public WritableSequenceFile(Fields fields, Class<? extends Writable> valueType) {
        this(fields, null, valueType);
    }

    /**
     * Constructor WritableSequenceFile creates a new WritableSequenceFile instance.
     *
     * @param fields    of type Fields
     * @param keyType   of type Class<? extends Writable>
     * @param valueType of type Class<? extends Writable>
     */
    @ConstructorProperties({ "fields", "keyType", "valueType" })
    public WritableSequenceFile(Fields fields, Class<? extends Writable> keyType,
            Class<? extends Writable> valueType) {
        super(fields);
        this.keyType = keyType;
        this.valueType = valueType;

        if (keyType == null && valueType == null)
            throw new IllegalArgumentException("both keyType and valueType may not be null");

        if (keyType == null && fields.size() != 1)
            throw new IllegalArgumentException(
                    "fields must declare exactly one field when only reading/writing 'keys' from a sequence file");
        else if (valueType == null && fields.size() != 1)
            throw new IllegalArgumentException(
                    "fields must declare exactly one field when only reading/writing 'values' from a sequence file");
        else if (keyType != null && valueType != null && fields.size() != 2)
            throw new IllegalArgumentException(
                    "fields must declare exactly two fields when only reading/writing 'keys' and 'values' from a sequence file");
    }

    @Override
    public void sinkInit(Tap tap, JobConf conf) {
        super.sinkInit(tap, conf);

        if (keyType != null)
            conf.setOutputKeyClass(keyType);
        else
            conf.setOutputKeyClass(NullWritable.class);

        if (valueType != null)
            conf.setOutputValueClass(valueType);
        else
            conf.setOutputValueClass(NullWritable.class);
    }

    @Override
    public Tuple source(Object key, Object value) {
        if (keyType == null)
            return new Tuple(value);

        if (valueType == null)
            return new Tuple(key);

        return new Tuple(key, value);
    }

    @Override
    public void sink(TupleEntry tupleEntry, OutputCollector outputCollector) throws IOException {
        Object keyValue = NullWritable.get();
        Object valueValue = NullWritable.get();

        if (keyType == null) {
            valueValue = tupleEntry.getObject(getSinkFields());
        } else if (valueType == null) {
            keyValue = tupleEntry.getObject(getSinkFields());
        } else {
            keyValue = tupleEntry.getObject(getSinkFields().get(0));
            valueValue = tupleEntry.getObject(getSinkFields().get(1));
        }

        outputCollector.collect(keyValue, valueValue);
    }

    @Override
    public boolean equals(Object object) {
        if (this == object)
            return true;
        if (!(object instanceof WritableSequenceFile))
            return false;
        if (!super.equals(object))
            return false;

        WritableSequenceFile that = (WritableSequenceFile) object;

        if (keyType != null ? !keyType.equals(that.keyType) : that.keyType != null)
            return false;
        if (valueType != null ? !valueType.equals(that.valueType) : that.valueType != null)
            return false;

        return true;
    }

    @Override
    public int hashCode() {
        int result = super.hashCode();
        result = 31 * result + (keyType != null ? keyType.hashCode() : 0);
        result = 31 * result + (valueType != null ? valueType.hashCode() : 0);
        return result;
    }
}