cascading.tap.MultiSinkTap.java Source code

Java tutorial

Introduction

Here is the source code for cascading.tap.MultiSinkTap.java

Source

/*
 * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved.
 *
 * Project and contact information: http://www.cascading.org/
 *
 * This file is part of the Cascading project.
 *
 * Cascading is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Cascading is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Cascading.  If not, see <http://www.gnu.org/licenses/>.
 */

package cascading.tap;

import java.beans.ConstructorProperties;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import cascading.scheme.Scheme;
import cascading.scheme.SequenceFile;
import cascading.tap.hadoop.MultiInputFormat;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;
import cascading.tuple.TupleEntryCollector;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.log4j.Logger;

/**
 * Class MultiSinkTap is both a {@link CompositeTap} and {@link SinkTap} that can write to multiple child {@link Tap} instances simultaneously.
 * <p/>
 * It is the counterpart to {@link MultiSourceTap}.
 */
public class MultiSinkTap extends SinkTap implements CompositeTap {
    /** Field LOG */
    private static final Logger LOG = Logger.getLogger(MultiSinkTap.class);

    /** Field taps */
    private Tap[] taps;
    /** Field tempPath */
    private String tempPath = "__multisink_placeholder"
            + Integer.toString((int) (System.currentTimeMillis() * Math.random()));
    /** Field childConfigs */
    private List<Map<String, String>> childConfigs;

    private class MultiSinkCollector extends TupleEntryCollector implements OutputCollector {
        OutputCollector[] collectors;

        public MultiSinkCollector(JobConf conf, Tap... taps) throws IOException {
            collectors = new OutputCollector[taps.length];

            conf = new JobConf(conf);

            JobConf[] jobConfs = MultiInputFormat.getJobConfs(conf, childConfigs);

            for (int i = 0; i < taps.length; i++) {
                Tap tap = taps[i];
                LOG.info("opening for write: " + tap.toString());

                collectors[i] = (OutputCollector) tap.openForWrite(jobConfs[i]);
            }
        }

        protected void collect(Tuple tuple) {
            throw new UnsupportedOperationException("collect should never be called on MultiSinkCollector");
        }

        public void collect(Object key, Object value) throws IOException {
            for (OutputCollector collector : collectors)
                collector.collect(key, value);
        }

        @Override
        public void close() {
            super.close();

            try {
                for (OutputCollector collector : collectors) {
                    try {
                        ((TupleEntryCollector) collector).close();
                    } catch (Exception exception) {
                        LOG.warn("exception closing TupleEntryCollector", exception);
                    }
                }
            } finally {
                collectors = null;
            }
        }
    }

    /**
     * Constructor MultiSinkTap creates a new MultiSinkTap instance.
     *
     * @param taps of type Tap...
     */
    @ConstructorProperties({ "taps" })
    public MultiSinkTap(Tap... taps) {
        this.taps = taps;
    }

    protected Tap[] getTaps() {
        return taps;
    }

    @Override
    public Tap[] getChildTaps() {
        return Arrays.copyOf(taps, taps.length);
    }

    @Override
    public boolean isWriteDirect() {
        return true;
    }

    @Override
    public Path getPath() {
        return new Path(tempPath);
    }

    @Override
    public TupleEntryCollector openForWrite(JobConf conf) throws IOException {
        return new MultiSinkCollector(conf, getTaps());
    }

    @Override
    public void sinkInit(JobConf conf) throws IOException {
        childConfigs = new ArrayList<Map<String, String>>();

        for (int i = 0; i < getTaps().length; i++) {
            Tap tap = getTaps()[i];
            JobConf jobConf = new JobConf(conf);

            tap.sinkInit(jobConf);

            childConfigs.add(MultiInputFormat.getConfig(conf, jobConf));
        }
    }

    @Override
    public boolean makeDirs(JobConf conf) throws IOException {
        for (Tap tap : getTaps()) {
            if (!tap.makeDirs(conf))
                return false;
        }

        return true;
    }

    @Override
    public boolean deletePath(JobConf conf) throws IOException {
        for (Tap tap : getTaps()) {
            if (!tap.deletePath(conf))
                return false;
        }

        return true;
    }

    @Override
    public boolean pathExists(JobConf conf) throws IOException {
        for (Tap tap : getTaps()) {
            if (!tap.pathExists(conf))
                return false;
        }

        return true;
    }

    @Override
    public long getPathModified(JobConf conf) throws IOException {
        long modified = getTaps()[0].getPathModified(conf);

        for (int i = 1; i < getTaps().length; i++)
            modified = Math.max(getTaps()[i].getPathModified(conf), modified);

        return modified;
    }

    @Override
    public void sink(TupleEntry tupleEntry, OutputCollector outputCollector) throws IOException {
        for (int i = 0; i < taps.length; i++)
            taps[i].sink(tupleEntry, ((MultiSinkCollector) outputCollector).collectors[i]);
    }

    @Override
    public Scheme getScheme() {
        if (super.getScheme() != null)
            return super.getScheme();

        Set<Comparable> fieldNames = new LinkedHashSet<Comparable>();

        for (int i = 0; i < getTaps().length; i++) {
            for (Object o : getTaps()[i].getSinkFields())
                fieldNames.add((Comparable) o);
        }

        Fields allFields = new Fields(fieldNames.toArray(new Comparable[fieldNames.size()]));

        setScheme(new SequenceFile(allFields));

        return super.getScheme();
    }

    @Override
    public String toString() {
        return "MultiSinkTap[" + (taps == null ? "none" : Arrays.asList(taps)) + ']';
    }

    @Override
    public boolean equals(Object o) {
        if (this == o)
            return true;
        if (!(o instanceof MultiSinkTap))
            return false;
        if (!super.equals(o))
            return false;

        MultiSinkTap that = (MultiSinkTap) o;

        if (!Arrays.equals(taps, that.taps))
            return false;

        return true;
    }

    @Override
    public int hashCode() {
        int result = super.hashCode();
        result = 31 * result + (taps != null ? Arrays.hashCode(taps) : 0);
        return result;
    }
}