Java tutorial
/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.tap; import java.beans.ConstructorProperties; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Set; import cascading.scheme.Scheme; import cascading.scheme.SequenceFile; import cascading.tap.hadoop.MultiInputFormat; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntry; import cascading.tuple.TupleEntryCollector; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.OutputCollector; import org.apache.log4j.Logger; /** * Class MultiSinkTap is both a {@link CompositeTap} and {@link SinkTap} that can write to multiple child {@link Tap} instances simultaneously. * <p/> * It is the counterpart to {@link MultiSourceTap}. */ public class MultiSinkTap extends SinkTap implements CompositeTap { /** Field LOG */ private static final Logger LOG = Logger.getLogger(MultiSinkTap.class); /** Field taps */ private Tap[] taps; /** Field tempPath */ private String tempPath = "__multisink_placeholder" + Integer.toString((int) (System.currentTimeMillis() * Math.random())); /** Field childConfigs */ private List<Map<String, String>> childConfigs; private class MultiSinkCollector extends TupleEntryCollector implements OutputCollector { OutputCollector[] collectors; public MultiSinkCollector(JobConf conf, Tap... taps) throws IOException { collectors = new OutputCollector[taps.length]; conf = new JobConf(conf); JobConf[] jobConfs = MultiInputFormat.getJobConfs(conf, childConfigs); for (int i = 0; i < taps.length; i++) { Tap tap = taps[i]; LOG.info("opening for write: " + tap.toString()); collectors[i] = (OutputCollector) tap.openForWrite(jobConfs[i]); } } protected void collect(Tuple tuple) { throw new UnsupportedOperationException("collect should never be called on MultiSinkCollector"); } public void collect(Object key, Object value) throws IOException { for (OutputCollector collector : collectors) collector.collect(key, value); } @Override public void close() { super.close(); try { for (OutputCollector collector : collectors) { try { ((TupleEntryCollector) collector).close(); } catch (Exception exception) { LOG.warn("exception closing TupleEntryCollector", exception); } } } finally { collectors = null; } } } /** * Constructor MultiSinkTap creates a new MultiSinkTap instance. * * @param taps of type Tap... */ @ConstructorProperties({ "taps" }) public MultiSinkTap(Tap... taps) { this.taps = taps; } protected Tap[] getTaps() { return taps; } @Override public Tap[] getChildTaps() { return Arrays.copyOf(taps, taps.length); } @Override public boolean isWriteDirect() { return true; } @Override public Path getPath() { return new Path(tempPath); } @Override public TupleEntryCollector openForWrite(JobConf conf) throws IOException { return new MultiSinkCollector(conf, getTaps()); } @Override public void sinkInit(JobConf conf) throws IOException { childConfigs = new ArrayList<Map<String, String>>(); for (int i = 0; i < getTaps().length; i++) { Tap tap = getTaps()[i]; JobConf jobConf = new JobConf(conf); tap.sinkInit(jobConf); childConfigs.add(MultiInputFormat.getConfig(conf, jobConf)); } } @Override public boolean makeDirs(JobConf conf) throws IOException { for (Tap tap : getTaps()) { if (!tap.makeDirs(conf)) return false; } return true; } @Override public boolean deletePath(JobConf conf) throws IOException { for (Tap tap : getTaps()) { if (!tap.deletePath(conf)) return false; } return true; } @Override public boolean pathExists(JobConf conf) throws IOException { for (Tap tap : getTaps()) { if (!tap.pathExists(conf)) return false; } return true; } @Override public long getPathModified(JobConf conf) throws IOException { long modified = getTaps()[0].getPathModified(conf); for (int i = 1; i < getTaps().length; i++) modified = Math.max(getTaps()[i].getPathModified(conf), modified); return modified; } @Override public void sink(TupleEntry tupleEntry, OutputCollector outputCollector) throws IOException { for (int i = 0; i < taps.length; i++) taps[i].sink(tupleEntry, ((MultiSinkCollector) outputCollector).collectors[i]); } @Override public Scheme getScheme() { if (super.getScheme() != null) return super.getScheme(); Set<Comparable> fieldNames = new LinkedHashSet<Comparable>(); for (int i = 0; i < getTaps().length; i++) { for (Object o : getTaps()[i].getSinkFields()) fieldNames.add((Comparable) o); } Fields allFields = new Fields(fieldNames.toArray(new Comparable[fieldNames.size()])); setScheme(new SequenceFile(allFields)); return super.getScheme(); } @Override public String toString() { return "MultiSinkTap[" + (taps == null ? "none" : Arrays.asList(taps)) + ']'; } @Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof MultiSinkTap)) return false; if (!super.equals(o)) return false; MultiSinkTap that = (MultiSinkTap) o; if (!Arrays.equals(taps, that.taps)) return false; return true; } @Override public int hashCode() { int result = super.hashCode(); result = 31 * result + (taps != null ? Arrays.hashCode(taps) : 0); return result; } }