Java tutorial
/* * Licensed to Think Big Analytics, Inc. under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. Think Big Analytics, Inc. licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Copyright 2010 Think Big Analytics. All Rights Reserved. */ package tap.core; import java.io.IOException; import org.apache.avro.mapred.*; import org.apache.avro.Schema; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.*; import org.apache.hadoop.mapred.lib.MultipleOutputs; import org.apache.hadoop.util.ReflectionUtils; import tap.Phase; import tap.TapReducer; import tap.core.io.BinaryKey; import tap.core.mapreduce.io.ProtobufWritable; import tap.core.mapreduce.output.TapfileOutputFormat; import tap.util.ObjectFactory; import tap.util.ReflectUtils; /** * Bridge between a {@link org.apache.hadoop.mapred.Reducer} and an {@link AvroReducer}. */ public class ReducerBridge<V, OUT> extends BaseAvroReducer<V, OUT, AvroWrapper<OUT>, NullWritable> { private boolean isTextOutput = false; private boolean isProtoOutput = false; private boolean reduceOutKeyChanges = false; private Schema reduceOutSchema; private String multiOutputPrefix; private MultipleOutputs multiOutput; private Class<?> mapOutClass; private Class<?> reduceOutClass; ReflectionKeyExtractor extractor; @Override public void configure(JobConf conf) { super.configure(conf); isTextOutput = conf.getOutputFormat() instanceof TextOutputFormat; isProtoOutput = conf.getOutputFormat() instanceof TapfileOutputFormat; if (isProtoOutput) { try { mapOutClass = Class.forName(conf.get(Phase.MAP_OUT_CLASS)); reduceOutClass = Class.forName(conf.get(Phase.REDUCE_OUT_CLASS)); if (mapOutClass != reduceOutClass) { reduceOutKeyChanges = true; String groupBy = conf.get(Phase.GROUP_BY); String sortBy = conf.get(Phase.SORT_BY); reduceOutSchema = ReflectUtils.getSchema(ObjectFactory.newInstance(reduceOutClass)); extractor = ReflectionKeyExtractor.getReflectionKeyExtractorForReduceOutKey(reduceOutSchema, groupBy, sortBy); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } multiOutputPrefix = conf.get(Phase.MULTIPLE_OUTPUT_PREFIX); if (multiOutputPrefix == null) multiOutputPrefix = "out"; MultipleOutputs.addMultiNamedOutput(conf, multiOutputPrefix, conf.getOutputFormat().getClass(), conf.getOutputKeyClass(), conf.getOutputValueClass()); this.multiOutput = new MultipleOutputs(conf); } @Override @SuppressWarnings("unchecked") protected TapReducerInterface<V, OUT> getReducer(JobConf conf) { Class<? extends TapReducerInterface> theClass = conf.getClass(Phase.REDUCER, TapReducer.class, TapReducerInterface.class); return ReflectionUtils.newInstance(theClass, conf); } private class ReduceCollector<AO, OUT> extends AvroMultiCollector<AO> implements BinaryKeyAwareCollector { private final AvroWrapper<OUT> wrapper = new AvroWrapper<OUT>(null); private Reporter reporter; private OutputCollector originalCollector; private ProtobufWritable protobufWritable = new ProtobufWritable(); private BinaryKey binaryKey; BinaryKey reduceOutKey = null; public ReduceCollector(OutputCollector<?, NullWritable> out, Reporter reporter) { this.originalCollector = out; this.reporter = reporter; } @SuppressWarnings("unchecked") private void _collect(Object datum, OutputCollector out) throws IOException { if (isTextOutput) { out.collect(datum, NullWritable.get()); } else if (isProtoOutput) { if (datum != null) protobufWritable.setConverter(datum.getClass()); protobufWritable.set(datum); if (reduceOutKeyChanges) { if (reduceOutKey == null) reduceOutKey = extractor.getProtypeKey(); extractor.setKey(datum, reduceOutKey); out.collect(reduceOutKey, protobufWritable); } else { out.collect(binaryKey, protobufWritable); // out.collect(NullWritable.get(), protobufWritable); } } else { wrapper.datum((OUT) datum); out.collect(wrapper, NullWritable.get()); } } @Override public void collect(Object datum) throws IOException { _collect(datum, originalCollector); } @Override public void collect(Object datum, String multiName) throws IOException { OutputCollector collector = multiOutput.getCollector(multiOutputPrefix, multiName, reporter); _collect(datum, collector); } @Override public void setCurrentKey(BinaryKey key) { binaryKey = key; } } @Override protected AvroMultiCollector<OUT> getCollector(OutputCollector<AvroWrapper<OUT>, NullWritable> collector, Reporter reporter) { return new ReduceCollector(collector, reporter); } @Override public void close() throws IOException { super.close(); multiOutput.close(); } }