tap.core.ReducerBridge.java Source code

Java tutorial

Introduction

Here is the source code for tap.core.ReducerBridge.java

Source

/*
 * Licensed to Think Big Analytics, Inc. under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  Think Big Analytics, Inc. licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
 * Copyright 2010 Think Big Analytics. All Rights Reserved.
 */
package tap.core;

import java.io.IOException;

import org.apache.avro.mapred.*;
import org.apache.avro.Schema;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.*;
import org.apache.hadoop.mapred.lib.MultipleOutputs;
import org.apache.hadoop.util.ReflectionUtils;

import tap.Phase;
import tap.TapReducer;
import tap.core.io.BinaryKey;
import tap.core.mapreduce.io.ProtobufWritable;
import tap.core.mapreduce.output.TapfileOutputFormat;
import tap.util.ObjectFactory;
import tap.util.ReflectUtils;

/**
 * Bridge between a {@link org.apache.hadoop.mapred.Reducer} and an {@link AvroReducer}.
 */
public class ReducerBridge<V, OUT> extends BaseAvroReducer<V, OUT, AvroWrapper<OUT>, NullWritable> {

    private boolean isTextOutput = false;
    private boolean isProtoOutput = false;
    private boolean reduceOutKeyChanges = false;
    private Schema reduceOutSchema;

    private String multiOutputPrefix;
    private MultipleOutputs multiOutput;
    private Class<?> mapOutClass;
    private Class<?> reduceOutClass;
    ReflectionKeyExtractor extractor;

    @Override
    public void configure(JobConf conf) {
        super.configure(conf);

        isTextOutput = conf.getOutputFormat() instanceof TextOutputFormat;
        isProtoOutput = conf.getOutputFormat() instanceof TapfileOutputFormat;

        if (isProtoOutput) {
            try {
                mapOutClass = Class.forName(conf.get(Phase.MAP_OUT_CLASS));
                reduceOutClass = Class.forName(conf.get(Phase.REDUCE_OUT_CLASS));
                if (mapOutClass != reduceOutClass) {
                    reduceOutKeyChanges = true;
                    String groupBy = conf.get(Phase.GROUP_BY);
                    String sortBy = conf.get(Phase.SORT_BY);
                    reduceOutSchema = ReflectUtils.getSchema(ObjectFactory.newInstance(reduceOutClass));
                    extractor = ReflectionKeyExtractor.getReflectionKeyExtractorForReduceOutKey(reduceOutSchema,
                            groupBy, sortBy);
                }
            } catch (Exception e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }

        multiOutputPrefix = conf.get(Phase.MULTIPLE_OUTPUT_PREFIX);
        if (multiOutputPrefix == null)
            multiOutputPrefix = "out";

        MultipleOutputs.addMultiNamedOutput(conf, multiOutputPrefix, conf.getOutputFormat().getClass(),
                conf.getOutputKeyClass(), conf.getOutputValueClass());

        this.multiOutput = new MultipleOutputs(conf);
    }

    @Override
    @SuppressWarnings("unchecked")
    protected TapReducerInterface<V, OUT> getReducer(JobConf conf) {
        Class<? extends TapReducerInterface> theClass = conf.getClass(Phase.REDUCER, TapReducer.class,
                TapReducerInterface.class);
        return ReflectionUtils.newInstance(theClass, conf);
    }

    private class ReduceCollector<AO, OUT> extends AvroMultiCollector<AO> implements BinaryKeyAwareCollector {
        private final AvroWrapper<OUT> wrapper = new AvroWrapper<OUT>(null);
        private Reporter reporter;
        private OutputCollector originalCollector;
        private ProtobufWritable protobufWritable = new ProtobufWritable();
        private BinaryKey binaryKey;
        BinaryKey reduceOutKey = null;

        public ReduceCollector(OutputCollector<?, NullWritable> out, Reporter reporter) {
            this.originalCollector = out;
            this.reporter = reporter;

        }

        @SuppressWarnings("unchecked")
        private void _collect(Object datum, OutputCollector out) throws IOException {
            if (isTextOutput) {
                out.collect(datum, NullWritable.get());
            } else if (isProtoOutput) {
                if (datum != null)
                    protobufWritable.setConverter(datum.getClass());

                protobufWritable.set(datum);

                if (reduceOutKeyChanges) {
                    if (reduceOutKey == null)
                        reduceOutKey = extractor.getProtypeKey();
                    extractor.setKey(datum, reduceOutKey);
                    out.collect(reduceOutKey, protobufWritable);

                } else {
                    out.collect(binaryKey, protobufWritable);
                    // out.collect(NullWritable.get(), protobufWritable);
                }

            } else {
                wrapper.datum((OUT) datum);
                out.collect(wrapper, NullWritable.get());
            }
        }

        @Override
        public void collect(Object datum) throws IOException {
            _collect(datum, originalCollector);
        }

        @Override
        public void collect(Object datum, String multiName) throws IOException {
            OutputCollector collector = multiOutput.getCollector(multiOutputPrefix, multiName, reporter);
            _collect(datum, collector);
        }

        @Override
        public void setCurrentKey(BinaryKey key) {
            binaryKey = key;
        }
    }

    @Override
    protected AvroMultiCollector<OUT> getCollector(OutputCollector<AvroWrapper<OUT>, NullWritable> collector,
            Reporter reporter) {
        return new ReduceCollector(collector, reporter);
    }

    @Override
    public void close() throws IOException {
        super.close();
        multiOutput.close();
    }

}