tap.core.CombinerBridge.java Source code

Java tutorial

Introduction

Here is the source code for tap.core.CombinerBridge.java

Source

/*
 * Licensed to Think Big Analytics, Inc. under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  Think Big Analytics, Inc. licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * 
 * Copyright 2010 Think Big Analytics. All Rights Reserved.
 */
package tap.core;

import java.io.IOException;

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.mapred.*;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.ReflectionUtils;

import tap.Phase;
import tap.TapReducer;
import tap.core.io.BinaryKey;
import tap.util.ReflectUtils;

/**
 * Bridge between a {@link org.apache.hadoop.mapred.Reducer} and an {@link AvroReducer} used when combining. When combining, map
 * output pairs must be split before they're collected.
 */
public class CombinerBridge<V> extends BaseAvroReducer<V, V, AvroKey<BinaryKey>, AvroValue<V>> {

    private Schema schema;
    private String groupBy;
    private String sortBy;

    @Override
    @SuppressWarnings("unchecked")
    protected TapReducerInterface<V, V> getReducer(JobConf conf) {
        return ReflectionUtils
                .newInstance(conf.getClass(Phase.COMBINER, TapReducer.class, TapReducerInterface.class), conf);
    }

    @Override
    public void configure(JobConf conf) {
        super.configure(conf);
        this.schema = ReflectUtils.getSchema(out);
        this.groupBy = conf.get(Phase.GROUP_BY);
        this.sortBy = conf.get(Phase.SORT_BY);
    }

    private class Collector<VC> extends AvroMultiCollector<VC> {
        //private final AvroWrapper<V> wrapper = new AvroWrapper<V>(null);
        private final AvroKey<BinaryKey> keyWrapper = new AvroKey<BinaryKey>(null);
        private final AvroValue<VC> valueWrapper = new AvroValue<VC>(null);
        private final KeyExtractor<BinaryKey, VC> extractor;
        private final BinaryKey key;
        private OutputCollector<AvroKey<BinaryKey>, AvroValue<VC>> collector;

        public Collector(OutputCollector<AvroKey<BinaryKey>, AvroValue<VC>> collector,
                KeyExtractor<BinaryKey, VC> extractor) {
            this.collector = collector;
            this.extractor = extractor;
            key = extractor.getProtypeKey();
            keyWrapper.datum(key);
        }

        public void collect(VC datum) throws IOException {
            extractor.setKey(datum, key);
            valueWrapper.datum(datum);
            collector.collect(keyWrapper, valueWrapper);
        }
    }

    @Override
    protected AvroMultiCollector<V> getCollector(OutputCollector<AvroKey<BinaryKey>, AvroValue<V>> collector,
            Reporter reporter) {
        KeyExtractor<BinaryKey, V> extractor = new ReflectionKeyExtractor<V>(schema, groupBy, sortBy);
        //XXX fix this typing: the collector returns GenericData.Record, not K! should be Collector<V>
        return new Collector(collector, extractor);
    }

}