edu.uci.ics.hyracks.dataflow.hadoop.HadoopReducerOperatorDescriptor.java Source code

Java tutorial

Introduction

Here is the source code for edu.uci.ics.hyracks.dataflow.hadoop.HadoopReducerOperatorDescriptor.java

Source

/*
 * Copyright 2009-2013 by The Regents of the University of California
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * you may obtain a copy of the License from
 * 
 *     http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.uci.ics.hyracks.dataflow.hadoop;

import java.io.IOException;
import java.util.Iterator;
import java.util.NoSuchElementException;

import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapred.Counters.Counter;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RawKeyValueIterator;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.util.Progress;
import org.apache.hadoop.util.ReflectionUtils;

import edu.uci.ics.hyracks.api.context.IHyracksTaskContext;
import edu.uci.ics.hyracks.api.dataflow.IDataReader;
import edu.uci.ics.hyracks.api.dataflow.IDataWriter;
import edu.uci.ics.hyracks.api.dataflow.IOperatorNodePushable;
import edu.uci.ics.hyracks.api.dataflow.value.IComparator;
import edu.uci.ics.hyracks.api.dataflow.value.IComparatorFactory;
import edu.uci.ics.hyracks.api.dataflow.value.IRecordDescriptorProvider;
import edu.uci.ics.hyracks.api.dataflow.value.RecordDescriptor;
import edu.uci.ics.hyracks.api.exceptions.HyracksDataException;
import edu.uci.ics.hyracks.api.job.IOperatorDescriptorRegistry;
import edu.uci.ics.hyracks.dataflow.hadoop.data.KeyComparatorFactory;
import edu.uci.ics.hyracks.dataflow.hadoop.data.RawComparingComparatorFactory;
import edu.uci.ics.hyracks.dataflow.hadoop.util.DatatypeHelper;
import edu.uci.ics.hyracks.dataflow.hadoop.util.IHadoopClassFactory;
import edu.uci.ics.hyracks.dataflow.hadoop.util.MRContextUtil;
import edu.uci.ics.hyracks.dataflow.std.base.IOpenableDataWriterOperator;
import edu.uci.ics.hyracks.dataflow.std.group.DeserializedPreclusteredGroupOperator;
import edu.uci.ics.hyracks.dataflow.std.group.IGroupAggregator;
import edu.uci.ics.hyracks.dataflow.std.util.DeserializedOperatorNodePushable;
import edu.uci.ics.hyracks.hdfs.ContextFactory;

public class HadoopReducerOperatorDescriptor<K2, V2, K3, V3> extends AbstractHadoopOperatorDescriptor {
    private class ReducerAggregator implements IGroupAggregator {
        private Object reducer;
        private DataWritingOutputCollector<K3, V3> output;
        private Reporter reporter;
        private ReducerContext reducerContext;
        RawKeyValueIterator rawKeyValueIterator = new RawKeyValueIterator() {

            @Override
            public boolean next() throws IOException {
                return false;
            }

            @Override
            public DataInputBuffer getValue() throws IOException {
                return null;
            }

            @Override
            public Progress getProgress() {
                return null;
            }

            @Override
            public DataInputBuffer getKey() throws IOException {
                return null;
            }

            @Override
            public void close() throws IOException {

            }
        };

        class ReducerContext extends org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer.Context {
            private HadoopReducerOperatorDescriptor.ValueIterator iterator;

            @SuppressWarnings("unchecked")
            ReducerContext(org.apache.hadoop.mapreduce.Reducer reducer, JobConf conf)
                    throws IOException, InterruptedException, ClassNotFoundException {
                ((org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer) reducer).super(new MRContextUtil()
                        .createReduceContext(conf, new TaskAttemptID(), rawKeyValueIterator, null, null, null, null,
                                null, null, Class.forName("org.apache.hadoop.io.NullWritable"),
                                Class.forName("org.apache.hadoop.io.NullWritable")));
            }

            public void setIterator(HadoopReducerOperatorDescriptor.ValueIterator iter) {
                iterator = iter;
            }

            @Override
            public Iterable<V2> getValues() throws IOException, InterruptedException {
                return new Iterable<V2>() {
                    @Override
                    public Iterator<V2> iterator() {
                        return iterator;
                    }
                };
            }

            /** Start processing next unique key. */
            @Override
            public boolean nextKey() throws IOException, InterruptedException {
                boolean hasMore = iterator.hasNext();
                if (hasMore) {
                    nextKeyValue();
                }
                return hasMore;
            }

            /**
             * Advance to the next key/value pair.
             */
            @Override
            public boolean nextKeyValue() throws IOException, InterruptedException {
                iterator.next();
                return true;
            }

            public Object getCurrentKey() {
                return iterator.getKey();
            }

            @Override
            public Object getCurrentValue() {
                return iterator.getValue();
            }

            /**
             * Generate an output key/value pair.
             */
            @Override
            public void write(Object key, Object value) throws IOException, InterruptedException {
                output.collect(key, value);
            }

        }

        public ReducerAggregator(Object reducer) throws HyracksDataException {
            this.reducer = reducer;
            initializeReducer();
            output = new DataWritingOutputCollector<K3, V3>();
            reporter = new Reporter() {
                @Override
                public void progress() {

                }

                @Override
                public void setStatus(String arg0) {

                }

                @Override
                public void incrCounter(String arg0, String arg1, long arg2) {

                }

                @Override
                public void incrCounter(Enum<?> arg0, long arg1) {

                }

                @Override
                public InputSplit getInputSplit() throws UnsupportedOperationException {
                    return null;
                }

                @Override
                public Counter getCounter(String arg0, String arg1) {
                    return null;
                }

                @Override
                public Counter getCounter(Enum<?> arg0) {
                    return null;
                }

                @Override
                public float getProgress() {
                    // TODO Auto-generated method stub
                    return 0;
                }
            };
        }

        @Override
        public void aggregate(IDataReader<Object[]> reader, IDataWriter<Object[]> writer)
                throws HyracksDataException {
            Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
            ValueIterator i = new ValueIterator();
            i.reset(reader);
            output.setWriter(writer);
            try {
                if (jobConf.getUseNewReducer()) {
                    try {
                        reducerContext.setIterator(i);
                        ((org.apache.hadoop.mapreduce.Reducer) reducer).run(reducerContext);
                    } catch (InterruptedException e) {
                        e.printStackTrace();
                        throw new HyracksDataException(e);
                    }
                } else {
                    ((org.apache.hadoop.mapred.Reducer) reducer).reduce(i.getKey(), i, output, reporter);
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }

        @Override
        public void close() throws HyracksDataException {
            // -- - close - --
            try {
                if (!jobConf.getUseNewMapper()) {
                    ((org.apache.hadoop.mapred.Reducer) reducer).close();
                }
            } catch (IOException e) {
                throw new HyracksDataException(e);
            }
        }

        private void initializeReducer() throws HyracksDataException {
            jobConf.setClassLoader(this.getClass().getClassLoader());
            if (!jobConf.getUseNewReducer()) {
                ((org.apache.hadoop.mapred.Reducer) reducer).configure(getJobConf());
            } else {
                try {
                    reducerContext = new ReducerContext((org.apache.hadoop.mapreduce.Reducer) reducer, jobConf);
                } catch (IOException e) {
                    e.printStackTrace();
                    throw new HyracksDataException(e);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                    throw new HyracksDataException(e);
                } catch (RuntimeException e) {
                    e.printStackTrace();
                } catch (ClassNotFoundException e) {
                    e.printStackTrace();
                }
            }
        }
    }

    private class ValueIterator implements Iterator<V2> {
        private IDataReader<Object[]> reader;
        private K2 key;
        private V2 value;

        public K2 getKey() {
            return key;
        }

        public V2 getValue() {
            return value;
        }

        @Override
        public boolean hasNext() {
            if (value == null) {
                Object[] tuple;
                try {
                    tuple = reader.readData();
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
                if (tuple != null) {
                    value = (V2) tuple[1];
                }
            }
            return value != null;
        }

        @Override
        public V2 next() {
            if (!hasNext()) {
                throw new NoSuchElementException();
            }
            V2 v = value;
            value = null;
            return v;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }

        void reset(IDataReader<Object[]> reader) {
            this.reader = reader;
            try {
                Object[] tuple = reader.readData();
                key = (K2) tuple[0];
                value = (V2) tuple[1];
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
    }

    private static final long serialVersionUID = 1L;
    private Class reducerClass;
    private IComparatorFactory comparatorFactory;
    private boolean useAsCombiner = false;

    public HadoopReducerOperatorDescriptor(IOperatorDescriptorRegistry spec, JobConf conf,
            IComparatorFactory comparatorFactory, IHadoopClassFactory classFactory, boolean useAsCombiner) {
        super(spec, 1, getRecordDescriptor(conf, classFactory), conf, classFactory);
        this.comparatorFactory = comparatorFactory;
        this.useAsCombiner = useAsCombiner;
    }

    private Object createReducer() throws Exception {
        if (reducerClass != null) {
            return ReflectionUtils.newInstance(reducerClass, getJobConf());
        } else {
            Object reducer;
            if (!useAsCombiner) {
                if (getJobConf().getUseNewReducer()) {
                    JobContext jobContext = new ContextFactory().createJobContext(getJobConf());
                    reducerClass = (Class<? extends org.apache.hadoop.mapreduce.Reducer<?, ?, ?, ?>>) jobContext
                            .getReducerClass();
                } else {
                    reducerClass = (Class<? extends Reducer>) getJobConf().getReducerClass();
                }
            } else {
                if (getJobConf().getUseNewReducer()) {
                    JobContext jobContext = new ContextFactory().createJobContext(getJobConf());
                    reducerClass = (Class<? extends org.apache.hadoop.mapreduce.Reducer<?, ?, ?, ?>>) jobContext
                            .getCombinerClass();
                } else {
                    reducerClass = (Class<? extends Reducer>) getJobConf().getCombinerClass();
                }
            }
            reducer = getHadoopClassFactory().createReducer(reducerClass.getName(), getJobConf());
            return reducer;
        }
    }

    @Override
    public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
            IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) {
        try {
            if (this.comparatorFactory == null) {
                String comparatorClassName = getJobConf().getOutputValueGroupingComparator().getClass().getName();
                Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
                RawComparator rawComparator = null;
                if (comparatorClassName != null) {
                    Class comparatorClazz = getHadoopClassFactory().loadClass(comparatorClassName);
                    this.comparatorFactory = new KeyComparatorFactory(comparatorClazz);

                } else {
                    String mapOutputKeyClass = getJobConf().getMapOutputKeyClass().getName();
                    if (getHadoopClassFactory() != null) {
                        rawComparator = WritableComparator
                                .get(getHadoopClassFactory().loadClass(mapOutputKeyClass));
                    } else {
                        rawComparator = WritableComparator
                                .get((Class<? extends WritableComparable>) Class.forName(mapOutputKeyClass));
                    }
                    this.comparatorFactory = new RawComparingComparatorFactory(rawComparator.getClass());
                }
            }
            IOpenableDataWriterOperator op = new DeserializedPreclusteredGroupOperator(new int[] { 0 },
                    new IComparator[] { comparatorFactory.createComparator() },
                    new ReducerAggregator(createReducer()));
            return new DeserializedOperatorNodePushable(ctx, op,
                    recordDescProvider.getInputRecordDescriptor(getActivityId(), 0));
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

    public static RecordDescriptor getRecordDescriptor(JobConf conf, IHadoopClassFactory classFactory) {
        String outputKeyClassName = null;
        String outputValueClassName = null;

        if (conf.getUseNewMapper()) {
            JobContext context = new ContextFactory().createJobContext(conf);
            outputKeyClassName = context.getOutputKeyClass().getName();
            outputValueClassName = context.getOutputValueClass().getName();
        } else {
            outputKeyClassName = conf.getOutputKeyClass().getName();
            outputValueClassName = conf.getOutputValueClass().getName();
        }

        RecordDescriptor recordDescriptor = null;
        try {
            if (classFactory == null) {
                recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                        (Class<? extends Writable>) Class.forName(outputKeyClassName),
                        (Class<? extends Writable>) Class.forName(outputValueClassName));
            } else {
                recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                        (Class<? extends Writable>) classFactory.loadClass(outputKeyClassName),
                        (Class<? extends Writable>) classFactory.loadClass(outputValueClassName));
            }
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
        return recordDescriptor;
    }
}