com.aliyun.odps.mapred.bridge.LotReducerUDTF.java Source code

Introduction

Here is the source code for com.aliyun.odps.mapred.bridge.LotReducerUDTF.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.aliyun.odps.mapred.bridge;

import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;

import org.apache.commons.lang.ArrayUtils;

import com.aliyun.odps.Column;
import com.aliyun.odps.data.Record;
import com.aliyun.odps.data.TableInfo;
import com.aliyun.odps.io.LongWritable;
import com.aliyun.odps.data.RecordComparator;
import com.aliyun.odps.io.Writable;
import com.aliyun.odps.mapred.Partitioner;
import com.aliyun.odps.mapred.Reducer;
import com.aliyun.odps.mapred.Reducer.TaskContext;
import com.aliyun.odps.mapred.bridge.type.ColumnBasedRecordComparator;
import com.aliyun.odps.mapred.bridge.utils.MapReduceUtils;
import com.aliyun.odps.mapred.conf.BridgeJobConf;
import com.aliyun.odps.udf.ExecutionContext;
import com.aliyun.odps.udf.annotation.NotReuseArgumentObject;
import com.aliyun.odps.udf.annotation.PreferWritable;
import com.aliyun.odps.utils.ReflectionUtils;

/**
 * Reducer Implementation wrapper UDTF.
 */
@PreferWritable
@NotReuseArgumentObject
public class LotReducerUDTF extends LotTaskUDTF {

    private TaskContext ctx;

    class ReduceContextImpl extends UDTFTaskContextImpl implements TaskContext {

        // key  value????
        private Record key;
        private Record value;

        private Comparator<Object[]> keyGroupingComparator;
        private LotGroupingRecordIterator itr;
        Partitioner partitioner;
        private long nextRecordCntr = 1;
        private long inputValueCounter;
        private long inputKeyCounter;

        public ReduceContextImpl(BridgeJobConf conf) {
            super(conf);
        }

        @SuppressWarnings({ "rawtypes", "unchecked" })
        @Override
        public void configure(ExecutionContext ctx) {
            super.configure(ctx);
            String[] keyGrpColumns;
            Column[] keyRS;
            Class<? extends RecordComparator> keyComparatorClass = null;
            Class<? extends RecordComparator> keyGroupingComparatorClass = null;

            if (pipeMode) {
                conf.setReducerClass(pipeNode.getTransformClass());
                key = new WritableRecord(pipeNode.getInputKeySchema());
                value = new WritableRecord(pipeNode.getInputValueSchema());
                keyGrpColumns = pipeNode.getInputGroupingColumns();
                keyRS = pipeNode.getInputKeySchema();
                // hotfix for sprint21, should remove in sprint23
                //        keyComparatorClass = pipeNode.getInputKeyComparatorClass();
                //        keyGroupingComparatorClass = pipeNode.getInputKeyGroupingComparatorClass();
                keyComparatorClass = conf.getPipelineOutputKeyComparatorClass(pipeIndex - 1);
                keyGroupingComparatorClass = conf.getPipelineOutputKeyGroupingComparatorClass(pipeIndex - 1);
                // for reducer, only if pipeline mode should have partitioner
                Class<? extends Partitioner> partitionerClass = pipeNode.getPartitionerClass();
                if (partitionerClass != null) {
                    partitioner = ReflectionUtils.newInstance(partitionerClass, getJobConf());
                    partitioner.configure(conf);
                }
            } else {
                key = new WritableRecord(conf.getMapOutputKeySchema());
                value = new WritableRecord(conf.getMapOutputValueSchema());
                keyGrpColumns = conf.getOutputGroupingColumns();
                keyRS = conf.getMapOutputKeySchema();
                keyComparatorClass = conf.getOutputKeyComparatorClass();
                keyGroupingComparatorClass = conf.getOutputKeyGroupingComparatorClass();
            }

            if (keyGroupingComparatorClass != null) {
                keyGroupingComparator = ReflectionUtils.newInstance(keyGroupingComparatorClass, getJobConf());
            } else if (keyComparatorClass != null) {
                keyGroupingComparator = ReflectionUtils.newInstance(keyComparatorClass, getJobConf());
            } else {
                keyGroupingComparator = new ColumnBasedRecordComparator(keyGrpColumns, keyRS);
            }

            // for inner output
            if (innerOutput && pipeMode && pipeNode != null && pipeNode.getNextNode() != null) {
                Column[] keyCols = pipeNode.getOutputKeySchema();
                Column[] valCols = pipeNode.getOutputValueSchema();

                Column[] outputFields = new Column[keyCols.length + valCols.length + packagedOutputSchema.length];
                int len = 0;
                for (Column col : keyCols) {
                    outputFields[len++] = col;
                }
                for (Column col : valCols) {
                    outputFields[len++] = col;
                }
                innerOutputIndex = len;
                for (Column col : packagedOutputSchema) {
                    outputFields[len++] = col;
                }
                packagedOutputSchema = outputFields;
            }
        }

        @Override
        public void write(Record r) throws IOException {
            write(r, TableInfo.DEFAULT_LABEL);
        }

        @Override
        public void write(Record r, String label) throws IOException {
            if (!hasLabel(label)) {
                throw new IOException(ErrorCode.NO_SUCH_LABEL.toString() + " " + label);
            }
            if (innerOutput) {
                write(createInnerOutputRow(((WritableRecord) r).toWritableArray(), true,
                        TableInfo.INNER_OUTPUT_LABEL, label));
            } else {
                write(createOutputRow(r, label));
            }
        }

        protected void write(Object[] record) {
            collect(record);
        }

        @Override
        public boolean nextKeyValue() {
            inputKeyCounter++;
            // ??
            if (itr == null) {
                Object[] init = getData();
                if (init == null) {
                    return false;
                }

                key.set(Arrays.copyOf(init, key.getColumnCount()));
                value.set(Arrays.copyOfRange(init, key.getColumnCount(), init.length));
                itr = new LotGroupingRecordIterator(this, keyGroupingComparator, init, (WritableRecord) key,
                        (WritableRecord) value);
            } else {
                while (itr.hasNext()) {
                    itr.remove();
                }
                if (!itr.reset()) {
                    return false;
                }
            }

            return true;
        }

        @Override
        public Record getCurrentKey() {
            return key;
        }

        @Override
        public Iterator<Record> getValues() {
            return itr;
        }

        @Override
        public void write(Record key, Record value) {
            if (!pipeMode || pipeNode.getNextNode() == null) {
                throw new UnsupportedOperationException(ErrorCode.INTERMEDIATE_OUTPUT_IN_REDUCER.toString());
            }
            // pipeline mode
            Writable[] keyArray = ((WritableRecord) key).toWritableArray();
            Writable[] valueArray = ((WritableRecord) value).toWritableArray();
            Writable[] result;
            int idx = 0;
            if (partitioner != null) {
                int part = partitioner.getPartition(key, value, getNumReduceTasks());
                if (part < 0 || part >= getNumReduceTasks()) {
                    throw new RuntimeException("partitioner return invalid partition value:" + part);
                }
                result = new Writable[1 + keyArray.length + valueArray.length];
                result[idx++] = new LongWritable(part);
            } else {
                result = new Writable[keyArray.length + valueArray.length];
            }
            for (Writable obj : keyArray) {
                result[idx++] = obj;
            }
            for (Writable obj : valueArray) {
                result[idx++] = obj;
            }

            if (innerOutput) {
                write(createInnerOutputRow(result, false, TableInfo.DEFAULT_LABEL, TableInfo.DEFAULT_LABEL));
            } else {
                write(result);
            }
        }

        public Object[] getData() {
            inputValueCounter++;
            if (inputValueCounter == nextRecordCntr) {
                StateInfo.updateMemStat(
                        "after processed " + inputKeyCounter + " keys, " + inputValueCounter + " values");
                nextRecordCntr = getNextCntr(inputValueCounter, false);
            }
            return getNextRowWapper();
        }
    }

    @Override
    public void setup(ExecutionContext eCtx) {
        ctx = new ReduceContextImpl(conf);
        Column[] ks = conf.getMapOutputKeySchema();
        Column[] vs = conf.getMapOutputValueSchema();
        inputSchema = (Column[]) ArrayUtils.addAll(ks, vs);
        UDTFTaskContextImpl udtfCtx = (UDTFTaskContextImpl) ctx;
        udtfCtx.configure(eCtx);
    }

    public void run() throws IOException {
        StateInfo.init();
        MapReduceUtils.runReducer((Class<Reducer>) conf.getReducerClass(), ctx);
        StateInfo.updateMemStat("reducer end");
        StateInfo.printMaxMemo();
    }

    // finalgetNextRow?
    public Object[] getNextRowWapper() {
        Object[] data = getNextRow();
        if (data != null) {
            return data.clone();
        } else {
            return null;
        }
    }
}