Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package com.aliyun.odps.mapred.bridge; import java.io.IOException; import java.util.Arrays; import java.util.Comparator; import java.util.Iterator; import org.apache.commons.lang.ArrayUtils; import com.aliyun.odps.Column; import com.aliyun.odps.data.Record; import com.aliyun.odps.data.TableInfo; import com.aliyun.odps.io.LongWritable; import com.aliyun.odps.data.RecordComparator; import com.aliyun.odps.io.Writable; import com.aliyun.odps.mapred.Partitioner; import com.aliyun.odps.mapred.Reducer; import com.aliyun.odps.mapred.Reducer.TaskContext; import com.aliyun.odps.mapred.bridge.type.ColumnBasedRecordComparator; import com.aliyun.odps.mapred.bridge.utils.MapReduceUtils; import com.aliyun.odps.mapred.conf.BridgeJobConf; import com.aliyun.odps.udf.ExecutionContext; import com.aliyun.odps.udf.annotation.NotReuseArgumentObject; import com.aliyun.odps.udf.annotation.PreferWritable; import com.aliyun.odps.utils.ReflectionUtils; /** * Reducer Implementation wrapper UDTF. */ @PreferWritable @NotReuseArgumentObject public class LotReducerUDTF extends LotTaskUDTF { private TaskContext ctx; class ReduceContextImpl extends UDTFTaskContextImpl implements TaskContext { // key value???? private Record key; private Record value; private Comparator<Object[]> keyGroupingComparator; private LotGroupingRecordIterator itr; Partitioner partitioner; private long nextRecordCntr = 1; private long inputValueCounter; private long inputKeyCounter; public ReduceContextImpl(BridgeJobConf conf) { super(conf); } @SuppressWarnings({ "rawtypes", "unchecked" }) @Override public void configure(ExecutionContext ctx) { super.configure(ctx); String[] keyGrpColumns; Column[] keyRS; Class<? extends RecordComparator> keyComparatorClass = null; Class<? extends RecordComparator> keyGroupingComparatorClass = null; if (pipeMode) { conf.setReducerClass(pipeNode.getTransformClass()); key = new WritableRecord(pipeNode.getInputKeySchema()); value = new WritableRecord(pipeNode.getInputValueSchema()); keyGrpColumns = pipeNode.getInputGroupingColumns(); keyRS = pipeNode.getInputKeySchema(); // hotfix for sprint21, should remove in sprint23 // keyComparatorClass = pipeNode.getInputKeyComparatorClass(); // keyGroupingComparatorClass = pipeNode.getInputKeyGroupingComparatorClass(); keyComparatorClass = conf.getPipelineOutputKeyComparatorClass(pipeIndex - 1); keyGroupingComparatorClass = conf.getPipelineOutputKeyGroupingComparatorClass(pipeIndex - 1); // for reducer, only if pipeline mode should have partitioner Class<? extends Partitioner> partitionerClass = pipeNode.getPartitionerClass(); if (partitionerClass != null) { partitioner = ReflectionUtils.newInstance(partitionerClass, getJobConf()); partitioner.configure(conf); } } else { key = new WritableRecord(conf.getMapOutputKeySchema()); value = new WritableRecord(conf.getMapOutputValueSchema()); keyGrpColumns = conf.getOutputGroupingColumns(); keyRS = conf.getMapOutputKeySchema(); keyComparatorClass = conf.getOutputKeyComparatorClass(); keyGroupingComparatorClass = conf.getOutputKeyGroupingComparatorClass(); } if (keyGroupingComparatorClass != null) { keyGroupingComparator = ReflectionUtils.newInstance(keyGroupingComparatorClass, getJobConf()); } else if (keyComparatorClass != null) { keyGroupingComparator = ReflectionUtils.newInstance(keyComparatorClass, getJobConf()); } else { keyGroupingComparator = new ColumnBasedRecordComparator(keyGrpColumns, keyRS); } // for inner output if (innerOutput && pipeMode && pipeNode != null && pipeNode.getNextNode() != null) { Column[] keyCols = pipeNode.getOutputKeySchema(); Column[] valCols = pipeNode.getOutputValueSchema(); Column[] outputFields = new Column[keyCols.length + valCols.length + packagedOutputSchema.length]; int len = 0; for (Column col : keyCols) { outputFields[len++] = col; } for (Column col : valCols) { outputFields[len++] = col; } innerOutputIndex = len; for (Column col : packagedOutputSchema) { outputFields[len++] = col; } packagedOutputSchema = outputFields; } } @Override public void write(Record r) throws IOException { write(r, TableInfo.DEFAULT_LABEL); } @Override public void write(Record r, String label) throws IOException { if (!hasLabel(label)) { throw new IOException(ErrorCode.NO_SUCH_LABEL.toString() + " " + label); } if (innerOutput) { write(createInnerOutputRow(((WritableRecord) r).toWritableArray(), true, TableInfo.INNER_OUTPUT_LABEL, label)); } else { write(createOutputRow(r, label)); } } protected void write(Object[] record) { collect(record); } @Override public boolean nextKeyValue() { inputKeyCounter++; // ?? if (itr == null) { Object[] init = getData(); if (init == null) { return false; } key.set(Arrays.copyOf(init, key.getColumnCount())); value.set(Arrays.copyOfRange(init, key.getColumnCount(), init.length)); itr = new LotGroupingRecordIterator(this, keyGroupingComparator, init, (WritableRecord) key, (WritableRecord) value); } else { while (itr.hasNext()) { itr.remove(); } if (!itr.reset()) { return false; } } return true; } @Override public Record getCurrentKey() { return key; } @Override public Iterator<Record> getValues() { return itr; } @Override public void write(Record key, Record value) { if (!pipeMode || pipeNode.getNextNode() == null) { throw new UnsupportedOperationException(ErrorCode.INTERMEDIATE_OUTPUT_IN_REDUCER.toString()); } // pipeline mode Writable[] keyArray = ((WritableRecord) key).toWritableArray(); Writable[] valueArray = ((WritableRecord) value).toWritableArray(); Writable[] result; int idx = 0; if (partitioner != null) { int part = partitioner.getPartition(key, value, getNumReduceTasks()); if (part < 0 || part >= getNumReduceTasks()) { throw new RuntimeException("partitioner return invalid partition value:" + part); } result = new Writable[1 + keyArray.length + valueArray.length]; result[idx++] = new LongWritable(part); } else { result = new Writable[keyArray.length + valueArray.length]; } for (Writable obj : keyArray) { result[idx++] = obj; } for (Writable obj : valueArray) { result[idx++] = obj; } if (innerOutput) { write(createInnerOutputRow(result, false, TableInfo.DEFAULT_LABEL, TableInfo.DEFAULT_LABEL)); } else { write(result); } } public Object[] getData() { inputValueCounter++; if (inputValueCounter == nextRecordCntr) { StateInfo.updateMemStat( "after processed " + inputKeyCounter + " keys, " + inputValueCounter + " values"); nextRecordCntr = getNextCntr(inputValueCounter, false); } return getNextRowWapper(); } } @Override public void setup(ExecutionContext eCtx) { ctx = new ReduceContextImpl(conf); Column[] ks = conf.getMapOutputKeySchema(); Column[] vs = conf.getMapOutputValueSchema(); inputSchema = (Column[]) ArrayUtils.addAll(ks, vs); UDTFTaskContextImpl udtfCtx = (UDTFTaskContextImpl) ctx; udtfCtx.configure(eCtx); } public void run() throws IOException { StateInfo.init(); MapReduceUtils.runReducer((Class<Reducer>) conf.getReducerClass(), ctx); StateInfo.updateMemStat("reducer end"); StateInfo.printMaxMemo(); } // finalgetNextRow? public Object[] getNextRowWapper() { Object[] data = getNextRow(); if (data != null) { return data.clone(); } else { return null; } } }