org.apache.hyracks.dataflow.hadoop.mapreduce.ReduceWriter.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hyracks.dataflow.hadoop.mapreduce.ReduceWriter.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.hyracks.dataflow.hadoop.mapreduce;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.counters.GenericCounter;

import org.apache.hyracks.api.comm.IFrame;
import org.apache.hyracks.api.comm.IFrameWriter;
import org.apache.hyracks.api.comm.VSizeFrame;
import org.apache.hyracks.api.context.IHyracksTaskContext;
import org.apache.hyracks.api.dataflow.value.IBinaryComparator;
import org.apache.hyracks.api.dataflow.value.IBinaryComparatorFactory;
import org.apache.hyracks.api.dataflow.value.RecordDescriptor;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAccessor;
import org.apache.hyracks.dataflow.common.comm.io.FrameTupleAppender;
import org.apache.hyracks.dataflow.common.comm.util.FrameUtils;
import org.apache.hyracks.dataflow.hadoop.util.MRContextUtil;

public class ReduceWriter<K2, V2, K3, V3> implements IFrameWriter {
    private final IHyracksTaskContext ctx;
    private final HadoopHelper helper;
    private final int[] groupFields;
    private final FrameTupleAccessor accessor0;
    private final FrameTupleAccessor accessor1;
    private final IFrame copyFrame;
    private final IBinaryComparator[] comparators;
    private final KVIterator kvi;
    private final Reducer<K2, V2, K3, V3> reducer;
    private final RecordWriter<K3, V3> recordWriter;
    private final TaskAttemptID taId;
    private final TaskAttemptContext taskAttemptContext;

    private boolean first;
    private boolean groupStarted;
    private List<IFrame> group;
    private int bPtr;
    private FrameTupleAppender fta;
    private Counter keyCounter;
    private Counter valueCounter;

    public ReduceWriter(IHyracksTaskContext ctx, HadoopHelper helper, int[] groupFields,
            IBinaryComparatorFactory[] comparatorFactories, RecordDescriptor recordDescriptor,
            Reducer<K2, V2, K3, V3> reducer, RecordWriter<K3, V3> recordWriter, TaskAttemptID taId,
            TaskAttemptContext taskAttemptContext) throws HyracksDataException {
        this.ctx = ctx;
        this.helper = helper;
        this.groupFields = groupFields;
        accessor0 = new FrameTupleAccessor(recordDescriptor);
        accessor1 = new FrameTupleAccessor(recordDescriptor);
        copyFrame = new VSizeFrame(ctx);
        accessor1.reset(copyFrame.getBuffer());
        comparators = new IBinaryComparator[comparatorFactories.length];
        for (int i = 0; i < comparatorFactories.length; ++i) {
            comparators[i] = comparatorFactories[i].createBinaryComparator();
        }
        this.reducer = reducer;
        this.recordWriter = recordWriter;
        this.taId = taId;
        this.taskAttemptContext = taskAttemptContext;

        kvi = new KVIterator(helper, recordDescriptor);
    }

    @Override
    public void open() throws HyracksDataException {
        first = true;
        groupStarted = false;
        group = new ArrayList<>();
        bPtr = 0;
        group.add(new VSizeFrame(ctx));
        fta = new FrameTupleAppender();
        keyCounter = new GenericCounter();
        valueCounter = new GenericCounter();
    }

    @Override
    public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
        accessor0.reset(buffer);
        int nTuples = accessor0.getTupleCount();
        for (int i = 0; i < nTuples; ++i) {
            if (first) {
                groupInit();
                first = false;
            } else {
                if (i == 0) {
                    accessor1.reset(copyFrame.getBuffer());
                    switchGroupIfRequired(accessor1, accessor1.getTupleCount() - 1, accessor0, i);
                } else {
                    switchGroupIfRequired(accessor0, i - 1, accessor0, i);
                }
            }
            accumulate(accessor0, i);
        }
        copyFrame.ensureFrameSize(buffer.capacity());
        FrameUtils.copyAndFlip(buffer, copyFrame.getBuffer());
    }

    private void accumulate(FrameTupleAccessor accessor, int tIndex) throws HyracksDataException {
        if (!fta.append(accessor, tIndex)) {
            ++bPtr;
            if (group.size() <= bPtr) {
                group.add(new VSizeFrame(ctx));
            }
            fta.reset(group.get(bPtr), true);
            if (!fta.append(accessor, tIndex)) {
                throw new HyracksDataException("Record size ("
                        + (accessor.getTupleEndOffset(tIndex) - accessor.getTupleStartOffset(tIndex))
                        + ") larger than frame size (" + group.get(bPtr).getBuffer().capacity() + ")");
            }
        }
    }

    private void switchGroupIfRequired(FrameTupleAccessor prevTupleAccessor, int prevTupleIndex,
            FrameTupleAccessor currTupleAccessor, int currTupleIndex) throws HyracksDataException {
        if (!sameGroup(prevTupleAccessor, prevTupleIndex, currTupleAccessor, currTupleIndex)) {
            reduce();
            groupInit();
        }
    }

    private void groupInit() throws HyracksDataException {
        groupStarted = true;
        bPtr = 0;
        fta.reset(group.get(0), true);
    }

    private void reduce() throws HyracksDataException {
        kvi.reset(group, bPtr + 1);
        try {
            Reducer<K2, V2, K3, V3>.Context rCtx = new MRContextUtil().createReduceContext(
                    helper.getConfiguration(), taId, kvi, keyCounter, valueCounter, recordWriter, null, null,
                    (RawComparator<K2>) helper.getRawGroupingComparator(),
                    (Class<K2>) helper.getJob().getMapOutputKeyClass(),
                    (Class<V2>) helper.getJob().getMapOutputValueClass());
            reducer.run(rCtx);
        } catch (Exception e) {
            throw new HyracksDataException(e);
        }
        groupStarted = false;
    }

    private boolean sameGroup(FrameTupleAccessor a1, int t1Idx, FrameTupleAccessor a2, int t2Idx)
            throws HyracksDataException {
        for (int i = 0; i < comparators.length; ++i) {
            int fIdx = groupFields[i];
            int s1 = a1.getTupleStartOffset(t1Idx) + a1.getFieldSlotsLength() + a1.getFieldStartOffset(t1Idx, fIdx);
            int l1 = a1.getFieldLength(t1Idx, fIdx);
            int s2 = a2.getTupleStartOffset(t2Idx) + a2.getFieldSlotsLength() + a2.getFieldStartOffset(t2Idx, fIdx);
            int l2 = a2.getFieldLength(t2Idx, fIdx);
            if (comparators[i].compare(a1.getBuffer().array(), s1, l1, a2.getBuffer().array(), s2, l2) != 0) {
                return false;
            }
        }
        return true;
    }

    @Override
    public void close() throws HyracksDataException {
        if (groupStarted) {
            reduce();
        }
        try {
            recordWriter.close(taskAttemptContext);
        } catch (Exception e) {
            throw new HyracksDataException(e);
        }
    }

    @Override
    public void fail() throws HyracksDataException {
    }
}