org.apache.crunch.impl.mr.run.CrunchRecordReader.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.crunch.impl.mr.run.CrunchRecordReader.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.crunch.impl.mr.run;

import java.io.IOException;

import org.apache.crunch.hadoop.mapreduce.TaskAttemptContextFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.util.ReflectionUtils;

class CrunchRecordReader<K, V> extends RecordReader<K, V> {

    private RecordReader<K, V> curReader;
    private CrunchInputSplit crunchSplit;
    private CombineFileSplit combineFileSplit;
    private TaskAttemptContext context;
    private int idx;
    private long progress;

    public CrunchRecordReader(InputSplit inputSplit, final TaskAttemptContext context)
            throws IOException, InterruptedException {
        this.crunchSplit = (CrunchInputSplit) inputSplit;
        if (crunchSplit.getInputSplit() instanceof CombineFileSplit) {
            combineFileSplit = (CombineFileSplit) crunchSplit.getInputSplit();
        }
        this.context = context;
        Configuration conf = crunchSplit.getConf();
        if (conf == null) {
            conf = context.getConfiguration();
            crunchSplit.setConf(conf);
        }
        initNextRecordReader();
    }

    private boolean initNextRecordReader() throws IOException, InterruptedException {
        if (combineFileSplit != null) {
            if (curReader != null) {
                curReader.close();
                curReader = null;
                if (idx > 0) {
                    progress += combineFileSplit.getLength(idx - 1);
                }
            }
            // if all chunks have been processed, nothing more to do.
            if (idx == combineFileSplit.getNumPaths()) {
                return false;
            }
        } else if (idx > 0) {
            return false;
        }

        idx++;
        Configuration conf = crunchSplit.getConf();
        InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils
                .newInstance(crunchSplit.getInputFormatClass(), conf);
        this.curReader = inputFormat.createRecordReader(getDelegateSplit(),
                TaskAttemptContextFactory.create(conf, context.getTaskAttemptID()));
        return true;
    }

    private InputSplit getDelegateSplit() throws IOException {
        if (combineFileSplit != null) {
            return new FileSplit(combineFileSplit.getPath(idx - 1), combineFileSplit.getOffset(idx - 1),
                    combineFileSplit.getLength(idx - 1), combineFileSplit.getLocations());
        } else {
            return crunchSplit.getInputSplit();
        }
    }

    @Override
    public void close() throws IOException {
        if (curReader != null) {
            curReader.close();
            curReader = null;
        }
    }

    @Override
    public K getCurrentKey() throws IOException, InterruptedException {
        return curReader.getCurrentKey();
    }

    @Override
    public V getCurrentValue() throws IOException, InterruptedException {
        return curReader.getCurrentValue();
    }

    @Override
    public float getProgress() throws IOException, InterruptedException {
        float curProgress = 0; // bytes processed in current split
        if (null != curReader) {
            curProgress = (float) (curReader.getProgress() * getCurLength());
        }
        return Math.min(1.0f, (progress + curProgress) / getOverallLength());
    }

    private long getCurLength() {
        if (combineFileSplit == null) {
            return 1L;
        } else {
            return combineFileSplit.getLength(idx - 1);
        }
    }

    private float getOverallLength() {
        if (combineFileSplit == null) {
            return 1.0f;
        } else {
            return (float) combineFileSplit.getLength();
        }
    }

    @Override
    public void initialize(InputSplit inputSplit, TaskAttemptContext context)
            throws IOException, InterruptedException {
        this.crunchSplit = (CrunchInputSplit) inputSplit;
        this.context = context;
        Configuration conf = crunchSplit.getConf();
        if (conf == null) {
            conf = context.getConfiguration();
            crunchSplit.setConf(conf);
        }
        if (crunchSplit.getInputSplit() instanceof CombineFileSplit) {
            combineFileSplit = (CombineFileSplit) crunchSplit.getInputSplit();
        }
        if (curReader != null) {
            curReader.initialize(getDelegateSplit(),
                    TaskAttemptContextFactory.create(conf, context.getTaskAttemptID()));
        }
    }

    @Override
    public boolean nextKeyValue() throws IOException, InterruptedException {
        while ((curReader == null) || !curReader.nextKeyValue()) {
            if (!initNextRecordReader()) {
                return false;
            }
            if (curReader != null) {
                curReader.initialize(getDelegateSplit(),
                        TaskAttemptContextFactory.create(crunchSplit.getConf(), context.getTaskAttemptID()));
            }
        }
        return true;
    }
}