nl.bioinf.wvanhelvoirt.HadoopPhredCalculator.NReadRecordReader.java Source code

Java tutorial

Introduction

Here is the source code for nl.bioinf.wvanhelvoirt.HadoopPhredCalculator.NReadRecordReader.java

Source

/*
 * Copyright (c) 2016 Wout van Helvoirt
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

package nl.bioinf.wvanhelvoirt.HadoopPhredCalculator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
import org.apache.hadoop.util.LineReader;

import java.io.IOException;

/**
 * NReadRecordReader
 *
 * This is a custom class to create a RecordReader for each split.
 *
 * @author Wout van Helvoirt
 */
public class NReadRecordReader extends RecordReader<LongWritable, Text> {

    /** After which lines to stop. */
    private int NLINESTOPROCESS;
    /** The lineReader. */
    private LineReader in;
    /** The LongWritable key. */
    private LongWritable key;
    /** The Text containing lines. */
    private Text value;
    /** Start position. */
    private long start;
    /** End position. */
    private long end;
    /** Current position. */
    private long pos;
    /** Max line length. */
    private int maxLineLength;

    /**
     * Closes any connection.
     *
     * @throws IOException Returns default exception.
     */
    @Override
    public void close() throws IOException {

        if (this.in != null) {
            this.in.close();
        }
    }

    /**
     * Override method that returns a LongWritable as key.
     *
     * @return LongWritable key.
     * @throws IOException          Returns default exception.
     * @throws InterruptedException Returns default exception.
     */
    @Override
    public LongWritable getCurrentKey() throws IOException, InterruptedException {

        return this.key;
    }

    /**
     * Override method that returns the current value. If the file has been read with a call to NextKeyValue(),
     * this returns the contents of the file as a Text.
     *
     * @return Text containing the contents of the file.
     * @throws IOException          Returns default exception.
     * @throws InterruptedException Returns default exception.
     */
    @Override
    public Text getCurrentValue() throws IOException, InterruptedException {

        return this.value;
    }

    /**
     * Override method that returns whether the file has been processed or not.
     *
     * @return Float 0.0 if the file has not been processed. 1.0 if it has.
     * @throws IOException          Returns default exception.
     * @throws InterruptedException Returns default exception.
     */
    @Override
    public float getProgress() throws IOException, InterruptedException {

        // Return progress state.
        if (this.start == this.end) {
            return 0.0f;
        } else {
            return Math.min(1.0f, (this.pos - this.start) / (float) (this.end - this.start));
        }
    }

    /**
     * Override method for instantiation.
     *
     * @param inputSplit The InputSplit to read.
     * @param context    The context for this task.
     * @throws IOException          Returns default exception.
     * @throws InterruptedException Returns default exception.
     */
    @Override
    public void initialize(InputSplit inputSplit, TaskAttemptContext context)
            throws IOException, InterruptedException {

        // Initialize.
        Configuration conf = context.getConfiguration();
        FileSplit split = (FileSplit) inputSplit;
        Path file = split.getPath();
        FileSystem fs = file.getFileSystem(conf);
        FSDataInputStream infile = fs.open(split.getPath());

        // Use number of lines given by user and set parameters.
        this.NLINESTOPROCESS = NLineInputFormat.getNumLinesPerSplit(context);
        this.maxLineLength = conf.getInt("mapreduce.input.linerecordreader.line.maxlength", Integer.MAX_VALUE);
        this.start = split.getStart();
        this.end = this.start + split.getLength();
        boolean skipFirstLine = false;

        // Skip first line?
        if (this.start != 0) {
            skipFirstLine = true;
            this.start--;
            infile.seek(this.start);
        }
        this.in = new LineReader(infile, conf);
        if (skipFirstLine) {
            this.start += this.in.readLine(new Text(), 0,
                    (int) Math.min((long) Integer.MAX_VALUE, this.end - this.start));
        }
        this.pos = this.start;
    }

    /**
     * Override method that if the file has not already been read, reads it into memory, so that a call to
     * getCurrentValue() will return the lines this file as Text. Then, returns true. If it has already been read,
     * then returns false without updating any internal state.
     *
     * @return Boolean whether the file was read or not.
     * @throws IOException          If there is an error reading the file.
     * @throws InterruptedException If there is an error.
     */
    @Override
    public boolean nextKeyValue() throws IOException, InterruptedException {

        // Initialize key and value.
        if (this.key == null) {
            this.key = new LongWritable();
        }
        if (this.value == null) {
            this.value = new Text();
        }

        // Get the key and value.
        this.key.set(this.pos);
        this.value.clear();
        Text endline = new Text("\n");
        int newSize = 0;
        for (int i = 0; i < this.NLINESTOPROCESS; i++) {
            Text v = new Text();
            while (this.pos < this.end) {
                newSize = this.in.readLine(v, this.maxLineLength,
                        Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.pos), this.maxLineLength));
                this.value.append(v.getBytes(), 0, v.getLength());
                this.value.append(endline.getBytes(), 0, endline.getLength());
                if (newSize == 0) {
                    break;
                }
                this.pos += newSize;
                if (newSize < this.maxLineLength) {
                    break;
                }
            }
        }

        // If newSize is still zero, return false, else true.
        if (newSize == 0) {
            this.key = null;
            this.value = null;
            return false;
        } else {
            return true;
        }
    }
}