FastqRecordReader.java Source code

Java tutorial

Introduction

Here is the source code for FastqRecordReader.java

Source

/**
 * Copyright 2016 Jos Manuel Abun Mosquera <josemanuel.abuin@usc.es>
 * 
 * This file is part of SparkBWA.
 *
 * SparkBWA is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * SparkBWA is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with SparkBWA. If not, see <http://www.gnu.org/licenses/>.
 */

import java.io.IOException;
//import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;

/**
 * This class define a custom RecordReader for FASTQ files for the 
 * Hadoop MapReduce framework.
    
 * @author Mahmoud Parsian
 * @author Jos M. Abun
 */
public class FastqRecordReader extends RecordReader<Long, String> {

    // input data comes from lrr
    private LineRecordReader lrr = null;

    private Long key = 0L;
    private String value = "";

    private final String[] lines = new String[4];
    private final long[] keys = new long[4];

    @Override
    public void close() throws IOException {
        this.lrr.close();
    }

    @Override
    public Long getCurrentKey() throws IOException, InterruptedException {
        return key;
    }

    @Override
    public String getCurrentValue() throws IOException, InterruptedException {
        return value;
    }

    @Override
    public float getProgress() throws IOException, InterruptedException {
        return this.lrr.getProgress();
    }

    @Override
    public void initialize(final InputSplit inputSplit, final TaskAttemptContext taskAttemptContext)
            throws IOException, InterruptedException {
        this.lrr = new LineRecordReader();
        this.lrr.initialize(inputSplit, taskAttemptContext);
    }

    @Override
    public boolean nextKeyValue() throws IOException, InterruptedException {
        int count = 0;
        boolean found = false;

        while (!found) {

            if (!this.lrr.nextKeyValue()) {
                return false;
            }

            final String s = this.lrr.getCurrentValue().toString().trim();
            //System.out.println("nextKeyValue() s="+s);

            // Prevent empty lines
            if (s.length() == 0) {
                continue;
            }

            this.lines[count] = s;
            this.keys[count] = this.lrr.getCurrentKey().get();

            if (count < 3) {
                count++;
            } else {
                if (this.lines[0].charAt(0) == '@' && this.lines[2].charAt(0) == '+') {
                    found = true;
                } else {
                    shiftLines();
                    shiftPositions(); //this.keys[i] = this.keys[i+1];
                }
            }

        } //end-while

        // set key
        this.key = this.keys[0];
        // set value
        this.value = buildValue();
        // clear records for next FASTQ sequence
        clearRecords();

        return true;
    }

    private void shiftLines() {
        // this.lines[i] = this.lines[i+1];
        this.lines[0] = this.lines[1];
        this.lines[1] = this.lines[2];
        this.lines[2] = this.lines[3];
    }

    private void shiftPositions() {
        //this.keys[i] = this.keys[i+1];
        this.keys[0] = this.keys[1];
        this.keys[1] = this.keys[2];
        this.keys[2] = this.keys[3];
    }

    private void clearRecords() {
        lines[0] = null;
        lines[1] = null;
        lines[2] = null;
        lines[3] = null;
    }

    private String buildValue() {
        StringBuilder builder = new StringBuilder();
        builder.append(lines[0]);
        //builder.append(",;,");
        builder.append("\n");
        builder.append(lines[1]);
        //builder.append(",;,");
        builder.append("\n");
        builder.append(lines[2]);
        //builder.append(",;,");
        builder.append("\n");
        builder.append(lines[3]);

        //return new Text(builder.toString());
        return builder.toString();
    }
}