com.philiphubbard.sabe.MRAssemblerTest2.java Source code

Java tutorial

Introduction

Here is the source code for com.philiphubbard.sabe.MRAssemblerTest2.java

Source

// Copyright (c) 2014 Philip M. Hubbard
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
// 
// http://opensource.org/licenses/MIT

package com.philiphubbard.sabe;

import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.IOException;
import java.util.ArrayList;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;

// A sample driver application for running the MRAssembler class with Hadoop.
// The test data in this case is a larger set of reads, containing errors and
// repetitive sections repeated once ("single repeats"), and a goal that is 
// a longer sequence.

public class MRAssemblerTest2 {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();

        setupTest(conf);

        MRAssembler assembler = new MRAssembler(MER_LENGTH, COVERAGE);
        assembler.run(new Path(testInput), new Path(testOutput));

        verifyTest(conf);
        cleanupTest(conf);

        System.exit(0);
    }

    private static void setupTest(Configuration conf) throws IOException {
        FileSystem fileSystem = FileSystem.get(conf);

        Path path = new Path(testInput);
        if (fileSystem.exists(path))
            fileSystem.delete(path, true);

        ArrayList<Text> reads = new ArrayList<Text>();

        // The expected result:
        // CCCTTTCTGTTGACCCATCATTGTTTAGTAACCCGCGGGATGCCTGGCAGACCCGCGGGACGATCTCCTCTGACCCATCATCGAAATTCC
        // Note that it has the following pattern:
        // segment 0: CCCTTTCTGT 
        // segment 1, which will be repeated: TGACCCATCA 
        // segment 2: TTGTTTAGTA 
        // segment 3, which will be repeated: ACCCGCGGGA 
        // segment 4: TGCCTGGCAG 
        // segment 3, again: ACCCGCGGGA 
        // segment 5: CGATCTCCTC
        // segment 1, again: TGACCCATCA 
        // segment 6: TCGAAATTCC

        reads.add(new Text("CCCTTTC\n"));
        // Error: initial T omitted.
        reads.add(new Text("GTTGACCCATCATTGTTTAGTAACCCGCGGGATGCCTGGCAGACC"));
        reads.add(new Text("CGCGGGACGAT\n"));
        // Error: final C omitted.
        reads.add(new Text("CTCCTCTGACCCATCATCGAAATTC\n"));

        reads.add(new Text("CCCTTTCTGTTGACCCAT\n"));
        // Error: final C replaced with G.
        reads.add(new Text("CATTGTTTAGTAACCCGCGGGATGCCTGGCAGACG\n"));
        reads.add(new Text("CGCGGGACGATCTCCTCTGACCCATCATCGAAATTCC\n"));

        // Error: C at index 14 replaced with A.
        reads.add(new Text("CCCTTTCTGTTGACACATCATTGTTTAGTAAC"));
        reads.add(new Text("CCGCGGGATGCC\n"));
        // Error: C at index 25 omitted.
        reads.add(new Text("TGGCAGACCCGCGGGACGATCTCCTTGACCCATCATCGAAATTCC\n"));

        reads.add(new Text("CCCTTTCTGTTGACCCATCATTGTTTAGTAACCCGCGGGATGCCTG\n"));
        // Error: G at index 10 replaced with T.
        reads.add(new Text("GCAGACCCGCTGGACGA\n"));
        reads.add(new Text("TCTCCTCTGACCCATCATCGAAATTCC\n"));

        reads.add(new Text("CCCTTTCTGTTGACCCATCATTGTTTAGTAACCCGCGGGATGC"));
        // Error: final G omitted.
        reads.add(new Text("CTGGCAGACCCGC\n"));
        reads.add(new Text("GGACGATCTCCTCT\n"));
        // Error: CG at index 10 transposed to GC
        reads.add(new Text("GACCCATCATCGAAATTCC\n"));

        FSDataOutputStream out = fileSystem.create(path);
        for (Text read : reads) {
            byte[] bytes = read.copyBytes();
            for (byte b : bytes)
                out.write(b);
        }
        out.close();

        fileSystem.close();
    }

    private static void verifyTest(Configuration conf) throws IOException {
        FileSystem fileSystem = FileSystem.get(conf);
        FSDataInputStream output = fileSystem.open(new Path(testOutput));
        BufferedReader reader = new BufferedReader(new InputStreamReader(output));

        String actual = reader.readLine();
        final String expected = "CCCTTTCTGTTGACCCATCATTGTTTAGTAACCCGCGGGATGCCTGGCAGACCCGCGGGACGATCTCCTCTGACCCATCATCGAAATTCC";

        System.out.println(expected);

        if (!actual.equals(expected))
            throw new IOException("Test failed with incorrect result:\n" + actual);

        reader.close();

        System.out.println("Test succeeded.");
    }

    private static void cleanupTest(Configuration conf) throws IOException {
        FileSystem fileSystem = FileSystem.get(conf);

        fileSystem.delete(new Path(testInput), true);
        fileSystem.delete(new Path(testOutput), true);

        fileSystem.close();
    }

    private static final int MER_LENGTH = 6;
    private static final int COVERAGE = 5;

    private static String testInput = new String("MRAssemblerTest_in.txt");
    private static String testOutput = new String("MRAssemblerTest_out");
}