graphbuilding.GenomixMapper.java Source code

Java tutorial

Introduction

Here is the source code for graphbuilding.GenomixMapper.java

Source

/*
 * Copyright 2009-2012 by The Regents of the University of California
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * you may obtain a copy of the License from
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package graphbuilding;

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;

import type.*;

/**
 * This class implement mapper operator of mapreduce model
 */
@SuppressWarnings("deprecation")
public class GenomixMapper extends MapReduceBase implements Mapper<LongWritable, Text, Kmer, KmerCountValue> {

    public static int KMER_SIZE;
    public KmerCountValue outputAdjList;
    public Kmer outputKmer;

    @Override
    public void configure(JobConf job) {
        KMER_SIZE = Integer.parseInt(job.get("sizeKmer"));
        outputAdjList = new KmerCountValue();
        outputKmer = new Kmer(KMER_SIZE);
    }

    /*succeed node
      A 00000001 1
      C 00000010 2
      G 00000100 4
      T 00001000 8
      precursor node
      A 00010000 16
      C 00100000 32
      G 01000000 64
      T 10000000 128*/
    public void map(LongWritable key, Text value, OutputCollector<Kmer, KmerCountValue> output, Reporter reporter)
            throws IOException {
        /* A 00
           C 01
           G 10
           T 11*/
        String geneLine = value.toString(); // Read the Real Gene Line
        Pattern genePattern = Pattern.compile("[AGCT]+");
        Matcher geneMatcher = genePattern.matcher(geneLine);
        boolean isValid = geneMatcher.matches();
        if (isValid == true) {
            /** first kmer */
            byte count = 1;
            byte[] array = geneLine.getBytes();
            outputKmer.setByRead(array, 0);
            byte pre = 0;
            byte next = GeneCode.getAdjBit(array[KMER_SIZE]);
            byte adj = GeneCode.mergePreNextAdj(pre, next);
            outputAdjList.set(adj, count);
            output.collect(outputKmer, outputAdjList);
            /** middle kmer */
            for (int i = KMER_SIZE; i < array.length - 1; i++) {
                pre = GeneCode.getBitMapFromGeneCode(outputKmer.shiftKmerWithNextChar(array[i]));
                next = GeneCode.getAdjBit(array[i + 1]);
                adj = GeneCode.mergePreNextAdj(pre, next);
                outputAdjList.set(adj, count);
                output.collect(outputKmer, outputAdjList);
            }
            /** last kmer */
            pre = GeneCode.getBitMapFromGeneCode(outputKmer.shiftKmerWithNextChar(array[array.length - 1]));
            next = 0;
            adj = GeneCode.mergePreNextAdj(pre, next);
            outputAdjList.set(adj, count);
            output.collect(outputKmer, outputAdjList);
        }
    }
}