Java tutorial
/* * Copyright 2009-2012 by The Regents of the University of California * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * you may obtain a copy of the License from * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package graphbuilding; import java.io.IOException; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import type.*; /** * This class implement mapper operator of mapreduce model */ @SuppressWarnings("deprecation") public class GenomixMapper extends MapReduceBase implements Mapper<LongWritable, Text, Kmer, KmerCountValue> { public static int KMER_SIZE; public KmerCountValue outputAdjList; public Kmer outputKmer; @Override public void configure(JobConf job) { KMER_SIZE = Integer.parseInt(job.get("sizeKmer")); outputAdjList = new KmerCountValue(); outputKmer = new Kmer(KMER_SIZE); } /*succeed node A 00000001 1 C 00000010 2 G 00000100 4 T 00001000 8 precursor node A 00010000 16 C 00100000 32 G 01000000 64 T 10000000 128*/ public void map(LongWritable key, Text value, OutputCollector<Kmer, KmerCountValue> output, Reporter reporter) throws IOException { /* A 00 C 01 G 10 T 11*/ String geneLine = value.toString(); // Read the Real Gene Line Pattern genePattern = Pattern.compile("[AGCT]+"); Matcher geneMatcher = genePattern.matcher(geneLine); boolean isValid = geneMatcher.matches(); if (isValid == true) { /** first kmer */ byte count = 1; byte[] array = geneLine.getBytes(); outputKmer.setByRead(array, 0); byte pre = 0; byte next = GeneCode.getAdjBit(array[KMER_SIZE]); byte adj = GeneCode.mergePreNextAdj(pre, next); outputAdjList.set(adj, count); output.collect(outputKmer, outputAdjList); /** middle kmer */ for (int i = KMER_SIZE; i < array.length - 1; i++) { pre = GeneCode.getBitMapFromGeneCode(outputKmer.shiftKmerWithNextChar(array[i])); next = GeneCode.getAdjBit(array[i + 1]); adj = GeneCode.mergePreNextAdj(pre, next); outputAdjList.set(adj, count); output.collect(outputKmer, outputAdjList); } /** last kmer */ pre = GeneCode.getBitMapFromGeneCode(outputKmer.shiftKmerWithNextChar(array[array.length - 1])); next = 0; adj = GeneCode.mergePreNextAdj(pre, next); outputAdjList.set(adj, count); output.collect(outputKmer, outputAdjList); } } }