List of usage examples for org.apache.hadoop.io Text charAt
public int charAt(int position)
position
. From source file:it.crs4.seal.prq.PairReadsQSeqMapper.java
License:Open Source License
public void map(Text readId, SequencedFragment read, IMRContext<SequenceId, Text> context) throws IOException, InterruptedException { // build the key builder.delete(0, builder.length()); // field up and including the index number goes in the location. The read is on its own. if (read.getRead() == null) throw new RuntimeException("Cannot get read number from read: " + readId); if (read.getLane() != null && read.getTile() != null && read.getXpos() != null && read.getYpos() != null) { appendIdToBuilder(builder, read); // appends the read id to the builder provided // finally the index field builder.append("#").append(read.getIndexSequence() == null ? '0' : read.getIndexSequence()); sequenceKey.set(builder.toString(), read.getRead()); } else {//from w ww . java 2 s.c o m // maybe it's a fastq id with a trailing read number (/1 or /2) if (readId.getLength() > 2) { int last = readId.getLength() - 1; if (readId.charAt(last - 1) == '/') { // truncate the /[12] from the read id // last == length - 1. We want length - 2 bytes, which is equal to last - 1 sequenceKey.set(Text.decode(readId.getBytes(), 0, last - 1), read.getRead()); } else throw new RuntimeException( "Didn't find /read_number at end of the read id. Please use qseq files or fastq with illumina-formatted name tags."); } else throw new RuntimeException("Read id " + readId + " is too short. Please use qseq files or fastq with illumina-formatted name tags."); } // then the tab-delimited value sequenceValue.clear(); sequenceValue.append(read.getSequence().getBytes(), 0, read.getSequence().getLength()); sequenceValue.append(Delim, 0, Delim.length); sequenceValue.append(read.getQuality().getBytes(), 0, read.getQuality().getLength()); sequenceValue.append(Delim, 0, Delim.length); // the filter flag is optional. If it's absent we assume the read passes filtering. sequenceValue.append(ZeroOne, (read.getFilterPassed() == null || read.getFilterPassed() ? 1 : 0), 1); context.write(sequenceKey, sequenceValue); context.progress(); }
From source file:it.uniroma1.hadoop.pagerank.job1.PageRankJob1Mapper.java
License:Open Source License
@Override public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { /* Job#1 mapper will simply parse a line of the input graph creating a map with key-value(s) pairs. * Input format is the following (separator is TAB): * //w w w. java 2s . c om * <nodeA> <nodeB> * * which denotes an edge going from <nodeA> to <nodeB>. * We would need to skip comment lines (denoted by the # characters at the beginning of the line). * We will also collect all the distinct nodes in our graph: this is needed to compute the initial * pagerank value in Job #1 reducer and also in later jobs. */ if (value.charAt(0) != '#') { int tabIndex = value.find("\t"); String nodeA = Text.decode(value.getBytes(), 0, tabIndex); String nodeB = Text.decode(value.getBytes(), tabIndex + 1, value.getLength() - (tabIndex + 1)); context.write(new Text(nodeA), new Text(nodeB)); // add the current source node to the node list so we can // compute the total amount of nodes of our graph in Job#2 PageRank.NODES.add(nodeA); // also add the target node to the same list: we may have a target node // with no outlinks (so it will never be parsed as source) PageRank.NODES.add(nodeB); } }
From source file:kogiri.common.hadoop.io.reader.fasta.FastaRawReadReader.java
License:Open Source License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); this.start = split.getStart(); this.end = this.start + split.getLength(); final Path file = split.getPath(); this.compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = this.compressionCodecs.getCodec(file); this.filename = file.getName(); this.firstRead = true; // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); // get uncompressed length if (codec instanceof GzipCodec) { this.isCompressed = true; FSDataInputStream fileInCheckSize = fs.open(file); byte[] len = new byte[4]; try {//from w w w . j a va 2 s . c om LOG.info("compressed input : " + file.getName()); LOG.info("compressed file size : " + this.end); fileInCheckSize.skip(this.end - 4); IOUtils.readFully(fileInCheckSize, len, 0, len.length); this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0]; if (this.uncompressedSize < 0) { this.uncompressedSize = this.end; } LOG.info("uncompressed file size : " + this.uncompressedSize); } finally { fileInCheckSize.close(); } this.end = Long.MAX_VALUE; } else if (codec != null) { this.isCompressed = true; this.end = Long.MAX_VALUE; this.uncompressedSize = Long.MAX_VALUE; } else { this.isCompressed = false; } // get inputstream FSDataInputStream fileIn = fs.open(file); if (codec != null) { this.in = new LineReader(codec.createInputStream(fileIn), job); } else { if (this.start != 0) { fileIn.seek(this.start); } this.in = new LineReader(fileIn, job); } // skip lines until we meet new read start while (this.start < this.end) { Text skipText = new Text(); long newSize = this.in.readLine(skipText, this.maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.start), this.maxLineLength)); if (newSize == 0) { // EOF this.hasNextRead = false; this.pos = this.end; break; } if (skipText.getLength() > 0 && skipText.charAt(0) == READ_DELIMITER) { this.prevLine = skipText; this.prevSize = newSize; this.hasNextRead = true; this.pos = this.start; break; } this.start += newSize; if (this.start >= this.end) { // EOF this.hasNextRead = false; this.pos = this.end; break; } } this.key = null; this.value = null; }
From source file:kogiri.common.hadoop.io.reader.fasta.FastaRawReadReader.java
License:Open Source License
@Override public boolean nextKeyValue() throws IOException, InterruptedException { // seek to new read start if (this.hasNextRead) { this.key = new LongWritable(this.pos); this.value = new FastaRawRead(this.filename); Text description = this.prevLine; this.pos += this.prevSize; long readStartOffset = this.key.get(); long descriptionStartOffset = readStartOffset + 1; long sequenceStartOffset = this.pos; long descriptionLen = sequenceStartOffset - descriptionStartOffset; List<String> sequences = new ArrayList<String>(); List<Long> sequenceStarts = new ArrayList<Long>(); boolean foundNextRead = false; while (!foundNextRead) { Text newLine = new Text(); long newSize = this.in.readLine(newLine, this.maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.pos), this.maxLineLength)); if (newSize == 0) { // EOF this.prevLine = null; this.prevSize = 0; this.pos = this.end; break; }/* w w w . j ava2 s .co m*/ if (newLine.getLength() > 0 && newLine.charAt(0) == READ_DELIMITER) { this.prevLine = newLine; this.prevSize = newSize; if (this.pos + newSize < this.end) { foundNextRead = true; } else { foundNextRead = false; } break; } else { sequences.add(newLine.toString()); sequenceStarts.add(this.pos); } this.pos += newSize; } long newReadStartOffset = this.pos; long readLen = newReadStartOffset - readStartOffset; long sequenceLen = newReadStartOffset - sequenceStartOffset; this.value.setReadOffset(readStartOffset); this.value.setDescriptionOffset(descriptionStartOffset); this.value.setSequenceOffset(sequenceStartOffset); this.value.setReadLen(readLen); this.value.setDescriptionLen(descriptionLen); this.value.setSequenceLen(sequenceLen); this.value.setDescription(description.toString()); if (this.firstRead) { this.value.setContinuousRead(false); this.firstRead = false; } else { this.value.setContinuousRead(true); } FastaRawReadLine[] readLines = new FastaRawReadLine[sequences.size()]; for (int i = 0; i < sequences.size(); i++) { readLines[i] = new FastaRawReadLine(sequenceStarts.get(i), sequences.get(i)); } this.value.setRawSequence(readLines); this.hasNextRead = foundNextRead; return true; } else { this.pos = this.end; this.prevLine = null; this.prevSize = 0; this.key = null; this.value = null; this.hasNextRead = false; return false; } }
From source file:kogiri.common.hadoop.io.reader.fasta.FastaReadDescriptionReader.java
License:Open Source License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); this.start = split.getStart(); this.end = this.start + split.getLength(); final Path file = split.getPath(); this.compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = this.compressionCodecs.getCodec(file); this.filename = file.getName(); this.firstRead = true; // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); // get uncompressed length if (codec instanceof GzipCodec) { this.isCompressed = true; FSDataInputStream fileInCheckSize = fs.open(file); byte[] len = new byte[4]; try {// w ww.jav a2s . c o m LOG.info("compressed input : " + file.getName()); LOG.info("compressed file size : " + this.end); fileInCheckSize.skip(this.end - 4); IOUtils.readFully(fileInCheckSize, len, 0, len.length); this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0]; if (this.uncompressedSize < 0) { this.uncompressedSize = this.end; } LOG.info("uncompressed file size : " + this.uncompressedSize); } finally { fileInCheckSize.close(); } this.end = Long.MAX_VALUE; } else if (codec != null) { this.isCompressed = true; this.end = Long.MAX_VALUE; this.uncompressedSize = Long.MAX_VALUE; } else { this.isCompressed = false; } // get inputstream FSDataInputStream fileIn = fs.open(file); if (codec != null) { this.in = new LineReader(codec.createInputStream(fileIn), job); } else { if (this.start != 0) { fileIn.seek(this.start); } this.in = new LineReader(fileIn, job); } // skip lines until we meet new record start while (this.start < this.end) { Text skipText = new Text(); long newSize = this.in.readLine(skipText, this.maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.start), this.maxLineLength)); if (newSize == 0) { // EOF this.hasNextRecord = false; this.pos = this.end; break; } if (skipText.getLength() > 0 && skipText.charAt(0) == READ_DELIMITER) { this.prevLine = skipText; this.prevSize = newSize; this.hasNextRecord = true; this.pos = this.start; break; } this.start += newSize; if (this.start >= this.end) { // EOF this.hasNextRecord = false; this.pos = this.end; break; } } this.key = null; this.value = null; }
From source file:kogiri.common.hadoop.io.reader.fasta.FastaReadDescriptionReader.java
License:Open Source License
@Override public boolean nextKeyValue() throws IOException, InterruptedException { // seek to new record start if (this.hasNextRecord) { this.key = new LongWritable(this.pos); this.value = new FastaRead(this.filename); Text description = this.prevLine; this.pos += this.prevSize; long readStartOffset = this.key.get(); long descriptionStartOffset = readStartOffset + 1; long sequenceStartOffset = this.pos; long descriptionLen = sequenceStartOffset - descriptionStartOffset; boolean foundNextRead = false; while (!foundNextRead) { Text newLine = new Text(); long newSize = this.in.readLine(newLine, this.maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.pos), this.maxLineLength)); if (newSize == 0) { // EOF this.prevLine = null; this.prevSize = 0; this.pos = this.end; break; }//from w w w . ja va 2 s . c o m if (newLine.getLength() > 0 && newLine.charAt(0) == READ_DELIMITER) { this.prevLine = newLine; this.prevSize = newSize; if (this.pos + newSize < this.end) { foundNextRead = true; } else { foundNextRead = false; } break; } else { // skip } this.pos += newSize; } this.value.setReadOffset(readStartOffset); this.value.setDescription(description.toString()); this.value.setSequence(null); if (this.firstRead) { this.value.setContinuousRead(false); this.firstRead = false; } else { this.value.setContinuousRead(true); } this.hasNextRecord = foundNextRead; return true; } else { this.pos = this.end; this.prevLine = null; this.prevSize = 0; this.key = null; this.value = null; this.hasNextRecord = false; return false; } }
From source file:libra.common.hadoop.io.reader.fasta.FastaKmerReader.java
License:Apache License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) genericSplit; Configuration conf = context.getConfiguration(); this.kmersize = FastaKmerInputFormat.getKmerSize(conf); this.maxLineLength = conf.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); this.start = split.getStart(); this.end = this.start + split.getLength(); final Path file = split.getPath(); this.compressionCodecs = new CompressionCodecFactory(conf); final CompressionCodec codec = this.compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(conf); // get uncompressed length if (codec instanceof GzipCodec) { this.isCompressed = true; FSDataInputStream fileInCheckSize = fs.open(file); byte[] len = new byte[4]; try {/* w w w . jav a 2 s.c om*/ LOG.info("compressed input : " + file.getName()); LOG.info("compressed file size : " + this.end); fileInCheckSize.skip(this.end - 4); IOUtils.readFully(fileInCheckSize, len, 0, len.length); this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0]; if (this.uncompressedSize < 0) { this.uncompressedSize = this.end; } LOG.info("uncompressed file size : " + this.uncompressedSize); } finally { fileInCheckSize.close(); } this.end = Long.MAX_VALUE; } else if (codec != null) { this.isCompressed = true; this.end = Long.MAX_VALUE; this.uncompressedSize = Long.MAX_VALUE; } else { this.isCompressed = false; } // get inputstream FSDataInputStream fileIn = fs.open(file); boolean inTheMiddle = false; if (codec != null) { this.in = new LineReader(codec.createInputStream(fileIn), conf); } else { if (this.start != 0) { this.start--; fileIn.seek(this.start); inTheMiddle = true; } this.in = new LineReader(fileIn, conf); } this.buffer = new Text(); if (inTheMiddle) { // find new start line this.start += this.in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, this.end - this.start)); // back off FSDataInputStream fileIn2 = fs.open(file); fileIn2.seek(this.start - 1000); LineReader in2 = new LineReader(fileIn2, conf); Text tempLine = new Text(); long curpos = this.start - 1000; while (curpos < this.start) { curpos += in2.readLine(tempLine, 0, (int) (this.start - curpos)); } if (tempLine.charAt(0) == READ_DELIMITER) { // clean start this.buffer.clear(); } else { // leave k-1 seq in the buffer String seq = tempLine.toString().trim(); String left = seq.substring(seq.length() - this.kmersize + 1); this.buffer.set(left); } in2.close(); } this.pos = this.start; this.key = null; this.value = null; }
From source file:libra.common.hadoop.io.reader.fasta.FastaRawReadReader.java
License:Apache License
@Override public boolean nextKeyValue() throws IOException, InterruptedException { // seek to new read start if (this.hasNextRead) { this.key = new LongWritable(this.pos); this.value = new FastaRawRead(this.filename); Text description = this.prevLine; this.pos += this.prevSize; long readStartOffset = this.key.get(); long descriptionStartOffset = readStartOffset + 1; long sequenceStartOffset = this.pos; long descriptionLen = sequenceStartOffset - descriptionStartOffset; List<String> sequences = new ArrayList<String>(); List<Long> sequenceStarts = new ArrayList<Long>(); boolean foundNextRead = false; while (!foundNextRead) { Text newLine = new Text(); long newSize = this.in.readLine(newLine, this.maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.pos), this.maxLineLength)); if (newSize == 0) { // EOF this.prevLine = null; this.prevSize = 0; this.pos = this.end; break; }/*from ww w . ja v a 2s . com*/ if (newLine.getLength() > 0 && newLine.charAt(0) == READ_DELIMITER) { this.prevLine = newLine; this.prevSize = newSize; if (this.pos < this.end) { foundNextRead = true; } else { foundNextRead = false; } break; } else { sequences.add(newLine.toString()); sequenceStarts.add(this.pos); } this.pos += newSize; } long newReadStartOffset = this.pos; long readLen = newReadStartOffset - readStartOffset; long sequenceLen = newReadStartOffset - sequenceStartOffset; this.value.setReadOffset(readStartOffset); this.value.setDescriptionOffset(descriptionStartOffset); this.value.setSequenceOffset(sequenceStartOffset); this.value.setReadLen(readLen); this.value.setDescriptionLen(descriptionLen); this.value.setSequenceLen(sequenceLen); this.value.setDescription(description.toString()); if (this.firstRead) { this.value.setContinuousRead(false); this.firstRead = false; } else { this.value.setContinuousRead(true); } FastaRawReadLine[] readLines = new FastaRawReadLine[sequences.size()]; for (int i = 0; i < sequences.size(); i++) { readLines[i] = new FastaRawReadLine(sequenceStarts.get(i), sequences.get(i)); } this.value.setRawSequence(readLines); this.hasNextRead = foundNextRead; return true; } else { this.pos = this.end; this.prevLine = null; this.prevSize = 0; this.key = null; this.value = null; this.hasNextRead = false; return false; } }
From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchCombiner.java
License:Apache License
@Override protected void reduce(Text key, Iterable<MapWritable> stripes, Context context) throws IOException, InterruptedException { log.info("Entering Reducer. Key = {}", key.toString()); MapWritable sumOfStripes = new MapWritable(); MapWritable finalStripe = new MapWritable(); boolean isInitial = false; boolean isTransit = false; boolean isEmit = false; if (key.charAt(0) == 'I') { isInitial = true;//ww w . ja v a2s . c o m } else if (key.charAt(0) == 'E') { isEmit = true; } else if (key.charAt(0) == 'T') { isTransit = true; } else { throw new IllegalStateException("Baum Welch Reducer Error Determining the Key Type"); } if (isInitial) { Double[] val = new Double[nrOfHiddenStates]; for (int i = 0; i < nrOfHiddenStates; i++) { val[i] = 0.0; } for (MapWritable stripe : stripes) { log.info("Reducer Processing Initial Distribution Stripe."); for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) { log.info("Reducer Getting Initial Distribution Stripe Entry. Key = {} Value = {} ", Integer.toString(((IntWritable) stripeEntry.getKey()).get()), Double.toString(((DoubleWritable) stripeEntry.getValue()).get())); val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue()) .get(); } } for (int i = 0; i < nrOfHiddenStates; i++) { log.info("Reducer adding to sumOfStripes for Initial. Key = {} Value ={}", Integer.toString(i), Double.toString(val[i])); sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i])); } } else if (isEmit) { Iterator<MapWritable> it = stripes.iterator(); int seqlength = it.next().size(); Double[] val = new Double[nrOfEmittedStates]; for (int i = 0; i < nrOfEmittedStates; i++) { val[i] = 0.0; } for (MapWritable stripe : stripes) { log.info("Reducer Processing Emission Distribution Stripe."); for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) { log.info("Reducer Getting Emission Distribution Stripe Entry. Key = {} Value = {} ", Integer.toString(((IntWritable) stripeEntry.getKey()).get()), Double.toString(((DoubleWritable) stripeEntry.getValue()).get())); val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue()) .get(); } } for (int i = 0; i < nrOfEmittedStates; i++) { log.info("Reducer adding to sumOfStripes for Emission. Key = {} Value ={}", Integer.toString(i), Double.toString(val[i])); sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i])); } } else if (isTransit) { Double[] val = new Double[nrOfHiddenStates]; for (int i = 0; i < nrOfHiddenStates; i++) { val[i] = 0.0; } for (MapWritable stripe : stripes) { log.info("Reducer Processing Transition Distribution Stripe."); for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) { log.info("Reducer Getting Transition Distribution Stripe Entry. Key = {} Value = {} ", Integer.toString(((IntWritable) stripeEntry.getKey()).get()), Double.toString(((DoubleWritable) stripeEntry.getValue()).get())); val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue()) .get(); } } for (int i = 0; i < nrOfHiddenStates; i++) { log.info("Reducer adding to sumOfStripes for Transition. Key = {} Value ={}", Integer.toString(i), Double.toString(val[i])); sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i])); } } else { throw new IllegalStateException("Baum Welch Reducer Error: Unable to aggregate distribution stripes."); } context.write(key, sumOfStripes); }
From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchReducer.java
License:Apache License
@Override protected void reduce(Text key, Iterable<MapWritable> stripes, Context context) throws IOException, InterruptedException { log.info("Entering Reducer. Key = {}", key.toString()); MapWritable sumOfStripes = new MapWritable(); MapWritable finalStripe = new MapWritable(); boolean isInitial = false; boolean isTransit = false; boolean isEmit = false; int stateID = -1; if (key.charAt(0) == 'I') { isInitial = true;// w ww .ja v a 2 s . com } else if (key.charAt(0) == 'E') { isEmit = true; stateID = Character.getNumericValue(key.charAt(5)); } else if (key.charAt(0) == 'T') { isTransit = true; stateID = Character.getNumericValue(key.charAt(8)); } else { throw new IllegalStateException("Baum Welch Reducer Error Determining the Key Type"); } if (isInitial) { ; Double[] val = new Double[nrOfHiddenStates]; for (int i = 0; i < nrOfHiddenStates; i++) { val[i] = 0.0; } for (MapWritable stripe : stripes) { log.info("Reducer Processing Initial Distribution Stripe."); for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) { log.info("Reducer Getting Initial Distribution Stripe Entry. Key = {} Value = {} ", Integer.toString(((IntWritable) stripeEntry.getKey()).get()), Double.toString(((DoubleWritable) stripeEntry.getValue()).get())); val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue()) .get(); } } for (int i = 0; i < nrOfHiddenStates; i++) { log.info("Reducer adding to sumOfStripes for Initial. Key = {} Value ={}", Integer.toString(i), Double.toString(val[i])); sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i])); } } else if (isEmit) { Iterator<MapWritable> it = stripes.iterator(); int seqlength = it.next().size(); Double[] val = new Double[nrOfEmittedStates]; for (int i = 0; i < nrOfEmittedStates; i++) { val[i] = 0.0; } for (MapWritable stripe : stripes) { log.info("Reducer Processing Emission Distribution Stripe."); for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) { log.info("Reducer Getting Emission Distribution Stripe Entry. Key = {} Value = {} ", Integer.toString(((IntWritable) stripeEntry.getKey()).get()), Double.toString(((DoubleWritable) stripeEntry.getValue()).get())); val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue()) .get(); } } for (int i = 0; i < nrOfEmittedStates; i++) { log.info("Reducer adding to sumOfStripes for Emission. Key = {} Value ={}", Integer.toString(i), Double.toString(val[i])); sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i])); } } else if (isTransit) { Double[] val = new Double[nrOfHiddenStates]; for (int i = 0; i < nrOfHiddenStates; i++) { val[i] = 0.0; } for (MapWritable stripe : stripes) { log.info("Reducer Processing Transition Distribution Stripe."); for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) { log.info("Reducer Getting Transition Distribution Stripe Entry. Key = {} Value = {} ", Integer.toString(((IntWritable) stripeEntry.getKey()).get()), Double.toString(((DoubleWritable) stripeEntry.getValue()).get())); val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue()) .get(); } } for (int i = 0; i < nrOfHiddenStates; i++) { log.info("Reducer adding to sumOfStripes for Transition. Key = {} Value ={}", Integer.toString(i), Double.toString(val[i])); sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i])); } } else { throw new IllegalStateException("Baum Welch Reducer Error: Unable to aggregate distribution stripes."); } double sum = 0.0; for (MapWritable.Entry<Writable, Writable> sumEntry : sumOfStripes.entrySet()) { sum += ((DoubleWritable) sumEntry.getValue()).get(); } //DoubleWritable normalizedSum = new DoubleWritable(0.0); //double[] innerValues = new double[sumOfStripes.size()]; int index = 0; MapWritable distributionStripe = new MapWritable(); for (MapWritable.Entry<Writable, Writable> sumEntry : sumOfStripes.entrySet()) { IntWritable state = (IntWritable) sumEntry.getKey(); double innerValue = ((DoubleWritable) sumEntry.getValue()).get(); double normalizedSum = innerValue / sum; //innerValues[index++] = normalizedSum; distributionStripe.put(state, new DoubleWritable(normalizedSum)); //finalStripe.put(((IntWritable)sumEntry.getKey()), val); } log.info("Reducer Writing: Key = {} Value (Stripe) Size = {}", key.toString(), finalStripe.size()); for (MapWritable.Entry<Writable, Writable> entry : finalStripe.entrySet()) { log.info("Distribution Stripe Detail Key = {}, Value ={}", ((IntWritable) entry.getKey()).get(), ((DoubleWritable) entry.getValue()).get()); } context.write(key, distributionStripe); }