Example usage for org.apache.hadoop.io Text charAt

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text charAt.

Prototype

public int charAt(int position)

Source Link

Document

Returns the Unicode Scalar Value (32-bit integer value) for the character at position.

Usage

From source file:it.crs4.seal.prq.PairReadsQSeqMapper.java

License:Open Source License

public void map(Text readId, SequencedFragment read, IMRContext<SequenceId, Text> context)
        throws IOException, InterruptedException {
    // build the key
    builder.delete(0, builder.length());

    // field up and including the index number goes in the location.  The read is on its own.
    if (read.getRead() == null)
        throw new RuntimeException("Cannot get read number from read: " + readId);

    if (read.getLane() != null && read.getTile() != null && read.getXpos() != null && read.getYpos() != null) {
        appendIdToBuilder(builder, read); // appends the read id to the builder provided
        // finally the index field
        builder.append("#").append(read.getIndexSequence() == null ? '0' : read.getIndexSequence());
        sequenceKey.set(builder.toString(), read.getRead());
    } else {//from   w  ww . java 2  s.c  o m
        // maybe it's a fastq id with a trailing read number (/1 or /2)
        if (readId.getLength() > 2) {
            int last = readId.getLength() - 1;
            if (readId.charAt(last - 1) == '/') {
                // truncate the /[12] from the read id
                // last == length - 1.  We want length - 2 bytes, which is equal to last - 1
                sequenceKey.set(Text.decode(readId.getBytes(), 0, last - 1), read.getRead());
            } else
                throw new RuntimeException(
                        "Didn't find /read_number at end of the read id.  Please use qseq files or fastq with illumina-formatted name tags.");
        } else
            throw new RuntimeException("Read id " + readId
                    + " is too short.   Please use qseq files or fastq with illumina-formatted name tags.");
    }

    // then the tab-delimited value
    sequenceValue.clear();
    sequenceValue.append(read.getSequence().getBytes(), 0, read.getSequence().getLength());
    sequenceValue.append(Delim, 0, Delim.length);
    sequenceValue.append(read.getQuality().getBytes(), 0, read.getQuality().getLength());
    sequenceValue.append(Delim, 0, Delim.length);
    // the filter flag is optional.  If it's absent we assume the read passes filtering.
    sequenceValue.append(ZeroOne, (read.getFilterPassed() == null || read.getFilterPassed() ? 1 : 0), 1);

    context.write(sequenceKey, sequenceValue);
    context.progress();
}

From source file:it.uniroma1.hadoop.pagerank.job1.PageRankJob1Mapper.java

License:Open Source License

@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

    /* Job#1 mapper will simply parse a line of the input graph creating a map with key-value(s) pairs.
     * Input format is the following (separator is TAB):
     * //w w w.  java 2s . c om
     *     <nodeA>    <nodeB>
     * 
     * which denotes an edge going from <nodeA> to <nodeB>.
     * We would need to skip comment lines (denoted by the # characters at the beginning of the line).
     * We will also collect all the distinct nodes in our graph: this is needed to compute the initial 
     * pagerank value in Job #1 reducer and also in later jobs.
     */

    if (value.charAt(0) != '#') {

        int tabIndex = value.find("\t");
        String nodeA = Text.decode(value.getBytes(), 0, tabIndex);
        String nodeB = Text.decode(value.getBytes(), tabIndex + 1, value.getLength() - (tabIndex + 1));
        context.write(new Text(nodeA), new Text(nodeB));

        // add the current source node to the node list so we can 
        // compute the total amount of nodes of our graph in Job#2
        PageRank.NODES.add(nodeA);
        // also add the target node to the same list: we may have a target node 
        // with no outlinks (so it will never be parsed as source)
        PageRank.NODES.add(nodeB);

    }

}

From source file:kogiri.common.hadoop.io.reader.fasta.FastaRawReadReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    final Path file = split.getPath();
    this.compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = this.compressionCodecs.getCodec(file);

    this.filename = file.getName();

    this.firstRead = true;

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);

    // get uncompressed length
    if (codec instanceof GzipCodec) {
        this.isCompressed = true;

        FSDataInputStream fileInCheckSize = fs.open(file);
        byte[] len = new byte[4];
        try {//from  w  w w  .  j a  va 2 s  .  c  om
            LOG.info("compressed input : " + file.getName());
            LOG.info("compressed file size : " + this.end);
            fileInCheckSize.skip(this.end - 4);
            IOUtils.readFully(fileInCheckSize, len, 0, len.length);
            this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0];
            if (this.uncompressedSize < 0) {
                this.uncompressedSize = this.end;
            }
            LOG.info("uncompressed file size : " + this.uncompressedSize);
        } finally {
            fileInCheckSize.close();
        }

        this.end = Long.MAX_VALUE;
    } else if (codec != null) {
        this.isCompressed = true;
        this.end = Long.MAX_VALUE;
        this.uncompressedSize = Long.MAX_VALUE;
    } else {
        this.isCompressed = false;
    }

    // get inputstream
    FSDataInputStream fileIn = fs.open(file);

    if (codec != null) {
        this.in = new LineReader(codec.createInputStream(fileIn), job);
    } else {
        if (this.start != 0) {
            fileIn.seek(this.start);
        }
        this.in = new LineReader(fileIn, job);
    }

    // skip lines until we meet new read start
    while (this.start < this.end) {
        Text skipText = new Text();
        long newSize = this.in.readLine(skipText, this.maxLineLength,
                Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.start), this.maxLineLength));
        if (newSize == 0) {
            // EOF
            this.hasNextRead = false;
            this.pos = this.end;
            break;
        }

        if (skipText.getLength() > 0 && skipText.charAt(0) == READ_DELIMITER) {
            this.prevLine = skipText;
            this.prevSize = newSize;
            this.hasNextRead = true;
            this.pos = this.start;
            break;
        }

        this.start += newSize;

        if (this.start >= this.end) {
            // EOF
            this.hasNextRead = false;
            this.pos = this.end;
            break;
        }
    }

    this.key = null;
    this.value = null;
}

From source file:kogiri.common.hadoop.io.reader.fasta.FastaRawReadReader.java

License:Open Source License

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    // seek to new read start
    if (this.hasNextRead) {
        this.key = new LongWritable(this.pos);
        this.value = new FastaRawRead(this.filename);

        Text description = this.prevLine;
        this.pos += this.prevSize;

        long readStartOffset = this.key.get();
        long descriptionStartOffset = readStartOffset + 1;

        long sequenceStartOffset = this.pos;
        long descriptionLen = sequenceStartOffset - descriptionStartOffset;
        List<String> sequences = new ArrayList<String>();
        List<Long> sequenceStarts = new ArrayList<Long>();

        boolean foundNextRead = false;
        while (!foundNextRead) {
            Text newLine = new Text();
            long newSize = this.in.readLine(newLine, this.maxLineLength,
                    Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.pos), this.maxLineLength));
            if (newSize == 0) {
                // EOF
                this.prevLine = null;
                this.prevSize = 0;
                this.pos = this.end;
                break;
            }/*  w  w w  . j ava2  s  .co m*/

            if (newLine.getLength() > 0 && newLine.charAt(0) == READ_DELIMITER) {
                this.prevLine = newLine;
                this.prevSize = newSize;

                if (this.pos + newSize < this.end) {
                    foundNextRead = true;
                } else {
                    foundNextRead = false;
                }
                break;
            } else {
                sequences.add(newLine.toString());
                sequenceStarts.add(this.pos);
            }

            this.pos += newSize;
        }

        long newReadStartOffset = this.pos;
        long readLen = newReadStartOffset - readStartOffset;
        long sequenceLen = newReadStartOffset - sequenceStartOffset;

        this.value.setReadOffset(readStartOffset);
        this.value.setDescriptionOffset(descriptionStartOffset);
        this.value.setSequenceOffset(sequenceStartOffset);
        this.value.setReadLen(readLen);
        this.value.setDescriptionLen(descriptionLen);
        this.value.setSequenceLen(sequenceLen);
        this.value.setDescription(description.toString());
        if (this.firstRead) {
            this.value.setContinuousRead(false);
            this.firstRead = false;
        } else {
            this.value.setContinuousRead(true);
        }

        FastaRawReadLine[] readLines = new FastaRawReadLine[sequences.size()];
        for (int i = 0; i < sequences.size(); i++) {
            readLines[i] = new FastaRawReadLine(sequenceStarts.get(i), sequences.get(i));
        }

        this.value.setRawSequence(readLines);

        this.hasNextRead = foundNextRead;
        return true;
    } else {
        this.pos = this.end;
        this.prevLine = null;
        this.prevSize = 0;
        this.key = null;
        this.value = null;
        this.hasNextRead = false;
        return false;
    }
}

From source file:kogiri.common.hadoop.io.reader.fasta.FastaReadDescriptionReader.java

License:Open Source License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    final Path file = split.getPath();
    this.compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = this.compressionCodecs.getCodec(file);

    this.filename = file.getName();

    this.firstRead = true;

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);

    // get uncompressed length
    if (codec instanceof GzipCodec) {
        this.isCompressed = true;

        FSDataInputStream fileInCheckSize = fs.open(file);
        byte[] len = new byte[4];
        try {//  w  ww.jav  a2s . c  o  m
            LOG.info("compressed input : " + file.getName());
            LOG.info("compressed file size : " + this.end);
            fileInCheckSize.skip(this.end - 4);
            IOUtils.readFully(fileInCheckSize, len, 0, len.length);
            this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0];
            if (this.uncompressedSize < 0) {
                this.uncompressedSize = this.end;
            }
            LOG.info("uncompressed file size : " + this.uncompressedSize);
        } finally {
            fileInCheckSize.close();
        }

        this.end = Long.MAX_VALUE;
    } else if (codec != null) {
        this.isCompressed = true;
        this.end = Long.MAX_VALUE;
        this.uncompressedSize = Long.MAX_VALUE;
    } else {
        this.isCompressed = false;
    }

    // get inputstream
    FSDataInputStream fileIn = fs.open(file);

    if (codec != null) {
        this.in = new LineReader(codec.createInputStream(fileIn), job);
    } else {
        if (this.start != 0) {
            fileIn.seek(this.start);
        }
        this.in = new LineReader(fileIn, job);
    }

    // skip lines until we meet new record start
    while (this.start < this.end) {
        Text skipText = new Text();
        long newSize = this.in.readLine(skipText, this.maxLineLength,
                Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.start), this.maxLineLength));
        if (newSize == 0) {
            // EOF
            this.hasNextRecord = false;
            this.pos = this.end;
            break;
        }

        if (skipText.getLength() > 0 && skipText.charAt(0) == READ_DELIMITER) {
            this.prevLine = skipText;
            this.prevSize = newSize;
            this.hasNextRecord = true;
            this.pos = this.start;
            break;
        }

        this.start += newSize;

        if (this.start >= this.end) {
            // EOF
            this.hasNextRecord = false;
            this.pos = this.end;
            break;
        }
    }

    this.key = null;
    this.value = null;
}

From source file:kogiri.common.hadoop.io.reader.fasta.FastaReadDescriptionReader.java

License:Open Source License

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    // seek to new record start
    if (this.hasNextRecord) {
        this.key = new LongWritable(this.pos);
        this.value = new FastaRead(this.filename);

        Text description = this.prevLine;
        this.pos += this.prevSize;

        long readStartOffset = this.key.get();
        long descriptionStartOffset = readStartOffset + 1;

        long sequenceStartOffset = this.pos;
        long descriptionLen = sequenceStartOffset - descriptionStartOffset;

        boolean foundNextRead = false;
        while (!foundNextRead) {
            Text newLine = new Text();
            long newSize = this.in.readLine(newLine, this.maxLineLength,
                    Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.pos), this.maxLineLength));
            if (newSize == 0) {
                // EOF
                this.prevLine = null;
                this.prevSize = 0;
                this.pos = this.end;
                break;
            }//from  w w w . ja va 2 s . c o  m

            if (newLine.getLength() > 0 && newLine.charAt(0) == READ_DELIMITER) {
                this.prevLine = newLine;
                this.prevSize = newSize;

                if (this.pos + newSize < this.end) {
                    foundNextRead = true;
                } else {
                    foundNextRead = false;
                }
                break;
            } else {
                // skip
            }

            this.pos += newSize;
        }

        this.value.setReadOffset(readStartOffset);
        this.value.setDescription(description.toString());
        this.value.setSequence(null);
        if (this.firstRead) {
            this.value.setContinuousRead(false);
            this.firstRead = false;
        } else {
            this.value.setContinuousRead(true);
        }

        this.hasNextRecord = foundNextRead;
        return true;
    } else {
        this.pos = this.end;
        this.prevLine = null;
        this.prevSize = 0;
        this.key = null;
        this.value = null;
        this.hasNextRecord = false;
        return false;
    }
}

From source file:libra.common.hadoop.io.reader.fasta.FastaKmerReader.java

License:Apache License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration conf = context.getConfiguration();
    this.kmersize = FastaKmerInputFormat.getKmerSize(conf);
    this.maxLineLength = conf.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    final Path file = split.getPath();
    this.compressionCodecs = new CompressionCodecFactory(conf);
    final CompressionCodec codec = this.compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(conf);

    // get uncompressed length
    if (codec instanceof GzipCodec) {
        this.isCompressed = true;

        FSDataInputStream fileInCheckSize = fs.open(file);
        byte[] len = new byte[4];
        try {/* w  w  w .  jav  a  2 s.c om*/
            LOG.info("compressed input : " + file.getName());
            LOG.info("compressed file size : " + this.end);
            fileInCheckSize.skip(this.end - 4);
            IOUtils.readFully(fileInCheckSize, len, 0, len.length);
            this.uncompressedSize = (len[3] << 24) | (len[2] << 16) | (len[1] << 8) | len[0];
            if (this.uncompressedSize < 0) {
                this.uncompressedSize = this.end;
            }
            LOG.info("uncompressed file size : " + this.uncompressedSize);
        } finally {
            fileInCheckSize.close();
        }

        this.end = Long.MAX_VALUE;
    } else if (codec != null) {
        this.isCompressed = true;
        this.end = Long.MAX_VALUE;
        this.uncompressedSize = Long.MAX_VALUE;
    } else {
        this.isCompressed = false;
    }

    // get inputstream
    FSDataInputStream fileIn = fs.open(file);
    boolean inTheMiddle = false;
    if (codec != null) {
        this.in = new LineReader(codec.createInputStream(fileIn), conf);
    } else {
        if (this.start != 0) {
            this.start--;
            fileIn.seek(this.start);

            inTheMiddle = true;
        }
        this.in = new LineReader(fileIn, conf);
    }

    this.buffer = new Text();

    if (inTheMiddle) {
        // find new start line
        this.start += this.in.readLine(new Text(), 0,
                (int) Math.min((long) Integer.MAX_VALUE, this.end - this.start));

        // back off
        FSDataInputStream fileIn2 = fs.open(file);
        fileIn2.seek(this.start - 1000);

        LineReader in2 = new LineReader(fileIn2, conf);
        Text tempLine = new Text();
        long curpos = this.start - 1000;
        while (curpos < this.start) {
            curpos += in2.readLine(tempLine, 0, (int) (this.start - curpos));
        }

        if (tempLine.charAt(0) == READ_DELIMITER) {
            // clean start
            this.buffer.clear();
        } else {
            // leave k-1 seq in the buffer
            String seq = tempLine.toString().trim();
            String left = seq.substring(seq.length() - this.kmersize + 1);
            this.buffer.set(left);
        }

        in2.close();
    }

    this.pos = this.start;

    this.key = null;
    this.value = null;
}

From source file:libra.common.hadoop.io.reader.fasta.FastaRawReadReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    // seek to new read start
    if (this.hasNextRead) {
        this.key = new LongWritable(this.pos);
        this.value = new FastaRawRead(this.filename);

        Text description = this.prevLine;
        this.pos += this.prevSize;

        long readStartOffset = this.key.get();
        long descriptionStartOffset = readStartOffset + 1;

        long sequenceStartOffset = this.pos;
        long descriptionLen = sequenceStartOffset - descriptionStartOffset;
        List<String> sequences = new ArrayList<String>();
        List<Long> sequenceStarts = new ArrayList<Long>();

        boolean foundNextRead = false;
        while (!foundNextRead) {
            Text newLine = new Text();
            long newSize = this.in.readLine(newLine, this.maxLineLength,
                    Math.max((int) Math.min(Integer.MAX_VALUE, this.end - this.pos), this.maxLineLength));
            if (newSize == 0) {
                // EOF
                this.prevLine = null;
                this.prevSize = 0;
                this.pos = this.end;
                break;
            }/*from  ww  w  .  ja  v a 2s  . com*/

            if (newLine.getLength() > 0 && newLine.charAt(0) == READ_DELIMITER) {
                this.prevLine = newLine;
                this.prevSize = newSize;

                if (this.pos < this.end) {
                    foundNextRead = true;
                } else {
                    foundNextRead = false;
                }
                break;
            } else {
                sequences.add(newLine.toString());
                sequenceStarts.add(this.pos);
            }

            this.pos += newSize;
        }

        long newReadStartOffset = this.pos;
        long readLen = newReadStartOffset - readStartOffset;
        long sequenceLen = newReadStartOffset - sequenceStartOffset;

        this.value.setReadOffset(readStartOffset);
        this.value.setDescriptionOffset(descriptionStartOffset);
        this.value.setSequenceOffset(sequenceStartOffset);
        this.value.setReadLen(readLen);
        this.value.setDescriptionLen(descriptionLen);
        this.value.setSequenceLen(sequenceLen);
        this.value.setDescription(description.toString());
        if (this.firstRead) {
            this.value.setContinuousRead(false);
            this.firstRead = false;
        } else {
            this.value.setContinuousRead(true);
        }

        FastaRawReadLine[] readLines = new FastaRawReadLine[sequences.size()];
        for (int i = 0; i < sequences.size(); i++) {
            readLines[i] = new FastaRawReadLine(sequenceStarts.get(i), sequences.get(i));
        }

        this.value.setRawSequence(readLines);

        this.hasNextRead = foundNextRead;
        return true;
    } else {
        this.pos = this.end;
        this.prevLine = null;
        this.prevSize = 0;
        this.key = null;
        this.value = null;
        this.hasNextRead = false;
        return false;
    }
}

From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchCombiner.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<MapWritable> stripes, Context context)
        throws IOException, InterruptedException {

    log.info("Entering Reducer. Key = {}", key.toString());
    MapWritable sumOfStripes = new MapWritable();
    MapWritable finalStripe = new MapWritable();
    boolean isInitial = false;
    boolean isTransit = false;
    boolean isEmit = false;

    if (key.charAt(0) == 'I') {
        isInitial = true;//ww  w .  ja  v a2s  . c o  m
    } else if (key.charAt(0) == 'E') {
        isEmit = true;
    } else if (key.charAt(0) == 'T') {
        isTransit = true;
    } else {
        throw new IllegalStateException("Baum Welch Reducer Error Determining the Key Type");
    }

    if (isInitial) {
        Double[] val = new Double[nrOfHiddenStates];
        for (int i = 0; i < nrOfHiddenStates; i++) {
            val[i] = 0.0;
        }
        for (MapWritable stripe : stripes) {
            log.info("Reducer Processing Initial Distribution Stripe.");
            for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) {
                log.info("Reducer Getting Initial Distribution Stripe Entry. Key = {}  Value = {} ",
                        Integer.toString(((IntWritable) stripeEntry.getKey()).get()),
                        Double.toString(((DoubleWritable) stripeEntry.getValue()).get()));
                val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue())
                        .get();
            }
        }
        for (int i = 0; i < nrOfHiddenStates; i++) {
            log.info("Reducer adding to sumOfStripes for Initial. Key = {}  Value ={}", Integer.toString(i),
                    Double.toString(val[i]));
            sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i]));
        }
    } else if (isEmit) {
        Iterator<MapWritable> it = stripes.iterator();
        int seqlength = it.next().size();
        Double[] val = new Double[nrOfEmittedStates];
        for (int i = 0; i < nrOfEmittedStates; i++) {
            val[i] = 0.0;
        }
        for (MapWritable stripe : stripes) {
            log.info("Reducer Processing Emission Distribution Stripe.");
            for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) {
                log.info("Reducer Getting Emission Distribution Stripe Entry. Key = {}  Value = {} ",
                        Integer.toString(((IntWritable) stripeEntry.getKey()).get()),
                        Double.toString(((DoubleWritable) stripeEntry.getValue()).get()));
                val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue())
                        .get();
            }
        }
        for (int i = 0; i < nrOfEmittedStates; i++) {
            log.info("Reducer adding to sumOfStripes for Emission. Key = {}  Value ={}", Integer.toString(i),
                    Double.toString(val[i]));
            sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i]));
        }
    } else if (isTransit) {
        Double[] val = new Double[nrOfHiddenStates];
        for (int i = 0; i < nrOfHiddenStates; i++) {
            val[i] = 0.0;
        }
        for (MapWritable stripe : stripes) {
            log.info("Reducer Processing Transition Distribution Stripe.");
            for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) {
                log.info("Reducer Getting Transition Distribution Stripe Entry. Key = {}  Value = {} ",
                        Integer.toString(((IntWritable) stripeEntry.getKey()).get()),
                        Double.toString(((DoubleWritable) stripeEntry.getValue()).get()));
                val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue())
                        .get();
            }
        }
        for (int i = 0; i < nrOfHiddenStates; i++) {
            log.info("Reducer adding to sumOfStripes for Transition. Key = {}  Value ={}", Integer.toString(i),
                    Double.toString(val[i]));
            sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i]));
        }
    } else {
        throw new IllegalStateException("Baum Welch Reducer Error: Unable to aggregate distribution stripes.");
    }

    context.write(key, sumOfStripes);

}

From source file:org.apache.mahout.classifier.sequencelearning.baumwelchmapreduce.BaumWelchReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<MapWritable> stripes, Context context)
        throws IOException, InterruptedException {

    log.info("Entering Reducer. Key = {}", key.toString());
    MapWritable sumOfStripes = new MapWritable();
    MapWritable finalStripe = new MapWritable();
    boolean isInitial = false;
    boolean isTransit = false;
    boolean isEmit = false;
    int stateID = -1;

    if (key.charAt(0) == 'I') {
        isInitial = true;//  w  ww  .ja v  a  2 s .  com
    } else if (key.charAt(0) == 'E') {
        isEmit = true;
        stateID = Character.getNumericValue(key.charAt(5));
    } else if (key.charAt(0) == 'T') {
        isTransit = true;
        stateID = Character.getNumericValue(key.charAt(8));
    } else {
        throw new IllegalStateException("Baum Welch Reducer Error Determining the Key Type");
    }

    if (isInitial) {
        ;
        Double[] val = new Double[nrOfHiddenStates];
        for (int i = 0; i < nrOfHiddenStates; i++) {
            val[i] = 0.0;
        }
        for (MapWritable stripe : stripes) {
            log.info("Reducer Processing Initial Distribution Stripe.");
            for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) {
                log.info("Reducer Getting Initial Distribution Stripe Entry. Key = {}  Value = {} ",
                        Integer.toString(((IntWritable) stripeEntry.getKey()).get()),
                        Double.toString(((DoubleWritable) stripeEntry.getValue()).get()));
                val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue())
                        .get();
            }
        }
        for (int i = 0; i < nrOfHiddenStates; i++) {
            log.info("Reducer adding to sumOfStripes for Initial. Key = {}  Value ={}", Integer.toString(i),
                    Double.toString(val[i]));
            sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i]));
        }
    } else if (isEmit) {
        Iterator<MapWritable> it = stripes.iterator();
        int seqlength = it.next().size();
        Double[] val = new Double[nrOfEmittedStates];
        for (int i = 0; i < nrOfEmittedStates; i++) {
            val[i] = 0.0;
        }
        for (MapWritable stripe : stripes) {
            log.info("Reducer Processing Emission Distribution Stripe.");
            for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) {
                log.info("Reducer Getting Emission Distribution Stripe Entry. Key = {}  Value = {} ",
                        Integer.toString(((IntWritable) stripeEntry.getKey()).get()),
                        Double.toString(((DoubleWritable) stripeEntry.getValue()).get()));
                val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue())
                        .get();
            }
        }
        for (int i = 0; i < nrOfEmittedStates; i++) {
            log.info("Reducer adding to sumOfStripes for Emission. Key = {}  Value ={}", Integer.toString(i),
                    Double.toString(val[i]));
            sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i]));
        }
    } else if (isTransit) {
        Double[] val = new Double[nrOfHiddenStates];
        for (int i = 0; i < nrOfHiddenStates; i++) {
            val[i] = 0.0;
        }
        for (MapWritable stripe : stripes) {
            log.info("Reducer Processing Transition Distribution Stripe.");
            for (MapWritable.Entry<Writable, Writable> stripeEntry : stripe.entrySet()) {
                log.info("Reducer Getting Transition Distribution Stripe Entry. Key = {}  Value = {} ",
                        Integer.toString(((IntWritable) stripeEntry.getKey()).get()),
                        Double.toString(((DoubleWritable) stripeEntry.getValue()).get()));
                val[((IntWritable) stripeEntry.getKey()).get()] += ((DoubleWritable) stripeEntry.getValue())
                        .get();
            }
        }
        for (int i = 0; i < nrOfHiddenStates; i++) {
            log.info("Reducer adding to sumOfStripes for Transition. Key = {}  Value ={}", Integer.toString(i),
                    Double.toString(val[i]));
            sumOfStripes.put(new IntWritable(i), new DoubleWritable(val[i]));
        }
    } else {
        throw new IllegalStateException("Baum Welch Reducer Error: Unable to aggregate distribution stripes.");
    }

    double sum = 0.0;
    for (MapWritable.Entry<Writable, Writable> sumEntry : sumOfStripes.entrySet()) {
        sum += ((DoubleWritable) sumEntry.getValue()).get();
    }

    //DoubleWritable normalizedSum = new DoubleWritable(0.0);
    //double[] innerValues = new double[sumOfStripes.size()];
    int index = 0;
    MapWritable distributionStripe = new MapWritable();
    for (MapWritable.Entry<Writable, Writable> sumEntry : sumOfStripes.entrySet()) {
        IntWritable state = (IntWritable) sumEntry.getKey();
        double innerValue = ((DoubleWritable) sumEntry.getValue()).get();
        double normalizedSum = innerValue / sum;
        //innerValues[index++] = normalizedSum;
        distributionStripe.put(state, new DoubleWritable(normalizedSum));
        //finalStripe.put(((IntWritable)sumEntry.getKey()), val);
    }

    log.info("Reducer Writing:  Key = {} Value (Stripe) Size = {}", key.toString(), finalStripe.size());
    for (MapWritable.Entry<Writable, Writable> entry : finalStripe.entrySet()) {
        log.info("Distribution Stripe Detail Key = {}, Value ={}", ((IntWritable) entry.getKey()).get(),
                ((DoubleWritable) entry.getValue()).get());
    }
    context.write(key, distributionStripe);

}