libra.common.kmermatch.KmerJoiner.java Source code

Java tutorial

Introduction

Here is the source code for libra.common.kmermatch.KmerJoiner.java

Source

/*
 * Copyright 2016 iychoi.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package libra.common.kmermatch;

import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.List;
import libra.common.hadoop.io.datatypes.CompressedSequenceWritable;
import libra.common.helpers.SequenceHelper;
import libra.preprocess.common.kmerhistogram.KmerRangePartition;
import libra.preprocess.common.kmerindex.AKmerIndexReader;
import libra.preprocess.common.kmerindex.KmerIndexReader;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

/**
 *
 * @author iychoi
 */
public class KmerJoiner {

    private static final Log LOG = LogFactory.getLog(KmerJoiner.class);

    private Path[] kmerIndexPath;
    private KmerRangePartition partition;
    private Configuration conf;

    private AKmerIndexReader[] readers;
    private BigInteger partitionSize;
    private CompressedSequenceWritable progressKey;
    private boolean eof;
    private BigInteger beginSequence;

    private CompressedSequenceWritable[] stepKeys;
    private IntWritable[] stepVals;
    private List<Integer> stepMinKeys;
    private boolean stepStarted;

    public KmerJoiner(Path[] kmerIndexPath, KmerRangePartition partition, TaskAttemptContext context)
            throws IOException {
        initialize(kmerIndexPath, partition, context.getConfiguration());
    }

    public KmerJoiner(Path[] kmerIndexPath, KmerRangePartition partition, Configuration conf) throws IOException {
        initialize(kmerIndexPath, partition, conf);
    }

    private void initialize(Path[] kmerIndexPath, KmerRangePartition partition, Configuration conf)
            throws IOException {
        this.kmerIndexPath = kmerIndexPath;
        this.partition = partition;
        this.conf = conf;

        this.readers = new AKmerIndexReader[this.kmerIndexPath.length];
        LOG.info("# of KmerIndexReader : " + this.readers.length);
        for (int i = 0; i < this.readers.length; i++) {
            FileSystem fs = this.kmerIndexPath[i].getFileSystem(this.conf);
            this.readers[i] = new KmerIndexReader(fs, this.kmerIndexPath[i], this.partition.getPartitionBeginKmer(),
                    this.partition.getPartitionEndKmer(), this.conf);
        }

        this.partitionSize = partition.getPartitionSize();
        this.progressKey = null;
        this.eof = false;
        this.beginSequence = this.partition.getPartitionBegin();
        this.stepKeys = new CompressedSequenceWritable[this.readers.length];
        this.stepVals = new IntWritable[this.readers.length];
        this.stepStarted = false;

        LOG.info("Matcher is initialized");
        LOG.info(
                "> Range " + this.partition.getPartitionBeginKmer() + " ~ " + this.partition.getPartitionEndKmer());
        LOG.info("> Num of Slice Entries : " + this.partition.getPartitionSize().longValue());
    }

    public KmerMatchResult stepNext() throws IOException {
        List<Integer> minKeyIndice = getNextMinKeys();
        if (minKeyIndice.size() > 0) {
            CompressedSequenceWritable minKey = this.stepKeys[minKeyIndice.get(0)];
            this.progressKey = minKey;

            // check matching
            IntWritable[] minVals = new IntWritable[minKeyIndice.size()];
            Path[] minIndexPaths = new Path[minKeyIndice.size()];

            int valIdx = 0;
            for (int idx : minKeyIndice) {
                minVals[valIdx] = this.stepVals[idx];
                minIndexPaths[valIdx] = this.readers[idx].getIndexPath();
                valIdx++;
            }

            return new KmerMatchResult(minKey, minVals, minIndexPaths);
        }

        // step failed and no match
        this.eof = true;
        this.progressKey = null;
        return null;
    }

    private List<Integer> findMinKeys() throws IOException {
        CompressedSequenceWritable minKey = null;
        List<Integer> minKeyIndice = new ArrayList<Integer>();
        for (int i = 0; i < this.readers.length; i++) {
            if (this.stepKeys[i] != null) {
                if (minKey == null) {
                    minKey = this.stepKeys[i];
                    minKeyIndice.clear();
                    minKeyIndice.add(i);
                } else {
                    int comp = minKey.compareTo(this.stepKeys[i]);
                    if (comp == 0) {
                        // found same min key
                        minKeyIndice.add(i);
                    } else if (comp > 0) {
                        // found smaller one
                        minKey = this.stepKeys[i];
                        minKeyIndice.clear();
                        minKeyIndice.add(i);
                    }
                }
            }
        }

        return minKeyIndice;
    }

    private List<Integer> getNextMinKeys() throws IOException {
        if (!this.stepStarted) {
            for (int i = 0; i < this.readers.length; i++) {
                // fill first
                CompressedSequenceWritable key = new CompressedSequenceWritable();
                IntWritable val = new IntWritable();
                if (this.readers[i].next(key, val)) {
                    this.stepKeys[i] = key;
                    this.stepVals[i] = val;
                } else {
                    this.stepKeys[i] = null;
                    this.stepVals[i] = null;
                }
            }

            this.stepStarted = true;
            this.stepMinKeys = findMinKeys();
            return this.stepMinKeys;
        } else {
            // find min key
            if (this.stepMinKeys.size() == 0) {
                //EOF
                return this.stepMinKeys;
            }

            // move min pointers
            for (int idx : this.stepMinKeys) {
                CompressedSequenceWritable key = new CompressedSequenceWritable();
                IntWritable val = new IntWritable();
                if (this.readers[idx].next(key, val)) {
                    this.stepKeys[idx] = key;
                    this.stepVals[idx] = val;
                } else {
                    this.stepKeys[idx] = null;
                    this.stepVals[idx] = null;
                }
            }

            this.stepMinKeys = findMinKeys();
            return this.stepMinKeys;
        }
    }

    public float getProgress() {
        if (this.progressKey == null) {
            if (this.eof) {
                return 1.0f;
            } else {
                return 0.0f;
            }
        } else {
            BigInteger seq = SequenceHelper.convertToBigInteger(this.progressKey.getSequence());
            BigInteger prog = seq.subtract(this.beginSequence);
            int comp = this.partitionSize.compareTo(prog);
            if (comp <= 0) {
                return 1.0f;
            } else {
                BigDecimal progDecimal = new BigDecimal(prog);
                BigDecimal rate = progDecimal.divide(new BigDecimal(this.partitionSize), 3,
                        BigDecimal.ROUND_HALF_UP);

                float f = rate.floatValue();
                return Math.min(1.0f, f);
            }
        }
    }

    public void close() throws IOException {
        for (AKmerIndexReader reader : this.readers) {
            reader.close();
        }
    }
}