blast.specification.cutoff.TUITCutoffSet.java Source code

Java tutorial

Introduction

Here is the source code for blast.specification.cutoff.TUITCutoffSet.java

Source

package blast.specification.cutoff;

import blast.ncbi.output.Hsp;
import blast.normal.hit.NormalizedHit;
import logger.Log;
import org.apache.commons.math3.stat.inference.TestUtils;

import java.util.logging.Level;

/**
 * Taxonomic Unit Identification Tool (TUIT) is a free open source platform independent
 * software for accurate taxonomic classification of nucleotide sequences.
 * Copyright (C) 2013  Alexander Tuzhikov, Alexander Panchin and Valery Shestopalov.
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * A representation of a set of cutoffs. As long as every {@link blast.normal.hit.NormalizedHit} has to meet some certain requirements to qualify
 * as a pivotal hit for the taxonomic identification, this class helps to check the hits pIdent, query coverage and the E-value
 * ratio between a hit and any other hit within the specification list.
 */

public class TUITCutoffSet {
    /**
     * A symbol that BLAST uses to mark a gap
     */
    private static final char gap = '-';
    /**
     * A cutoff for pIdent
     */
    @SuppressWarnings("WeakerAccess")
    protected final double pIdentCutoff;
    /**
     * A cutoff for query coverage
     */
    @SuppressWarnings("WeakerAccess")
    protected final double queryCoverageCutoff;
    /**
     * A cutoff for the E-value ratio
     */
    @SuppressWarnings("WeakerAccess")
    protected final double alpha;

    /**
     * A protected constructor for the use via factories
     *
     * @param pIdentCutoff           {@code double} A cutoff for pIdent
     * @param queryCoverageCutoff    {@code double} A cutoff for query coverage
     * @param alpha {@code double} alpha cutoff for the p-value
     */
    @SuppressWarnings("WeakerAccess")
    protected TUITCutoffSet(final double pIdentCutoff, final double queryCoverageCutoff, final double alpha) {
        this.pIdentCutoff = pIdentCutoff;
        this.queryCoverageCutoff = queryCoverageCutoff;
        this.alpha = alpha;
    }

    /**
     * A getter for the percent Identity cutoff
     * @return {@code double} percent identity cutoff
     */
    public double getpIdentCutoff() {
        return pIdentCutoff;
    }

    /**
     * A getter for the query coverage cutoff
     * @return {@code double} query coverage cutoff
     */
    public double getQueryCoverageCutoff() {
        return queryCoverageCutoff;
    }

    /**
     * A getter for the alpha level cutoff
     * @return {@code double} alpha level of the p-value cutoff
     */
    public double getAlpha() {
        return alpha;
    }

    /**
     * As long as BLAST outputs treats every gap as an independent event, this method allows to correct for this by calculating gapopen.
     * A standard BLAST XMP output reports only gaps, but not gapopens. Note: gaps&lt;=gapopens
     *
     * @param seq {@link String} Nucleotide sequence with gaps, marked as '-'
     * @return {@code int} number of gapopens
     */
    public static int calculateNumberOfGapOpens(final String seq) {

        int numGapOpen = 0;
        for (int i = 1; i < seq.length(); i++) {
            if (seq.charAt(i) == gap) {
                if (seq.charAt(i - 1) != gap) {
                    numGapOpen++;
                }
            }
        }
        return numGapOpen;
    }

    /**
     * A static factory that returns a new instance of the {@link TUITCutoffSet}
     *
     * @param pIdentCutoff           {@code double} A cutoff for pIdent
     * @param queryCoverageCutoff    {@code double} A cutoff for query coverage
     * @param evalueDifferenceCutoff {@code double} A cutoff for E-value ratio
     * @return a new instance of {@link TUITCutoffSet} from the given parameters
     */
    public static TUITCutoffSet newDefaultInstance(final double pIdentCutoff, final double queryCoverageCutoff,
            final double evalueDifferenceCutoff) {
        return new TUITCutoffSet(pIdentCutoff, queryCoverageCutoff, evalueDifferenceCutoff);
    }

    /**
     * Checks whether the given {@link blast.normal.hit.NormalizedHit} passes the cutoffs for pIdent and query coverage
     *
     * @param normalizedHit {@link blast.normal.hit.NormalizedHit} that is being checked
     * @return {@code true} if the {@link blast.normal.hit.NormalizedHit} passes checks, {@code false} otherwise or if a pointer
     *         to {@code null} was given
     */
    public boolean normalizedHitPassesCheck(final NormalizedHit normalizedHit) {
        return !(normalizedHit == null || normalizedHit.getpIdent() < this.pIdentCutoff
                || normalizedHit.getHitQueryCoverage() < this.queryCoverageCutoff);
    }

    /**
     * Checks whether the given {@link NormalizedHit}s E-value ratio is higher than the cutoff value
     *
     * @param oneNormalizedHit     {@link NormalizedHit} (assuming the hit with a worse (higher) e-value)
     * @param anotherNormalizedHit {@link NormalizedHit} (assuming the hit with a better (lower) e-value)
     * @return {@code true} Performs a Chi Squared test on a contingency table <br>
     *         <table class="tg-table-plain">
     *         <tr>
     *         <th></th>
     *         <th>Better hit</th>
     *         <th>Other hit</th>
     *         <th>Sum</th>
     *         </tr>
     *         <tr class="tg-even">
     *         <td>Corrected Match</td>
     *         <td>Number of identities</td>
     *         <td>Number of identities</td>
     *         <td></td>
     *         </tr>
     *         <tr>
     *         <td>Corrected Mismatch</td>
     *         <td>Align.length-(Total Gaps - Gapopen)</td>
     *         <td>Align.length-(Total Gaps - Gapopen)</td>
     *         <td></td>
     *         </tr>
     *         <tr class="tg-even">
     *         <td>Sum</td>
     *         <td></td>
     *         <td></td>
     *         <td></td>
     *         </tr>
     *         <caption>TUIT cont. table</caption>
     *         </table>
     *         , returns true if the test has shown the statistical significant prevalence of one alignment over another
     *         , {@code false} otherwise or if either of the pointers
     *         point to {@code null}
     */
    public boolean hitsAreStatisticallyDifferent(final NormalizedHit oneNormalizedHit,
            final NormalizedHit anotherNormalizedHit) {

        int oneAlignLen = 0;
        for (Hsp hsp : oneNormalizedHit.getHit().getHitHsps().getHsp()) {
            oneAlignLen += Integer.valueOf(hsp.getHspAlignLen());
        }

        int anotherAlignLen = 0;
        for (Hsp hsp : anotherNormalizedHit.getHit().getHitHsps().getHsp()) {
            anotherAlignLen += Integer.valueOf(hsp.getHspAlignLen());
        }

        int oneNumIdents = 0;
        for (Hsp hsp : oneNormalizedHit.getHit().getHitHsps().getHsp()) {
            oneNumIdents += Integer.valueOf(hsp.getHspIdentity());
        }

        int anotherNumIdents = 0;
        for (Hsp hsp : anotherNormalizedHit.getHit().getHitHsps().getHsp()) {
            anotherNumIdents += Integer.valueOf(hsp.getHspIdentity());
        }

        int oneNumGaps = 0;
        for (Hsp hsp : oneNormalizedHit.getHit().getHitHsps().getHsp()) {
            oneNumGaps += Integer.valueOf(hsp.getHspGaps());
        }

        int anotherNumGaps = 0;
        for (Hsp hsp : anotherNormalizedHit.getHit().getHitHsps().getHsp()) {
            anotherNumGaps += Integer.valueOf(hsp.getHspGaps());
        }

        int oneNumGapOpens = 0;
        for (Hsp hsp : oneNormalizedHit.getHit().getHitHsps().getHsp()) {
            oneNumGapOpens += calculateNumberOfGapOpens(hsp.getHspQseq())
                    + calculateNumberOfGapOpens(hsp.getHspHseq());
        }

        int anotherNumGapOpens = 0;
        for (Hsp hsp : anotherNormalizedHit.getHit().getHitHsps().getHsp()) {
            anotherNumGapOpens += calculateNumberOfGapOpens(hsp.getHspQseq())
                    + calculateNumberOfGapOpens(hsp.getHspHseq());
        }

        return TestUtils.chiSquareTest(
                new long[][] { { oneNumIdents, anotherNumIdents },
                        { (oneAlignLen - (oneNumGaps - oneNumGapOpens)) - oneNumIdents,
                                (anotherAlignLen - (anotherNumGaps - anotherNumGapOpens)) - anotherNumIdents } },
                this.alpha);
    }
}