com.clarkparsia.geneious.SequenceVerificationAnnotation.java Source code

Java tutorial

Introduction

Here is the source code for com.clarkparsia.geneious.SequenceVerificationAnnotation.java

Source

/*
 * Copyright (c) 2012 - 2015, Clark & Parsia, LLC. <http://www.clarkparsia.com>
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.clarkparsia.geneious;

import java.net.URI;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.biomatters.geneious.publicapi.documents.sequence.SequenceAnnotation;
import com.biomatters.geneious.publicapi.documents.sequence.SequenceAnnotationInterval;
import com.biomatters.geneious.publicapi.documents.sequence.SequenceCharSequence;
import com.biomatters.geneious.publicapi.documents.sequence.SequenceDocument;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multiset;

/**
 * Representation of sequence verification annotation. 
 * 
 * @author Evren Sirin
 */
public class SequenceVerificationAnnotation {
    private static Logger LOGGER = LoggerFactory.getLogger(SequenceVerificationAnnotation.class.getName());

    private final SublimeAlignmentDocument alignmentDoc;
    private final SequenceAnnotationInterval interval;

    private SequenceVariantType variantType;
    private char[] substitution;
    private URI soType;
    private String identifier;
    private boolean isAmbiguous;

    public SequenceVerificationAnnotation(SequenceAnnotationInterval interval,
            SublimeAlignmentDocument alignmentDoc) {
        this.interval = interval;

        this.alignmentDoc = alignmentDoc;

        variantType = guessSequenceVariant();

        identifier = generateIdentifier();

        isAmbiguous = guessAmbiguous();
    }

    public SequenceVariantType getVariantType() {
        return variantType;
    }

    public void setVariantType(SequenceVariantType variantType) {
        this.variantType = variantType;
        this.identifier = generateIdentifier();
    }

    public boolean isAmbiguous() {
        return isAmbiguous;
    }

    public void setAmbiguous(boolean isAmbiguous) {
        this.isAmbiguous = isAmbiguous;
    }

    public SequenceAnnotationInterval getInterval() {
        return interval;
    }

    public SublimeAlignmentDocument getAligmentDoc() {
        return alignmentDoc;
    }

    public String getIdentifier() {
        return identifier;
    }

    private SequenceVariantType guessSequenceVariant() {
        int start = interval.getMinimumIndex() - 1;
        int end = interval.getMaximumIndex();

        SequenceCharSequence expectedSequence = alignmentDoc.getDesign().getCharSequence();
        int gapCount = expectedSequence.countGaps(start, end);
        if (gapCount == interval.getLength()) {
            // TODO check if this is duplication
            return SequenceVariantType.INSERTION;
        }

        int possibleDeletion = 0;
        for (SequenceDocument actualSequenceDoc : alignmentDoc.getSequencingData()) {
            SequenceCharSequence actualSequence = actualSequenceDoc.getCharSequence();
            gapCount = actualSequence.countGaps(start, end);
            if (gapCount == interval.getLength()) {
                possibleDeletion++;
            }
        }

        return possibleDeletion > alignmentDoc.getSequencingData().size() / 2 ? SequenceVariantType.DELETION
                : SequenceVariantType.SUBSTITUTION;
    }

    private boolean guessAmbiguous() {
        int from = interval.getMinimumIndex() - 1;
        int to = interval.getTo();

        SequenceCharSequence sequence = null;

        for (SequenceDocument actualSequenceDoc : alignmentDoc.getSequencingData()) {
            SequenceCharSequence anotherSequence = actualSequenceDoc.getCharSequence().subSequence(from, to);
            if (sequence == null) {
                sequence = anotherSequence;
            } else if (!sequence.equalsIgnoreCase(anotherSequence)) {
                return true;
            }
        }

        return false;
    }

    /**
     * Generate an identifier based on http://www.hgvs.org/mutnomen/recs.html
     */
    private String generateIdentifier() {
        guessSOType();

        StringBuilder sb = new StringBuilder();
        sb.append("c.");

        boolean isSingleSubstitution = (substitution != null);
        if (isSingleSubstitution) {
            sb.append(alignmentDoc.originalPosition(interval.getFrom()));
            sb.append(substitution[0]);
            sb.append('>');
            sb.append(substitution[1]);
        } else {
            sb.append(alignmentDoc.originalPosition(interval.getFrom()));
            if (interval.getLength() > 1) {
                sb.append('_');
                sb.append(alignmentDoc.originalPosition(interval.getTo()));
            }
            sb.append(variantType.symbol());
        }

        return sb.toString();

    }

    private void guessSOType() {
        SequenceCharSequence expectedSequence = alignmentDoc.getCharSequence();

        substitution = null;
        soType = null;

        if (interval.getLength() == 1 && variantType == SequenceVariantType.SUBSTITUTION) {
            int position = interval.getMinimumIndex() - 1;
            SequenceCharSequence e = expectedSequence.subSequence(position - 10, position + 10);
            char expected = expectedSequence.charAt(position);
            LOGGER.debug("          **********V*********");
            LOGGER.debug("Expected: " + e + " " + expected);
            char mostLikelySubstitution = '-';
            int maxOccurence = 0;
            Multiset<Character> possibleSubstitutions = HashMultiset.create();
            for (SequenceDocument actualSequenceDoc : alignmentDoc.getSequencingData()) {
                SequenceCharSequence actualSequence = actualSequenceDoc.getCharSequence();
                char substitution = actualSequence.charAt(position);
                LOGGER.debug("Actual  : " + actualSequence.subSequence(position - 10, position + 10) + " "
                        + substitution);
                if (substitution != '-' && substitution != expected) {
                    possibleSubstitutions.add(substitution);
                    int occurences = possibleSubstitutions.count(substitution);
                    if (occurences > maxOccurence || (occurences == maxOccurence && isValidChar(substitution)
                            && !isValidChar(mostLikelySubstitution))) {
                        mostLikelySubstitution = substitution;
                        maxOccurence = occurences;
                    }
                }
            }
            LOGGER.debug("          **********^*********");

            if (isValidChar(mostLikelySubstitution)) {
                substitution = new char[] { expected, mostLikelySubstitution };
                soType = SequenceOntologyUtil.getSOSubstitutionType(expected, mostLikelySubstitution);
            }
        }

        if (substitution == null) {
            soType = SequenceOntologyUtil.getSOType(variantType);
        }
    }

    private static boolean isValidChar(char c) {
        return c == 'A' || c == 'C' || c == 'G' || c == 'T';
    }

    public SequenceAnnotation generateAnnotation() {
        SequenceAnnotation result = new SequenceAnnotation(identifier, variantType.toString(), interval);

        if (soType != null) {
            String soName = SequenceOntologyUtil.getSOLabel(soType);
            if (soName != null) {
                result.addQualifier(GeneiousQualifiers.SO_TYPE_NAME, soName);
            }
            result.addQualifier(GeneiousQualifiers.SO_TYPE, soType.toString());
        }

        result.addQualifier(GeneiousQualifiers.AMBIGUOUS_VERIFICATION, isAmbiguous ? "yes" : "no");

        SequenceAnnotation affectedComponent = getAffectedComponent();
        if (affectedComponent != null) {
            result.addQualifier(GeneiousQualifiers.AFFECTED_COMPONENT, affectedComponent.getName());
        }

        return result;
    }

    public SequenceAnnotation getAffectedComponent() {
        return GeneiousUtils.findParentAnnotation(interval, alignmentDoc.getDesign().getSequenceAnnotations());
    }

    @Override
    public String toString() {
        return "SequenceVerificationResult [interval=" + interval + ", variantType=" + variantType + ", identifier="
                + identifier + ", isAmbiguous=" + isAmbiguous + "]";
    }
}