FDREstimator.ExportWithEstimatedFDR.java Source code

Java tutorial

Introduction

Here is the source code for FDREstimator.ExportWithEstimatedFDR.java

Source

/* 
 * Author: Chih-Chiang Tsou <chihchiang.tsou@gmail.com>
 *             Nesvizhskii Lab, Department of Computational Medicine and Bioinformatics, 
 *             University of Michigan, Ann Arbor
 *
 * Copyright 2014 University of Michigan, Ann Arbor, MI
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package FDREstimator;

import MSUmpire.BaseDataStructure.UmpireInfo;
import MSUmpire.PSMDataStructure.LCMSID;
import MSUmpire.PSMDataStructure.PepIonID;
import MSUmpire.SearchResultParser.PepXMLParser;
import MSUmpire.SearchResultParser.ProtXMLParser;
import MSUmpire.Utility.ConsoleLogger;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.commons.io.FilenameUtils;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.xml.sax.SAXException;
import org.xmlpull.v1.XmlPullParserException;

/**
 *
 * @author Chih-Chiang Tsou
 */
public class ExportWithEstimatedFDR {

    /**
     * @param args the command line arguments
     */
    public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException,
            ClassNotFoundException, XmlPullParserException, InterruptedException, SQLException {

        System.out.println(
                "=================================================================================================");
        System.out.println("Umpire search result parser(version: " + UmpireInfo.GetInstance().Version + ")");
        if (args.length == 0) {
            System.out.println(
                    "command : java -jar -Xmx1G Umpire-SearchResultParser.jar [Options] [Combined ProtXML file] [PepXML files...]");
            System.out.println("");
            System.out.println("ProtXML extension: *.prot.xml or *.ProtXML");
            System.out.println("PepXML extension: *.pep.xml or *.PepXML");
            System.out.println("\nOptions");
            System.out.println("\t-MP\tMin protein parsing probability\tex: -MP0.1f (default: -1, no filtering)");
            System.out.println("\t-Mp\tMin PSM parsing probability\tex: -Mp0.1f (default: -1, no filtering)");
            System.out.println("\t-fP\tProtein FDR\tex: -fP0.01 (default: 0.01, no filtering: -1)");
            System.out.println("\t-fp\tPeptide FDR\tex: -fp0.05 (default: 0.01, no filtering: -1)");
            System.out.println("\t-d\tDecoy tag prefix\tex: -dDECOY (default: rev_)");
            System.out.println(
                    "\t-C\t(0 or 1) Correct mass diff derived from isotope error\tex:-C0 (default:0, no correction)");
            System.out.println("\t-fa\tFasta file");
            System.out.println("\t-N\tOutput filename");
            System.out.println(
                    "\t-pt\tInitial protein probability filtering threshold\tex: -pt0.5 (default: 0.5, no filtering : -1)");
            System.out.println(
                    "\t-rf\tR factor threshold, proteins with protein probablity less than the threshold will be used to estimate the R factor \n\t\tex: -rf0.2 (default: 0.2, do not use R factor: -1)");
            return;
        }

        ConsoleLogger.SetConsoleLogger(Level.INFO);
        ConsoleLogger.SetFileLogger(Level.DEBUG, FilenameUtils.getFullPath(args[0]) + "parser_debug.log");

        float protFDR = 0.01f;
        float pepFDR = 0.01f;
        float MinpepProb = -1f;
        float MinprotProb = -1f;
        boolean CorrectMassDiff = false;
        String DecoyTag = "rev_";
        String Fasta = "";
        String Outputname = "";
        float protprob = 0.5f;
        float rfthreshold = 0.2f;
        String ProtXML = "";
        ArrayList<String> PepXML = new ArrayList<>();

        for (int i = 0; i < args.length; i++) {
            if (args[i].startsWith("-")) {
                if (args[i].startsWith("-fP")) {
                    protFDR = Float.parseFloat(args[i].substring(3));
                    Logger.getRootLogger().info("Protein FDR: " + protFDR);
                }
                if (args[i].startsWith("-fp")) {
                    pepFDR = Float.parseFloat(args[i].substring(3));
                    Logger.getRootLogger().info("Peptide FDR: " + pepFDR);
                }
                if (args[i].startsWith("-MP")) {
                    MinprotProb = Float.parseFloat(args[i].substring(3));
                    Logger.getRootLogger().info("Min protein parsing probability: " + MinprotProb);
                }
                if (args[i].startsWith("-Mp")) {
                    MinpepProb = Float.parseFloat(args[i].substring(3));
                    Logger.getRootLogger().info("Min PSM parsing probability: " + MinpepProb);
                }
                if (args[i].startsWith("-d")) {
                    DecoyTag = args[i].substring(2);
                    Logger.getRootLogger().info("Decoy tag: " + DecoyTag);
                }
                if (args[i].startsWith("-fa")) {
                    Fasta = args[i].substring(3);
                    Logger.getRootLogger().info("Fasta file: " + Fasta);
                }
                if (args[i].startsWith("-N")) {
                    Outputname = args[i].substring(2);
                    Logger.getRootLogger().info("Output filename: " + Outputname);
                }
                if (args[i].startsWith("-C")) {
                    if (args[i].substring(2).equals("1")) {
                        CorrectMassDiff = true;
                    }
                    Logger.getRootLogger().info("Correct mass diff: " + CorrectMassDiff);
                }

                if (args[i].startsWith("-pt")) {
                    protprob = Float.parseFloat(args[i].substring(3));
                    Logger.getRootLogger().info("Initial protein probablity filtering threshold: " + protprob);
                }
                if (args[i].startsWith("-rf")) {
                    rfthreshold = Float.parseFloat(args[i].substring(3));
                    Logger.getRootLogger().info("R factor threshold: " + rfthreshold);
                }
            }
            if (args[i].endsWith(".pep.xml") || args[i].endsWith(".PepXML")) {
                PepXML.add(args[i]);
            }
            if (args[i].endsWith(".prot.xml") || args[i].endsWith(".ProtXML")) {
                ProtXML = args[i];
            }
        }

        if (!Outputname.equals("")) {
            Outputname = Outputname + "_";
        }
        Outputname = Outputname + MSUmpire.Utility.DateTimeTag.GetTag();

        LCMSID lcmsid = new LCMSID(Outputname, DecoyTag, Fasta);
        for (String pepxml : PepXML) {
            LCMSID pepxmlid = new LCMSID(pepxml, DecoyTag, Fasta);
            PepXMLParser pepxmlparser = new PepXMLParser(pepxmlid, pepxml, MinpepProb, CorrectMassDiff);
            if (pepFDR != -1f) {
                pepxmlid.FilterByPepDecoyFDR(DecoyTag, pepFDR);
            }
            Logger.getRootLogger().info("peptide No.:" + pepxmlid.GetPepIonList().size()
                    + "; Peptide level threshold: " + pepxmlid.PepProbThreshold);
            for (PepIonID pepID : pepxmlid.GetPepIonList().values()) {
                lcmsid.AddPeptideID(pepID);
            }
        }

        if (!"".equals(ProtXML)) {
            ProtXMLParser protxmlparser = new ProtXMLParser(lcmsid, ProtXML, MinprotProb);
            lcmsid.DecoyTag = DecoyTag;
            if (protprob != -1f) {
                lcmsid.RemoveLowLocalPWProtein(protprob);
            }
            float rf = 1f;
            if (rfthreshold != -1f) {
                rf = lcmsid.GetRFactor(rfthreshold);
            }
            if (protFDR != -1f) {
                lcmsid.FilterByProteinDecoyFDRUsingMaxIniProb(lcmsid.DecoyTag, protFDR / rf);
            }
            if (!"".equals(Fasta)) {
                lcmsid.LoadSequence();
            }
            lcmsid.ReMapProPep();
            lcmsid.ExportProtID();
        }
        lcmsid.CreateInstanceForAllPepIon();
        lcmsid.ExportPepID();
        Logger.getRootLogger().info("Protein No.:" + lcmsid.ProteinList.size() + "; All peptide ions.:"
                + lcmsid.GetPepIonList().size());
    }

}