com.act.biointerpretation.analytics.ReactionDeletion.java Source code

Java tutorial

Introduction

Here is the source code for com.act.biointerpretation.analytics.ReactionDeletion.java

Source

/*************************************************************************
*                                                                        *
*  This file is part of the 20n/act project.                             *
*  20n/act enables DNA prediction for synthetic biology/bioengineering.  *
*  Copyright (C) 2017 20n Labs, Inc.                                     *
*                                                                        *
*  Please direct all queries to act@20n.com.                             *
*                                                                        *
*  This program is free software: you can redistribute it and/or modify  *
*  it under the terms of the GNU General Public License as published by  *
*  the Free Software Foundation, either version 3 of the License, or     *
*  (at your option) any later version.                                   *
*                                                                        *
*  This program is distributed in the hope that it will be useful,       *
*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
*  GNU General Public License for more details.                          *
*                                                                        *
*  You should have received a copy of the GNU General Public License     *
*  along with this program.  If not, see <http://www.gnu.org/licenses/>. *
*                                                                        *
*************************************************************************/

package com.act.biointerpretation.analytics;

import act.server.DBIterator;
import act.server.NoSQLAPI;
import act.shared.Reaction;
import com.act.utils.TSVWriter;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.json.JSONObject;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class ReactionDeletion {
    private static final Logger LOGGER = LogManager.getFormatterLogger(ReactionDeletion.class);
    public static final String OPTION_OUTPUT_PATH = "o";
    public static final String OPTION_SOURCE_DB = "r";
    public static final String OPTION_SINK_DB = "k";

    public static final String HELP_MESSAGE = StringUtils.join(new String[] {
            "This class is used to find all reactions that were not carried forward from a read to a write DB. ",
            "This analysis only applies to a pair of consecutive DBs in the biointepretation pipeline, and does not span ",
            "multiple processing steps." }, "");

    public static final List<Option.Builder> OPTION_BUILDERS = new ArrayList<Option.Builder>() {
        {
            add(Option.builder(OPTION_OUTPUT_PATH).argName("output path")
                    .desc("A path to where output should be written").hasArg().required().longOpt("output"));
            add(Option.builder(OPTION_SOURCE_DB).argName("db name").desc("DB from which reactions were read")
                    .hasArg().required().longOpt("source"));
            add(Option.builder(OPTION_SINK_DB).argName("db name").desc("DB to which reactions were written")
                    .hasArg().required().longOpt("sink"));
            add(Option.builder("h").argName("help").desc("Prints this help message").longOpt("help"));
        }
    };

    public static final HelpFormatter HELP_FORMATTER = new HelpFormatter();

    static {
        HELP_FORMATTER.setWidth(100);
    }

    public static void main(String[] args) throws Exception {
        Options opts = new Options();
        for (Option.Builder b : OPTION_BUILDERS) {
            opts.addOption(b.build());
        }

        CommandLine cl = null;
        try {
            CommandLineParser parser = new DefaultParser();
            cl = parser.parse(opts, args);
        } catch (ParseException e) {
            LOGGER.error(String.format("Argument parsing failed: %s\n", e.getMessage()));
            HELP_FORMATTER.printHelp(ReactionCountProvenance.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                    true);
            System.exit(1);
        }

        if (cl.hasOption("help")) {
            HELP_FORMATTER.printHelp(ReactionCountProvenance.class.getCanonicalName(), HELP_MESSAGE, opts, null,
                    true);
            return;
        }

        if (!cl.hasOption(OPTION_OUTPUT_PATH)) {
            LOGGER.error("Input -o prefix");
            return;
        }

        NoSQLAPI srcApi = new NoSQLAPI(cl.getOptionValue(OPTION_SOURCE_DB), cl.getOptionValue(OPTION_SOURCE_DB));
        NoSQLAPI sinkApi = new NoSQLAPI(cl.getOptionValue(OPTION_SINK_DB), cl.getOptionValue(OPTION_SINK_DB));

        searchForDroppedReactions(srcApi, sinkApi, new File(cl.getOptionValue(OPTION_OUTPUT_PATH)));
    }

    private static final List<String> OUTPUT_HEADER = Arrays.asList("id", "substrates", "products", "ecnum",
            "easy_desc");

    public static void searchForDroppedReactions(NoSQLAPI srcApi, NoSQLAPI sinkApi, File outputFile)
            throws IOException {
        Set<Long> srcIds = new HashSet<>();
        DBIterator iterator = srcApi.getReadDB().getIteratorOverReactions(
                new BasicDBObject("$query", new BasicDBObject()).append("$orderby", new BasicDBObject("_id", 1)),
                new BasicDBObject("_id", true));

        while (iterator.hasNext()) {
            DBObject obj = iterator.next();
            Object id = obj.get("_id");
            if (id instanceof Integer) {
                Integer idi = (Integer) id;
                srcIds.add(idi.longValue());
            } else {
                String msg = String.format("Found unexpected %s value for _id in src DB: %s",
                        id.getClass().getName(), id);
                LOGGER.error(msg);
                throw new RuntimeException(msg);
            }
        }
        iterator.close();

        Iterator<Reaction> sinkRxns = sinkApi.readRxnsFromInKnowledgeGraph();
        while (sinkRxns.hasNext()) {
            Reaction rxn = sinkRxns.next();
            for (JSONObject protein : rxn.getProteinData()) {
                if (protein.has("source_reaction_id")) {
                    Long srcId = protein.getLong("source_reaction_id");
                    srcIds.remove(srcId);
                } else
                    LOGGER.error("Found protein without source id for reaction %d", rxn.getUUID());
            }
        }

        if (srcIds.size() == 0) {
            LOGGER.info(
                    "No source read DB ids were unaccounted for in the write DB.  Exiting without writing output.");
            return;
        }

        List<Long> sortedSrcIds = new ArrayList<>(srcIds);
        Collections.sort(sortedSrcIds);

        try (TSVWriter<String, String> writer = new TSVWriter<>(OUTPUT_HEADER)) {
            writer.open(outputFile);

            int noProteinReactions = 0;

            for (Long id : sortedSrcIds) {
                Reaction rxn = srcApi.readReactionFromInKnowledgeGraph(id);
                if (rxn == null) {
                    LOGGER.error("Could not read reaction %d from source DB", id);
                    continue;
                }
                if (rxn.getProteinData().size() == 0) {
                    LOGGER.debug("Reaction %d has no proteins, and so cannot participate in the provenance chain",
                            rxn.getUUID());
                    noProteinReactions++;
                    continue;
                }
                Map<String, String> row = new HashMap<String, String>(OUTPUT_HEADER.size()) {
                    {
                        put("id", Long.valueOf(rxn.getUUID()).toString());
                        put("substrates", "{" + StringUtils.join(rxn.getSubstrates(), ",") + "}");
                        put("products", "{" + StringUtils.join(rxn.getProducts(), ",") + "}");
                        put("ecnum", rxn.getECNum());
                        put("easy_desc", rxn.getReactionName());
                    }
                };
                writer.append(row);
                writer.flush();
            }

            LOGGER.info("Found %d reactions with no proteins of %d reactions that might have been deleted",
                    noProteinReactions, srcIds.size());
        }
    }
}