Java tutorial
/* * Copyright 2011 The Scripps Research Institute * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.scripps.fl.pubchem.promiscuity; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.collections.CollectionUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import edu.emory.mathcs.backport.java.util.Arrays; import edu.scripps.fl.pubchem.promiscuity.model.CompoundPromiscuityInfo; import edu.scripps.fl.pubchem.promiscuity.model.OverallListsAndMaps; import edu.scripps.fl.pubchem.promiscuity.model.PCPromiscuityParameters; import edu.scripps.fl.pubchem.promiscuity.model.PromiscuityCount; import edu.scripps.fl.pubchem.promiscuity.model.Protein; import edu.scripps.fl.pubchem.web.ELinkResult; import edu.scripps.fl.pubchem.web.entrez.ELinkWebSession; public class PCPromiscuityMain { private static String[] searchTerms = { "\"NIH Molecular Libraries Program\"[SourceCategory]", "\"ChEMBL\"[SourceName]", "(\"beta lactamase\"[AssayProtocol] OR \"beta lactamase\"[AssayDescription] " + "OR \"B lactamase\"[AssayDescription] OR \"B lactamase\"[AssayProtocol]) NOT \"Summary\"[ActivityOutcomeMethod]", "(\"luciferase\"[AssayProtocol] OR \"luciferase\"[AssayDescription]) NOT \"Summary\"[ActivityOutcomeMethod]", "(\"fluorescence\"[AssayProtocol] OR \"fluorescent\"[AssayProtocol] OR \"fluorescence\"[AssayDescription] OR \"fluorescent\"[AssayDescription]) NOT \"Summary\"[ActivityOutcomeMethod]" }; private static String[] searchNames = { PCPromiscuityFactory.mlpAssaysName, PCPromiscuityFactory.chemblName, PCPromiscuityFactory.betaLactamaseName, PCPromiscuityFactory.luciferaseName, PCPromiscuityFactory.fluorescentName }; public PCPromiscuityParameters params; private static final Logger log = LoggerFactory.getLogger(PCPromiscuityMain.class); public PCPromiscuityMain(PCPromiscuityParameters uParams) { this.params = uParams; } public Map<Long, CompoundPromiscuityInfo> getCompoundPromiscuityInfoMap() throws Exception { OverallListsAndMaps overall = setOverallVariables(); Map<Long, CompoundPromiscuityInfo> compoundMap = setCompoundsPromiscuityInfo(overall); return compoundMap; } private Map<Long, CompoundPromiscuityInfo> setCompoundsPromiscuityInfo(OverallListsAndMaps overall) throws Exception { PCPromiscuityFactory promFactory = new PCPromiscuityFactory(); Map<Long, CompoundPromiscuityInfo> compoundMap = promFactory.getCompoundsWithDescriptors(params.getIds(), params.getDatabase()); new FunctionalGroupDetectionFactory().calculateFunctionalGroups(compoundMap); for (Long keyId : compoundMap.keySet()) { log.info("Setting counts for compound: " + keyId); CompoundPromiscuityInfo compound = compoundMap.get(keyId); if (compound.getOnHold()) continue; if (params.getDatabase().equalsIgnoreCase("pccompound")) { compound.setCID(keyId); } else { ELinkResult result = (ELinkResult) overall.getSIDToCIDMap().get(keyId); List<Long> CIDs = result.getIds("pccompound", "pcsubstance_pccompound_same"); if (CIDs != null && CIDs.size() > 0) compound.setCID(CIDs.get(0)); } promFactory.addRuleOfFiveViolations(compound); Map<String, PromiscuityCount<?>> countMap = compound.getCounts(); promFactory.addAllAssayCount(keyId, overall.getCompoundToAIDsMap(), params.getDatabase(), countMap); promFactory.addAllProteinCount(overall, countMap); promFactory.addAllNoProteinAIDCount(countMap, overall); if (!params.getPerProteinMode()) { promFactory.addProjectCountsPerCompound(overall, countMap); promFactory.addAdvancedCounts(overall, countMap); } else { Map<Protein, Map<String, PromiscuityCount<?>>> perProteinCountMap = promFactory .allAssayCountPerProtein(countMap, overall.getProteinAIDMap()); Map<String, PromiscuityCount<?>> noProteinCountMap = new HashMap<String, PromiscuityCount<?>>(); PromiscuityCount<Long> allAssayNoProteinCount = (PromiscuityCount<Long>) countMap .get(PCPromiscuityFactory.noProteinsName); noProteinCountMap.put(PCPromiscuityFactory.allAssayName, allAssayNoProteinCount); noProteinCountMap.put(PCPromiscuityFactory.noProteinsName, allAssayNoProteinCount); if (!params.getSimpleMode()) { promFactory.addProjectCountsPerCompound(overall, countMap); for (Protein keyProtein : perProteinCountMap.keySet()) { Map<String, PromiscuityCount<?>> countMapPerProtein = perProteinCountMap.get(keyProtein); promFactory.addAdvancedCountsPerProtein(overall, countMapPerProtein, countMap); promFactory.addAllNoProteinAIDCount(countMapPerProtein, overall); perProteinCountMap.put(keyProtein, countMapPerProtein); } promFactory.addAdvancedCountsPerProtein(overall, noProteinCountMap, countMap); promFactory.addAllNoProteinAIDCount(noProteinCountMap, overall); } compound.setPerProteinCounts(perProteinCountMap); compound.setNoProteinCounts(noProteinCountMap); } compound.setCounts(countMap); compoundMap.put(compound.getId(), compound); } log.info("Memory usage after compound map completely set up: " + promFactory.memUsage()); return compoundMap; } private OverallListsAndMaps setOverallVariables() throws Exception { OverallListsAndMaps overall = new OverallListsAndMaps(); String db = params.getDatabase(); List<String> linkNames = Arrays.asList(new String[] { db + "_pcassay", db + "_pcassay_active" }); ELinkWebSession session = ELinkWebSession.newInstance(db, "pcassay", linkNames, params.getIds(), ""); log.info("Number of ids in compound to pcassay link request: " + String.valueOf(params.getIds().size())); session.run(); overall.setCompoundToAIDsMap(session.getELinkResultsAsMap()); OverallListsAndMapsFactory overallFactory = new OverallListsAndMapsFactory(); overall.setAllAIDs(session.getAllIds(db + "_pcassay")); log.info("Number of All AIDs: " + overall.getAllAIDs().size()); String shortDB = db.substring(2, db.length()); List<Long> proteinAssays = overallFactory.aidListEsearch(params.getIds(), "pcassay_protein_target[Filter]", shortDB); overall.setAllProteinAIDs(proteinAssays); overall.setAidProteinMap(overallFactory.getAIDProteinMap(new ArrayList<Long>(proteinAssays))); List<Long> noProteinAIDs = (List<Long>) CollectionUtils.subtract(overall.getAllAIDs(), proteinAssays); overall.setAllNoProteinAIDs(noProteinAIDs); log.info("Number of aids returned from eSummary request: " + overall.getAidProteinMap().keySet().size()); if (params.getPerProteinMode()) { overall.setAllProteins(overallFactory.allProteinSet(overall.getAidProteinMap())); overall.setProteinAIDMap( overallFactory.proteinAIDMap(overall.getAidProteinMap(), overall.getAllProteins())); if (!params.getSimpleMode()) advancedCounts(overallFactory, overall, session, shortDB); } else advancedCounts(overallFactory, overall, session, shortDB); if (db.equalsIgnoreCase("pcsubstance")) { session = ELinkWebSession.newInstance(db, "pccompound", Arrays.asList(new String[] { "pcsubstance_pccompound_same" }), params.getIds(), ""); session.run(); overall.setSIDToCIDMap(session.getELinkResultsAsMap()); } log.info("Set up all overall lists"); return overall; } private void advancedCounts(OverallListsAndMapsFactory overallFactory, OverallListsAndMaps overall, ELinkWebSession session, String shortDB) throws Exception { // all aids in each desired assay count category Map<String, List<Long>> overallTotalAIDMap = new HashMap<String, List<Long>>(searchNames.length); for (int ii = 0; ii < searchTerms.length; ii++) { String searchTerm = searchTerms[ii]; log.info("Advanced search: " + searchTerm); List<Long> list = overallFactory.aidListEsearch(params.getIds(), searchTerm, shortDB); overallTotalAIDMap.put(searchNames[ii], list); } overall.setAdvancedCountTotalAIDMap(overallTotalAIDMap); // eLink request for summary aids related to not ChEMBL aids String linkNeighbor = "pcassay_pcassay_neighbor_list"; log.info("Number of ChEMBL AIDs: " + overall.getAdvancedCountTotalAIDMap().get(PCPromiscuityFactory.chemblName).size()); List<Long> notChEMBLAIDs = (List<Long>) CollectionUtils.subtract(overall.getAllAIDs(), overall.getAdvancedCountTotalAIDMap().get(PCPromiscuityFactory.chemblName)); session = ELinkWebSession.newInstance("pcassay", "pcassay", Arrays.asList(new String[] { linkNeighbor }), notChEMBLAIDs, "summary[activityoutcomemethod]"); log.info("Number of ids in link request: " + String.valueOf(notChEMBLAIDs.size())); session.run(); overall.setAllSummaries(session.getAllIds(linkNeighbor)); overall.setSummaryProteinMap( overallFactory.getAIDProteinMap(new ArrayList<Long>(overall.getAllSummaries()))); Map<Long, List<ELinkResult>> aidToSummaryMap = session.getELinkResultsAsMap(); overall.setSummaryToAIDsMap(overallFactory.getSummaryToAIDsMap(aidToSummaryMap)); overall.setMlpSummaries( overallFactory.aidtoAIDListEsearch(new ArrayList<Long>(overall.getAllSummaries()), searchTerms[0])); } }