001 /** 002 * Copyright (C) 2010 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of the Ensembl Java API demonstration project developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * This is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU General Public License (version 3) as published by 010 * the Free Software Foundation. 011 * 012 * This software is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 015 * GNU General Public License for more details. 016 * 017 * You should have received a copy of the GNU General Public License 018 * in this software distribution. If not, see <http://www.gnu.org/licenses/gpl-3.0.html/>. 019 */ 020 package uk.ac.roslin.ensembl.config; 021 022 import java.util.Collection; 023 import java.util.HashMap; 024 import java.util.Locale; 025 import uk.ac.roslin.ensembl.model.database.DatabaseType; 026 027 /** 028 * In 'ensembl.org' there is a single 'ensembl_compara' database which contains 029 * the pairwise comparison data on all the species in 'ensembl.org. However, in 030 * 'ensemblgenomes.org', there are separate 'ensembl_compara' databases for the 031 * different taxonomic groupings: 032 * <ul><li>ensembl_compara_plants</li> 033 * <li>ensembl_compara_bacteria</li> 034 * <li>ensembl_compara_fungi</li> 035 * <li>ensembl_compara_metazoa</li> 036 * <li>ensembl_compara_protozoa</li> 037 * <li>ensembl_compara_pan_homology</li></ul> 038 * In ensembl.org all species are compared in the ensembl_compara database, and 039 * belong to the compara group 'multi'. In each core database (species) in 040 * ensemblgenomes.org the 'species.division' key in table 'meta' specifies the 041 * group: 042 * <ul><li>EnsemblPlants</li> 043 * <li>EnsemblBacteria</li> 044 * <li>EnsemblFungi</li> 045 * <li>EnsemblMetazoa</li> 046 * <li>EnsemblProtozoa</li></ul> 047 * Orthology / paralogy (peptide) predictions are done between all species of a database, 048 * but genomic alignments only between certain combinations of species. Bacteria 049 * is a slightly special case in that we compare members of each collection to 050 * each other, but not between the collections (its actually 10 comparas merged 051 * into one). 052 * <p>ensembl_compara_pan_homology is a peptide compara database produced from 053 * a set of selected species that are taken from all EnsemblGenomes divisions and 054 * from Ensembl (but doesn't include all species from all divisions). See list at 055 * <a href="http://metazoa.ensembl.org/info/docs/compara/homology_method.html"> 056 * http://metazoa.ensembl.org/info/docs/compara/homology_method.html</a>. 057 * One thing to note here is that pan also contains protein family data as well: 058 * <a href="http://metazoa.ensembl.org/info/docs/compara/family.html"> 059 * http://metazoa.ensembl.org/info/docs/compara/family.html</a>. This means that 060 * the genome_db table ends up with more entries than are used in the peptide 061 * homology comparisons alone and cannot be used to determine membership, so you 062 * need to use the species_set/method_link tables as well rather than just relying 063 * on genome_db. 064 */ 065 public class EnsemblComparaDivision extends EnsemblType implements DatabaseType, Comparable<EnsemblComparaDivision> { 066 067 public static EnsemblComparaDivision MULTI; 068 public static EnsemblComparaDivision PLANTS; 069 public static EnsemblComparaDivision PROTISTS; 070 public static EnsemblComparaDivision BACTERIA; 071 public static EnsemblComparaDivision FUNGI; 072 public static EnsemblComparaDivision METAZOA; 073 public static EnsemblComparaDivision PAN_HOMOLOGY; 074 075 private EnsemblComparaDivision(String value) { 076 this.label = value; 077 } 078 079 public static String getDBName(String label) { 080 String out = "ensembl_compara"; 081 if (label.equalsIgnoreCase("multi")) { 082 return out; 083 } else { 084 return out+"_"+label; 085 } 086 } 087 088 private static HashMap<String, EnsemblComparaDivision> typeListHash = EnsemblComparaDivision.initialize(); 089 090 public static Collection<EnsemblComparaDivision> getAllTypes() { 091 return typeListHash.values(); 092 } 093 094 private static HashMap<String, EnsemblComparaDivision> initialize() { 095 096 HashMap<String, EnsemblComparaDivision> out = new HashMap<String, EnsemblComparaDivision>(); 097 098 MULTI = new EnsemblComparaDivision("multi"); 099 PLANTS = new EnsemblComparaDivision("plants"); 100 PROTISTS = new EnsemblComparaDivision("protists"); 101 BACTERIA = new EnsemblComparaDivision("bacteria"); 102 FUNGI = new EnsemblComparaDivision("fungi"); 103 METAZOA = new EnsemblComparaDivision("metazoa"); 104 PAN_HOMOLOGY = new EnsemblComparaDivision("pan_homology"); 105 106 107 out.put(MULTI.toString(),MULTI ); 108 out.put(PLANTS.toString(), PLANTS); 109 out.put(PROTISTS.toString(),PROTISTS ); 110 out.put(BACTERIA.toString(), BACTERIA ); 111 out.put(FUNGI.toString(),FUNGI ); 112 out.put(METAZOA.toString(),METAZOA ); 113 out.put(PAN_HOMOLOGY.toString(), PAN_HOMOLOGY ); 114 115 return out; 116 } 117 118 public static EnsemblComparaDivision getEnsemblComparaDivision(String value) { 119 return typeListHash.get(value); 120 } 121 122 public static EnsemblComparaDivision getEnsemblComparaDivisionByMetaValue(String value) { 123 String v = null; 124 if (value != null) { 125 v = value.replace("Ensembl","").toLowerCase(); 126 } 127 return typeListHash.get(v); 128 } 129 130 131 132 public int compareTo(EnsemblComparaDivision other) { 133 if (other == null || other.toString() == null) { 134 return -1; 135 } 136 137 return this.toString().compareTo(other.toString()); 138 } 139 }