001 /** 002 * Copyright (C) 2010 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of the Ensembl Java API demonstration project developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * This is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU General Public License (version 3) as published by 010 * the Free Software Foundation. 011 * 012 * This software is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 015 * GNU General Public License for more details. 016 * 017 * You should have received a copy of the GNU General Public License 018 * in this software distribution. If not, see <http://www.gnu.org/licenses/gpl-3.0.html/>. 019 */ 020 021 package uk.ac.roslin.ensembl.datasourceaware.core; 022 023 import java.beans.beancontext.BeanContextSupport; 024 import java.lang.ref.SoftReference; 025 import java.util.ArrayList; 026 import java.util.Iterator; 027 import java.util.LinkedHashMap; 028 import java.util.List; 029 import java.util.Scanner; 030 import org.biojava3.core.exceptions.CompoundNotFoundError; 031 import org.biojava3.core.sequence.AccessionID; 032 import org.biojava3.core.sequence.DNASequence; 033 import org.biojava3.core.sequence.Strand; 034 import org.biojava3.core.sequence.compound.NucleotideCompound; 035 import org.biojava3.core.sequence.storage.ArrayListSequenceReader; 036 import org.biojava3.core.sequence.template.CompoundSet; 037 import org.biojava3.core.sequence.template.SequenceMixin; 038 import org.biojava3.core.sequence.template.SequenceProxyView; 039 import org.biojava3.core.sequence.template.SequenceReader; 040 import org.biojava3.core.sequence.template.SequenceView; 041 import uk.ac.roslin.ensembl.biojava3.EnsemblDNACompoundSet; 042 import uk.ac.roslin.ensembl.biojava3.EnsemblDNASequenceReader; 043 import uk.ac.roslin.ensembl.dao.factory.DAOCoreFactory; 044 import uk.ac.roslin.ensembl.exception.DAOException; 045 046 /** 047 * 048 * @author paterson 049 */ 050 public class DAEnsemblDNASequenceReader implements EnsemblDNASequenceReader { 051 052 protected String sequence; 053 protected EnsemblDNACompoundSet compoundSet; 054 protected DADNASequence dASequence; 055 protected Integer coordSystemID; 056 057 protected Integer seqRegionID; 058 protected Integer length; 059 protected String name; 060 061 protected DAOCoreFactory factory; 062 protected DADNASequence parent; 063 064 065 066 private LinkedHashMap<SoftReference<Integer>, SoftReference<DNASequence>> cache; 067 // private SoftReference<Map<Integer,SoftReference<DNASequence>>> cacheMap; 068 // private Map<Integer,SoftReference<DNASequence>> cache; 069 private Integer chunkSize = 250000; 070 private boolean usesCache = false; 071 private boolean nonCacheLazyLoaded = false; 072 private Integer chunks = 0; 073 074 075 private SequenceReader<NucleotideCompound> sequenceReader; 076 077 public DAEnsemblDNASequenceReader(){ 078 this.setCompoundSet(EnsemblDNACompoundSet.getDNACompoundSet()); 079 } 080 081 public DAEnsemblDNASequenceReader(Integer coordSystemID, Integer seqRegionID, Integer length, String name, DAOCoreFactory factory){ 082 this.setCompoundSet(EnsemblDNACompoundSet.getDNACompoundSet()); 083 this.setCoordSystemID(coordSystemID); 084 this.setLength(length); 085 if (this.getLengthInteger()>2*chunkSize) { 086 usesCache=true; 087 chunks = (this.getLengthInteger()/chunkSize) -1; 088 cache = new LinkedHashMap<SoftReference<Integer> ,SoftReference<DNASequence>>(); 089 } 090 this.setSeqRegionID(seqRegionID); 091 this.setName(name); 092 this.setFactory(factory); 093 } 094 095 private void initCache() { 096 097 } 098 099 private void lazyLoadNonCache() throws DAOException, CompoundNotFoundError { 100 101 String out = null; 102 if (this.getFactory()!=null 103 && this.getFactory().getSequenceDAO()!=null) { 104 out = this.getFactory().getSequenceDAO().getFullSequence(this); 105 setNonCacheLazyLoaded(true); 106 } 107 if (out!=null) { 108 this.setContents(out); 109 } 110 111 } 112 113 public DADNASequence getParent() { 114 return parent; 115 } 116 117 public void setParent(DADNASequence parent) { 118 this.parent = parent; 119 if (parent!=null) { 120 this.setFactory(parent.getDaoFactory()); 121 } 122 } 123 124 //************************************* 125 126 127 128 public DAOCoreFactory getFactory() { 129 if (factory == null) { 130 if (this.getParent() != null) { 131 factory = this.getParent().getDaoFactory(); 132 } 133 } 134 return factory; 135 } 136 137 public void setFactory(DAOCoreFactory factory) { 138 this.factory = factory; 139 } 140 141 public String getName() { 142 return name; 143 } 144 145 public void setName(String name) { 146 this.name = name; 147 } 148 149 public Integer getCoordSystemID() { 150 return coordSystemID; 151 } 152 153 public void setCoordSystemID(Integer coordSystemID) { 154 this.coordSystemID = coordSystemID; 155 } 156 157 public int getLength() { 158 return (length!=null)?length:0; 159 } 160 161 public Integer getLengthInteger() { 162 return length; 163 } 164 165 public void setLength(Integer length) { 166 this.length = length; 167 } 168 169 public Integer getSeqRegionID() { 170 return seqRegionID; 171 } 172 173 public void setSeqRegionID(Integer seqRegionID) { 174 this.seqRegionID = seqRegionID; 175 } 176 177 //***************************** 178 179 public void setCompoundSet(CompoundSet compoundSet) { 180 this.compoundSet = (EnsemblDNACompoundSet) compoundSet; 181 } 182 183 public void setContents(String sequence) { 184 sequenceReader = new ArrayListSequenceReader<NucleotideCompound>(sequence, getCompoundSet()); 185 setNonCacheLazyLoaded(true); 186 length = sequenceReader.getLength(); 187 } 188 189 //****************************** 190 191 public NucleotideCompound getCompoundAt(int position) { 192 triggerLoad(); 193 if (this.getLength()==0 || sequenceReader == null ) { 194 return null; 195 } 196 return sequenceReader.getCompoundAt(position); 197 } 198 199 public int getIndexOf(NucleotideCompound compound) { 200 triggerLoad(); 201 if (this.getLength()==0) { 202 return 0; 203 } 204 return SequenceMixin.indexOf(this, compound); 205 } 206 207 public int getLastIndexOf(NucleotideCompound compound) { 208 triggerLoad(); 209 if (this.getLength()==0) { 210 return 0; 211 } 212 return SequenceMixin.lastIndexOf(this, compound); 213 } 214 215 public String getSequenceAsString() { 216 triggerLoad(); 217 if (this.getLength()==0) { 218 return ""; 219 } 220 return SequenceMixin.toString(this); 221 } 222 223 public String getSequenceAsString(Integer start, Integer end, Strand strand) { 224 triggerLoad(); 225 if (this.getLength()==0) { 226 return ""; 227 } 228 if(Strand.NEGATIVE.equals(strand)) { 229 return getSubSequence(start, end).getInverse().getSequenceAsString(); 230 } 231 else { 232 return getSubSequence(start, end).getSequenceAsString(); 233 } 234 } 235 236 private void triggerLoad() { 237 if (usesCache) { 238 //TODO Need to support caching version 239 } 240 else { 241 if(! nonCacheLazyLoaded) { 242 try { 243 //initialize the sequence 244 this.lazyLoadNonCache(); 245 } catch (DAOException ex) { 246 System.out.println("Error in loading data from remote source:\n" + ex.getMessage()); 247 248 } catch (CompoundNotFoundError ex) { 249 System.out.println("Error parsing sequence:\n" + ex.getMessage()); 250 251 } 252 } 253 } 254 255 } 256 257 public String getReverseComplementSequenceAsString(Integer start, Integer end) { 258 triggerLoad(); 259 if (this.getLength()==0) { 260 return ""; 261 } 262 return getSubSequence(start, end).getInverse().getSequenceAsString(); 263 } 264 265 public List<NucleotideCompound> getAsList() { 266 triggerLoad(); 267 if (this.getLength()==0) { 268 return new ArrayList<NucleotideCompound>(); 269 } 270 return SequenceMixin.toList(this); 271 } 272 273 public SequenceView<NucleotideCompound> getSubSequence(Integer start, Integer end) { 274 triggerLoad(); 275 if (this.getLength()==0) { 276 return null; 277 } 278 return new SequenceProxyView<NucleotideCompound>(this, start, end); 279 } 280 281 public CompoundSet<NucleotideCompound> getCompoundSet() { 282 return compoundSet; 283 } 284 285 public AccessionID getAccession() { 286 throw new UnsupportedOperationException("Not supported yet."); 287 } 288 289 public int countCompounds(NucleotideCompound... compounds) { 290 triggerLoad(); 291 if (this.getLength()==0) { 292 return 0; 293 } 294 return SequenceMixin.countCompounds(this, compounds); 295 } 296 297 public Iterator<NucleotideCompound> iterator() { 298 triggerLoad(); 299 return (sequenceReader!= null) ? SequenceMixin.createIterator(sequenceReader): 300 new EmptyIterator(); 301 } 302 303 public SequenceView<NucleotideCompound> getInverse() { 304 triggerLoad(); 305 if (this.getLength()==0) { 306 return null; 307 } 308 return SequenceMixin.inverse(this); 309 } 310 311 312 private void setNonCacheLazyLoaded(boolean nonCacheLazyLoaded) { 313 this.nonCacheLazyLoaded = nonCacheLazyLoaded; 314 } 315 316 public static class EmptyIterator<NucleotideCompound> implements 317 Iterator<NucleotideCompound> { 318 319 public EmptyIterator() { 320 321 } 322 323 public boolean hasNext() { 324 return false; 325 } 326 327 public NucleotideCompound next() { 328 return null; 329 } 330 331 public void remove() { 332 333 } 334 335 } 336 337 }