001 /** 002 * Copyright (C) 2010 The Roslin Institute <contact andy.law@roslin.ed.ac.uk> 003 * 004 * This file is part of the Ensembl Java API demonstration project developed by the 005 * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of 006 * Veterinary Studies, University of Edinburgh. 007 * 008 * This is free software: you can redistribute it and/or modify 009 * it under the terms of the GNU General Public License (version 3) as published by 010 * the Free Software Foundation. 011 * 012 * This software is distributed in the hope that it will be useful, 013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 015 * GNU General Public License for more details. 016 * 017 * You should have received a copy of the GNU General Public License 018 * in this software distribution. If not, see <http://www.gnu.org/licenses/gpl-3.0.html/>. 019 */ 020 package uk.ac.roslin.ensembl.datasourceaware.core; 021 022 import java.util.Iterator; 023 import java.util.LinkedList; 024 import org.biojava3.core.sequence.Strand; 025 import uk.ac.roslin.ensembl.model.Mapping; 026 import uk.ac.roslin.ensembl.model.MappingSet; 027 import uk.ac.roslin.ensembl.exception.DAOException; 028 import uk.ac.roslin.ensembl.exception.RangeException; 029 import uk.ac.roslin.ensembl.model.Coordinate; 030 import uk.ac.roslin.ensembl.model.Mapping; 031 import uk.ac.roslin.ensembl.model.core.AssembledDNASequence; 032 import uk.ac.roslin.ensembl.model.core.Assembly; 033 034 /** 035 * 036 * @author paterson 037 */ 038 public class DAAssembly implements Assembly { 039 040 DAAssembledDNASequence parent = null; 041 protected MappingSet componentMappings = null; 042 protected MappingSet stitchedComponentMappings = null; 043 protected Integer assemblyStart = null; 044 protected Integer assemblyStop = null; 045 046 public static DAAssembly getAssembly(DAAssembledDNASequence parent) { 047 return new DAAssembly(parent); 048 } 049 050 public static DAAssembly getAssembly(DAAssembledDNASequence parent, Integer start, Integer stop) { 051 return new DAAssembly(parent, start, stop); 052 } 053 054 public DAAssembly() { 055 } 056 057 private DAAssembly(DAAssembledDNASequence parent) { 058 this.parent = parent; 059 } 060 061 private DAAssembly(DAAssembledDNASequence parent, Integer start, Integer stop) { 062 this.parent = parent; 063 this.assemblyStart = start; 064 this.assemblyStop = stop; 065 } 066 067 public DAAssembledDNASequence getParent() { 068 return parent; 069 } 070 071 public void setParent(AssembledDNASequence parent) { 072 this.parent = (DAAssembledDNASequence) parent; 073 } 074 075 public Integer getAssemblyStart() { 076 if (assemblyStart == null) { 077 try { 078 assemblyStart = (parent.getBioBegin() != null) ? parent.getBioBegin() : 1; 079 } catch (Exception e) { 080 } 081 } 082 if (assemblyStart == null) { 083 assemblyStart = 1; 084 } 085 return assemblyStart; 086 } 087 088 public void setAssemblyStart(Integer assemblyStart) { 089 this.assemblyStart = assemblyStart; 090 } 091 092 public Integer getAssemblyStop() { 093 if (assemblyStop != null || parent==null) { 094 return assemblyStop; 095 } 096 assemblyStop = parent.getBioEnd() ; 097 return assemblyStop; 098 } 099 100 public void setAssemblyStop(Integer assemblyStop) { 101 this.assemblyStop = assemblyStop; 102 } 103 104 private void setMappings() throws DAOException { 105 componentMappings = new MappingSet(); 106 try { 107 componentMappings = (MappingSet) parent.getDaoFactory().getAssemblyDAO().getComponentMappingsByStartStop(parent, this.getAssemblyStart(), this.getAssemblyStop()); 108 } catch (Exception e) { 109 throw new DAOException("Failed to create the stitched mappings for a DAAssembly", e); 110 } 111 } 112 113 public MappingSet getMappings() throws DAOException { 114 if (this.componentMappings == null) { 115 this.setMappings(); 116 } 117 return this.componentMappings; 118 } 119 120 public MappingSet getStitchedMappings() throws DAOException { 121 if (this.stitchedComponentMappings == null) { 122 this.stitchedComponentMappings = this.stitchComponents( 123 this.getAssemblyStart(), this.getAssemblyStop(), this.getMappings()); 124 } 125 return this.stitchedComponentMappings; 126 } 127 128 public MappingSet getStitchedMappings(Integer start, Integer stop) throws DAOException { 129 if (this.stitchedComponentMappings == null) { 130 this.stitchedComponentMappings = this.stitchComponents(this.getAssemblyStart(), this.getAssemblyStop(), this.getMappings()); 131 } 132 133 MappingSet out = new MappingSet(); 134 135 for (Mapping mapping : this.getStitchedMappings()) { 136 137 138 if (mapping.getSourceCoordinates().getStart() > stop) { 139 break; 140 } else if (mapping.getSourceCoordinates().getEnd() < start) { 141 continue; 142 } else { 143 Integer upstream = mapping.getSourceCoordinates().getStart() - start; 144 Integer downstream = mapping.getSourceCoordinates().getEnd() - stop; 145 146 if (upstream >= 0 && downstream <= 0) { 147 out.add((Mapping) mapping); 148 continue; 149 } 150 Mapping newMapping = new Mapping(); 151 newMapping.setSource(mapping.getSource()); 152 newMapping.setSourceCoordinates(mapping.getSourceCoordinates()); 153 newMapping.setTarget(mapping.getTarget()); 154 newMapping.setTargetCoordinates(mapping.getTargetCoordinates()); 155 156 if (upstream < 0) { 157 newMapping.getTargetCoordinates().setStart(mapping.getTargetCoordinates().getStart() - upstream); 158 newMapping.getSourceCoordinates().setStart(start); 159 } 160 if (downstream > 0) { 161 newMapping.getTargetCoordinates().setEnd(mapping.getTargetCoordinates().getEnd() - downstream); 162 newMapping.getSourceCoordinates().setEnd(stop); 163 } 164 out.add(newMapping); 165 continue; 166 } 167 } 168 169 return out; 170 } 171 172 private MappingSet stitchComponents(Integer _start, Integer _end, MappingSet mappings) throws DAOException { 173 174 MappingSet out = new MappingSet(); 175 176 boolean reverse = false; 177 Integer start; 178 Integer end; 179 180 if (_start==null || _end ==null) { 181 throw new DAOException("invalid assembly coordinates"); 182 } 183 184 if (_start > _end) { 185 reverse = true; 186 start = _end; 187 end = _start; 188 } else { 189 start = _start; 190 end = _end; 191 } 192 193 Integer currentPosition = start; 194 195 196 for (Mapping mapping : mappings) { 197 198 Integer sourceStart = mapping.getSourceCoordinates().getStart(); 199 Integer sourceEnd = mapping.getSourceCoordinates().getEnd(); 200 DADNASequence source = (DADNASequence) mapping.getSource(); 201 DADNASequence target = (DADNASequence) mapping.getTarget(); 202 Integer distanceLeft = end - currentPosition; 203 204 //skip ones completely before the desired start 205 if (sourceStart > end || sourceEnd < currentPosition) { 206 continue; 207 } else if (sourceStart <= currentPosition) { 208 209 //we overlap the start 210 Integer overlap = currentPosition - sourceStart; 211 Mapping m = new Mapping(); 212 m.setSource(source); 213 m.setTarget(target); 214 215 //if we also overlap the end 216 if (sourceEnd > end) { 217 m.setTargetCoordinates( 218 mapping.getTargetCoordinates().getStart() + overlap, 219 mapping.getTargetCoordinates().getStart() + overlap + distanceLeft, 220 mapping.getTargetCoordinates().getStrand()); 221 m.setSourceCoordinates(currentPosition, end, Coordinate.Strand.FORWARD_STRAND); 222 out.add(m); 223 //we're done 224 currentPosition = end + 1; 225 break; 226 } else { 227 m.setTargetCoordinates( 228 mapping.getTargetCoordinates().getStart() + overlap, 229 mapping.getTargetCoordinates().getEnd(), 230 mapping.getTargetCoordinates().getStrand()); 231 m.setSourceCoordinates(currentPosition, sourceEnd, Coordinate.Strand.FORWARD_STRAND); 232 out.add(m); 233 if (sourceEnd == end) { 234 //we're done 235 currentPosition = end + 1; 236 break; 237 } else { 238 currentPosition = sourceEnd + 1; 239 continue; 240 } 241 } 242 243 244 245 } else if (currentPosition < sourceStart) { 246 247 //plug a gap 248 Integer gapLength = sourceStart - currentPosition; 249 Mapping m = new Mapping(); 250 m.setSource(source); 251 m.setTarget(GapSequence.makeGap(gapLength)); 252 m.setTargetCoordinates(1, gapLength, Coordinate.Strand.FORWARD_STRAND); 253 m.setSourceCoordinates(currentPosition, currentPosition + gapLength - 1, Coordinate.Strand.FORWARD_STRAND); 254 out.add(m); 255 currentPosition = currentPosition + gapLength; 256 257 258 //if we are within bounds 259 if (end >= sourceEnd) { 260 Mapping m1 = new Mapping(); 261 m1.setSource(source); 262 m1.setTarget(target); 263 m1.setSourceCoordinates(currentPosition, sourceEnd, Coordinate.Strand.FORWARD_STRAND); 264 m1.setTargetCoordinates(mapping.getTargetCoordinates()); 265 out.add(m1); 266 if (end == sourceEnd) { 267 // we're done 268 currentPosition = end + 1; 269 break; 270 } else { 271 currentPosition = sourceEnd + 1; 272 continue; 273 } 274 } //if we run past the desired end 275 else { 276 Mapping m2 = new Mapping(); 277 m2.setSource(source); 278 m2.setTarget(target); 279 m2.setSourceCoordinates(currentPosition, end, Coordinate.Strand.FORWARD_STRAND); 280 m2.setTargetCoordinates(1, end - sourceStart + 1, mapping.getTargetCoordinates().getStrand()); 281 out.add(m2); 282 //we're done 283 currentPosition = end + 1; 284 break; 285 } 286 287 } 288 289 } 290 291 if (currentPosition <= end) { 292 //plug a terminal gap 293 Integer gapLength = end - currentPosition + 1; 294 Mapping m = new Mapping(); 295 m.setSource(parent); 296 m.setTarget(GapSequence.makeGap(gapLength)); 297 m.setTargetCoordinates(1, gapLength, Coordinate.Strand.FORWARD_STRAND); 298 m.setSourceCoordinates(currentPosition, end, Coordinate.Strand.FORWARD_STRAND); 299 out.add(m); 300 301 } 302 303 return out; 304 } 305 306 public String getSequenceAsString(Integer start, Integer stop, Strand strand) throws RangeException, DAOException { 307 308 StringBuffer sb = new StringBuffer(); 309 String out = ""; 310 311 //range of sequence I want 312 Integer begin = start; 313 Integer end = stop; 314 315 //make sure range is not greater than the assembly extent 316 if (end == null || this.getAssemblyStop() ==null || end > this.getAssemblyStop()) { 317 // end = this.getAssemblyStop(); 318 throw new RangeException("Requested range greater than assembly range."); 319 } 320 if (begin == null || this.getAssemblyStart() ==null || begin < this.getAssemblyStart()) { 321 //begin = this.getAssemblyStart(); 322 throw new RangeException("Requested range greater than assembly range."); 323 } 324 325 Integer nextStart = start; 326 327 for (Mapping mapping : this.getStitchedMappings()) { 328 329 if (nextStart > stop) { 330 //we're done 331 break; 332 } 333 334 ////////////////////////////////////// 335 if (mapping.getSourceCoordinates().getStart() > stop) { 336 //we're past the end - so quit 337 break; 338 } else if (mapping.getSourceCoordinates().getEnd() < nextStart) { 339 //we're not yet at the position we want so skip 340 continue; 341 } else { 342 343 //this mapping is at least partially in range 344 345 Integer desiredOutputStart = mapping.getTargetCoordinates().getStart() + (nextStart - mapping.getSourceCoordinates().getStart()); 346 Integer desiredOutputEnd = mapping.getTargetCoordinates().getStart() + (stop - mapping.getSourceCoordinates().getStart()); 347 348 349 if (desiredOutputEnd > mapping.getTargetCoordinates().getEnd()) { 350 desiredOutputEnd = mapping.getTargetCoordinates().getEnd(); 351 } else { 352 //this will complete the sequence 353 } 354 355 nextStart = nextStart + desiredOutputEnd - desiredOutputStart + 1; 356 357 DADNASequence seq = (DADNASequence) mapping.getTarget(); 358 if (mapping.getTargetCoordinates().getStrand().equals(uk.ac.roslin.ensembl.model.Coordinate.Strand.FORWARD_STRAND)) { 359 sb.append(((DADNASequence) ((Mapping) mapping).getTarget()).getSequenceAsString( 360 desiredOutputStart, desiredOutputEnd, null)); 361 } else { 362 sb.append(((DADNASequence) ((Mapping) mapping).getTarget()).getReverseComplementSequenceAsString(seq.getLength() - desiredOutputEnd + 1, seq.getLength() - desiredOutputStart + 1)); 363 } 364 } 365 } 366 return sb.toString(); 367 } 368 369 public String getReverseComplementSequenceAsString(Integer start, Integer stop) throws RangeException, DAOException { 370 371 StringBuffer outerBuffer = new StringBuffer(); 372 LinkedList<StringBuffer> buffers = new LinkedList<StringBuffer>(); 373 374 String out = ""; 375 376 //range of sequence I want 377 Integer begin = start; 378 Integer end = stop; 379 380 //make sure range is not greater than the assembly extent 381 if (end == null || end > this.getAssemblyStop()) { 382 // end = this.getAssemblyStop(); 383 throw new RangeException("Requested range greater than assembly range."); 384 } 385 if (begin == null || begin < this.getAssemblyStart()) { 386 //begin = this.getAssemblyStart(); 387 throw new RangeException("Requested range greater than assembly range."); 388 } 389 390 Integer nextStart = start; 391 392 for (Mapping mapping : this.getStitchedMappings()) { 393 394 if (nextStart > stop) { 395 //we're done 396 break; 397 } 398 399 if (mapping.getSourceCoordinates().getStart() > stop) { 400 //we're past the end - so quit 401 break; 402 } else if (mapping.getSourceCoordinates().getEnd() < nextStart) { 403 //we're not yet at the position we want so skip 404 continue; 405 } else { 406 407 //this mapping is at least partially in range 408 Integer desiredOutputStart = mapping.getTargetCoordinates().getStart() + (nextStart - mapping.getSourceCoordinates().getStart()); 409 Integer desiredOutputEnd = mapping.getTargetCoordinates().getStart() + (stop - mapping.getSourceCoordinates().getStart()); 410 411 if (desiredOutputEnd > mapping.getTargetCoordinates().getEnd()) { 412 desiredOutputEnd = mapping.getTargetCoordinates().getEnd(); 413 } else { 414 //this will complete the sequence 415 } 416 nextStart = nextStart + desiredOutputEnd - desiredOutputStart + 1; 417 StringBuffer innerBuffer = new StringBuffer(); 418 DADNASequence seq = (DADNASequence) mapping.getTarget(); 419 420 if (mapping.getTargetCoordinates().getStrand().equals(uk.ac.roslin.ensembl.model.Coordinate.Strand.FORWARD_STRAND)) { 421 422 innerBuffer.append(((DADNASequence) ((Mapping) mapping).getTarget()).getReverseComplementSequenceAsString( 423 desiredOutputStart,desiredOutputEnd)); 424 425 } else { 426 innerBuffer.append(((DADNASequence) ((Mapping) mapping).getTarget()).getSequenceAsString( 427 //desiredOutputStart, desiredOutputEnd, null)); 428 seq.getLength() - desiredOutputEnd + 1, seq.getLength() - desiredOutputStart + 1)); 429 } 430 buffers.add(innerBuffer); 431 } 432 } 433 434 Iterator<StringBuffer> it = buffers.descendingIterator(); 435 436 while (it.hasNext()) { 437 outerBuffer.append(it.next()); 438 } 439 440 441 442 443 return outerBuffer.toString(); 444 } 445 }