001    /**
002     * Copyright (C) 2010 The Roslin Institute <contact andy.law@roslin.ed.ac.uk>
003     *
004     * This file is part of the Ensembl Java API demonstration project developed by the
005     * Bioinformatics Group at The Roslin Institute, The Royal (Dick) School of
006     * Veterinary Studies, University of Edinburgh.
007     *
008     * This is free software: you can redistribute it and/or modify
009     * it under the terms of the GNU General Public License (version 3) as published by
010     * the Free Software Foundation.
011     *
012     * This software is distributed in the hope that it will be useful,
013     * but WITHOUT ANY WARRANTY; without even the implied warranty of
014     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
015     * GNU General Public License for more details.
016     *
017     * You should have received a copy of the GNU General Public License
018     * in this software distribution. If not, see <http://www.gnu.org/licenses/gpl-3.0.html/>.
019     */
020    package uk.ac.roslin.ensembl.datasourceaware.core;
021    
022    import java.util.Iterator;
023    import java.util.LinkedList;
024    import org.biojava3.core.sequence.Strand;
025    import uk.ac.roslin.ensembl.model.Mapping;
026    import uk.ac.roslin.ensembl.model.MappingSet;
027    import uk.ac.roslin.ensembl.exception.DAOException;
028    import uk.ac.roslin.ensembl.exception.RangeException;
029    import uk.ac.roslin.ensembl.model.Coordinate;
030    import uk.ac.roslin.ensembl.model.Mapping;
031    import uk.ac.roslin.ensembl.model.core.AssembledDNASequence;
032    import uk.ac.roslin.ensembl.model.core.Assembly;
033    
034    /**
035     *
036     * @author paterson
037     */
038    public class DAAssembly implements Assembly {
039    
040        DAAssembledDNASequence parent = null;
041        protected MappingSet componentMappings = null;
042        protected MappingSet stitchedComponentMappings = null;
043        protected Integer assemblyStart = null;
044        protected Integer assemblyStop = null;
045    
046        public static DAAssembly getAssembly(DAAssembledDNASequence parent) {
047            return new DAAssembly(parent);
048        }
049    
050        public static DAAssembly getAssembly(DAAssembledDNASequence parent, Integer start, Integer stop) {
051            return new DAAssembly(parent, start, stop);
052        }
053    
054        public DAAssembly() {
055        }
056    
057        private DAAssembly(DAAssembledDNASequence parent) {
058            this.parent = parent;
059        }
060    
061        private DAAssembly(DAAssembledDNASequence parent, Integer start, Integer stop) {
062            this.parent = parent;
063            this.assemblyStart = start;
064            this.assemblyStop = stop;
065        }
066    
067        public DAAssembledDNASequence getParent() {
068            return parent;
069        }
070    
071        public void setParent(AssembledDNASequence parent) {
072            this.parent = (DAAssembledDNASequence) parent;
073        }
074    
075        public Integer getAssemblyStart() {
076            if (assemblyStart == null) {
077                try {
078                    assemblyStart = (parent.getBioBegin() != null) ? parent.getBioBegin() : 1;
079                } catch (Exception e) {
080                }
081            }
082            if (assemblyStart == null) {
083                assemblyStart = 1;
084            }
085            return assemblyStart;
086        }
087    
088        public void setAssemblyStart(Integer assemblyStart) {
089            this.assemblyStart = assemblyStart;
090        }
091    
092        public Integer getAssemblyStop() {
093            if (assemblyStop != null || parent==null) {
094                return assemblyStop;
095            }
096            assemblyStop = parent.getBioEnd() ;
097            return assemblyStop;
098        }
099    
100        public void setAssemblyStop(Integer assemblyStop) {
101            this.assemblyStop = assemblyStop;
102        }
103    
104        private void setMappings() throws DAOException {
105            componentMappings = new MappingSet();
106            try {
107                componentMappings = (MappingSet) parent.getDaoFactory().getAssemblyDAO().getComponentMappingsByStartStop(parent, this.getAssemblyStart(), this.getAssemblyStop());
108            } catch (Exception e) {
109                    throw new DAOException("Failed to create the stitched mappings for a DAAssembly", e);
110            }
111         }
112    
113        public MappingSet getMappings() throws DAOException {
114            if (this.componentMappings == null) {
115                this.setMappings();
116            }
117            return this.componentMappings;
118        }
119    
120        public MappingSet getStitchedMappings() throws DAOException {
121            if (this.stitchedComponentMappings == null) {
122            this.stitchedComponentMappings = this.stitchComponents(
123                            this.getAssemblyStart(), this.getAssemblyStop(), this.getMappings());
124            }
125            return this.stitchedComponentMappings;
126        }
127    
128        public MappingSet getStitchedMappings(Integer start, Integer stop) throws DAOException {
129            if (this.stitchedComponentMappings == null) {
130                this.stitchedComponentMappings = this.stitchComponents(this.getAssemblyStart(), this.getAssemblyStop(), this.getMappings());
131            }
132    
133            MappingSet out = new MappingSet();
134    
135            for (Mapping mapping : this.getStitchedMappings()) {
136    
137    
138                if (mapping.getSourceCoordinates().getStart() > stop) {
139                    break;
140                } else if (mapping.getSourceCoordinates().getEnd() < start) {
141                    continue;
142                } else {
143                    Integer upstream = mapping.getSourceCoordinates().getStart() - start;
144                    Integer downstream = mapping.getSourceCoordinates().getEnd() - stop;
145    
146                    if (upstream >= 0 && downstream <= 0) {
147                        out.add((Mapping) mapping);
148                        continue;
149                    }
150                    Mapping newMapping = new Mapping();
151                    newMapping.setSource(mapping.getSource());
152                    newMapping.setSourceCoordinates(mapping.getSourceCoordinates());
153                    newMapping.setTarget(mapping.getTarget());
154                    newMapping.setTargetCoordinates(mapping.getTargetCoordinates());
155    
156                    if (upstream < 0) {
157                        newMapping.getTargetCoordinates().setStart(mapping.getTargetCoordinates().getStart() - upstream);
158                        newMapping.getSourceCoordinates().setStart(start);
159                    }
160                    if (downstream > 0) {
161                        newMapping.getTargetCoordinates().setEnd(mapping.getTargetCoordinates().getEnd() - downstream);
162                        newMapping.getSourceCoordinates().setEnd(stop);
163                    }
164                    out.add(newMapping);
165                    continue;
166                }
167            }
168    
169            return out;
170        }
171    
172        private MappingSet stitchComponents(Integer _start, Integer _end, MappingSet mappings) throws DAOException {
173    
174            MappingSet out = new MappingSet();
175    
176            boolean reverse = false;
177            Integer start;
178            Integer end;
179    
180            if (_start==null || _end ==null) {
181                throw new DAOException("invalid assembly coordinates");
182            }
183    
184            if (_start > _end) {
185                reverse = true;
186                start = _end;
187                end = _start;
188            } else {
189                start = _start;
190                end = _end;
191            }
192    
193            Integer currentPosition = start;
194    
195    
196            for (Mapping mapping : mappings) {
197    
198                Integer sourceStart = mapping.getSourceCoordinates().getStart();
199                Integer sourceEnd = mapping.getSourceCoordinates().getEnd();
200                DADNASequence source = (DADNASequence) mapping.getSource();
201                DADNASequence target = (DADNASequence) mapping.getTarget();
202                Integer distanceLeft = end - currentPosition;
203    
204                //skip ones completely before the desired start
205                if (sourceStart > end || sourceEnd < currentPosition) {
206                    continue;
207                } else if (sourceStart <= currentPosition) {
208    
209                    //we overlap the start
210                    Integer overlap = currentPosition - sourceStart;
211                    Mapping m = new Mapping();
212                    m.setSource(source);
213                    m.setTarget(target);
214    
215                    //if we also overlap the end
216                    if (sourceEnd > end) {
217                        m.setTargetCoordinates(
218                                mapping.getTargetCoordinates().getStart() + overlap,
219                                mapping.getTargetCoordinates().getStart() + overlap + distanceLeft,
220                                mapping.getTargetCoordinates().getStrand());
221                        m.setSourceCoordinates(currentPosition, end, Coordinate.Strand.FORWARD_STRAND);
222                        out.add(m);
223                        //we're done
224                        currentPosition = end + 1;
225                        break;
226                    } else {
227                        m.setTargetCoordinates(
228                                mapping.getTargetCoordinates().getStart() + overlap,
229                                mapping.getTargetCoordinates().getEnd(),
230                                mapping.getTargetCoordinates().getStrand());
231                        m.setSourceCoordinates(currentPosition, sourceEnd, Coordinate.Strand.FORWARD_STRAND);
232                        out.add(m);
233                        if (sourceEnd == end) {
234                            //we're done
235                            currentPosition = end + 1;
236                            break;
237                        } else {
238                            currentPosition = sourceEnd + 1;
239                            continue;
240                        }
241                    }
242    
243    
244    
245                } else if (currentPosition < sourceStart) {
246    
247                    //plug a gap
248                    Integer gapLength = sourceStart - currentPosition;
249                    Mapping m = new Mapping();
250                    m.setSource(source);
251                    m.setTarget(GapSequence.makeGap(gapLength));
252                    m.setTargetCoordinates(1, gapLength, Coordinate.Strand.FORWARD_STRAND);
253                    m.setSourceCoordinates(currentPosition, currentPosition + gapLength - 1, Coordinate.Strand.FORWARD_STRAND);
254                    out.add(m);
255                    currentPosition = currentPosition + gapLength;
256    
257    
258                    //if we are within bounds
259                    if (end >= sourceEnd) {
260                        Mapping m1 = new Mapping();
261                        m1.setSource(source);
262                        m1.setTarget(target);
263                        m1.setSourceCoordinates(currentPosition, sourceEnd, Coordinate.Strand.FORWARD_STRAND);
264                        m1.setTargetCoordinates(mapping.getTargetCoordinates());
265                        out.add(m1);
266                        if (end == sourceEnd) {
267                            // we're done
268                            currentPosition = end + 1;
269                            break;
270                        } else {
271                            currentPosition = sourceEnd + 1;
272                            continue;
273                        }
274                    } //if we run past the desired end
275                    else {
276                        Mapping m2 = new Mapping();
277                        m2.setSource(source);
278                        m2.setTarget(target);
279                        m2.setSourceCoordinates(currentPosition, end, Coordinate.Strand.FORWARD_STRAND);
280                        m2.setTargetCoordinates(1, end - sourceStart + 1, mapping.getTargetCoordinates().getStrand());
281                        out.add(m2);
282                        //we're done
283                        currentPosition = end + 1;
284                        break;
285                    }
286    
287                }
288    
289            }
290    
291            if (currentPosition <= end) {
292                //plug a terminal gap
293                Integer gapLength = end - currentPosition + 1;
294                Mapping m = new Mapping();
295                m.setSource(parent);
296                m.setTarget(GapSequence.makeGap(gapLength));
297                m.setTargetCoordinates(1, gapLength, Coordinate.Strand.FORWARD_STRAND);
298                m.setSourceCoordinates(currentPosition, end, Coordinate.Strand.FORWARD_STRAND);
299                out.add(m);
300    
301            }
302    
303            return out;
304        }
305    
306        public String getSequenceAsString(Integer start, Integer stop, Strand strand) throws RangeException, DAOException {
307    
308            StringBuffer sb = new StringBuffer();
309            String out = "";
310    
311            //range of sequence I want
312            Integer begin = start;
313            Integer end = stop;
314    
315            //make sure range is not greater than the assembly extent
316            if (end == null || this.getAssemblyStop() ==null || end > this.getAssemblyStop()) {
317                // end = this.getAssemblyStop();
318                throw new RangeException("Requested range greater than assembly range.");
319            }
320            if (begin == null || this.getAssemblyStart() ==null || begin < this.getAssemblyStart()) {
321                //begin = this.getAssemblyStart();
322                throw new RangeException("Requested range greater than assembly range.");
323            }
324    
325            Integer nextStart = start;
326    
327            for (Mapping mapping : this.getStitchedMappings()) {
328    
329                if (nextStart > stop) {
330                    //we're done
331                    break;
332                }
333    
334                //////////////////////////////////////
335                if (mapping.getSourceCoordinates().getStart() > stop) {
336                    //we're past the end - so quit
337                    break;
338                } else if (mapping.getSourceCoordinates().getEnd() < nextStart) {
339                    //we're not yet at the position we want so skip
340                    continue;
341                } else {
342    
343                    //this mapping is at least partially in range
344    
345                    Integer desiredOutputStart = mapping.getTargetCoordinates().getStart() + (nextStart - mapping.getSourceCoordinates().getStart());
346                    Integer desiredOutputEnd = mapping.getTargetCoordinates().getStart() + (stop - mapping.getSourceCoordinates().getStart());
347    
348    
349                    if (desiredOutputEnd > mapping.getTargetCoordinates().getEnd()) {
350                        desiredOutputEnd = mapping.getTargetCoordinates().getEnd();
351                    } else {
352                        //this will complete the sequence
353                    }
354    
355                    nextStart = nextStart + desiredOutputEnd - desiredOutputStart + 1;
356    
357                    DADNASequence seq = (DADNASequence) mapping.getTarget();
358                    if (mapping.getTargetCoordinates().getStrand().equals(uk.ac.roslin.ensembl.model.Coordinate.Strand.FORWARD_STRAND)) {
359                        sb.append(((DADNASequence) ((Mapping) mapping).getTarget()).getSequenceAsString(
360                                desiredOutputStart, desiredOutputEnd, null));
361                    } else {
362                        sb.append(((DADNASequence) ((Mapping) mapping).getTarget()).getReverseComplementSequenceAsString(seq.getLength() - desiredOutputEnd + 1, seq.getLength() - desiredOutputStart + 1));
363                    }
364                }
365            }
366            return sb.toString();
367        }
368    
369        public String getReverseComplementSequenceAsString(Integer start, Integer stop) throws RangeException, DAOException {
370    
371            StringBuffer outerBuffer = new StringBuffer();
372            LinkedList<StringBuffer> buffers = new LinkedList<StringBuffer>();
373    
374            String out = "";
375    
376            //range of sequence I want
377            Integer begin = start;
378            Integer end = stop;
379    
380            //make sure range is not greater than the assembly extent
381            if (end == null || end > this.getAssemblyStop()) {
382                // end = this.getAssemblyStop();
383                throw new RangeException("Requested range greater than assembly range.");
384            }
385            if (begin == null || begin < this.getAssemblyStart()) {
386                //begin = this.getAssemblyStart();
387                throw new RangeException("Requested range greater than assembly range.");
388            }
389    
390            Integer nextStart = start;
391    
392            for (Mapping mapping : this.getStitchedMappings()) {
393    
394                if (nextStart > stop) {
395                    //we're done
396                    break;
397                }
398    
399                if (mapping.getSourceCoordinates().getStart() > stop) {
400                    //we're past the end - so quit
401                    break;
402                } else if (mapping.getSourceCoordinates().getEnd() < nextStart) {
403                    //we're not yet at the position we want so skip
404                    continue;
405                } else {
406    
407                    //this mapping is at least partially in range
408                    Integer desiredOutputStart = mapping.getTargetCoordinates().getStart() + (nextStart - mapping.getSourceCoordinates().getStart());
409                    Integer desiredOutputEnd = mapping.getTargetCoordinates().getStart() + (stop - mapping.getSourceCoordinates().getStart());
410    
411                    if (desiredOutputEnd > mapping.getTargetCoordinates().getEnd()) {
412                        desiredOutputEnd = mapping.getTargetCoordinates().getEnd();
413                    } else {
414                        //this will complete the sequence
415                    }
416                    nextStart = nextStart + desiredOutputEnd - desiredOutputStart + 1;
417                    StringBuffer innerBuffer = new StringBuffer();
418                    DADNASequence seq = (DADNASequence) mapping.getTarget();
419    
420                    if (mapping.getTargetCoordinates().getStrand().equals(uk.ac.roslin.ensembl.model.Coordinate.Strand.FORWARD_STRAND)) {
421    
422                        innerBuffer.append(((DADNASequence) ((Mapping) mapping).getTarget()).getReverseComplementSequenceAsString(
423                                desiredOutputStart,desiredOutputEnd));
424                                
425                    } else {
426                        innerBuffer.append(((DADNASequence) ((Mapping) mapping).getTarget()).getSequenceAsString(
427                                //desiredOutputStart, desiredOutputEnd, null));
428                                seq.getLength() - desiredOutputEnd + 1, seq.getLength() - desiredOutputStart + 1));
429                    }
430                    buffers.add(innerBuffer);
431                }
432            }
433    
434            Iterator<StringBuffer> it = buffers.descendingIterator();
435    
436            while (it.hasNext()) {
437                outerBuffer.append(it.next());
438            }
439    
440    
441    
442    
443            return outerBuffer.toString();
444        }
445    }