Example usage for org.apache.commons.collections.buffer CircularFifoBuffer toArray

List of usage examples for org.apache.commons.collections.buffer CircularFifoBuffer toArray

Introduction

In this page you can find the example usage for org.apache.commons.collections.buffer CircularFifoBuffer toArray.

Prototype

@SuppressWarnings("unchecked")
public <T> T[] toArray(T[] a) 

Source Link

Usage

From source file:edu.berkeley.compbio.sequtils.strings.MarkovTreeNode.java

/**
 * Computes the total log probability of generating the given sequence fragment under the model.  This differs from
 * {@link #totalProbability(byte[])} in that the sequence fragment is not given explicitly but only as metadata.  Thus
 * its probability may be computed from summary statistics that are already available in the given SequenceFragment
 * rather than from the raw sequence.  Also, because these probabilities are typically very small, the result is
 * returned in log space (indeed implementations will likely compute them in log space).
 *
 * @param sequenceFragment the SequenceFragment whose probability is to be computed
 * @return the natural logarithm of the conditional probability (a double value between 0 and 1, inclusive)
 *//*from  w  w  w.  j av a  2s.  c o  m*/
public double fragmentLogProbability(final SequenceFragment sequenceFragment, final boolean perSample)
        throws SequenceSpectrumException {
    // the RonPSA implementation uses backlinks and so is vastly more efficient.
    // We can't use backlinks here because they might point to nodes outside of this subtree

    synchronized (sequenceFragment.getReaderForSynchronizing()) // because of resetting the reader
    {
        final SequenceReader in;
        try {
            in = sequenceFragment.getResetReader();
        } catch (NotEnoughSequenceException e) {
            throw new SequenceSpectrumRuntimeException(e);
        }
        final int requiredPrefixLength = getMaxDepth() - 1;
        double logprob = 0;
        final CircularFifoBuffer prefix = new CircularFifoBuffer(requiredPrefixLength);

        int samples = 0;
        while (true) {
            try {
                final byte c = in.read();

                try {
                    // PERF converting array prefix from circularFifoBuffer to byte[] is terribly inefficient
                    final byte[] prefixAsBytes = DSArrayUtils
                            .toPrimitive((Byte[]) prefix.toArray(new Byte[prefix.size()]));

                    // these log probabilities could be cached, e.g. logConditionalProbability(c, prefix)
                    logprob += MathUtils.approximateLog(conditionalProbability(c, prefixAsBytes));

                    samples++;

                    prefix.add(c);
                } catch (SequenceSpectrumException e) {
                    // probably just an invalid character
                    logger.debug("Invalid character " + (char) c);
                    // ignore this character as far as the probability is concerned
                    prefix.clear();
                }
            } catch (NotEnoughSequenceException e) {
                break;
            } catch (IOException e) {
                logger.error("Error", e);
                throw new SequenceSpectrumException(e);
            } catch (FilterException e) {
                logger.error("Error", e);
                throw new SequenceSpectrumException(e);
            }
        }

        if (perSample) {
            // we have ln(product(p) == sum(ln(p)).
            // The geometric mean is exp(sum(ln(p))/n), so to get ln(geometric mean) we need only divide by n.
            logprob /= samples;
        }

        return logprob;
    }
}