Example usage for org.apache.commons.math3.distribution PoissonDistribution PoissonDistribution

List of usage examples for org.apache.commons.math3.distribution PoissonDistribution PoissonDistribution

Introduction

In this page you can find the example usage for org.apache.commons.math3.distribution PoissonDistribution PoissonDistribution.

Prototype

public PoissonDistribution(double p) throws NotStrictlyPositiveException 

Source Link

Document

Creates a new Poisson distribution with specified mean.

Usage

From source file:SimpleGui.QueryGenerator.java

/**
 * Creating specified number (numOfQuerySegments) of sub-queries
 * This should be first time to keep sub-queries set fixed for all inputs
 *//*  w  w  w . j a  va 2s.  c om*/
public ArrayList<String> generateSubQuerySamples(int numOfQuerySegments) throws IOException {
    /* File file = new File("//home//santhilata//Dropbox//CacheLearning//QGen//src//main//java//QueryInput" +
        "//SubQueries.txt");
            
            
     FileWriter fw = new FileWriter(file);
            
     fw.flush();
     */
    DatabaseDetails[] ddetails = this.getConfiguration().getDistributedEnvironment().getDatabaseSchema();

    int querySegmentSample = 0;
    String tempSTR = "";
    ArrayList<String> queries = new ArrayList<>(numOfQuerySegments);
    createSQSS();
    createPQSS();

    for (int i = 0; i < numOfQuerySegments; i++) {
        String query = "";

        ArrayList<String> tempListSQS = new ArrayList<>();

        int noOfSQSInQuery = 0; // This variable is to select more than one sqss. This is to ensure we get data from multiple databases
        while (noOfSQSInQuery == 0) {
            noOfSQSInQuery = new Random().nextInt(ddetails.length) + 1; // upto no.of databases
        }

        int seed = new Random().nextInt(8);
        while (seed == 0) {
            seed = new Random().nextInt(8);
        }

        PoissonDistribution poisson = new PoissonDistribution(noOfSQS / seed);
        UniformIntegerDistribution uid = new UniformIntegerDistribution(noOfSQS / (seed + 1), noOfSQS / (seed));

        //adding SQS in the query
        for (int j = 0; j < noOfSQSInQuery; j++) {
            //  String tempSTR = sqss.get(poisson.sample());
            if (querySegmentSample > sqss.size() - 1)
                querySegmentSample = 0;
            else {
                tempSTR = sqss.get(querySegmentSample);
                querySegmentSample++;
            }
            if (!tempListSQS.contains(tempSTR))
                tempListSQS.add(tempSTR);
            else
                j--;
        }

        for (int j = 0; j < noOfSQSInQuery; j++) {
            query = query + "<" + tempListSQS.get(j) + ">";

        }
        //till here added sqs in a query

        int tempAttr = new Random().nextInt(noOfSQSInQuery);
        //to ensure that first attribute is taken from the chosen subject query segments
        while ((tempAttr == 0)) {
            if (noOfSQSInQuery == 1)
                tempAttr = 1;
            else
                tempAttr = new Random().nextInt(noOfSQSInQuery);
        }

        String attr1 = "";
        if (tempAttr == 1)
            attr1 = tempListSQS.get(0);
        else
            attr1 = tempListSQS.get(tempAttr);

        String attr2 = attr1 + "*"; // what is this star for?
        while (attr2.contains(attr1)) { //this is to avoid repetition of attributes in the predicates
            // System.out.println(attr2 +" "+attr1);
            PoissonDistribution poisson_PQS = new PoissonDistribution(pqss.size() / seed);
            int poisson_sample = poisson_PQS.sample();

            while (poisson_sample >= pqss.size()) {
                poisson_sample = poisson_PQS.sample();
            }

            attr2 = pqss.get(poisson_sample);
            //  System.out.println("new attr2 "+attr2);
        }

        if (attr2 == null) {
            attr1 = attr1 + "," + conditionArray[new Random().nextInt(conditionArray.length)];
            long cardinality = Math.round(Math.abs(new Random().nextGaussian() * 1000));
            attr1 = attr1 + "," + conditionArray[new Random().nextInt(conditionArray.length)] + "-"
                    + cardinality;

        }

        String str = "<" + attr1 + attr2 + ">";

        query = query + "#" + str;

        queries.add(query);

    }
    /*
    for (String query: queries
     ) {
     fw.append(query+"\n");
    }
    fw.close();
    */
    this.sub_queries = queries;

    return queries;
}

From source file:SimpleGui.QueryGenerator.java

/**
 * Returns array of values in a given distribution
 * @param distribution//  www .ja v  a 2 s.  com
 * @param numQs
 * @param mean
 * @param variance
 * @param exponentialMean
 * @param u_lower
 * @param u_upper
 * @return
 */
public int[] getDistributionValues(String distribution, int numQs, double mean, double variance,
        double exponentialMean, int u_lower, int u_upper, double slope, int upperBoundary) {

    int[] values = new int[numQs];

    switch (distribution) {
    case "Poisson": {
        RandomEngine engine = new DRand();
        Poisson poisson = new Poisson(mean, engine);
        int poissonObs = poisson.nextInt();
        Normal normal = new Normal(mean, variance, engine);

        for (int i = 0; i < numQs; i++) {
            double normalObs = normal.nextDouble();
            int sample = (int) Math.abs(normalObs);

            while (sample >= upperBoundary) {
                normalObs = normal.nextDouble();
                sample = (int) Math.abs(normalObs);
            }

            values[i] = sample;
            //System.out.println(values[i]+"from poisson");
            //=============================================
            PoissonDistribution p = new PoissonDistribution(mean);
            int randomInt = p.sample();
            // values[i] = randomInt;
        }
        // Arrays.sort(values);

        break;
    }
    case "Random": {

        Random randomGenerator = new Random();
        for (int i = 0; i < numQs; i++) {
            int randomInt = randomGenerator.nextInt(numQs - 1);
            values[i] = randomInt + 1; // to avoid '0'
        }

        break;
    }
    case "Uniform": {
        for (int i = 0; i < numQs; i++) {
            UniformIntegerDistribution u = new UniformIntegerDistribution(u_lower, u_upper);
            int randomInt = u.sample();
            while (randomInt >= upperBoundary) {
                randomInt = u.sample();
            }
            if (randomInt == 0)
                values[i] = randomInt + 1; // to avoid random '0'
            else
                values[i] = randomInt;

        }
        break;
    }

    case "Exponential": {
        for (int i = 0; i < numQs; i++) {
            ExponentialDistribution e = new ExponentialDistribution(exponentialMean);
            double exponential = e.sample();
            while (exponential >= upperBoundary - 1) {
                exponential = e.sample();
            }
            values[i] = new Double(exponential).intValue() + 1; //to avoid random value '0'
        }

        break;
    }

    case "Grading": { // y=mx+c. increment and then decrement for positive slopes.
        int constant = 0;
        for (int i = 0; i < numQs; i++) {
            constant += slope;
            int nextVal = new Double(constant).intValue();
            System.out.println(upperBoundary);
            if (nextVal >= upperBoundary || nextVal < 0) {
                slope = -1 * slope;
                i--;
            } else
                values[i] = nextVal;
        }
        break;
    }
    }
    // Arrays.sort(values);

    return values;
}

From source file:uk.ac.babraham.SeqMonk.ProbeGenerators.MacsPeakCaller.java

public void run() {

    //      for (int i=0;i<selectedChIPStores.length;i++) {
    //         System.err.println("Selcted ChIP="+selectedChIPStores[i]);
    //      }// w w w . j  a  va2s .c o m
    //      for (int i=0;i<selectedInputStores.length;i++) {
    //         System.err.println("Selcted Input="+selectedInputStores[i]);
    //      }

    // First find the tag offsets between the watson and crick strands

    // Work out the total average coverage for all of the combined ChIP samples
    long totalChIPCoverage = 0;

    for (int i = 0; i < selectedChIPStores.length; i++) {
        totalChIPCoverage += selectedChIPStores[i].getTotalReadLength();
    }

    if (cancel) {
        generationCancelled();
        return;
    }

    double averageChIPCoveragePerBase = totalChIPCoverage / (double) collection.genome().getTotalGenomeLength();

    double lowerCoverage = averageChIPCoveragePerBase * minFoldEnrichment;
    double upperCoverage = averageChIPCoveragePerBase * maxFoldEnrichment;

    System.err.println("Coverage range for high confidence peaks is " + lowerCoverage + " - " + upperCoverage);

    // Now we go through the data to find locations for our high confidence peaks so we can
    // randomly select 1000 of these to use to find the offset between the two strands

    Chromosome[] chromosomes = collection.genome().getAllChromosomes();

    Vector<Probe> potentialHighConfidencePeaks = new Vector<Probe>();

    for (int c = 0; c < chromosomes.length; c++) {

        if (cancel) {
            generationCancelled();
            return;
        }

        // Time for an update
        updateGenerationProgress("Finding high confidence peaks on chromosome " + chromosomes[c].name(), c,
                chromosomes.length);

        Probe lastValidProbe = null;

        for (int startPosition = 1; startPosition < chromosomes[c].length()
                - fragmentSize; startPosition += fragmentSize / 2) {

            // See if we need to quit
            if (cancel) {
                generationCancelled();
                return;
            }

            long totalLength = 0;
            Probe probe = new Probe(chromosomes[c], startPosition, startPosition + fragmentSize);

            for (int s = 0; s < selectedChIPStores.length; s++) {
                long[] reads = selectedChIPStores[s].getReadsForProbe(probe);
                for (int j = 0; j < reads.length; j++) {
                    totalLength += SequenceRead.length(reads[j]);
                }
            }

            if (totalLength >= (lowerCoverage * probe.length())
                    && totalLength <= upperCoverage * probe.length()) {

                // We have a potential high quality peak.

                // See if we can merge it with the last valid probe

                if (lastValidProbe != null
                        && SequenceRead.overlaps(lastValidProbe.packedPosition(), probe.packedPosition())) {

                    lastValidProbe = new Probe(chromosomes[c], lastValidProbe.start(), probe.end());

                } else if (lastValidProbe != null) {

                    // Check that the overall density over the region falls within our limits
                    totalLength = 0;
                    for (int s = 0; s < selectedChIPStores.length; s++) {
                        long[] reads = selectedChIPStores[s].getReadsForProbe(lastValidProbe);
                        for (int j = 0; j < reads.length; j++) {
                            totalLength += SequenceRead.length(reads[j]);
                        }
                    }

                    if (totalLength >= (lowerCoverage * lastValidProbe.length())
                            && totalLength <= upperCoverage * lastValidProbe.length()) {
                        potentialHighConfidencePeaks.add(lastValidProbe);
                    }

                    lastValidProbe = probe;
                } else {
                    lastValidProbe = probe;
                }
            }

        }

        if (lastValidProbe != null) {
            long totalLength = 0;
            for (int s = 0; s < selectedChIPStores.length; s++) {
                long[] reads = selectedChIPStores[s].getReadsForProbe(lastValidProbe);
                for (int j = 0; j < reads.length; j++) {
                    totalLength += SequenceRead.length(reads[j]);
                }
            }

            if (totalLength >= (lowerCoverage * lastValidProbe.length())
                    && totalLength <= upperCoverage * lastValidProbe.length()) {
                potentialHighConfidencePeaks.add(lastValidProbe);
            }
        }
    }

    if (potentialHighConfidencePeaks.size() == 0) {

        JOptionPane.showMessageDialog(SeqMonkApplication.getInstance(), "No high confidence peaks found",
                "Quitting generator", JOptionPane.INFORMATION_MESSAGE);
        generationCancelled();
        return;
    }

    //      System.err.println("Found "+potentialHighConfidencePeaks.size()+" high confidence peaks");

    // Now we select 1000 random probes from this set
    Probe[] highConfidencePeaks = potentialHighConfidencePeaks.toArray(new Probe[0]);

    Collections.shuffle(Arrays.asList(highConfidencePeaks));

    Probe[] randomHighConfidenceProbes = new Probe[Math.min(highConfidencePeaks.length, 1000)];

    for (int i = 0; i < randomHighConfidenceProbes.length; i++) {
        randomHighConfidenceProbes[i] = highConfidencePeaks[i];
    }

    // Now find the average distance between forward / reverse reads in the candidate peaks

    int[] distances = new int[highConfidencePeaks.length];

    // Sort the candidates so we don't do stupid stuff with the cache
    Arrays.sort(randomHighConfidenceProbes);

    for (int p = 0; p < randomHighConfidenceProbes.length; p++) {

        // See if we need to quit
        if (cancel) {
            generationCancelled();
            return;
        }

        distances[p] = getInterStrandDistance(randomHighConfidenceProbes[p], selectedChIPStores);
    }

    int medianInterStrandDistance = (int) SimpleStats.median(distances);

    if (medianInterStrandDistance < 0)
        medianInterStrandDistance = 0;

    //      System.err.println("Median inter strand difference = "+medianInterStrandDistance);

    // Now we find the depth cutoff for overrepresented single tags using a binomial distribution
    int totalReadCount = 0;
    for (int i = 0; i < selectedChIPStores.length; i++) {
        totalReadCount += selectedChIPStores[i].getTotalReadCount();
    }

    BinomialDistribution bin = new BinomialDistribution(totalReadCount,
            1d / collection.genome().getTotalGenomeLength());

    // We want to know what depth has a chance of less than 1^-5
    int redundantThreshold = bin.inverseCumulativeProbability(1 - 0.00001d);

    if (redundantThreshold < 1)
        redundantThreshold = 1;

    //      System.err.println("Redundancy threshold is "+redundantThreshold);

    // Now we construct a poisson distribution to work out the threshold to use for
    // constructing a full candidate peak set.

    updateGenerationProgress("Counting non-redundant reads", 0, 1);

    // To do this we need to get the full non-redundant length from the whole set
    int totalNonRedCount = getNonRedundantReadCount(selectedChIPStores, redundantThreshold);

    //      System.err.println("Total non-redundant sequences is "+totalNonRedCount);

    // We need to know the median read length for the data
    int readLength = 0;
    for (int i = 0; i < selectedChIPStores.length; i++) {
        readLength += selectedChIPStores[i].getTotalReadLength() / selectedChIPStores[i].getTotalReadCount();
    }
    readLength /= selectedChIPStores.length;

    double expectedCountsPerWindow = getExpectedCountPerWindow(totalNonRedCount,
            collection.genome().getTotalGenomeLength(), fragmentSize, readLength);

    PoissonDistribution poisson = new PoissonDistribution(expectedCountsPerWindow);

    int readCountCutoff = poisson.inverseCumulativeProbability(1 - pValue);

    //      System.err.println("Threshold for enrichment in a window is "+readCountCutoff+" reads using a p-value of "+pValue+" and a mean of "+(totalNonRedCount/(collection.genome().getTotalGenomeLength()/(double)fragmentSize)));

    // Now we go back through the whole dataset to do a search for all possible candidate probes 

    // We re-use the peak vector we came up with before.
    potentialHighConfidencePeaks.clear();

    for (int c = 0; c < chromosomes.length; c++) {
        // Time for an update
        updateGenerationProgress("Finding candidate peaks on chromosome " + chromosomes[c].name(), c,
                chromosomes.length);

        Probe lastValidProbe = null;

        for (int startPosition = 1; startPosition < chromosomes[c].length()
                - fragmentSize; startPosition += fragmentSize / 2) {

            // See if we need to quit
            if (cancel) {
                generationCancelled();
                return;
            }

            // We expand the region we're looking at by the inter-strand distance as we're going to
            // be adjusting the read positions
            Probe probe = new Probe(chromosomes[c], startPosition, (startPosition + fragmentSize - 1));

            long[] mergedProbeReads = getReadsFromDataStoreCollection(probe, selectedChIPStores,
                    medianInterStrandDistance);

            mergedProbeReads = deduplicateReads(mergedProbeReads, redundantThreshold);

            SequenceRead.sort(mergedProbeReads);

            int thisProbeOverlapCount = 0;
            for (int i = 0; i < mergedProbeReads.length; i++) {
                if (SequenceRead.overlaps(mergedProbeReads[i], probe.packedPosition())) {
                    ++thisProbeOverlapCount;
                }
            }

            if (thisProbeOverlapCount > readCountCutoff) {

                // We have a potential high quality peak.

                // See if we can merge it with the last valid probe

                if (lastValidProbe != null
                        && SequenceRead.overlaps(lastValidProbe.packedPosition(), probe.packedPosition())) {

                    lastValidProbe = new Probe(chromosomes[c], lastValidProbe.start(), probe.end());

                } else if (lastValidProbe != null) {
                    potentialHighConfidencePeaks.add(lastValidProbe);
                    lastValidProbe = probe;
                } else {
                    lastValidProbe = probe;
                }
            }

        }

        if (lastValidProbe != null) {
            potentialHighConfidencePeaks.add(lastValidProbe);
        }
    }

    // Finally we re-filter the peaks we have using local poisson distributions with densities taken
    // from either the input samples (if there are any), or the local region.  The densities are 
    // estimated over 1,5 and 10kb around the peak and genome wide and the max of these is taken.
    // If there is no input then the 1kb region is not used.

    Probe[] allCandidateProbes = potentialHighConfidencePeaks.toArray(new Probe[0]);

    // Work out which stores we're using to validate against.
    DataStore[] validationStores;
    boolean useInput = false;
    double inputCorrection = 1;

    int validationNonRedCount;

    if (selectedInputStores != null && selectedInputStores.length > 0) {

        // See if we need to quit
        if (cancel) {
            generationCancelled();
            return;
        }

        validationStores = selectedInputStores;
        useInput = true;

        // We also need to work out the total number of nonredundant seqences
        // in the input so we can work out a scaling factor so that the densities
        // for input and chip are comparable.
        validationNonRedCount = getNonRedundantReadCount(validationStores, redundantThreshold);

        inputCorrection = totalNonRedCount / (double) validationNonRedCount;

        System.err.println("From chip=" + totalNonRedCount + " input=" + validationNonRedCount
                + " correction is " + inputCorrection);

    } else {
        validationStores = selectedChIPStores;
        validationNonRedCount = totalNonRedCount;
    }

    Vector<Probe> finalValidatedProbes = new Vector<Probe>();

    for (int p = 0; p < allCandidateProbes.length; p++) {

        // See if we need to quit
        if (cancel) {
            generationCancelled();
            return;
        }

        if (p % 100 == 0) {
            updateGenerationProgress("Validated " + p + " out of " + allCandidateProbes.length + " raw peaks",
                    p, allCandidateProbes.length);
        }

        //         System.err.println("Validating "+allCandidateProbes[p].chromosome()+":"+allCandidateProbes[p].start()+"-"+allCandidateProbes[p].end());

        // We now need to find the maximum read density per 2*bandwidth against which
        // we're going to validate this peak

        // We're going to get all reads within 10kb of the peak, and then we can subselect from there

        int midPoint = allCandidateProbes[p].middle();

        Probe region10kb = new Probe(allCandidateProbes[p].chromosome(), Math.max(midPoint - 5000, 1),
                Math.min(midPoint + 4999, allCandidateProbes[p].chromosome().length()),
                allCandidateProbes[p].strand());
        Probe region5kb = new Probe(allCandidateProbes[p].chromosome(), Math.max(midPoint - 2500, 1),
                Math.min(midPoint + 2499, allCandidateProbes[p].chromosome().length()),
                allCandidateProbes[p].strand());
        Probe region1kb = new Probe(allCandidateProbes[p].chromosome(), Math.max(midPoint - 500, 1),
                Math.min(midPoint + 499, allCandidateProbes[p].chromosome().length()),
                allCandidateProbes[p].strand());

        // Get the probes for the largest region
        long[] thisRegionReads = getReadsFromDataStoreCollection(region10kb, validationStores, 0);

        // Deduplicate so it's a fair comparison
        thisRegionReads = deduplicateReads(thisRegionReads, redundantThreshold); // Should we recalculate the redundant threshold based on the input coverage?

        int region10kbcount = thisRegionReads.length;
        int region5kbcount = 0;
        int region1kbcount = 0;

        // Go through the reads seeing if they fit into the 5 or 1kb regions
        for (int r = 0; r < thisRegionReads.length; r++) {
            if (SequenceRead.overlaps(region5kb.packedPosition(), thisRegionReads[r]))
                ++region5kbcount;
            if (SequenceRead.overlaps(region1kb.packedPosition(), thisRegionReads[r]))
                ++region1kbcount;
        }

        //         System.err.println("Input counts 10kb="+region10kbcount+" 5kb="+region5kbcount+" 1kb="+region1kbcount);

        // Convert to densities per window and ajdust for global coverage

        double globalDensity = getExpectedCountPerWindow(validationNonRedCount,
                collection.genome().getTotalGenomeLength(), allCandidateProbes[p].length(), readLength)
                * inputCorrection;
        double density10kb = getExpectedCountPerWindow(region10kbcount, region10kb.length(),
                allCandidateProbes[p].length(), readLength) * inputCorrection;
        double density5kb = getExpectedCountPerWindow(region5kbcount, region5kb.length(),
                allCandidateProbes[p].length(), readLength) * inputCorrection;
        double density1kb = getExpectedCountPerWindow(region1kbcount, region1kb.length(),
                allCandidateProbes[p].length(), readLength) * inputCorrection;

        // Find the highest density to use for the validation
        double highestDensity = globalDensity;
        if (density10kb > highestDensity)
            highestDensity = density10kb;
        if (density5kb > highestDensity)
            highestDensity = density5kb;
        if (useInput && density1kb > highestDensity)
            highestDensity = density1kb;

        //         System.err.println("Global="+globalDensity+" 10kb="+density10kb+" 5kb="+density5kb+" 1kb="+density1kb+" using="+highestDensity);

        // Construct a poisson distribution with this density
        PoissonDistribution localPoisson = new PoissonDistribution(highestDensity);

        //         System.err.println("Cutoff from global="+(new PoissonDistribution(globalDensity)).inverseCumulativeProbability(1-pValue)+" 10kb="+(new PoissonDistribution(density10kb)).inverseCumulativeProbability(1-pValue)+" 5kb="+(new PoissonDistribution(density5kb)).inverseCumulativeProbability(1-pValue)+" 1kb="+(new PoissonDistribution(density1kb)).inverseCumulativeProbability(1-pValue));
        // Now check to see if the actual count from this peak is enough to still pass
        long[] mergedProbeReads = getReadsFromDataStoreCollection(allCandidateProbes[p], selectedChIPStores,
                medianInterStrandDistance);
        mergedProbeReads = deduplicateReads(mergedProbeReads, redundantThreshold);

        SequenceRead.sort(mergedProbeReads);

        int thisProbeOverlapCount = 0;
        for (int i = 0; i < mergedProbeReads.length; i++) {
            if (SequenceRead.overlaps(mergedProbeReads[i], allCandidateProbes[p].packedPosition())) {
                ++thisProbeOverlapCount;
            }
        }

        //         System.err.println("Read count in ChIP is "+thisProbeOverlapCount);

        if (thisProbeOverlapCount > localPoisson.inverseCumulativeProbability(1 - pValue)) {
            finalValidatedProbes.add(allCandidateProbes[p]);
            //            System.err.println("Adding probe to final set");
        }

    }

    //      System.err.println("From "+allCandidateProbes.length+" candidates "+finalValidatedProbes.size()+" peaks were validated");

    ProbeSet finalSet = new ProbeSet(getDescription(), finalValidatedProbes.toArray(new Probe[0]));

    generationComplete(finalSet);

}