Example usage for org.apache.commons.math3.distribution PoissonDistribution PoissonDistribution

Introduction

In this page you can find the example usage for org.apache.commons.math3.distribution PoissonDistribution PoissonDistribution.

Prototype

public PoissonDistribution(double p) throws NotStrictlyPositiveException

Source Link

Document

Creates a new Poisson distribution with specified mean.

Usage

From source file:SimpleGui.QueryGenerator.java

/**
 * Creating specified number (numOfQuerySegments) of sub-queries
 * This should be first time to keep sub-queries set fixed for all inputs
 *//*  w  w  w . j a  va 2s.  c om*/
public ArrayList<String> generateSubQuerySamples(int numOfQuerySegments) throws IOException {
    /* File file = new File("//home//santhilata//Dropbox//CacheLearning//QGen//src//main//java//QueryInput" +
        "//SubQueries.txt");
            
            
     FileWriter fw = new FileWriter(file);
            
     fw.flush();
     */
    DatabaseDetails[] ddetails = this.getConfiguration().getDistributedEnvironment().getDatabaseSchema();

    int querySegmentSample = 0;
    String tempSTR = "";
    ArrayList<String> queries = new ArrayList<>(numOfQuerySegments);
    createSQSS();
    createPQSS();

    for (int i = 0; i < numOfQuerySegments; i++) {
        String query = "";

        ArrayList<String> tempListSQS = new ArrayList<>();

        int noOfSQSInQuery = 0; // This variable is to select more than one sqss. This is to ensure we get data from multiple databases
        while (noOfSQSInQuery == 0) {
            noOfSQSInQuery = new Random().nextInt(ddetails.length) + 1; // upto no.of databases
        }

        int seed = new Random().nextInt(8);
        while (seed == 0) {
            seed = new Random().nextInt(8);
        }

        PoissonDistribution poisson = new PoissonDistribution(noOfSQS / seed);
        UniformIntegerDistribution uid = new UniformIntegerDistribution(noOfSQS / (seed + 1), noOfSQS / (seed));

        //adding SQS in the query
        for (int j = 0; j < noOfSQSInQuery; j++) {
            //  String tempSTR = sqss.get(poisson.sample());
            if (querySegmentSample > sqss.size() - 1)
                querySegmentSample = 0;
            else {
                tempSTR = sqss.get(querySegmentSample);
                querySegmentSample++;
            }
            if (!tempListSQS.contains(tempSTR))
                tempListSQS.add(tempSTR);
            else
                j--;
        }

        for (int j = 0; j < noOfSQSInQuery; j++) {
            query = query + "<" + tempListSQS.get(j) + ">";

        }
        //till here added sqs in a query

        int tempAttr = new Random().nextInt(noOfSQSInQuery);
        //to ensure that first attribute is taken from the chosen subject query segments
        while ((tempAttr == 0)) {
            if (noOfSQSInQuery == 1)
                tempAttr = 1;
            else
                tempAttr = new Random().nextInt(noOfSQSInQuery);
        }

        String attr1 = "";
        if (tempAttr == 1)
            attr1 = tempListSQS.get(0);
        else
            attr1 = tempListSQS.get(tempAttr);

        String attr2 = attr1 + "*"; // what is this star for?
        while (attr2.contains(attr1)) { //this is to avoid repetition of attributes in the predicates
            // System.out.println(attr2 +" "+attr1);
            PoissonDistribution poisson_PQS = new PoissonDistribution(pqss.size() / seed);
            int poisson_sample = poisson_PQS.sample();

            while (poisson_sample >= pqss.size()) {
                poisson_sample = poisson_PQS.sample();
            }

            attr2 = pqss.get(poisson_sample);
            //  System.out.println("new attr2 "+attr2);
        }

        if (attr2 == null) {
            attr1 = attr1 + "," + conditionArray[new Random().nextInt(conditionArray.length)];
            long cardinality = Math.round(Math.abs(new Random().nextGaussian() * 1000));
            attr1 = attr1 + "," + conditionArray[new Random().nextInt(conditionArray.length)] + "-"
                    + cardinality;

        }

        String str = "<" + attr1 + attr2 + ">";

        query = query + "#" + str;

        queries.add(query);

    }
    /*
    for (String query: queries
     ) {
     fw.append(query+"\n");
    }
    fw.close();
    */
    this.sub_queries = queries;

    return queries;
}

From source file:SimpleGui.QueryGenerator.java

/**
 * Returns array of values in a given distribution
 * @param distribution//  www .ja v  a 2 s.  com
 * @param numQs
 * @param mean
 * @param variance
 * @param exponentialMean
 * @param u_lower
 * @param u_upper
 * @return
 */
public int[] getDistributionValues(String distribution, int numQs, double mean, double variance,
        double exponentialMean, int u_lower, int u_upper, double slope, int upperBoundary) {

    int[] values = new int[numQs];

    switch (distribution) {
    case "Poisson": {
        RandomEngine engine = new DRand();
        Poisson poisson = new Poisson(mean, engine);
        int poissonObs = poisson.nextInt();
        Normal normal = new Normal(mean, variance, engine);

        for (int i = 0; i < numQs; i++) {
            double normalObs = normal.nextDouble();
            int sample = (int) Math.abs(normalObs);

            while (sample >= upperBoundary) {
                normalObs = normal.nextDouble();
                sample = (int) Math.abs(normalObs);
            }

            values[i] = sample;
            //System.out.println(values[i]+"from poisson");
            //=============================================
            PoissonDistribution p = new PoissonDistribution(mean);
            int randomInt = p.sample();
            // values[i] = randomInt;
        }
        // Arrays.sort(values);

        break;
    }
    case "Random": {

        Random randomGenerator = new Random();
        for (int i = 0; i < numQs; i++) {
            int randomInt = randomGenerator.nextInt(numQs - 1);
            values[i] = randomInt + 1; // to avoid '0'
        }

        break;
    }
    case "Uniform": {
        for (int i = 0; i < numQs; i++) {
            UniformIntegerDistribution u = new UniformIntegerDistribution(u_lower, u_upper);
            int randomInt = u.sample();
            while (randomInt >= upperBoundary) {
                randomInt = u.sample();
            }
            if (randomInt == 0)
                values[i] = randomInt + 1; // to avoid random '0'
            else
                values[i] = randomInt;

        }
        break;
    }

    case "Exponential": {
        for (int i = 0; i < numQs; i++) {
            ExponentialDistribution e = new ExponentialDistribution(exponentialMean);
            double exponential = e.sample();
            while (exponential >= upperBoundary - 1) {
                exponential = e.sample();
            }
            values[i] = new Double(exponential).intValue() + 1; //to avoid random value '0'
        }

        break;
    }

    case "Grading": { // y=mx+c. increment and then decrement for positive slopes.
        int constant = 0;
        for (int i = 0; i < numQs; i++) {
            constant += slope;
            int nextVal = new Double(constant).intValue();
            System.out.println(upperBoundary);
            if (nextVal >= upperBoundary || nextVal < 0) {
                slope = -1 * slope;
                i--;
            } else
                values[i] = nextVal;
        }
        break;
    }
    }
    // Arrays.sort(values);

    return values;
}

From source file:uk.ac.babraham.SeqMonk.ProbeGenerators.MacsPeakCaller.java

public void run() {

    //      for (int i=0;i<selectedChIPStores.length;i++) {
    //         System.err.println("Selcted ChIP="+selectedChIPStores[i]);
    //      }// w w w . j  a  va2s .c o m
    //      for (int i=0;i<selectedInputStores.length;i++) {
    //         System.err.println("Selcted Input="+selectedInputStores[i]);
    //      }

    // First find the tag offsets between the watson and crick strands

    // Work out the total average coverage for all of the combined ChIP samples
    long totalChIPCoverage = 0;

    for (int i = 0; i < selectedChIPStores.length; i++) {
        totalChIPCoverage += selectedChIPStores[i].getTotalReadLength();
    }

    if (cancel) {
        generationCancelled();
        return;
    }

    double averageChIPCoveragePerBase = totalChIPCoverage / (double) collection.genome().getTotalGenomeLength();

    double lowerCoverage = averageChIPCoveragePerBase * minFoldEnrichment;
    double upperCoverage = averageChIPCoveragePerBase * maxFoldEnrichment;

    System.err.println("Coverage range for high confidence peaks is " + lowerCoverage + " - " + upperCoverage);

    // Now we go through the data to find locations for our high confidence peaks so we can
    // randomly select 1000 of these to use to find the offset between the two strands

    Chromosome[] chromosomes = collection.genome().getAllChromosomes();

    Vector<Probe> potentialHighConfidencePeaks = new Vector<Probe>();

    for (int c = 0; c < chromosomes.length; c++) {

        if (cancel) {
            generationCancelled();
            return;
        }

        // Time for an update
        updateGenerationProgress("Finding high confidence peaks on chromosome " + chromosomes[c].name(), c,
                chromosomes.length);

        Probe lastValidProbe = null;

        for (int startPosition = 1; startPosition < chromosomes[c].length()
                - fragmentSize; startPosition += fragmentSize / 2) {

            // See if we need to quit
            if (cancel) {
                generationCancelled();
                return;
            }

            long totalLength = 0;
            Probe probe = new Probe(chromosomes[c], startPosition, startPosition + fragmentSize);

            for (int s = 0; s < selectedChIPStores.length; s++) {
                long[] reads = selectedChIPStores[s].getReadsForProbe(probe);
                for (int j = 0; j < reads.length; j++) {
                    totalLength += SequenceRead.length(reads[j]);
                }
            }

            if (totalLength >= (lowerCoverage * probe.length())
                    && totalLength <= upperCoverage * probe.length()) {

                // We have a potential high quality peak.

                // See if we can merge it with the last valid probe

                if (lastValidProbe != null
                        && SequenceRead.overlaps(lastValidProbe.packedPosition(), probe.packedPosition())) {

                    lastValidProbe = new Probe(chromosomes[c], lastValidProbe.start(), probe.end());

                } else if (lastValidProbe != null) {

                    // Check that the overall density over the region falls within our limits
                    totalLength = 0;
                    for (int s = 0; s < selectedChIPStores.length; s++) {
                        long[] reads = selectedChIPStores[s].getReadsForProbe(lastValidProbe);
                        for (int j = 0; j < reads.length; j++) {
                            totalLength += SequenceRead.length(reads[j]);
                        }
                    }

                    if (totalLength >= (lowerCoverage * lastValidProbe.length())
                            && totalLength <= upperCoverage * lastValidProbe.length()) {
                        potentialHighConfidencePeaks.add(lastValidProbe);
                    }

                    lastValidProbe = probe;
                } else {
                    lastValidProbe = probe;
                }
            }

        }

        if (lastValidProbe != null) {
            long totalLength = 0;
            for (int s = 0; s < selectedChIPStores.length; s++) {
                long[] reads = selectedChIPStores[s].getReadsForProbe(lastValidProbe);
                for (int j = 0; j < reads.length; j++) {
                    totalLength += SequenceRead.length(reads[j]);
                }
            }

            if (totalLength >= (lowerCoverage * lastValidProbe.length())
                    && totalLength <= upperCoverage * lastValidProbe.length()) {
                potentialHighConfidencePeaks.add(lastValidProbe);
            }
        }
    }

    if (potentialHighConfidencePeaks.size() == 0) {

        JOptionPane.showMessageDialog(SeqMonkApplication.getInstance(), "No high confidence peaks found",
                "Quitting generator", JOptionPane.INFORMATION_MESSAGE);
        generationCancelled();
        return;
    }

    //      System.err.println("Found "+potentialHighConfidencePeaks.size()+" high confidence peaks");

    // Now we select 1000 random probes from this set
    Probe[] highConfidencePeaks = potentialHighConfidencePeaks.toArray(new Probe[0]);

    Collections.shuffle(Arrays.asList(highConfidencePeaks));

    Probe[] randomHighConfidenceProbes = new Probe[Math.min(highConfidencePeaks.length, 1000)];

    for (int i = 0; i < randomHighConfidenceProbes.length; i++) {
        randomHighConfidenceProbes[i] = highConfidencePeaks[i];
    }

    // Now find the average distance between forward / reverse reads in the candidate peaks

    int[] distances = new int[highConfidencePeaks.length];

    // Sort the candidates so we don't do stupid stuff with the cache
    Arrays.sort(randomHighConfidenceProbes);

    for (int p = 0; p < randomHighConfidenceProbes.length; p++) {

        // See if we need to quit
        if (cancel) {
            generationCancelled();
            return;
        }

        distances[p] = getInterStrandDistance(randomHighConfidenceProbes[p], selectedChIPStores);
    }

    int medianInterStrandDistance = (int) SimpleStats.median(distances);

    if (medianInterStrandDistance < 0)
        medianInterStrandDistance = 0;

    //      System.err.println("Median inter strand difference = "+medianInterStrandDistance);

    // Now we find the depth cutoff for overrepresented single tags using a binomial distribution
    int totalReadCount = 0;
    for (int i = 0; i < selectedChIPStores.length; i++) {
        totalReadCount += selectedChIPStores[i].getTotalReadCount();
    }

    BinomialDistribution bin = new BinomialDistribution(totalReadCount,
            1d / collection.genome().getTotalGenomeLength());

    // We want to know what depth has a chance of less than 1^-5
    int redundantThreshold = bin.inverseCumulativeProbability(1 - 0.00001d);

    if (redundantThreshold < 1)
        redundantThreshold = 1;

    //      System.err.println("Redundancy threshold is "+redundantThreshold);

    // Now we construct a poisson distribution to work out the threshold to use for
    // constructing a full candidate peak set.

    updateGenerationProgress("Counting non-redundant reads", 0, 1);

    // To do this we need to get the full non-redundant length from the whole set
    int totalNonRedCount = getNonRedundantReadCount(selectedChIPStores, redundantThreshold);

    //      System.err.println("Total non-redundant sequences is "+totalNonRedCount);

    // We need to know the median read length for the data
    int readLength = 0;
    for (int i = 0; i < selectedChIPStores.length; i++) {
        readLength += selectedChIPStores[i].getTotalReadLength() / selectedChIPStores[i].getTotalReadCount();
    }
    readLength /= selectedChIPStores.length;

    double expectedCountsPerWindow = getExpectedCountPerWindow(totalNonRedCount,
            collection.genome().getTotalGenomeLength(), fragmentSize, readLength);

    PoissonDistribution poisson = new PoissonDistribution(expectedCountsPerWindow);

    int readCountCutoff = poisson.inverseCumulativeProbability(1 - pValue);

    //      System.err.println("Threshold for enrichment in a window is "+readCountCutoff+" reads using a p-value of "+pValue+" and a mean of "+(totalNonRedCount/(collection.genome().getTotalGenomeLength()/(double)fragmentSize)));

    // Now we go back through the whole dataset to do a search for all possible candidate probes 

    // We re-use the peak vector we came up with before.
    potentialHighConfidencePeaks.clear();

    for (int c = 0; c < chromosomes.length; c++) {
        // Time for an update
        updateGenerationProgress("Finding candidate peaks on chromosome " + chromosomes[c].name(), c,
                chromosomes.length);

        Probe lastValidProbe = null;

        for (int startPosition = 1; startPosition < chromosomes[c].length()
                - fragmentSize; startPosition += fragmentSize / 2) {

            // See if we need to quit
            if (cancel) {
                generationCancelled();
                return;
            }

            // We expand the region we're looking at by the inter-strand distance as we're going to
            // be adjusting the read positions
            Probe probe = new Probe(chromosomes[c], startPosition, (startPosition + fragmentSize - 1));

            long[] mergedProbeReads = getReadsFromDataStoreCollection(probe, selectedChIPStores,
                    medianInterStrandDistance);

            mergedProbeReads = deduplicateReads(mergedProbeReads, redundantThreshold);

            SequenceRead.sort(mergedProbeReads);

            int thisProbeOverlapCount = 0;
            for (int i = 0; i < mergedProbeReads.length; i++) {
                if (SequenceRead.overlaps(mergedProbeReads[i], probe.packedPosition())) {
                    ++thisProbeOverlapCount;
                }
            }

            if (thisProbeOverlapCount > readCountCutoff) {

                // We have a potential high quality peak.

                // See if we can merge it with the last valid probe

                if (lastValidProbe != null
                        && SequenceRead.overlaps(lastValidProbe.packedPosition(), probe.packedPosition())) {

                    lastValidProbe = new Probe(chromosomes[c], lastValidProbe.start(), probe.end());

                } else if (lastValidProbe != null) {
                    potentialHighConfidencePeaks.add(lastValidProbe);
                    lastValidProbe = probe;
                } else {
                    lastValidProbe = probe;
                }
            }

        }

        if (lastValidProbe != null) {
            potentialHighConfidencePeaks.add(lastValidProbe);
        }
    }

    // Finally we re-filter the peaks we have using local poisson distributions with densities taken
    // from either the input samples (if there are any), or the local region.  The densities are 
    // estimated over 1,5 and 10kb around the peak and genome wide and the max of these is taken.
    // If there is no input then the 1kb region is not used.

    Probe[] allCandidateProbes = potentialHighConfidencePeaks.toArray(new Probe[0]);

    // Work out which stores we're using to validate against.
    DataStore[] validationStores;
    boolean useInput = false;
    double inputCorrection = 1;

    int validationNonRedCount;

    if (selectedInputStores != null && selectedInputStores.length > 0) {

        // See if we need to quit
        if (cancel) {
            generationCancelled();
            return;
        }

        validationStores = selectedInputStores;
        useInput = true;

        // We also need to work out the total number of nonredundant seqences
        // in the input so we can work out a scaling factor so that the densities
        // for input and chip are comparable.
        validationNonRedCount = getNonRedundantReadCount(validationStores, redundantThreshold);

        inputCorrection = totalNonRedCount / (double) validationNonRedCount;

        System.err.println("From chip=" + totalNonRedCount + " input=" + validationNonRedCount
                + " correction is " + inputCorrection);

    } else {
        validationStores = selectedChIPStores;
        validationNonRedCount = totalNonRedCount;
    }

    Vector<Probe> finalValidatedProbes = new Vector<Probe>();

    for (int p = 0; p < allCandidateProbes.length; p++) {

        // See if we need to quit
        if (cancel) {
            generationCancelled();
            return;
        }

        if (p % 100 == 0) {
            updateGenerationProgress("Validated " + p + " out of " + allCandidateProbes.length + " raw peaks",
                    p, allCandidateProbes.length);
        }

        //         System.err.println("Validating "+allCandidateProbes[p].chromosome()+":"+allCandidateProbes[p].start()+"-"+allCandidateProbes[p].end());

        // We now need to find the maximum read density per 2*bandwidth against which
        // we're going to validate this peak

        // We're going to get all reads within 10kb of the peak, and then we can subselect from there

        int midPoint = allCandidateProbes[p].middle();

        Probe region10kb = new Probe(allCandidateProbes[p].chromosome(), Math.max(midPoint - 5000, 1),
                Math.min(midPoint + 4999, allCandidateProbes[p].chromosome().length()),
                allCandidateProbes[p].strand());
        Probe region5kb = new Probe(allCandidateProbes[p].chromosome(), Math.max(midPoint - 2500, 1),
                Math.min(midPoint + 2499, allCandidateProbes[p].chromosome().length()),
                allCandidateProbes[p].strand());
        Probe region1kb = new Probe(allCandidateProbes[p].chromosome(), Math.max(midPoint - 500, 1),
                Math.min(midPoint + 499, allCandidateProbes[p].chromosome().length()),
                allCandidateProbes[p].strand());

        // Get the probes for the largest region
        long[] thisRegionReads = getReadsFromDataStoreCollection(region10kb, validationStores, 0);

        // Deduplicate so it's a fair comparison
        thisRegionReads = deduplicateReads(thisRegionReads, redundantThreshold); // Should we recalculate the redundant threshold based on the input coverage?

        int region10kbcount = thisRegionReads.length;
        int region5kbcount = 0;
        int region1kbcount = 0;

        // Go through the reads seeing if they fit into the 5 or 1kb regions
        for (int r = 0; r < thisRegionReads.length; r++) {
            if (SequenceRead.overlaps(region5kb.packedPosition(), thisRegionReads[r]))
                ++region5kbcount;
            if (SequenceRead.overlaps(region1kb.packedPosition(), thisRegionReads[r]))
                ++region1kbcount;
        }

        //         System.err.println("Input counts 10kb="+region10kbcount+" 5kb="+region5kbcount+" 1kb="+region1kbcount);

        // Convert to densities per window and ajdust for global coverage

        double globalDensity = getExpectedCountPerWindow(validationNonRedCount,
                collection.genome().getTotalGenomeLength(), allCandidateProbes[p].length(), readLength)
                * inputCorrection;
        double density10kb = getExpectedCountPerWindow(region10kbcount, region10kb.length(),
                allCandidateProbes[p].length(), readLength) * inputCorrection;
        double density5kb = getExpectedCountPerWindow(region5kbcount, region5kb.length(),
                allCandidateProbes[p].length(), readLength) * inputCorrection;
        double density1kb = getExpectedCountPerWindow(region1kbcount, region1kb.length(),
                allCandidateProbes[p].length(), readLength) * inputCorrection;

        // Find the highest density to use for the validation
        double highestDensity = globalDensity;
        if (density10kb > highestDensity)
            highestDensity = density10kb;
        if (density5kb > highestDensity)
            highestDensity = density5kb;
        if (useInput && density1kb > highestDensity)
            highestDensity = density1kb;

        //         System.err.println("Global="+globalDensity+" 10kb="+density10kb+" 5kb="+density5kb+" 1kb="+density1kb+" using="+highestDensity);

        // Construct a poisson distribution with this density
        PoissonDistribution localPoisson = new PoissonDistribution(highestDensity);

        //         System.err.println("Cutoff from global="+(new PoissonDistribution(globalDensity)).inverseCumulativeProbability(1-pValue)+" 10kb="+(new PoissonDistribution(density10kb)).inverseCumulativeProbability(1-pValue)+" 5kb="+(new PoissonDistribution(density5kb)).inverseCumulativeProbability(1-pValue)+" 1kb="+(new PoissonDistribution(density1kb)).inverseCumulativeProbability(1-pValue));
        // Now check to see if the actual count from this peak is enough to still pass
        long[] mergedProbeReads = getReadsFromDataStoreCollection(allCandidateProbes[p], selectedChIPStores,
                medianInterStrandDistance);
        mergedProbeReads = deduplicateReads(mergedProbeReads, redundantThreshold);

        SequenceRead.sort(mergedProbeReads);

        int thisProbeOverlapCount = 0;
        for (int i = 0; i < mergedProbeReads.length; i++) {
            if (SequenceRead.overlaps(mergedProbeReads[i], allCandidateProbes[p].packedPosition())) {
                ++thisProbeOverlapCount;
            }
        }

        //         System.err.println("Read count in ChIP is "+thisProbeOverlapCount);

        if (thisProbeOverlapCount > localPoisson.inverseCumulativeProbability(1 - pValue)) {
            finalValidatedProbes.add(allCandidateProbes[p]);
            //            System.err.println("Adding probe to final set");
        }

    }

    //      System.err.println("From "+allCandidateProbes.length+" candidates "+finalValidatedProbes.size()+" peaks were validated");

    ProbeSet finalSet = new ProbeSet(getDescription(), finalValidatedProbes.toArray(new Probe[0]));

    generationComplete(finalSet);

}