Example usage for org.apache.commons.math3.distribution BetaDistribution sample

Introduction

In this page you can find the example usage for org.apache.commons.math3.distribution BetaDistribution sample.

Prototype

public double[] sample(int sampleSize)

Source Link

Document

The default implementation generates the sample by calling #sample() in a loop.

Usage

From source file:edu.byu.nlp.crowdsourcing.AnnotatorAccuracySetting.java

public void generateConfusionMatrices(RandomGenerator rnd, boolean varyAnnotatorRates, int numLabels,
        String filename) {//w ww  . j a  va 2s  .  co  m

    if (confusionMatrices == null) {
        switch (this) {

        // read annotator confusion matrices from file
        case FILE:
            try {
                List<SimulatedAnnotator> annotators = SimulatedAnnotators
                        .deserialize(Files2.toString(filename, Charsets.UTF_8));
                annotatorRates = SimulatedAnnotators.annotationRatesOf(annotators);
                confusionMatrices = SimulatedAnnotators.confusionsOf(annotators);
            } catch (IOException e) {
                throw new IllegalArgumentException("could not parse annotator file: " + filename);
            }
            // ensure that the simulated annotators we are reading in have the same number of classes as the dataset we are labeling
            Preconditions.checkState(confusionMatrices[0].length == numLabels,
                    "mismatch between the number of label classes " + "in the simulated annotator file "
                            + confusionMatrices[0].length + " and the number in the dataset " + numLabels);
            break;

        // annotators are drawn from independent dirichlets
        case INDEPENDENT:
            annotatorRates = uniformAnnotatorRates(accuracies.length);
            confusionMatrices = new double[accuracies.length][numLabels][numLabels];
            // a matrix where all rows are sampled from a dirichlet
            for (int a = 0; a < accuracies.length; a++) {
                for (int i = 0; i < numLabels; i++) {
                    confusionMatrices[a][i] = DirichletDistribution.sampleSymmetric(symmetricDirichletParam,
                            numLabels, rnd);
                }
            }
            break;

        // annotator accuracies drawn from Beta(shape1,shape2) parameters MLE-fit to CFGroups data
        case CFBETA:
            annotatorRates = uniformAnnotatorRates(accuracies.length);
            BetaDistribution accGen = new BetaDistribution(rnd, 3.6, 5.1);
            accuracies = accGen.sample(accuracies.length); // sample accuracies
            logger.info("sampled " + accuracies.length
                    + " simulated annotator accuracies from a Beta(3.6,5.1). min="
                    + DoubleArrays.min(accuracies) + " max=" + DoubleArrays.max(accuracies) + " mean="
                    + DoubleArrays.mean(accuracies));
            confusionMatrices = confusionMatricesFromAccuracyAndDirichlet(accuracies, numLabels,
                    symmetricDirichletParam, rnd);
            break;

        // predetermined annotator accuracies 
        default:
            annotatorRates = uniformAnnotatorRates(accuracies.length);
            confusionMatrices = confusionMatricesFromAccuracyAndDirichlet(accuracies, numLabels,
                    symmetricDirichletParam, rnd);
            break;
        }

        if (varyAnnotatorRates && this != FILE) {
            throw new IllegalStateException(
                    "Varying annotator rates are ONLY implemented for FILE annotators. Not " + this);
        }

        // force uniform annotator rates
        if (!varyAnnotatorRates) {
            annotatorRates = uniformAnnotatorRates(confusionMatrices.length);
        }
    }
}