List of usage examples for org.apache.commons.math3.distribution BetaDistribution sample
public double[] sample(int sampleSize)
From source file:edu.byu.nlp.crowdsourcing.AnnotatorAccuracySetting.java
public void generateConfusionMatrices(RandomGenerator rnd, boolean varyAnnotatorRates, int numLabels, String filename) {//w ww . j a va 2s . co m if (confusionMatrices == null) { switch (this) { // read annotator confusion matrices from file case FILE: try { List<SimulatedAnnotator> annotators = SimulatedAnnotators .deserialize(Files2.toString(filename, Charsets.UTF_8)); annotatorRates = SimulatedAnnotators.annotationRatesOf(annotators); confusionMatrices = SimulatedAnnotators.confusionsOf(annotators); } catch (IOException e) { throw new IllegalArgumentException("could not parse annotator file: " + filename); } // ensure that the simulated annotators we are reading in have the same number of classes as the dataset we are labeling Preconditions.checkState(confusionMatrices[0].length == numLabels, "mismatch between the number of label classes " + "in the simulated annotator file " + confusionMatrices[0].length + " and the number in the dataset " + numLabels); break; // annotators are drawn from independent dirichlets case INDEPENDENT: annotatorRates = uniformAnnotatorRates(accuracies.length); confusionMatrices = new double[accuracies.length][numLabels][numLabels]; // a matrix where all rows are sampled from a dirichlet for (int a = 0; a < accuracies.length; a++) { for (int i = 0; i < numLabels; i++) { confusionMatrices[a][i] = DirichletDistribution.sampleSymmetric(symmetricDirichletParam, numLabels, rnd); } } break; // annotator accuracies drawn from Beta(shape1,shape2) parameters MLE-fit to CFGroups data case CFBETA: annotatorRates = uniformAnnotatorRates(accuracies.length); BetaDistribution accGen = new BetaDistribution(rnd, 3.6, 5.1); accuracies = accGen.sample(accuracies.length); // sample accuracies logger.info("sampled " + accuracies.length + " simulated annotator accuracies from a Beta(3.6,5.1). min=" + DoubleArrays.min(accuracies) + " max=" + DoubleArrays.max(accuracies) + " mean=" + DoubleArrays.mean(accuracies)); confusionMatrices = confusionMatricesFromAccuracyAndDirichlet(accuracies, numLabels, symmetricDirichletParam, rnd); break; // predetermined annotator accuracies default: annotatorRates = uniformAnnotatorRates(accuracies.length); confusionMatrices = confusionMatricesFromAccuracyAndDirichlet(accuracies, numLabels, symmetricDirichletParam, rnd); break; } if (varyAnnotatorRates && this != FILE) { throw new IllegalStateException( "Varying annotator rates are ONLY implemented for FILE annotators. Not " + this); } // force uniform annotator rates if (!varyAnnotatorRates) { annotatorRates = uniformAnnotatorRates(confusionMatrices.length); } } }