Example usage for org.apache.commons.math3.stat.descriptive.moment Mean getN

List of usage examples for org.apache.commons.math3.stat.descriptive.moment Mean getN

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive.moment Mean getN.

Prototype

public long getN() 

Source Link

Usage

From source file:com.cloudera.oryx.rdf.common.rule.CategoricalDecision.java

private static List<Decision> categoricalDecisionsForNumericTarget(int featureNumber, ExampleSet examples,
        int suggestedMaxSplitCandidates) {
    // PLANET paper claims this is optimal:
    int categoryCount = examples.getCategoryCount(featureNumber);
    Mean[] averageTargetForCategory = new Mean[categoryCount];
    for (Example example : examples) {
        CategoricalFeature feature = (CategoricalFeature) example.getFeature(featureNumber);
        if (feature == null) {
            continue;
        }/*from   ww  w .j a v a 2  s  .  c o m*/
        int category = feature.getValueID();
        Mean categoryAverage = averageTargetForCategory[category];
        if (categoryAverage == null) {
            categoryAverage = new Mean();
            averageTargetForCategory[category] = categoryAverage;
        }
        categoryAverage.increment(((NumericFeature) example.getTarget()).getValue());
    }

    int maxCategory = -1;
    int maxCount = -1;
    for (int i = 0; i < averageTargetForCategory.length; i++) {
        Mean average = averageTargetForCategory[i];
        if (average != null && average.getN() > maxCount) {
            maxCount = (int) averageTargetForCategory[i].getN();
            maxCategory = i;
        }
    }
    Preconditions.checkArgument(maxCategory >= 0);

    List<Pair<Double, Integer>> byScore = Lists.newArrayListWithCapacity(averageTargetForCategory.length);
    for (int featureCategory = 0; featureCategory < averageTargetForCategory.length; featureCategory++) {
        StorelessUnivariateStatistic mean = averageTargetForCategory[featureCategory];
        if (mean != null) {
            byScore.add(new Pair<Double, Integer>(mean.getResult(), featureCategory));
        }
    }
    return sortAndGetDecisionsOverSubset(featureNumber, categoryCount, byScore, maxCategory,
            suggestedMaxSplitCandidates);
}

From source file:com.cloudera.oryx.app.speed.rdf.RDFSpeedModelManager.java

@Override
public Iterable<String> buildUpdates(JavaPairRDD<String, String> newData) {
    if (model == null) {
        return Collections.emptyList();
    }/*from w  w w.j a va  2  s. c  om*/

    JavaRDD<Example> examplesRDD = newData.values().map(MLFunctions.PARSE_FN)
            .map(new ToExampleFn(inputSchema, model.getEncodings()));

    DecisionForest forest = model.getForest();
    JavaPairRDD<Pair<Integer, String>, Iterable<Feature>> targetsByTreeAndID = examplesRDD
            .flatMapToPair(new ToTreeNodeFeatureFn(forest)).groupByKey();

    List<String> updates = new ArrayList<>();

    if (inputSchema.isClassification()) {

        List<Tuple2<Pair<Integer, String>, Map<Integer, Long>>> countsByTreeAndID = targetsByTreeAndID
                .mapValues(new TargetCategoryCountFn()).collect();
        for (Tuple2<Pair<Integer, String>, Map<Integer, Long>> p : countsByTreeAndID) {
            Integer treeID = p._1().getFirst();
            String nodeID = p._1().getSecond();
            updates.add(TextUtils.joinJSON(Arrays.asList(treeID, nodeID, p._2())));
        }

    } else {

        List<Tuple2<Pair<Integer, String>, Mean>> meanTargetsByTreeAndID = targetsByTreeAndID
                .mapValues(new MeanNewTargetFn()).collect();
        for (Tuple2<Pair<Integer, String>, Mean> p : meanTargetsByTreeAndID) {
            Integer treeID = p._1().getFirst();
            String nodeID = p._1().getSecond();
            Mean mean = p._2();
            updates.add(TextUtils.joinJSON(Arrays.asList(treeID, nodeID, mean.getResult(), mean.getN())));
        }

    }

    return updates;
}

From source file:com.itemanalysis.psychometrics.irt.equating.MeanSigmaMethodTest.java

/**
 * Tests the calculations needed for mean/mean and mean/sigma scale linking.
 * Item parameters and true values obtained from example 2 from the STUIRT
 * program by Michael Kolen and colleagues. Note that the original example
 * used teh PARSCALE version of item parameters. These were converted to
 * ICL type parameters by subtracting a step from the item difficulty.
 *
 *//*from w  w w  .  j  a va2  s  .com*/
@Test
public void mixedFormatDescriptiveStatisticsTestFormX() {
    System.out.println("Mixed format descriptive statistics test Form X");

    ItemResponseModel[] irm = new ItemResponseModel[17];

    irm[0] = new Irm3PL(0.751335, -0.897391, 0.244001, 1.7);
    irm[1] = new Irm3PL(0.955947, -0.811477, 0.242883, 1.7);
    irm[2] = new Irm3PL(0.497206, -0.858681, 0.260893, 1.7);
    irm[3] = new Irm3PL(0.724000, -0.123911, 0.243497, 1.7);
    irm[4] = new Irm3PL(0.865200, 0.205889, 0.319135, 1.7);
    irm[5] = new Irm3PL(0.658129, 0.555228, 0.277826, 1.7);
    irm[6] = new Irm3PL(1.082118, 0.950549, 0.157979, 1.7);
    irm[7] = new Irm3PL(0.988294, 1.377501, 0.084828, 1.7);
    irm[8] = new Irm3PL(1.248923, 1.614355, 0.181874, 1.7);
    irm[9] = new Irm3PL(1.116682, 2.353932, 0.246856, 1.7);
    irm[10] = new Irm3PL(0.438171, 3.217965, 0.309243, 1.7);
    irm[11] = new Irm3PL(1.082206, 4.441864, 0.192339, 1.7);

    double[] step1 = { 0, -1.09327, 1.101266 };
    irm[12] = new IrmGPCM(0.269994, step1, 1.7);

    double[] step2 = { 0, 1.526148, 1.739176 };
    irm[13] = new IrmGPCM(0.972506, step2, 1.7);

    double[] step3 = { 0, 1.362356, 5.566958 };
    irm[14] = new IrmGPCM(0.378812, step3, 1.7);

    double[] step4 = { 0, 1.486566, -0.071229, 1.614823 };
    irm[15] = new IrmGPCM(0.537706, step4, 1.7);

    double[] step5 = { 0, 1.425413, 2.630705, 3.242696 };
    irm[16] = new IrmGPCM(0.554506, step5, 1.7);

    Mean discriminationX = new Mean();
    Mean difficultyX = new Mean();

    Mean difficultyMeanX = new Mean();
    StandardDeviation difficultySdX = new StandardDeviation(false);//Do not correct for bias. Use N in the denominator, not N-1.

    for (int j = 0; j < 17; j++) {
        irm[j].incrementMeanMean(discriminationX, difficultyX);
        irm[j].incrementMeanSigma(difficultyMeanX, difficultySdX);
    }

    //        System.out.println("Mean/mean descriptive statistics for Form X");
    //        System.out.println("a-mean: " + discriminationX.getResult());
    //        System.out.println("b-mean: " + difficultyX.getResult());

    assertEquals("Mean/mean check: discrimination mean", 0.7719,
            Precision.round(discriminationX.getResult(), 4), 1e-5);
    assertEquals("Mean/mean check: difficulty mean", 1.3566, Precision.round(difficultyX.getResult(), 4), 1e-5);
    assertEquals("Mean/mean check: Number of difficulties (including steps) ", 24, difficultyX.getN(), 1e-3);

    //        System.out.println();
    //        System.out.println("Mean/sigma descriptive statistics for Form X");
    //        System.out.println("b-mean: " + difficultyMeanX.getResult());
    //        System.out.println("b-sd: " + difficultySdX.getResult());
    //        System.out.println("b-N: " + difficultyMeanX.getN() + ",   " + difficultySdX.getN());

    assertEquals("Mean/sigma check: difficulty mean", 1.3566, Precision.round(difficultyMeanX.getResult(), 4),
            1e-5);
    assertEquals("Mean/sigma check: difficulty sd", 1.6372, Precision.round(difficultySdX.getResult(), 4),
            1e-5);
    assertEquals("Mean/sigma check: Number of difficulties (including steps) ", 24, difficultyMeanX.getN(),
            1e-3);
    assertEquals("Mean/sigma check: Number of difficulties (including steps) ", 24, difficultySdX.getN(), 1e-3);

}

From source file:com.itemanalysis.psychometrics.irt.equating.MeanSigmaMethodTest.java

/**
 * Tests the calculations needed for mean/mean and mean/sigma scale linking.
 * Item parameters and true values obtained from example 2 from the STUIRT
 * program by Michael Kolen and colleagues. Note that the original example
 * used teh PARSCALE version of item parameters. These were converted to
 * ICL type parameters by subtracting a step from the item difficulty.
 *
 *///  w  ww .  ja  v a  2s  .c  o  m
@Test
public void mixedFormatDescriptiveStatisticsTestFormY() {
    System.out.println("Mixed format descriptive statistics test Form Y");

    ItemResponseModel[] irm = new ItemResponseModel[17];

    irm[0] = new Irm3PL(0.887276, -1.334798, 0.134406, 1.7);
    irm[1] = new Irm3PL(1.184412, -1.129004, 0.237765, 1.7);
    irm[2] = new Irm3PL(0.609412, -1.464546, 0.15139, 1.7);
    irm[3] = new Irm3PL(0.923812, -0.576435, 0.240097, 1.7);
    irm[4] = new Irm3PL(0.822776, -0.476357, 0.192369, 1.7);
    irm[5] = new Irm3PL(0.707818, -0.235189, 0.189557, 1.7);
    irm[6] = new Irm3PL(1.306976, 0.242986, 0.165553, 1.7);
    irm[7] = new Irm3PL(1.295471, 0.598029, 0.090557, 1.7);
    irm[8] = new Irm3PL(1.366841, 0.923206, 0.172993, 1.7);
    irm[9] = new Irm3PL(1.389624, 1.380666, 0.238008, 1.7);
    irm[10] = new Irm3PL(0.293806, 2.02807, 0.203448, 1.7);
    irm[11] = new Irm3PL(0.885347, 3.152928, 0.195473, 1.7);

    double[] step1 = { 0, -1.387347, 0.399117 };
    irm[12] = new IrmGPCM(0.346324, step1, 1.7);

    double[] step2 = { 0, 0.756514, 0.956014 };
    irm[13] = new IrmGPCM(1.252012, step2, 1.7);

    double[] step3 = { 0, 0.975303, 4.676299 };
    irm[14] = new IrmGPCM(0.392282, step3, 1.7);

    double[] step4 = { 0, 0.643405, -0.418869, 0.804394 };
    irm[15] = new IrmGPCM(0.660841, step4, 1.7);

    double[] step5 = { 0, 0.641293, 1.750488, 2.53802 };
    irm[16] = new IrmGPCM(0.669612, step5, 1.7);

    Mean discriminationX = new Mean();
    Mean difficultyX = new Mean();

    Mean difficultyMeanX = new Mean();
    StandardDeviation difficultySdX = new StandardDeviation(false);//Do not correct for bias. Use N in the denominator, not N-1.

    for (int j = 0; j < 17; j++) {
        irm[j].incrementMeanMean(discriminationX, difficultyX);
        irm[j].incrementMeanSigma(difficultyMeanX, difficultySdX);
    }

    //        System.out.println("Mean/mean descriptive statistics for Form X");
    //        System.out.println("a-mean: " + discriminationX.getResult());
    //        System.out.println("b-mean: " + difficultyX.getResult());

    assertEquals("Mean/mean check: discrimination mean", 0.8820,
            Precision.round(discriminationX.getResult(), 4), 1e-5);
    assertEquals("Mean/mean check: difficulty mean", 0.6435, Precision.round(difficultyX.getResult(), 4), 1e-5);
    assertEquals("Mean/mean check: Number of difficulties (including steps) ", 24, difficultyX.getN(), 1e-3);

    //        System.out.println();
    //        System.out.println("Mean/sigma descriptive statistics for Form X");
    //        System.out.println("b-mean: " + difficultyMeanX.getResult());
    //        System.out.println("b-sd: " + difficultySdX.getResult());
    //        System.out.println("b-N: " + difficultyMeanX.getN() + ",   " + difficultySdX.getN());

    assertEquals("Mean/sigma check: difficulty mean", 0.6435, Precision.round(difficultyMeanX.getResult(), 4),
            1e-5);
    assertEquals("Mean/sigma check: difficulty sd", 1.4527, Precision.round(difficultySdX.getResult(), 4),
            1e-5);
    assertEquals("Mean/sigma check: Number of difficulties (including steps) ", 24, difficultyMeanX.getN(),
            1e-3);
    assertEquals("Mean/sigma check: Number of difficulties (including steps) ", 24, difficultySdX.getN(), 1e-3);

}

From source file:net.myrrix.online.eval.PrecisionRecallEvaluator.java

@Override
public EvaluationResult evaluate(final MyrrixRecommender recommender, final RescorerProvider provider,
        final Multimap<Long, RecommendedItem> testData) throws TasteException {

    final Mean precision = new Mean();
    final Mean recall = new Mean();
    final Mean ndcg = new Mean();
    final Mean meanAveragePrecision = new Mean();

    Processor<Long> processor = new Processor<Long>() {
        @Override/* ww  w.  j  a  v  a2s .co  m*/
        public void process(Long userID, long count) {

            Collection<RecommendedItem> values = testData.get(userID);
            int numValues = values.size();
            if (numValues == 0) {
                return;
            }

            IDRescorer rescorer = provider == null ? null
                    : provider.getRecommendRescorer(new long[] { userID }, recommender);

            List<RecommendedItem> recs;
            try {
                recs = recommender.recommend(userID, numValues, rescorer);
            } catch (NoSuchUserException nsue) {
                // Probably OK, just removed all data for this user from training
                log.warn("User only in test data: {}", userID);
                return;
            } catch (TasteException te) {
                log.warn("Unexpected exception", te);
                return;
            }
            int numRecs = recs.size();

            Collection<Long> valueIDs = Sets.newHashSet();
            for (RecommendedItem rec : values) {
                valueIDs.add(rec.getItemID());
            }

            int intersectionSize = 0;
            double score = 0.0;
            double maxScore = 0.0;
            Mean precisionAtI = new Mean();
            double averagePrecision = 0.0;

            for (int i = 0; i < numRecs; i++) {
                RecommendedItem rec = recs.get(i);
                double value = LN2 / Math.log(2.0 + i); // 1 / log_2(1 + (i+1))
                if (valueIDs.contains(rec.getItemID())) {
                    intersectionSize++;
                    score += value;
                    precisionAtI.increment(1.0);
                    averagePrecision += precisionAtI.getResult();
                } else {
                    precisionAtI.increment(0.0);
                }
                maxScore += value;
            }
            averagePrecision /= numValues;

            synchronized (precision) {
                precision.increment(numRecs == 0 ? 0.0 : (double) intersectionSize / numRecs);
                recall.increment((double) intersectionSize / numValues);
                ndcg.increment(maxScore == 0.0 ? 0.0 : score / maxScore);
                meanAveragePrecision.increment(averagePrecision);
                if (count % 10000 == 0) {
                    log.info(new IRStatisticsImpl(precision.getResult(), recall.getResult(), ndcg.getResult(),
                            meanAveragePrecision.getResult()).toString());
                }
            }
        }
    };

    Paralleler<Long> paralleler = new Paralleler<Long>(testData.keySet().iterator(), processor, "PREval");
    try {
        if (Boolean.parseBoolean(System.getProperty("eval.parallel", "true"))) {
            paralleler.runInParallel();
        } else {
            paralleler.runInSerial();
        }
    } catch (InterruptedException ie) {
        throw new TasteException(ie);
    } catch (ExecutionException e) {
        throw new TasteException(e.getCause());
    }

    EvaluationResult result;
    if (precision.getN() > 0) {
        result = new IRStatisticsImpl(precision.getResult(), recall.getResult(), ndcg.getResult(),
                meanAveragePrecision.getResult());
    } else {
        result = null;
    }
    log.info(String.valueOf(result));
    return result;
}

From source file:org.briljantframework.data.vector.DoubleVector.java

@Override
public double mean() {
    Mean mean = new Mean();
    for (int i = 0, size = size(); i < size; i++) {
        double v = getAsDoubleAt(i);
        if (!Is.NA(v)) {
            mean.increment(v);/* w  ww .  ja  v a2 s.  co m*/
        }
    }
    return mean.getN() > 0 ? mean.getResult() : Na.DOUBLE;
}