Example usage for org.apache.commons.math3.stat.descriptive.moment Mean getN

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive.moment Mean getN.

Prototype

public long getN()

Source Link

Usage

From source file:com.cloudera.oryx.rdf.common.rule.CategoricalDecision.java

private static List<Decision> categoricalDecisionsForNumericTarget(int featureNumber, ExampleSet examples,
        int suggestedMaxSplitCandidates) {
    // PLANET paper claims this is optimal:
    int categoryCount = examples.getCategoryCount(featureNumber);
    Mean[] averageTargetForCategory = new Mean[categoryCount];
    for (Example example : examples) {
        CategoricalFeature feature = (CategoricalFeature) example.getFeature(featureNumber);
        if (feature == null) {
            continue;
        }/*from   ww  w .j a v a 2  s  .  c o m*/
        int category = feature.getValueID();
        Mean categoryAverage = averageTargetForCategory[category];
        if (categoryAverage == null) {
            categoryAverage = new Mean();
            averageTargetForCategory[category] = categoryAverage;
        }
        categoryAverage.increment(((NumericFeature) example.getTarget()).getValue());
    }

    int maxCategory = -1;
    int maxCount = -1;
    for (int i = 0; i < averageTargetForCategory.length; i++) {
        Mean average = averageTargetForCategory[i];
        if (average != null && average.getN() > maxCount) {
            maxCount = (int) averageTargetForCategory[i].getN();
            maxCategory = i;
        }
    }
    Preconditions.checkArgument(maxCategory >= 0);

    List<Pair<Double, Integer>> byScore = Lists.newArrayListWithCapacity(averageTargetForCategory.length);
    for (int featureCategory = 0; featureCategory < averageTargetForCategory.length; featureCategory++) {
        StorelessUnivariateStatistic mean = averageTargetForCategory[featureCategory];
        if (mean != null) {
            byScore.add(new Pair<Double, Integer>(mean.getResult(), featureCategory));
        }
    }
    return sortAndGetDecisionsOverSubset(featureNumber, categoryCount, byScore, maxCategory,
            suggestedMaxSplitCandidates);
}

From source file:com.cloudera.oryx.app.speed.rdf.RDFSpeedModelManager.java

@Override
public Iterable<String> buildUpdates(JavaPairRDD<String, String> newData) {
    if (model == null) {
        return Collections.emptyList();
    }/*from w  w w.j a va  2  s. c  om*/

    JavaRDD<Example> examplesRDD = newData.values().map(MLFunctions.PARSE_FN)
            .map(new ToExampleFn(inputSchema, model.getEncodings()));

    DecisionForest forest = model.getForest();
    JavaPairRDD<Pair<Integer, String>, Iterable<Feature>> targetsByTreeAndID = examplesRDD
            .flatMapToPair(new ToTreeNodeFeatureFn(forest)).groupByKey();

    List<String> updates = new ArrayList<>();

    if (inputSchema.isClassification()) {

        List<Tuple2<Pair<Integer, String>, Map<Integer, Long>>> countsByTreeAndID = targetsByTreeAndID
                .mapValues(new TargetCategoryCountFn()).collect();
        for (Tuple2<Pair<Integer, String>, Map<Integer, Long>> p : countsByTreeAndID) {
            Integer treeID = p._1().getFirst();
            String nodeID = p._1().getSecond();
            updates.add(TextUtils.joinJSON(Arrays.asList(treeID, nodeID, p._2())));
        }

    } else {

        List<Tuple2<Pair<Integer, String>, Mean>> meanTargetsByTreeAndID = targetsByTreeAndID
                .mapValues(new MeanNewTargetFn()).collect();
        for (Tuple2<Pair<Integer, String>, Mean> p : meanTargetsByTreeAndID) {
            Integer treeID = p._1().getFirst();
            String nodeID = p._1().getSecond();
            Mean mean = p._2();
            updates.add(TextUtils.joinJSON(Arrays.asList(treeID, nodeID, mean.getResult(), mean.getN())));
        }

    }

    return updates;
}

From source file:com.itemanalysis.psychometrics.irt.equating.MeanSigmaMethodTest.java

/**
 * Tests the calculations needed for mean/mean and mean/sigma scale linking.
 * Item parameters and true values obtained from example 2 from the STUIRT
 * program by Michael Kolen and colleagues. Note that the original example
 * used teh PARSCALE version of item parameters. These were converted to
 * ICL type parameters by subtracting a step from the item difficulty.
 *
 *//*from w  w w  .  j  a va2  s  .com*/
@Test
public void mixedFormatDescriptiveStatisticsTestFormX() {
    System.out.println("Mixed format descriptive statistics test Form X");

    ItemResponseModel[] irm = new ItemResponseModel[17];

    irm[0] = new Irm3PL(0.751335, -0.897391, 0.244001, 1.7);
    irm[1] = new Irm3PL(0.955947, -0.811477, 0.242883, 1.7);
    irm[2] = new Irm3PL(0.497206, -0.858681, 0.260893, 1.7);
    irm[3] = new Irm3PL(0.724000, -0.123911, 0.243497, 1.7);
    irm[4] = new Irm3PL(0.865200, 0.205889, 0.319135, 1.7);
    irm[5] = new Irm3PL(0.658129, 0.555228, 0.277826, 1.7);
    irm[6] = new Irm3PL(1.082118, 0.950549, 0.157979, 1.7);
    irm[7] = new Irm3PL(0.988294, 1.377501, 0.084828, 1.7);
    irm[8] = new Irm3PL(1.248923, 1.614355, 0.181874, 1.7);
    irm[9] = new Irm3PL(1.116682, 2.353932, 0.246856, 1.7);
    irm[10] = new Irm3PL(0.438171, 3.217965, 0.309243, 1.7);
    irm[11] = new Irm3PL(1.082206, 4.441864, 0.192339, 1.7);

    double[] step1 = { 0, -1.09327, 1.101266 };
    irm[12] = new IrmGPCM(0.269994, step1, 1.7);

    double[] step2 = { 0, 1.526148, 1.739176 };
    irm[13] = new IrmGPCM(0.972506, step2, 1.7);

    double[] step3 = { 0, 1.362356, 5.566958 };
    irm[14] = new IrmGPCM(0.378812, step3, 1.7);

    double[] step4 = { 0, 1.486566, -0.071229, 1.614823 };
    irm[15] = new IrmGPCM(0.537706, step4, 1.7);

    double[] step5 = { 0, 1.425413, 2.630705, 3.242696 };
    irm[16] = new IrmGPCM(0.554506, step5, 1.7);

    Mean discriminationX = new Mean();
    Mean difficultyX = new Mean();

    Mean difficultyMeanX = new Mean();
    StandardDeviation difficultySdX = new StandardDeviation(false);//Do not correct for bias. Use N in the denominator, not N-1.

    for (int j = 0; j < 17; j++) {
        irm[j].incrementMeanMean(discriminationX, difficultyX);
        irm[j].incrementMeanSigma(difficultyMeanX, difficultySdX);
    }

    //        System.out.println("Mean/mean descriptive statistics for Form X");
    //        System.out.println("a-mean: " + discriminationX.getResult());
    //        System.out.println("b-mean: " + difficultyX.getResult());

    assertEquals("Mean/mean check: discrimination mean", 0.7719,
            Precision.round(discriminationX.getResult(), 4), 1e-5);
    assertEquals("Mean/mean check: difficulty mean", 1.3566, Precision.round(difficultyX.getResult(), 4), 1e-5);
    assertEquals("Mean/mean check: Number of difficulties (including steps) ", 24, difficultyX.getN(), 1e-3);

    //        System.out.println();
    //        System.out.println("Mean/sigma descriptive statistics for Form X");
    //        System.out.println("b-mean: " + difficultyMeanX.getResult());
    //        System.out.println("b-sd: " + difficultySdX.getResult());
    //        System.out.println("b-N: " + difficultyMeanX.getN() + ",   " + difficultySdX.getN());

    assertEquals("Mean/sigma check: difficulty mean", 1.3566, Precision.round(difficultyMeanX.getResult(), 4),
            1e-5);
    assertEquals("Mean/sigma check: difficulty sd", 1.6372, Precision.round(difficultySdX.getResult(), 4),
            1e-5);
    assertEquals("Mean/sigma check: Number of difficulties (including steps) ", 24, difficultyMeanX.getN(),
            1e-3);
    assertEquals("Mean/sigma check: Number of difficulties (including steps) ", 24, difficultySdX.getN(), 1e-3);

}

From source file:com.itemanalysis.psychometrics.irt.equating.MeanSigmaMethodTest.java

/**
 * Tests the calculations needed for mean/mean and mean/sigma scale linking.
 * Item parameters and true values obtained from example 2 from the STUIRT
 * program by Michael Kolen and colleagues. Note that the original example
 * used teh PARSCALE version of item parameters. These were converted to
 * ICL type parameters by subtracting a step from the item difficulty.
 *
 *///  w  ww .  ja  v a  2s  .c  o  m
@Test
public void mixedFormatDescriptiveStatisticsTestFormY() {
    System.out.println("Mixed format descriptive statistics test Form Y");

    ItemResponseModel[] irm = new ItemResponseModel[17];

    irm[0] = new Irm3PL(0.887276, -1.334798, 0.134406, 1.7);
    irm[1] = new Irm3PL(1.184412, -1.129004, 0.237765, 1.7);
    irm[2] = new Irm3PL(0.609412, -1.464546, 0.15139, 1.7);
    irm[3] = new Irm3PL(0.923812, -0.576435, 0.240097, 1.7);
    irm[4] = new Irm3PL(0.822776, -0.476357, 0.192369, 1.7);
    irm[5] = new Irm3PL(0.707818, -0.235189, 0.189557, 1.7);
    irm[6] = new Irm3PL(1.306976, 0.242986, 0.165553, 1.7);
    irm[7] = new Irm3PL(1.295471, 0.598029, 0.090557, 1.7);
    irm[8] = new Irm3PL(1.366841, 0.923206, 0.172993, 1.7);
    irm[9] = new Irm3PL(1.389624, 1.380666, 0.238008, 1.7);
    irm[10] = new Irm3PL(0.293806, 2.02807, 0.203448, 1.7);
    irm[11] = new Irm3PL(0.885347, 3.152928, 0.195473, 1.7);

    double[] step1 = { 0, -1.387347, 0.399117 };
    irm[12] = new IrmGPCM(0.346324, step1, 1.7);

    double[] step2 = { 0, 0.756514, 0.956014 };
    irm[13] = new IrmGPCM(1.252012, step2, 1.7);

    double[] step3 = { 0, 0.975303, 4.676299 };
    irm[14] = new IrmGPCM(0.392282, step3, 1.7);

    double[] step4 = { 0, 0.643405, -0.418869, 0.804394 };
    irm[15] = new IrmGPCM(0.660841, step4, 1.7);

    double[] step5 = { 0, 0.641293, 1.750488, 2.53802 };
    irm[16] = new IrmGPCM(0.669612, step5, 1.7);

    Mean discriminationX = new Mean();
    Mean difficultyX = new Mean();

    Mean difficultyMeanX = new Mean();
    StandardDeviation difficultySdX = new StandardDeviation(false);//Do not correct for bias. Use N in the denominator, not N-1.

    for (int j = 0; j < 17; j++) {
        irm[j].incrementMeanMean(discriminationX, difficultyX);
        irm[j].incrementMeanSigma(difficultyMeanX, difficultySdX);
    }

    //        System.out.println("Mean/mean descriptive statistics for Form X");
    //        System.out.println("a-mean: " + discriminationX.getResult());
    //        System.out.println("b-mean: " + difficultyX.getResult());

    assertEquals("Mean/mean check: discrimination mean", 0.8820,
            Precision.round(discriminationX.getResult(), 4), 1e-5);
    assertEquals("Mean/mean check: difficulty mean", 0.6435, Precision.round(difficultyX.getResult(), 4), 1e-5);
    assertEquals("Mean/mean check: Number of difficulties (including steps) ", 24, difficultyX.getN(), 1e-3);

    //        System.out.println();
    //        System.out.println("Mean/sigma descriptive statistics for Form X");
    //        System.out.println("b-mean: " + difficultyMeanX.getResult());
    //        System.out.println("b-sd: " + difficultySdX.getResult());
    //        System.out.println("b-N: " + difficultyMeanX.getN() + ",   " + difficultySdX.getN());

    assertEquals("Mean/sigma check: difficulty mean", 0.6435, Precision.round(difficultyMeanX.getResult(), 4),
            1e-5);
    assertEquals("Mean/sigma check: difficulty sd", 1.4527, Precision.round(difficultySdX.getResult(), 4),
            1e-5);
    assertEquals("Mean/sigma check: Number of difficulties (including steps) ", 24, difficultyMeanX.getN(),
            1e-3);
    assertEquals("Mean/sigma check: Number of difficulties (including steps) ", 24, difficultySdX.getN(), 1e-3);

}

From source file:net.myrrix.online.eval.PrecisionRecallEvaluator.java

@Override
public EvaluationResult evaluate(final MyrrixRecommender recommender, final RescorerProvider provider,
        final Multimap<Long, RecommendedItem> testData) throws TasteException {

    final Mean precision = new Mean();
    final Mean recall = new Mean();
    final Mean ndcg = new Mean();
    final Mean meanAveragePrecision = new Mean();

    Processor<Long> processor = new Processor<Long>() {
        @Override/* ww  w.  j  a  v  a2s .co  m*/
        public void process(Long userID, long count) {

            Collection<RecommendedItem> values = testData.get(userID);
            int numValues = values.size();
            if (numValues == 0) {
                return;
            }

            IDRescorer rescorer = provider == null ? null
                    : provider.getRecommendRescorer(new long[] { userID }, recommender);

            List<RecommendedItem> recs;
            try {
                recs = recommender.recommend(userID, numValues, rescorer);
            } catch (NoSuchUserException nsue) {
                // Probably OK, just removed all data for this user from training
                log.warn("User only in test data: {}", userID);
                return;
            } catch (TasteException te) {
                log.warn("Unexpected exception", te);
                return;
            }
            int numRecs = recs.size();

            Collection<Long> valueIDs = Sets.newHashSet();
            for (RecommendedItem rec : values) {
                valueIDs.add(rec.getItemID());
            }

            int intersectionSize = 0;
            double score = 0.0;
            double maxScore = 0.0;
            Mean precisionAtI = new Mean();
            double averagePrecision = 0.0;

            for (int i = 0; i < numRecs; i++) {
                RecommendedItem rec = recs.get(i);
                double value = LN2 / Math.log(2.0 + i); // 1 / log_2(1 + (i+1))
                if (valueIDs.contains(rec.getItemID())) {
                    intersectionSize++;
                    score += value;
                    precisionAtI.increment(1.0);
                    averagePrecision += precisionAtI.getResult();
                } else {
                    precisionAtI.increment(0.0);
                }
                maxScore += value;
            }
            averagePrecision /= numValues;

            synchronized (precision) {
                precision.increment(numRecs == 0 ? 0.0 : (double) intersectionSize / numRecs);
                recall.increment((double) intersectionSize / numValues);
                ndcg.increment(maxScore == 0.0 ? 0.0 : score / maxScore);
                meanAveragePrecision.increment(averagePrecision);
                if (count % 10000 == 0) {
                    log.info(new IRStatisticsImpl(precision.getResult(), recall.getResult(), ndcg.getResult(),
                            meanAveragePrecision.getResult()).toString());
                }
            }
        }
    };

    Paralleler<Long> paralleler = new Paralleler<Long>(testData.keySet().iterator(), processor, "PREval");
    try {
        if (Boolean.parseBoolean(System.getProperty("eval.parallel", "true"))) {
            paralleler.runInParallel();
        } else {
            paralleler.runInSerial();
        }
    } catch (InterruptedException ie) {
        throw new TasteException(ie);
    } catch (ExecutionException e) {
        throw new TasteException(e.getCause());
    }

    EvaluationResult result;
    if (precision.getN() > 0) {
        result = new IRStatisticsImpl(precision.getResult(), recall.getResult(), ndcg.getResult(),
                meanAveragePrecision.getResult());
    } else {
        result = null;
    }
    log.info(String.valueOf(result));
    return result;
}

From source file:org.briljantframework.data.vector.DoubleVector.java

@Override
public double mean() {
    Mean mean = new Mean();
    for (int i = 0, size = size(); i < size; i++) {
        double v = getAsDoubleAt(i);
        if (!Is.NA(v)) {
            mean.increment(v);/* w  ww .  ja  v a2 s.  co m*/
        }
    }
    return mean.getN() > 0 ? mean.getResult() : Na.DOUBLE;
}