List of usage examples for org.apache.commons.math3.stat.descriptive.moment Mean getN
public long getN()
From source file:com.cloudera.oryx.rdf.common.rule.CategoricalDecision.java
private static List<Decision> categoricalDecisionsForNumericTarget(int featureNumber, ExampleSet examples, int suggestedMaxSplitCandidates) { // PLANET paper claims this is optimal: int categoryCount = examples.getCategoryCount(featureNumber); Mean[] averageTargetForCategory = new Mean[categoryCount]; for (Example example : examples) { CategoricalFeature feature = (CategoricalFeature) example.getFeature(featureNumber); if (feature == null) { continue; }/*from ww w .j a v a 2 s . c o m*/ int category = feature.getValueID(); Mean categoryAverage = averageTargetForCategory[category]; if (categoryAverage == null) { categoryAverage = new Mean(); averageTargetForCategory[category] = categoryAverage; } categoryAverage.increment(((NumericFeature) example.getTarget()).getValue()); } int maxCategory = -1; int maxCount = -1; for (int i = 0; i < averageTargetForCategory.length; i++) { Mean average = averageTargetForCategory[i]; if (average != null && average.getN() > maxCount) { maxCount = (int) averageTargetForCategory[i].getN(); maxCategory = i; } } Preconditions.checkArgument(maxCategory >= 0); List<Pair<Double, Integer>> byScore = Lists.newArrayListWithCapacity(averageTargetForCategory.length); for (int featureCategory = 0; featureCategory < averageTargetForCategory.length; featureCategory++) { StorelessUnivariateStatistic mean = averageTargetForCategory[featureCategory]; if (mean != null) { byScore.add(new Pair<Double, Integer>(mean.getResult(), featureCategory)); } } return sortAndGetDecisionsOverSubset(featureNumber, categoryCount, byScore, maxCategory, suggestedMaxSplitCandidates); }
From source file:com.cloudera.oryx.app.speed.rdf.RDFSpeedModelManager.java
@Override public Iterable<String> buildUpdates(JavaPairRDD<String, String> newData) { if (model == null) { return Collections.emptyList(); }/*from w w w.j a va 2 s. c om*/ JavaRDD<Example> examplesRDD = newData.values().map(MLFunctions.PARSE_FN) .map(new ToExampleFn(inputSchema, model.getEncodings())); DecisionForest forest = model.getForest(); JavaPairRDD<Pair<Integer, String>, Iterable<Feature>> targetsByTreeAndID = examplesRDD .flatMapToPair(new ToTreeNodeFeatureFn(forest)).groupByKey(); List<String> updates = new ArrayList<>(); if (inputSchema.isClassification()) { List<Tuple2<Pair<Integer, String>, Map<Integer, Long>>> countsByTreeAndID = targetsByTreeAndID .mapValues(new TargetCategoryCountFn()).collect(); for (Tuple2<Pair<Integer, String>, Map<Integer, Long>> p : countsByTreeAndID) { Integer treeID = p._1().getFirst(); String nodeID = p._1().getSecond(); updates.add(TextUtils.joinJSON(Arrays.asList(treeID, nodeID, p._2()))); } } else { List<Tuple2<Pair<Integer, String>, Mean>> meanTargetsByTreeAndID = targetsByTreeAndID .mapValues(new MeanNewTargetFn()).collect(); for (Tuple2<Pair<Integer, String>, Mean> p : meanTargetsByTreeAndID) { Integer treeID = p._1().getFirst(); String nodeID = p._1().getSecond(); Mean mean = p._2(); updates.add(TextUtils.joinJSON(Arrays.asList(treeID, nodeID, mean.getResult(), mean.getN()))); } } return updates; }
From source file:com.itemanalysis.psychometrics.irt.equating.MeanSigmaMethodTest.java
/** * Tests the calculations needed for mean/mean and mean/sigma scale linking. * Item parameters and true values obtained from example 2 from the STUIRT * program by Michael Kolen and colleagues. Note that the original example * used teh PARSCALE version of item parameters. These were converted to * ICL type parameters by subtracting a step from the item difficulty. * *//*from w w w . j a va2 s .com*/ @Test public void mixedFormatDescriptiveStatisticsTestFormX() { System.out.println("Mixed format descriptive statistics test Form X"); ItemResponseModel[] irm = new ItemResponseModel[17]; irm[0] = new Irm3PL(0.751335, -0.897391, 0.244001, 1.7); irm[1] = new Irm3PL(0.955947, -0.811477, 0.242883, 1.7); irm[2] = new Irm3PL(0.497206, -0.858681, 0.260893, 1.7); irm[3] = new Irm3PL(0.724000, -0.123911, 0.243497, 1.7); irm[4] = new Irm3PL(0.865200, 0.205889, 0.319135, 1.7); irm[5] = new Irm3PL(0.658129, 0.555228, 0.277826, 1.7); irm[6] = new Irm3PL(1.082118, 0.950549, 0.157979, 1.7); irm[7] = new Irm3PL(0.988294, 1.377501, 0.084828, 1.7); irm[8] = new Irm3PL(1.248923, 1.614355, 0.181874, 1.7); irm[9] = new Irm3PL(1.116682, 2.353932, 0.246856, 1.7); irm[10] = new Irm3PL(0.438171, 3.217965, 0.309243, 1.7); irm[11] = new Irm3PL(1.082206, 4.441864, 0.192339, 1.7); double[] step1 = { 0, -1.09327, 1.101266 }; irm[12] = new IrmGPCM(0.269994, step1, 1.7); double[] step2 = { 0, 1.526148, 1.739176 }; irm[13] = new IrmGPCM(0.972506, step2, 1.7); double[] step3 = { 0, 1.362356, 5.566958 }; irm[14] = new IrmGPCM(0.378812, step3, 1.7); double[] step4 = { 0, 1.486566, -0.071229, 1.614823 }; irm[15] = new IrmGPCM(0.537706, step4, 1.7); double[] step5 = { 0, 1.425413, 2.630705, 3.242696 }; irm[16] = new IrmGPCM(0.554506, step5, 1.7); Mean discriminationX = new Mean(); Mean difficultyX = new Mean(); Mean difficultyMeanX = new Mean(); StandardDeviation difficultySdX = new StandardDeviation(false);//Do not correct for bias. Use N in the denominator, not N-1. for (int j = 0; j < 17; j++) { irm[j].incrementMeanMean(discriminationX, difficultyX); irm[j].incrementMeanSigma(difficultyMeanX, difficultySdX); } // System.out.println("Mean/mean descriptive statistics for Form X"); // System.out.println("a-mean: " + discriminationX.getResult()); // System.out.println("b-mean: " + difficultyX.getResult()); assertEquals("Mean/mean check: discrimination mean", 0.7719, Precision.round(discriminationX.getResult(), 4), 1e-5); assertEquals("Mean/mean check: difficulty mean", 1.3566, Precision.round(difficultyX.getResult(), 4), 1e-5); assertEquals("Mean/mean check: Number of difficulties (including steps) ", 24, difficultyX.getN(), 1e-3); // System.out.println(); // System.out.println("Mean/sigma descriptive statistics for Form X"); // System.out.println("b-mean: " + difficultyMeanX.getResult()); // System.out.println("b-sd: " + difficultySdX.getResult()); // System.out.println("b-N: " + difficultyMeanX.getN() + ", " + difficultySdX.getN()); assertEquals("Mean/sigma check: difficulty mean", 1.3566, Precision.round(difficultyMeanX.getResult(), 4), 1e-5); assertEquals("Mean/sigma check: difficulty sd", 1.6372, Precision.round(difficultySdX.getResult(), 4), 1e-5); assertEquals("Mean/sigma check: Number of difficulties (including steps) ", 24, difficultyMeanX.getN(), 1e-3); assertEquals("Mean/sigma check: Number of difficulties (including steps) ", 24, difficultySdX.getN(), 1e-3); }
From source file:com.itemanalysis.psychometrics.irt.equating.MeanSigmaMethodTest.java
/** * Tests the calculations needed for mean/mean and mean/sigma scale linking. * Item parameters and true values obtained from example 2 from the STUIRT * program by Michael Kolen and colleagues. Note that the original example * used teh PARSCALE version of item parameters. These were converted to * ICL type parameters by subtracting a step from the item difficulty. * */// w ww . ja v a 2s .c o m @Test public void mixedFormatDescriptiveStatisticsTestFormY() { System.out.println("Mixed format descriptive statistics test Form Y"); ItemResponseModel[] irm = new ItemResponseModel[17]; irm[0] = new Irm3PL(0.887276, -1.334798, 0.134406, 1.7); irm[1] = new Irm3PL(1.184412, -1.129004, 0.237765, 1.7); irm[2] = new Irm3PL(0.609412, -1.464546, 0.15139, 1.7); irm[3] = new Irm3PL(0.923812, -0.576435, 0.240097, 1.7); irm[4] = new Irm3PL(0.822776, -0.476357, 0.192369, 1.7); irm[5] = new Irm3PL(0.707818, -0.235189, 0.189557, 1.7); irm[6] = new Irm3PL(1.306976, 0.242986, 0.165553, 1.7); irm[7] = new Irm3PL(1.295471, 0.598029, 0.090557, 1.7); irm[8] = new Irm3PL(1.366841, 0.923206, 0.172993, 1.7); irm[9] = new Irm3PL(1.389624, 1.380666, 0.238008, 1.7); irm[10] = new Irm3PL(0.293806, 2.02807, 0.203448, 1.7); irm[11] = new Irm3PL(0.885347, 3.152928, 0.195473, 1.7); double[] step1 = { 0, -1.387347, 0.399117 }; irm[12] = new IrmGPCM(0.346324, step1, 1.7); double[] step2 = { 0, 0.756514, 0.956014 }; irm[13] = new IrmGPCM(1.252012, step2, 1.7); double[] step3 = { 0, 0.975303, 4.676299 }; irm[14] = new IrmGPCM(0.392282, step3, 1.7); double[] step4 = { 0, 0.643405, -0.418869, 0.804394 }; irm[15] = new IrmGPCM(0.660841, step4, 1.7); double[] step5 = { 0, 0.641293, 1.750488, 2.53802 }; irm[16] = new IrmGPCM(0.669612, step5, 1.7); Mean discriminationX = new Mean(); Mean difficultyX = new Mean(); Mean difficultyMeanX = new Mean(); StandardDeviation difficultySdX = new StandardDeviation(false);//Do not correct for bias. Use N in the denominator, not N-1. for (int j = 0; j < 17; j++) { irm[j].incrementMeanMean(discriminationX, difficultyX); irm[j].incrementMeanSigma(difficultyMeanX, difficultySdX); } // System.out.println("Mean/mean descriptive statistics for Form X"); // System.out.println("a-mean: " + discriminationX.getResult()); // System.out.println("b-mean: " + difficultyX.getResult()); assertEquals("Mean/mean check: discrimination mean", 0.8820, Precision.round(discriminationX.getResult(), 4), 1e-5); assertEquals("Mean/mean check: difficulty mean", 0.6435, Precision.round(difficultyX.getResult(), 4), 1e-5); assertEquals("Mean/mean check: Number of difficulties (including steps) ", 24, difficultyX.getN(), 1e-3); // System.out.println(); // System.out.println("Mean/sigma descriptive statistics for Form X"); // System.out.println("b-mean: " + difficultyMeanX.getResult()); // System.out.println("b-sd: " + difficultySdX.getResult()); // System.out.println("b-N: " + difficultyMeanX.getN() + ", " + difficultySdX.getN()); assertEquals("Mean/sigma check: difficulty mean", 0.6435, Precision.round(difficultyMeanX.getResult(), 4), 1e-5); assertEquals("Mean/sigma check: difficulty sd", 1.4527, Precision.round(difficultySdX.getResult(), 4), 1e-5); assertEquals("Mean/sigma check: Number of difficulties (including steps) ", 24, difficultyMeanX.getN(), 1e-3); assertEquals("Mean/sigma check: Number of difficulties (including steps) ", 24, difficultySdX.getN(), 1e-3); }
From source file:net.myrrix.online.eval.PrecisionRecallEvaluator.java
@Override public EvaluationResult evaluate(final MyrrixRecommender recommender, final RescorerProvider provider, final Multimap<Long, RecommendedItem> testData) throws TasteException { final Mean precision = new Mean(); final Mean recall = new Mean(); final Mean ndcg = new Mean(); final Mean meanAveragePrecision = new Mean(); Processor<Long> processor = new Processor<Long>() { @Override/* ww w. j a v a2s .co m*/ public void process(Long userID, long count) { Collection<RecommendedItem> values = testData.get(userID); int numValues = values.size(); if (numValues == 0) { return; } IDRescorer rescorer = provider == null ? null : provider.getRecommendRescorer(new long[] { userID }, recommender); List<RecommendedItem> recs; try { recs = recommender.recommend(userID, numValues, rescorer); } catch (NoSuchUserException nsue) { // Probably OK, just removed all data for this user from training log.warn("User only in test data: {}", userID); return; } catch (TasteException te) { log.warn("Unexpected exception", te); return; } int numRecs = recs.size(); Collection<Long> valueIDs = Sets.newHashSet(); for (RecommendedItem rec : values) { valueIDs.add(rec.getItemID()); } int intersectionSize = 0; double score = 0.0; double maxScore = 0.0; Mean precisionAtI = new Mean(); double averagePrecision = 0.0; for (int i = 0; i < numRecs; i++) { RecommendedItem rec = recs.get(i); double value = LN2 / Math.log(2.0 + i); // 1 / log_2(1 + (i+1)) if (valueIDs.contains(rec.getItemID())) { intersectionSize++; score += value; precisionAtI.increment(1.0); averagePrecision += precisionAtI.getResult(); } else { precisionAtI.increment(0.0); } maxScore += value; } averagePrecision /= numValues; synchronized (precision) { precision.increment(numRecs == 0 ? 0.0 : (double) intersectionSize / numRecs); recall.increment((double) intersectionSize / numValues); ndcg.increment(maxScore == 0.0 ? 0.0 : score / maxScore); meanAveragePrecision.increment(averagePrecision); if (count % 10000 == 0) { log.info(new IRStatisticsImpl(precision.getResult(), recall.getResult(), ndcg.getResult(), meanAveragePrecision.getResult()).toString()); } } } }; Paralleler<Long> paralleler = new Paralleler<Long>(testData.keySet().iterator(), processor, "PREval"); try { if (Boolean.parseBoolean(System.getProperty("eval.parallel", "true"))) { paralleler.runInParallel(); } else { paralleler.runInSerial(); } } catch (InterruptedException ie) { throw new TasteException(ie); } catch (ExecutionException e) { throw new TasteException(e.getCause()); } EvaluationResult result; if (precision.getN() > 0) { result = new IRStatisticsImpl(precision.getResult(), recall.getResult(), ndcg.getResult(), meanAveragePrecision.getResult()); } else { result = null; } log.info(String.valueOf(result)); return result; }
From source file:org.briljantframework.data.vector.DoubleVector.java
@Override public double mean() { Mean mean = new Mean(); for (int i = 0, size = size(); i < size; i++) { double v = getAsDoubleAt(i); if (!Is.NA(v)) { mean.increment(v);/* w ww . ja v a2 s. co m*/ } } return mean.getN() > 0 ? mean.getResult() : Na.DOUBLE; }