List of usage examples for org.apache.commons.math3.stat.descriptive.moment Mean increment
@Override public void increment(final double d)
Note that when #Mean(FirstMoment) is used to create a Mean, this method does nothing.
From source file:com.cloudera.oryx.rdf.computation.RDFDistributedGenerationRunner.java
private static void updateMeanImportances(Map<String, Mean> columnNameToMeanImportance, Model model) { for (MiningField field : model.getMiningSchema().getMiningFields()) { Double importance = field.getImportance(); if (importance != null) { String fieldName = field.getName().getValue(); Mean mean = columnNameToMeanImportance.get(fieldName); if (mean == null) { mean = new Mean(); columnNameToMeanImportance.put(fieldName, mean); }//from ww w. j a v a 2 s . co m mean.increment(importance); } } }
From source file:com.cloudera.oryx.rdf.common.rule.CategoricalDecision.java
private static List<Decision> categoricalDecisionsForNumericTarget(int featureNumber, ExampleSet examples, int suggestedMaxSplitCandidates) { // PLANET paper claims this is optimal: int categoryCount = examples.getCategoryCount(featureNumber); Mean[] averageTargetForCategory = new Mean[categoryCount]; for (Example example : examples) { CategoricalFeature feature = (CategoricalFeature) example.getFeature(featureNumber); if (feature == null) { continue; }/*from ww w . ja v a 2s. co m*/ int category = feature.getValueID(); Mean categoryAverage = averageTargetForCategory[category]; if (categoryAverage == null) { categoryAverage = new Mean(); averageTargetForCategory[category] = categoryAverage; } categoryAverage.increment(((NumericFeature) example.getTarget()).getValue()); } int maxCategory = -1; int maxCount = -1; for (int i = 0; i < averageTargetForCategory.length; i++) { Mean average = averageTargetForCategory[i]; if (average != null && average.getN() > maxCount) { maxCount = (int) averageTargetForCategory[i].getN(); maxCategory = i; } } Preconditions.checkArgument(maxCategory >= 0); List<Pair<Double, Integer>> byScore = Lists.newArrayListWithCapacity(averageTargetForCategory.length); for (int featureCategory = 0; featureCategory < averageTargetForCategory.length; featureCategory++) { StorelessUnivariateStatistic mean = averageTargetForCategory[featureCategory]; if (mean != null) { byScore.add(new Pair<Double, Integer>(mean.getResult(), featureCategory)); } } return sortAndGetDecisionsOverSubset(featureNumber, categoryCount, byScore, maxCategory, suggestedMaxSplitCandidates); }
From source file:com.sciaps.utils.Util.java
public static Spectrum createAverage(Collection<? extends Spectrum> shots, double sampleRate) { Min minWL = new Min(); Max maxWL = new Max(); for (Spectrum shot : shots) { minWL.increment(shot.getValidRange().getMinimumDouble()); maxWL.increment(shot.getValidRange().getMaximumDouble()); }//from w w w .j av a2 s .co m double range = maxWL.getResult() - minWL.getResult(); int numSamples = (int) Math.floor(range * sampleRate); double[][] data = new double[2][numSamples]; Mean avgy = new Mean(); for (int i = 0; i < numSamples; i++) { double x = minWL.getResult() + i * (1 / sampleRate); avgy.clear(); for (Spectrum shot : shots) { if (shot.getValidRange().containsDouble(x)) { UnivariateFunction iv = shot.getIntensityFunction(); double y = iv.value(x); avgy.increment(y); } } data[0][i] = x; data[1][i] = avgy.getResult(); } RawDataSpectrum newSpectrum = new RawDataSpectrum(data); return newSpectrum; }
From source file:eu.crisis_economics.abm.algorithms.series.TestAbstractSeries.java
/** * Assert that the long term mean of values drawn from a {@link RandomSeries} object * is as expected./*from w w w .j a va 2 s . co m*/ * * @param series (<code>S</code>) <br> * The {@link RandomSeries} object to test. * @param numberOfSamples <br> * The number of samples to draw from <code>S</code>. * @param expectedLongTermMean <br> * The expected long term mean of the series. */ protected void assertLongTermMean(RandomSeries series, final int numberOfSamples, final double expectedLongTermMean) { final Mean mean = new Mean(); for (int i = 0; i < numberOfSamples; ++i) mean.increment(series.next()); Assert.assertEquals(mean.getResult(), expectedLongTermMean, 1.e-2); }
From source file:com.cloudera.oryx.als.computation.iterate.row.RowStep.java
@Override protected MRPipeline createPipeline() throws IOException { IterationState iterationState = getIterationState(); String iterationKey = iterationState.getIterationKey(); boolean x = iterationState.isComputingX(); int lastIteration = iterationState.getIteration() - 1; Store store = Store.get();//from ww w. j a v a 2s.c o m JobStepConfig config = getConfig(); String instanceDir = config.getInstanceDir(); int generationID = config.getGenerationID(); if (store.exists(Namespaces.getInstanceGenerationPrefix(instanceDir, generationID) + "X/", false)) { // Actually, looks like whole computation of X/Y finished -- just proceed return null; } // Take the opportunity to clean out iteration before last, if computing X if (x) { String lastLastIterationKey = Namespaces.getIterationsPrefix(instanceDir, generationID) + (lastIteration - 1) + '/'; if (store.exists(lastLastIterationKey, false)) { log.info("Deleting old iteration data from {}", lastLastIterationKey); store.recursiveDelete(lastLastIterationKey); } } String yKey; if (x) { yKey = Namespaces.getIterationsPrefix(instanceDir, generationID) + lastIteration + "/Y/"; } else { yKey = iterationKey + "X/"; } String xKey = iterationKey + (x ? "X/" : "Y/"); String tempKey = Namespaces.getTempPrefix(instanceDir, generationID); String rKey = tempKey + (x ? "userVectors/" : "itemVectors/"); if (!validOutputPath(xKey)) { return null; } MRPipeline p = createBasicPipeline(RowReduceFn.class); Configuration conf = p.getConfiguration(); conf.set(Y_KEY_KEY, yKey); String popularKey = tempKey + (x ? "popularItemsByUserPartition/" : "popularUsersByItemPartition/"); conf.set(POPULAR_KEY, popularKey); String testPrefix = Namespaces.getInstanceGenerationPrefix(instanceDir, generationID) + "test/"; conf.set(MAP_KEY, testPrefix); YState yState = new YState(ALSTypes.DENSE_ROW_MATRIX); // Shared Y-Matrix state GroupingOptions opts = groupingOptions(); PCollection<MatrixRow> matrix = PTables.asPTable(p.read(input(rKey, ALSTypes.SPARSE_ROW_MATRIX))) .groupByKey(opts).parallelDo("rowReduce", new RowReduceFn(yState), ALSTypes.DENSE_ROW_MATRIX) .write(output(xKey)); if (!x) { matrix.parallelDo("asPair", MatrixRow.AS_PAIR, Avros.tableOf(Avros.longs(), ALSTypes.FLOAT_ARRAY)) .parallelDo("convergenceSample", new ConvergenceSampleFn(yState), Avros.strings()) .write(compressedTextOutput(p.getConfiguration(), iterationKey + "Yconvergence")); } if (x && ConfigUtils.getDefaultConfig().getDouble("model.test-set-fraction") > 0.0 && store.exists(testPrefix, false)) { PCollection<Double> aps = matrix .parallelDo("asPair", MatrixRow.AS_PAIR, Avros.tableOf(Avros.longs(), ALSTypes.FLOAT_ARRAY)) .parallelDo("computeAP", new ComputeUserAPFn(yState), Avros.doubles()); Mean meanAveragePrecision = new Mean(); for (double ap : aps.materialize()) { meanAveragePrecision.increment(ap); } log.info("Mean average precision: {}", meanAveragePrecision.getResult()); File tempMAPFile = File.createTempFile("MAP", ".txt"); tempMAPFile.deleteOnExit(); Files.write(Double.toString(meanAveragePrecision.getResult()), tempMAPFile, StandardCharsets.UTF_8); store.upload(iterationKey + "MAP", tempMAPFile, false); IOUtils.delete(tempMAPFile); } return p; }
From source file:com.itemanalysis.psychometrics.polycor.AbstractPolyserialCorrelation.java
public void summarize(double[] x, int[] y) { if (x.length != y.length) throw new IllegalArgumentException("X and Y are of different lengths."); N = (double) x.length; Mean meanX = new Mean(); StandardDeviation sdX = new StandardDeviation(); PearsonCorrelation rxy = new PearsonCorrelation(); Frequency table = new Frequency(); for (int i = 0; i < N; i++) { meanX.increment(x[i]); sdX.increment(x[i]);/*from www . j a v a 2s.c o m*/ rxy.increment(x[i], (double) y[i]); table.addValue(y[i]); } //compute thresholds int nrow = table.getUniqueCount(); double[] freqDataY = new double[nrow]; double ntotal = table.getSumFreq(); for (int i = 0; i < (nrow - 1); i++) { freqDataY[i] = table.getCumFreq(i + 1); thresholds[i] = norm.inverseCumulativeProbability(freqDataY[i] / ntotal); } thresholds[nrow - 1] = 10;//set last threshold to a large number less than infinity }
From source file:com.cloudera.oryx.als.computation.iterate.row.ComputeUserAPFn.java
@Override public void process(Pair<Long, float[]> input, Emitter<Double> emitter) { LongSet ids = testData.get(input.first()); if (ids == null || ids.isEmpty()) { return;/*from www .j a va 2 s . c om*/ } float[] userVector = input.second(); LongObjectMap<float[]> Y = yState.getY(); long[] itemIDs = ids.toArray(); double[] scores = new double[itemIDs.length]; for (int i = 0; i < itemIDs.length; i++) { long itemID = itemIDs[i]; float[] itemVector = Y.get(itemID); if (itemVector == null) { continue; } scores[i] = SimpleVectorMath.dot(userVector, itemVector); } int[] rank = new int[itemIDs.length]; for (LongObjectMap.MapEntry<float[]> entry : Y.entrySet()) { double score = SimpleVectorMath.dot(userVector, entry.getValue()); for (int i = 0; i < itemIDs.length; i++) { if (score > scores[i]) { rank[i]++; } } } Arrays.sort(rank); Mean precision = new Mean(); double totalPrecisionTimesRelevance = 0.0; for (int i = 0; i < rank.length; i++) { int relevantRetrieved = i + 1; int precisionAt = rank[i] + 1; precision.increment((double) relevantRetrieved / precisionAt); totalPrecisionTimesRelevance += precision.getResult(); } double averagePrecision = totalPrecisionTimesRelevance / rank.length; log.info("Average precision: {}", averagePrecision); emitter.emit(averagePrecision); }
From source file:com.cloudera.oryx.als.computation.local.ComputeMAP.java
@Override public Object call() throws IOException { LongObjectMap<LongSet> testData = new LongObjectMap<>(); File[] files = testDir.listFiles(IOUtils.NOT_HIDDEN); if (files != null) { for (File file : files) { for (CharSequence line : new FileLineIterable(file)) { String[] columns = DelimitedDataUtils.decode(line); long userID = StringLongMapping.toLong(columns[0]); long itemID = StringLongMapping.toLong(columns[1]); LongSet itemIDs = testData.get(userID); if (itemIDs == null) { itemIDs = new LongSet(); testData.put(userID, itemIDs); }//from ww w . j av a 2 s . com itemIDs.add(itemID); } } } Mean meanAveragePrecision = new Mean(); LongPrimitiveIterator it = X.keySetIterator(); while (it.hasNext()) { long userID = it.nextLong(); float[] userVector = X.get(userID); LongSet ids = testData.get(userID); if (ids == null || ids.isEmpty()) { continue; } long[] itemIDs = ids.toArray(); double[] scores = new double[itemIDs.length]; for (int i = 0; i < itemIDs.length; i++) { long itemID = itemIDs[i]; float[] itemVector = Y.get(itemID); if (itemVector == null) { continue; } scores[i] = SimpleVectorMath.dot(userVector, itemVector); } int[] rank = new int[itemIDs.length]; for (LongObjectMap.MapEntry<float[]> entry : Y.entrySet()) { double score = SimpleVectorMath.dot(userVector, entry.getValue()); for (int i = 0; i < itemIDs.length; i++) { if (score > scores[i]) { rank[i]++; } } } Arrays.sort(rank); Mean precision = new Mean(); double totalPrecisionTimesRelevance = 0.0; for (int i = 0; i < rank.length; i++) { int relevantRetrieved = i + 1; int precisionAt = rank[i] + 1; precision.increment((double) relevantRetrieved / precisionAt); totalPrecisionTimesRelevance += precision.getResult(); } double averagePrecision = totalPrecisionTimesRelevance / rank.length; meanAveragePrecision.increment(averagePrecision); } log.info("Mean average precision: {}", meanAveragePrecision.getResult()); return null; }
From source file:com.itemanalysis.psychometrics.irt.equating.RobustZEquatingTest.java
private void testA() throws IllegalArgumentException { double[] aDiff = new double[nA]; za = new RobustZ[nA]; for (int i = 0; i < nA; i++) { aDiff[i] = Math.log(aX[i]) - Math.log(aY[i]); }/* w w w. ja va 2 s . c o m*/ double median = percentile.evaluate(aDiff, 50); double q3 = percentile.evaluate(aDiff, 75); double q1 = percentile.evaluate(aDiff, 25); double iqr = q3 - q1; Mean mean = new Mean(); for (int i = 0; i < nA; i++) { za[i] = new RobustZ(aDiff[i], median, iqr); if (!za[i].significant(significanceLevel)) { mean.increment(aDiff[i]); } } slope = Math.exp(mean.getResult()); }
From source file:com.itemanalysis.psychometrics.irt.equating.RobustZEquatingTest.java
private void testB() { double[] bDiff = new double[nB]; zb = new RobustZ[nB]; for (int i = 0; i < nB; i++) { bDiff[i] = bY[i] - slope * bX[i]; }/*from w ww. ja v a2 s . com*/ double median = percentile.evaluate(bDiff, 50); double q3 = percentile.evaluate(bDiff, 75); double q1 = percentile.evaluate(bDiff, 25); double iqr = q3 - q1; Mean mean = new Mean(); for (int i = 0; i < nB; i++) { zb[i] = new RobustZ(bDiff[i], median, iqr); if (!zb[i].significant(significanceLevel)) { mean.increment(bDiff[i]); } } intercept = mean.getResult(); }