List of usage examples for org.apache.commons.math3.stat.descriptive StorelessUnivariateStatistic getResult
double getResult();
From source file:com.cloudera.oryx.rdf.common.rule.NumericPrediction.java
static NumericPrediction buildNumericPrediction(Iterable<Example> examples) { StorelessUnivariateStatistic mean = new Mean(); for (Example example : examples) { mean.increment(((NumericFeature) example.getTarget()).getValue()); }/* w ww. j a v a 2 s . c om*/ Preconditions.checkState(mean.getN() > 0); return new NumericPrediction((float) mean.getResult(), (int) mean.getN()); }
From source file:com.cloudera.oryx.rdf.common.eval.Evaluation.java
/** * @param testSet test set to evaluate on * @return average absolute value of numeric target value in the test set */// w ww . j av a2s . c o m private static double meanAbs(Iterable<Example> testSet) { StorelessUnivariateStatistic mean = new Mean(); for (Example test : testSet) { NumericFeature actual = (NumericFeature) test.getTarget(); mean.increment(FastMath.abs(actual.getValue())); } return mean.getResult(); }
From source file:com.cloudera.oryx.rdf.common.rule.CategoricalDecision.java
private static List<Decision> categoricalDecisionsForNumericTarget(int featureNumber, ExampleSet examples, int suggestedMaxSplitCandidates) { // PLANET paper claims this is optimal: int categoryCount = examples.getCategoryCount(featureNumber); Mean[] averageTargetForCategory = new Mean[categoryCount]; for (Example example : examples) { CategoricalFeature feature = (CategoricalFeature) example.getFeature(featureNumber); if (feature == null) { continue; }// w w w.j a v a2 s . co m int category = feature.getValueID(); Mean categoryAverage = averageTargetForCategory[category]; if (categoryAverage == null) { categoryAverage = new Mean(); averageTargetForCategory[category] = categoryAverage; } categoryAverage.increment(((NumericFeature) example.getTarget()).getValue()); } int maxCategory = -1; int maxCount = -1; for (int i = 0; i < averageTargetForCategory.length; i++) { Mean average = averageTargetForCategory[i]; if (average != null && average.getN() > maxCount) { maxCount = (int) averageTargetForCategory[i].getN(); maxCategory = i; } } Preconditions.checkArgument(maxCategory >= 0); List<Pair<Double, Integer>> byScore = Lists.newArrayListWithCapacity(averageTargetForCategory.length); for (int featureCategory = 0; featureCategory < averageTargetForCategory.length; featureCategory++) { StorelessUnivariateStatistic mean = averageTargetForCategory[featureCategory]; if (mean != null) { byScore.add(new Pair<Double, Integer>(mean.getResult(), featureCategory)); } } return sortAndGetDecisionsOverSubset(featureNumber, categoryCount, byScore, maxCategory, suggestedMaxSplitCandidates); }
From source file:com.cloudera.oryx.rdf.common.eval.Evaluation.java
/** * @param classifier a {@link com.cloudera.oryx.rdf.common.tree.TreeBasedClassifier} (e.g. {@link com.cloudera.oryx.rdf.common.tree.DecisionForest}) * trained on data with a numeric target * @param testSet test set to evaluate on * @return root mean squared error over the test set square root of mean squared difference between actual * and predicted numeric target value// w ww. ja v a 2 s . co m */ public static double rootMeanSquaredError(TreeBasedClassifier classifier, Iterable<Example> testSet) { StorelessUnivariateStatistic mse = new Mean(); for (Example test : testSet) { NumericFeature actual = (NumericFeature) test.getTarget(); NumericPrediction prediction = (NumericPrediction) classifier.classify(test); double diff = actual.getValue() - prediction.getPrediction(); mse.increment(diff * diff); } return FastMath.sqrt(mse.getResult()); }
From source file:gedi.util.datastructure.array.functions.StorelessUnivariateStatisticAdapter.java
@Override public double applyAsDouble(NumericArray value) { StorelessUnivariateStatistic local = commons.copy(); for (int i = 0; i < value.length(); i++) local.increment(value.getDouble(i)); return local.getResult(); }
From source file:com.cloudera.oryx.rdf.common.rule.NumericDecision.java
static List<Decision> numericDecisionsFromExamples(int featureNumber, Iterable<Example> examples, int suggestedMaxSplitCandidates) { Multiset<Float> sortedFeatureValueCounts = TreeMultiset.create(); StorelessUnivariateStatistic mean = new Mean(); int numExamples = 0; for (Example example : examples) { NumericFeature feature = (NumericFeature) example.getFeature(featureNumber); if (feature == null) { continue; }//from ww w . j a v a2s . c o m numExamples++; float value = feature.getValue(); sortedFeatureValueCounts.add(value, 1); mean.increment(value); } // Make decisions from split points that divide up input into roughly equal amounts of examples List<Decision> decisions = Lists.newArrayListWithExpectedSize(suggestedMaxSplitCandidates); int approxExamplesPerSplit = FastMath.max(1, numExamples / suggestedMaxSplitCandidates); int examplesInSplit = 0; float lastValue = Float.NaN; // This will iterate in order of value by nature of TreeMap for (Multiset.Entry<Float> entry : sortedFeatureValueCounts.entrySet()) { float value = entry.getElement(); if (examplesInSplit >= approxExamplesPerSplit) { decisions.add( new NumericDecision(featureNumber, (value + lastValue) / 2.0f, (float) mean.getResult())); examplesInSplit = 0; } examplesInSplit += entry.getCount(); lastValue = value; } // The vital condition here is that if decision n decides an example is positive, then all subsequent // decisions in the list will also find it positive. So we need to order from highest threshold to lowest Collections.reverse(decisions); return decisions; }
From source file:com.dasasian.chok.testutil.loadtest.LoadTestMasterOperation.java
@Override public void nodeOperationsComplete(MasterContext context, List<OperationResult> nodeResults) throws Exception { try {//from w ww. j a va 2 s .co m final int queryRate = calculateCurrentQueryRate(); LOG.info("collecting results for iteration " + currentIteration + " and query rate " + queryRate + " after " + (System.currentTimeMillis() - currentIterationStartTime) + " ms ..."); List<LoadTestQueryResult> queryResults = new ArrayList<>(); for (OperationResult operationResult : nodeResults) { if (operationResult == null || operationResult.getUnhandledError() != null) { Exception rootException = null; if (operationResult != null) { //rootException = operationResult.getUnhandledError(); } throw new IllegalStateException( "at least one node operation did not completed properly: " + nodeResults, rootException); } LoadTestNodeOperationResult nodeOperationResult = (LoadTestNodeOperationResult) operationResult; queryResults.addAll(nodeOperationResult.getQueryResults()); } LOG.info("Received " + queryResults.size() + " queries, expected " + queryRate * runTime / 1000); File statisticsFile = new File(resultDir, "load-test-log-" + startTime + ".log"); File resultsFile = new File(resultDir, "load-test-results-" + startTime + ".log"); Writer statisticsWriter = new OutputStreamWriter(new FileOutputStream(statisticsFile, true)); Writer resultWriter = new OutputStreamWriter(new FileOutputStream(resultsFile, true)); if (currentIteration == 0) { // print headers statisticsWriter.append("#queryRate \tnode \tstartTime \tendTime \telapseTime \tquery \n"); resultWriter.append( "#requestedQueryRate \tachievedQueryRate \tfiredQueries \tqueryErrors \tavarageQueryDuration \tstandardDeviation \n"); } try { StorelessUnivariateStatistic timeStandardDeviation = new StandardDeviation(); StorelessUnivariateStatistic timeMean = new Mean(); int errors = 0; for (LoadTestQueryResult result : queryResults) { long elapsedTime = result.getEndTime() > 0 ? result.getEndTime() - result.getStartTime() : -1; statisticsWriter.write(queryRate + "\t" + result.getNodeId() + "\t" + result.getStartTime() + "\t" + result.getEndTime() + "\t" + elapsedTime + "\t" + result.getQuery() + "\n"); if (elapsedTime != -1) { timeStandardDeviation.increment(elapsedTime); timeMean.increment(elapsedTime); } else { ++errors; } } resultWriter.write(queryRate + "\t" + ((double) queryResults.size() / (runTime / 1000)) + "\t" + queryResults.size() + "\t" + errors + "\t" + (int) timeMean.getResult() + "\t" + (int) timeStandardDeviation.getResult() + "\n"); } catch (IOException e) { throw new IllegalStateException("Failed to write statistics data.", e); } try { LOG.info("results written to " + resultsFile.getAbsolutePath()); LOG.info("statistics written to " + statisticsFile.getAbsolutePath()); statisticsWriter.close(); resultWriter.close(); } catch (IOException e) { LOG.warn("Failed to close statistics file."); } if (queryRate + step <= endRate) { currentIteration++; LOG.info("triggering next iteration " + currentIteration); context.getMasterQueue().add(this); } else { LOG.info("finish load test in iteration " + currentIteration + " after " + (System.currentTimeMillis() - startTime) + " ms"); context.getProtocol().removeFlag(getName()); } } catch (Exception e) { context.getProtocol().removeFlag(getName()); } }
From source file:com.cloudera.oryx.als.computation.LoadRunner.java
public void runLoad() throws InterruptedException { final StorelessUnivariateStatistic recommendedBecause = new Mean(); final StorelessUnivariateStatistic setPreference = new Mean(); final StorelessUnivariateStatistic removePreference = new Mean(); final StorelessUnivariateStatistic ingest = new Mean(); final StorelessUnivariateStatistic refresh = new Mean(); final StorelessUnivariateStatistic estimatePreference = new Mean(); final StorelessUnivariateStatistic mostSimilarItems = new Mean(); final StorelessUnivariateStatistic similarityToItem = new Mean(); final StorelessUnivariateStatistic mostPopularItems = new Mean(); final StorelessUnivariateStatistic recommendToMany = new Mean(); final StorelessUnivariateStatistic recommend = new Mean(); final RandomGenerator random = RandomManager.getRandom(); int numCores = Runtime.getRuntime().availableProcessors(); final int stepsPerWorker = steps / numCores; Collection<Callable<Object>> workers = Lists.newArrayListWithCapacity(numCores); for (int i = 0; i < numCores; i++) { workers.add(new Callable<Object>() { @Override//ww w. jav a 2 s. c om public Void call() throws Exception { for (int i = 0; i < stepsPerWorker; i++) { double r; String userID; String itemID; String itemID2; float value; synchronized (random) { r = random.nextDouble(); userID = uniqueUserIDs[random.nextInt(uniqueUserIDs.length)]; itemID = uniqueItemIDs[random.nextInt(uniqueItemIDs.length)]; itemID2 = uniqueItemIDs[random.nextInt(uniqueItemIDs.length)]; value = random.nextInt(10); } long stepStart = System.currentTimeMillis(); if (r < 0.05) { client.recommendedBecause(userID, itemID, 10); recommendedBecause.increment(System.currentTimeMillis() - stepStart); } else if (r < 0.07) { client.setPreference(userID, itemID); setPreference.increment(System.currentTimeMillis() - stepStart); } else if (r < 0.08) { client.setPreference(userID, itemID, value); setPreference.increment(System.currentTimeMillis() - stepStart); } else if (r < 0.11) { client.removePreference(userID, itemID); removePreference.increment(System.currentTimeMillis() - stepStart); } else if (r < 0.12) { Reader reader = new StringReader( DelimitedDataUtils.encode(userID, itemID, Float.toString(value)) + '\n'); client.ingest(reader); ingest.increment(System.currentTimeMillis() - stepStart); } else if (r < 0.13) { client.refresh(); refresh.increment(System.currentTimeMillis() - stepStart); } else if (r < 0.14) { client.similarityToItem(itemID, itemID2); similarityToItem.increment(System.currentTimeMillis() - stepStart); } else if (r < 0.15) { client.mostPopularItems(10); mostPopularItems.increment(System.currentTimeMillis() - stepStart); } else if (r < 0.19) { client.estimatePreference(userID, itemID); estimatePreference.increment(System.currentTimeMillis() - stepStart); } else if (r < 0.20) { client.estimateForAnonymous(itemID, new String[] { itemID2 }); estimatePreference.increment(System.currentTimeMillis() - stepStart); } else if (r < 0.25) { client.mostSimilarItems(new String[] { itemID }, 10); mostSimilarItems.increment(System.currentTimeMillis() - stepStart); } else if (r < 0.30) { client.recommendToMany(new String[] { userID, userID }, 10, true, null); recommendToMany.increment(System.currentTimeMillis() - stepStart); } else { client.recommend(userID, 10); recommend.increment(System.currentTimeMillis() - stepStart); } } return null; } }); } log.info("Starting load test..."); long start = System.currentTimeMillis(); ExecutorService executor = Executors.newFixedThreadPool(numCores); Iterable<Future<Object>> futures; try { futures = executor.invokeAll(workers); } finally { ExecutorUtils.shutdownNowAndAwait(executor); } long end = System.currentTimeMillis(); ExecutorUtils.checkExceptions(futures); log.info("Finished {} steps in {}ms", steps, end - start); log.info("recommendedBecause: {}", recommendedBecause.getResult()); log.info("setPreference: {}", setPreference.getResult()); log.info("removePreference: {}", removePreference.getResult()); log.info("ingest: {}", ingest.getResult()); log.info("refresh: {}", refresh.getResult()); log.info("estimatePreference: {}", estimatePreference.getResult()); log.info("mostSimilarItems: {}", mostSimilarItems.getResult()); log.info("similarityToItem: {}", similarityToItem.getResult()); log.info("mostPopularItems: {}", mostPopularItems.getResult()); log.info("recommendToMany: {}", recommendToMany.getResult()); log.info("recommend: {}", recommend.getResult()); }
From source file:org.apereo.portal.events.aggr.JpaStatisticalSummaryTest.java
public void testStorelessUnivariateStatistic(StorelessUnivariateStatistic sus, double expected) throws Exception { assertEquals(expected, sus.getResult(), 0.1); final ObjectMapper mapper = new ObjectMapper(); mapper.findAndRegisterModules();/* www . ja v a2s . c o m*/ //Configure Jackson to just use fields mapper.setVisibility(PropertyAccessor.FIELD, Visibility.ANY); mapper.setVisibility(PropertyAccessor.GETTER, Visibility.NONE); mapper.setVisibility(PropertyAccessor.IS_GETTER, Visibility.NONE); mapper.setVisibility(PropertyAccessor.SETTER, Visibility.NONE); mapper.setVisibility(PropertyAccessor.CREATOR, Visibility.NONE); mapper.addMixInAnnotations(Object.class, IgnoreTypeMixIn.class); final FilterProvider filters = new SimpleFilterProvider().addFilter("storedDataFilter", SimpleBeanPropertyFilter.serializeAllExcept("storedData")); final ObjectWriter ssWriter = mapper.writer(filters); final ObjectReader ssReader = mapper.reader(sus.getClass()); final String susString = ssWriter.writeValueAsString(sus); System.out.println(susString); final StorelessUnivariateStatistic newSus = ssReader.readValue(susString); assertEquals(expected, newSus.getResult(), 0.1); }
From source file:org.drugis.mtc.summary.SummaryUtil.java
public static double evaluate(StorelessUnivariateStatistic stat, List<Double> vals) { stat.clear();//from w ww . j a v a 2 s .co m for (Double d : vals) { stat.increment(d); } return stat.getResult(); }