List of usage examples for org.apache.commons.math3.stat StatUtils populationVariance
public static double populationVariance(final double[] values, final double mean) throws MathIllegalArgumentException
From source file:com.netflix.lipstick.warnings.JobWarnings.java
public List<String> findSkewedReducers(List<ReducerDuration> reducerTimes) { if (!(MIN_REDUCERS_FOR_SKEW < reducerTimes.size())) { return Lists.newLinkedList(); }// www .j a va 2 s . co m int numPotentialOutliers = (int) Math.ceil(reducerTimes.size() / 10.0); int inflection = reducerTimes.size() - numPotentialOutliers; List<ReducerDuration> potentialOutliers = reducerTimes.subList(inflection, reducerTimes.size()); List<ReducerDuration> referenceReducers = reducerTimes.subList(0, inflection); /* List of reducer duration values that we will compare the potential outliers to. */ double[] referenceDurations = new double[referenceReducers.size()]; for (int i = 0; i < referenceReducers.size(); i++) { referenceDurations[i] = referenceReducers.get(i).duration; } double refMean = StatUtils.mean(referenceDurations); double refVariance = StatUtils.populationVariance(referenceDurations, refMean); double refStdDev = Math.sqrt(refVariance); /* If the time to complete the task is more than this far from the mean of all task completion times, we consider it skewed */ double distToMeanThreshold = Math.max((refStdDev * 2), (MIN_STDDEV_DELTA_MINUTES * 60)) + refMean; /* Now collect and return any of the outliers whose distance from the mean is great than the computed threshold. */ List<String> skewedReducerIds = Lists.newArrayList(); for (ReducerDuration r : potentialOutliers) { if ((r.duration - refMean) > distToMeanThreshold) { skewedReducerIds.add(r.reducerTaskId); } } return skewedReducerIds; }