List of usage examples for org.apache.commons.math.stat.regression SimpleRegression SimpleRegression
public SimpleRegression()
From source file:ch.usi.inf.lidr.merging.SSL.java
/** * Creates and returns a {@link SimpleRegression} * for a given list of scored documents <code>scoredDocs</code>. * This regression maps unnormalized scores in <code>scoredDocs</code> * to normalized/centralized scores in <code>centrScores</code>. * Documents that appear both in <code>scoredDocs</code> * and <code>centrScores</code> are used as a training for the regression. * According to the original paper, only first 10 * documents are considered for training. * /*from w w w . j av a 2s .c o m*/ * @param scoredDocs The list of scored documents. * * @return The {@link SimpleRegression} with filled-in training data. */ private <T> SimpleRegression getRegression(List<ScoredEntity<T>> scoredDocs) { SimpleRegression regression = new SimpleRegression(); Set<Double> xData = new HashSet<Double>(); for (ScoredEntity<T> scoredDocument : scoredDocs) { Object docId = scoredDocument.getEntity(); double specificScore = scoredDocument.getScore(); if (centrScores.containsKey(docId) && !xData.contains(specificScore)) { regression.addData(specificScore, centrScores.get(docId)); xData.add(specificScore); if (regression.getN() >= 10) { return regression; } } } return regression; }
From source file:com.userweave.module.methoden.iconunderstandability.service.ComputeIconTestStatisticsImpl.java
/** * return regression, if regression can be computed * @return/*from ww w. jav a 2 s.c o m*/ */ private OverallStatistics computeOverallStatistics() { SimpleRegression regression = new SimpleRegression(); DescriptiveStatistics overallStatistics = DescriptiveStatistics.newInstance(); Map<Integer, DescriptiveStatistics> iconCount2Statistics = new HashMap<Integer, DescriptiveStatistics>(); List<Object[]> executionTimesIconCount = testResultDao.findAllValidExecutionTimesAndIconCount(); if (!executionTimesIconCount.isEmpty()) { // check, if there is variation in x (only one x value for all observation yield NaN!) boolean canComputeRegression = false; int iconCountForFirstResult = ((Long) executionTimesIconCount.get(0)[1]).intValue(); for (Object[] executionTimeIconCount : executionTimesIconCount) { int iconCount = ((Long) executionTimeIconCount[1]).intValue(); if (iconCount != iconCountForFirstResult) { canComputeRegression = true; } double executionTime = (Long) executionTimeIconCount[0]; if (isValid(executionTime)) { regression.addData(iconCount, executionTime); overallStatistics.addValue(executionTime); getStatisticsForIconCount(iconCount2Statistics, iconCount).addValue(executionTime); } } if (canComputeRegression) { return new OverallStatistics(regression, overallStatistics.getMean(), iconCount2Statistics); } else { return new OverallStatistics(null, overallStatistics.getMean(), iconCount2Statistics); } } else { return null; } }
From source file:com.griddynamics.jagger.engine.e1.scenario.DefaultWorkloadSuggestionMaker.java
private static Integer findClosestPoint(BigDecimal desiredTps, Map<Integer, Pair<Long, BigDecimal>> stats) { final int MAX_POINTS_FOR_REGRESSION = 10; SortedMap<Long, Integer> map = Maps.newTreeMap(new Comparator<Long>() { @Override/* w w w . j a v a2 s. c o m*/ public int compare(Long first, Long second) { return second.compareTo(first); } }); for (Map.Entry<Integer, Pair<Long, BigDecimal>> entry : stats.entrySet()) { map.put(entry.getValue().getFirst(), entry.getKey()); } if (map.size() < 2) { throw new IllegalArgumentException("Not enough stats to calculate point"); } // <time><number of threads> - sorted by time Iterator<Map.Entry<Long, Integer>> iterator = map.entrySet().iterator(); SimpleRegression regression = new SimpleRegression(); Integer tempIndex; double previousValue = -1.0; double value; double measuredTps; log.debug("Selecting next point for balancing"); int indx = 0; while (iterator.hasNext()) { tempIndex = iterator.next().getValue(); if (previousValue < 0.0) { previousValue = tempIndex.floatValue(); } value = tempIndex.floatValue(); measuredTps = stats.get(tempIndex).getSecond().floatValue(); regression.addData(value, measuredTps); log.debug(String.format(" %7.2f %7.2f", value, measuredTps)); indx++; if (indx > MAX_POINTS_FOR_REGRESSION) { break; } } double intercept = regression.getIntercept(); double slope = regression.getSlope(); double approxPoint; // if no slope => use previous number of threads if (Math.abs(slope) > 1e-12) { approxPoint = (desiredTps.doubleValue() - intercept) / slope; } else { approxPoint = previousValue; } // if approximation point is negative - ignore it if (approxPoint < 0) { approxPoint = previousValue; } log.debug(String.format("Next point %7d (target tps: %7.2f)", (int) Math.round(approxPoint), desiredTps.doubleValue())); return (int) Math.round(approxPoint); }
From source file:guineu.modules.filter.Alignment.RANSAC.AlignmentRansacPlot.java
private List<RTs> smooth(List<RTs> list) { // Add points to the model in between of the real points to smooth the regression model Collections.sort(list, new RTs()); for (int i = 0; i < list.size() - 1; i++) { RTs point1 = list.get(i);/* ww w. j a v a 2s.c o m*/ RTs point2 = list.get(i + 1); if (point1.RT < point2.RT - 2) { SimpleRegression regression = new SimpleRegression(); regression.addData(point1.RT, point1.RT2); regression.addData(point2.RT, point2.RT2); double rt = point1.RT + 1; while (rt < point2.RT) { RTs newPoint = new RTs(rt, regression.predict(rt)); list.add(newPoint); rt++; } } } return list; }
From source file:guineu.modules.filter.Alignment.RANSAC.RANSAC.java
/** * Build the model creating a line with the 2 points * * @param data vector with the points which represent all possible * alignments./*from w ww . j a va 2s . c o m*/ */ private void getAllModelPoints(List<AlignStructMol> data) { // Create the regression line using the two points SimpleRegression regression = new SimpleRegression(); for (int i = 0; i < data.size(); i++) { AlignStructMol point = data.get(i); if (point.ransacMaybeInLiers) { regression.addData(point.RT, point.RT2); } } // Add all the points which fit the model (the difference between the point // and the regression line is less than "t" for (AlignStructMol point : data) { double y = point.RT2; double bestY = regression.predict(point.RT); if (Math.abs(y - bestY) < t) { point.ransacAlsoInLiers = true; AlsoNumber++; } else { point.ransacAlsoInLiers = false; } } }
From source file:guineu.modules.filter.Alignment.RANSACGCGC.RANSACGCGC.java
/** * Build the model creating a line with the 2 points * @param data vector with the points which represent all possible alignments. *///from ww w.j a v a 2s . c o m private void getAllModelPoints(List<AlignGCGCStructMol> data) { // Create the regression line using the two points SimpleRegression regression = new SimpleRegression(); for (int i = 0; i < data.size(); i++) { AlignGCGCStructMol point = data.get(i); if (point.ransacMaybeInLiers) { regression.addData(point.RT, point.RT2); } } // Add all the points which fit the model (the difference between the point // and the regression line is less than "t" for (AlignGCGCStructMol point : data) { double y = point.RT2; double bestY = regression.predict(point.RT); if (Math.abs(y - bestY) < t) { point.ransacAlsoInLiers = true; AlsoNumber++; } else { point.ransacAlsoInLiers = false; } } }
From source file:guineu.modules.filter.Alignment.RANSACGCGC.AlignmentGCGCRansacPlot.java
private List<GCGCRTs> smooth(List<GCGCRTs> list) { // Add points to the model in between of the real points to smooth the regression model Collections.sort(list, new GCGCRTs()); for (int i = 0; i < list.size() - 1; i++) { GCGCRTs point1 = list.get(i);//from w w w . j av a 2 s .c om GCGCRTs point2 = list.get(i + 1); if (point1.RT < point2.RT - 2) { SimpleRegression regression = new SimpleRegression(); regression.addData(point1.RT, point1.RT2); regression.addData(point2.RT, point2.RT2); double rt = point1.RT + 1; while (rt < point2.RT) { GCGCRTs newPoint = new GCGCRTs(rt, regression.predict(rt)); list.add(newPoint); rt++; } } } return list; }
From source file:com.netxforge.netxstudio.common.math.NativeFunctions.java
/** * Return a {@link GenericsTuple tuple} with a key being the slope and the * value being the intercept of the trendline. * /*from www .ja v a 2 s. c o m*/ */ public GenericsTuple<Double, Double> trend(double[][] dataPair) { SimpleRegression regression = new SimpleRegression(); regression.addData(dataPair); double slope = regression.getSlope(); double intercept = regression.getIntercept(); return new GenericsTuple<Double, Double>(slope, intercept); }
From source file:edu.indiana.soic.ts.crunch.CrunchDataReader.java
public PTable<String, String> extractText(PTable<ImmutableBytesWritable, Result> tableContent) { return tableContent.parallelDo("Read data", new DoFn<Pair<ImmutableBytesWritable, Result>, Pair<String, String>>() { @Override// ww w .jav a 2 s. c o m public void process(Pair<ImmutableBytesWritable, Result> row, Emitter<Pair<String, String>> emitter) { SimpleRegression regression; NavigableMap<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> map = row.second() .getMap(); System.out.println(map.size()); for (Map.Entry<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> columnFamilyMap : map .entrySet()) { regression = new SimpleRegression(); int count = 1; for (Map.Entry<byte[], NavigableMap<Long, byte[]>> entryVersion : columnFamilyMap .getValue().entrySet()) { for (Map.Entry<Long, byte[]> entry : entryVersion.getValue().entrySet()) { String rowKey = Bytes.toString(row.second().getRow()); String column = Bytes.toString(entryVersion.getKey()); byte[] val = entry.getValue(); String valOfColumn = new String(val); System.out.println("RowKey : " + rowKey + " Column Key : " + column + " Column Val : " + valOfColumn); if (!valOfColumn.isEmpty()) { String[] priceAndCap = valOfColumn.split("_"); if (priceAndCap.length > 1) { String pr = priceAndCap[0]; if (pr != null && !pr.equals("null")) { double price = Double.valueOf(pr); if (price < 0) { price = price - 2 * price; } System.out.println("Price : " + price + " count : " + count); regression.addData(count, price); } } } } count++; } // displays intercept of regression line System.out.println("Intercept : " + regression.getIntercept()); // displays slope of regression line System.out.println("Slope : " + regression.getSlope()); // displays slope standard error System.out.println("Slope STD Error : " + regression.getSlopeStdErr()); emitter.emit(new Pair<String, String>(String.valueOf(regression.getIntercept()), String.valueOf(regression.getSlope()))); } } }, Writables.tableOf(Writables.strings(), Writables.strings())); }
From source file:com.joliciel.jochre.graphics.SegmenterImpl.java
/** * Split rows if they're particularly high, and contain considerable white space in the middle. * Shapes causing the join will be removed if too high, or attached to the closest row otherwise. * @param sourceImage/*from w w w.j a v a2 s . c o m*/ * @param regressions * @return */ void splitRows(SourceImage sourceImage) { LOG.debug("########## splitRows #########"); // Calculate the min row height to be considered for splitting double minHeightForSplit = sourceImage.getAverageShapeHeight(); LOG.debug("minHeightForSplit: " + minHeightForSplit); double slopeMean = sourceImage.getMeanHorizontalSlope(); List<RowOfShapes> candidateRows = new ArrayList<RowOfShapes>(); for (RowOfShapes row : sourceImage.getRows()) { if (row.getRight() == row.getLeft()) continue; int height = row.getBottom() - row.getTop(); if (height >= minHeightForSplit) { LOG.debug("Adding candidate " + row.toString()); candidateRows.add(row); } } // For each row to be considered for splitting, see if there are lines of white space inside it. Hashtable<RowOfShapes, List<RowOfShapes>> splitRows = new Hashtable<RowOfShapes, List<RowOfShapes>>(); for (RowOfShapes row : candidateRows) { SimpleRegression regression = new SimpleRegression(); // y = intercept + slope * x LOG.debug("Left point: (" + row.getLeft() + " , " + row.getTop() + ")"); regression.addData(row.getLeft(), row.getTop()); double rightHandY = row.getTop() + ((double) (row.getRight() - row.getLeft()) * slopeMean); LOG.debug("Right point: (" + row.getRight() + " , " + rightHandY + ")"); regression.addData(row.getRight(), rightHandY); int yDelta = (int) Math.ceil(Math.abs(rightHandY - (double) row.getTop())); int yInterval = yDelta + (row.getBottom() - row.getTop() + 1) + yDelta; LOG.debug("yDelta: " + yDelta); LOG.debug("yInterval: " + yInterval); // let's get pixel counts shape by shape, and leave out the rest (in case rows overlap vertically) int[] pixelCounts = new int[yInterval]; for (Shape shape : row.getShapes()) { LOG.trace("Shape " + shape); int yDeltaAtLeft = (int) Math.round(regression.predict(shape.getLeft())); LOG.trace("yDeltaAtLeft: " + yDeltaAtLeft); // the shape offset + the offset between the regression line and the row top // + the delta we left at the start in case the line slopes upwards to the right int topIndex = (shape.getTop() - row.getTop()) + (row.getTop() - yDeltaAtLeft) + yDelta; LOG.trace("topIndex: (" + shape.getTop() + " - " + row.getTop() + ") + (" + row.getTop() + " - " + yDeltaAtLeft + ") + " + yDelta + " = " + topIndex); for (int x = 0; x < shape.getWidth(); x++) { for (int y = 0; y < shape.getHeight(); y++) { if (shape.isPixelBlack(x, y, sourceImage.getBlackThreshold())) { pixelCounts[topIndex + y]++; } } } } Mean pixelCountMean = new Mean(); StandardDeviation pixelCountStdDev = new StandardDeviation(); for (int i = 0; i < yInterval; i++) { LOG.debug("Pixel count " + i + ": " + pixelCounts[i]); pixelCountMean.increment(pixelCounts[i]); pixelCountStdDev.increment(pixelCounts[i]); } LOG.debug("pixel count mean: " + pixelCountMean.getResult() + ", std dev: " + pixelCountStdDev.getResult()); // If there's a split required, we're going to go considerably above and below the mean several times double lowThreshold = pixelCountMean.getResult() / 2.0; double highThreshold = pixelCountMean.getResult() * 2.0; boolean inRow = false; List<Integer> switches = new ArrayList<Integer>(); for (int i = 0; i < yInterval; i++) { if (!inRow && pixelCounts[i] > highThreshold) { LOG.debug("In row at " + i + ", pixel count " + pixelCounts[i]); inRow = true; switches.add(i); } else if (inRow && pixelCounts[i] < lowThreshold) { LOG.debug("Out of row at " + i + ", pixel count " + pixelCounts[i]); inRow = false; switches.add(i); } } if (switches.size() > 2) { // we have more than one row List<Integer> rowSeparations = new ArrayList<Integer>(); // find the row separators for (int switchIndex = 1; switchIndex < switches.size() - 2; switchIndex = switchIndex + 2) { int outOfRow = switches.get(switchIndex); int intoRow = switches.get(switchIndex + 1); int minPixelCount = (int) Math.ceil(highThreshold); int minIndex = -1; // find the row with the lowest pixel count for (int i = outOfRow; i <= intoRow; i++) { if (pixelCounts[i] < minPixelCount) { minPixelCount = pixelCounts[i]; minIndex = i; } } rowSeparations.add(minIndex); } // separate the shapes among the rows List<RowOfShapes> newRows = new ArrayList<RowOfShapes>(rowSeparations.size() + 1); for (int i = 0; i <= rowSeparations.size(); i++) { newRows.add(graphicsService.getEmptyRow(sourceImage)); } // add a separator at the beginning and end rowSeparations.add(0, 0); rowSeparations.add(yInterval + 1); for (Shape shape : row.getShapes()) { int yDeltaAtLeft = (int) Math.round(regression.predict(shape.getLeft())); int topIndex = (shape.getTop() - row.getTop()) + (row.getTop() - yDeltaAtLeft) + yDelta; int firstSepAfterShapeBottom = rowSeparations.size(); int lastSepBeforeShapeTop = -1; for (int i = rowSeparations.size() - 1; i >= 0; i--) { int rowSeparation = rowSeparations.get(i); if (rowSeparation <= topIndex) { lastSepBeforeShapeTop = i; break; } } for (int i = 0; i < rowSeparations.size(); i++) { int rowSeparation = rowSeparations.get(i); if (rowSeparation >= topIndex + shape.getHeight()) { firstSepAfterShapeBottom = i; break; } } if (lastSepBeforeShapeTop == firstSepAfterShapeBottom - 1) { // shape clearly belongs to one row RowOfShapes newRow = newRows.get(lastSepBeforeShapeTop); newRow.addShape(shape); } else { // is the shape much closer to one row than another? // if yes, add it to then add it to this row int[] yPixelsPerRow = new int[newRows.size()]; for (int i = 0; i < newRows.size(); i++) { int separatorTop = rowSeparations.get(i); int separatorBottom = rowSeparations.get(i + 1); int top = topIndex < separatorTop ? separatorTop : topIndex; int bottom = topIndex + shape.getHeight() < separatorBottom ? topIndex + shape.getHeight() : separatorBottom; yPixelsPerRow[i] = bottom - top; } int pixelsInMaxRow = 0; int maxPixelRowIndex = -1; for (int i = 0; i < newRows.size(); i++) { if (yPixelsPerRow[i] > pixelsInMaxRow) { pixelsInMaxRow = yPixelsPerRow[i]; maxPixelRowIndex = i; } } double minPercentage = 0.8; if (((double) pixelsInMaxRow / (double) shape.getHeight()) >= minPercentage) { RowOfShapes newRow = newRows.get(maxPixelRowIndex); newRow.addShape(shape); } else { // otherwise, the shape needs to be got rid of // as it's causing massive confusion // do this by simply not adding it anywhere } } // is the shape in one row exactly? } // next shape splitRows.put(row, newRows); } // do we have more than one row? } // next row for (RowOfShapes row : splitRows.keySet()) { List<RowOfShapes> newRows = splitRows.get(row); sourceImage.replaceRow(row, newRows); } }