Example usage for org.apache.commons.math.stat.regression SimpleRegression SimpleRegression

List of usage examples for org.apache.commons.math.stat.regression SimpleRegression SimpleRegression

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.regression SimpleRegression SimpleRegression.

Prototype

public SimpleRegression() 

Source Link

Document

Create an empty SimpleRegression instance

Usage

From source file:ch.usi.inf.lidr.merging.SSL.java

/**
 * Creates and returns a {@link SimpleRegression}
 * for a given list of scored documents <code>scoredDocs</code>.
 * This regression maps unnormalized scores in <code>scoredDocs</code>
 * to normalized/centralized scores in <code>centrScores</code>.
 * Documents that appear both in <code>scoredDocs</code>
 * and <code>centrScores</code> are used as a training for the regression.
 * According to the original paper, only first 10
 * documents are considered for training.
 * /*from  w w  w . j  av  a  2s .c o m*/
 * @param scoredDocs The list of scored documents.
 * 
 * @return The {@link SimpleRegression} with filled-in training data.
 */
private <T> SimpleRegression getRegression(List<ScoredEntity<T>> scoredDocs) {
    SimpleRegression regression = new SimpleRegression();

    Set<Double> xData = new HashSet<Double>();
    for (ScoredEntity<T> scoredDocument : scoredDocs) {
        Object docId = scoredDocument.getEntity();
        double specificScore = scoredDocument.getScore();

        if (centrScores.containsKey(docId) && !xData.contains(specificScore)) {
            regression.addData(specificScore, centrScores.get(docId));
            xData.add(specificScore);

            if (regression.getN() >= 10) {
                return regression;
            }
        }
    }

    return regression;
}

From source file:com.userweave.module.methoden.iconunderstandability.service.ComputeIconTestStatisticsImpl.java

/**
 * return regression, if regression can be computed
 * @return/*from   ww w.  jav a 2 s.c o m*/
 */
private OverallStatistics computeOverallStatistics() {

    SimpleRegression regression = new SimpleRegression();

    DescriptiveStatistics overallStatistics = DescriptiveStatistics.newInstance();

    Map<Integer, DescriptiveStatistics> iconCount2Statistics = new HashMap<Integer, DescriptiveStatistics>();

    List<Object[]> executionTimesIconCount = testResultDao.findAllValidExecutionTimesAndIconCount();

    if (!executionTimesIconCount.isEmpty()) {

        // check, if there is variation in x (only one x value for all observation yield NaN!)
        boolean canComputeRegression = false;

        int iconCountForFirstResult = ((Long) executionTimesIconCount.get(0)[1]).intValue();

        for (Object[] executionTimeIconCount : executionTimesIconCount) {

            int iconCount = ((Long) executionTimeIconCount[1]).intValue();
            if (iconCount != iconCountForFirstResult) {
                canComputeRegression = true;
            }

            double executionTime = (Long) executionTimeIconCount[0];

            if (isValid(executionTime)) {
                regression.addData(iconCount, executionTime);
                overallStatistics.addValue(executionTime);
                getStatisticsForIconCount(iconCount2Statistics, iconCount).addValue(executionTime);
            }
        }

        if (canComputeRegression) {
            return new OverallStatistics(regression, overallStatistics.getMean(), iconCount2Statistics);
        } else {
            return new OverallStatistics(null, overallStatistics.getMean(), iconCount2Statistics);
        }
    } else {
        return null;
    }
}

From source file:com.griddynamics.jagger.engine.e1.scenario.DefaultWorkloadSuggestionMaker.java

private static Integer findClosestPoint(BigDecimal desiredTps, Map<Integer, Pair<Long, BigDecimal>> stats) {
    final int MAX_POINTS_FOR_REGRESSION = 10;

    SortedMap<Long, Integer> map = Maps.newTreeMap(new Comparator<Long>() {
        @Override/* w  w  w  . j  a  v  a2  s. c o  m*/
        public int compare(Long first, Long second) {
            return second.compareTo(first);
        }
    });
    for (Map.Entry<Integer, Pair<Long, BigDecimal>> entry : stats.entrySet()) {
        map.put(entry.getValue().getFirst(), entry.getKey());
    }

    if (map.size() < 2) {
        throw new IllegalArgumentException("Not enough stats to calculate point");
    }

    // <time><number of threads> - sorted by time
    Iterator<Map.Entry<Long, Integer>> iterator = map.entrySet().iterator();

    SimpleRegression regression = new SimpleRegression();
    Integer tempIndex;
    double previousValue = -1.0;
    double value;
    double measuredTps;

    log.debug("Selecting next point for balancing");
    int indx = 0;
    while (iterator.hasNext()) {

        tempIndex = iterator.next().getValue();

        if (previousValue < 0.0) {
            previousValue = tempIndex.floatValue();
        }
        value = tempIndex.floatValue();
        measuredTps = stats.get(tempIndex).getSecond().floatValue();

        regression.addData(value, measuredTps);

        log.debug(String.format("   %7.2f    %7.2f", value, measuredTps));

        indx++;
        if (indx > MAX_POINTS_FOR_REGRESSION) {
            break;
        }
    }

    double intercept = regression.getIntercept();
    double slope = regression.getSlope();

    double approxPoint;

    // if no slope => use previous number of threads
    if (Math.abs(slope) > 1e-12) {
        approxPoint = (desiredTps.doubleValue() - intercept) / slope;
    } else {
        approxPoint = previousValue;
    }

    // if approximation point is negative - ignore it
    if (approxPoint < 0) {
        approxPoint = previousValue;
    }

    log.debug(String.format("Next point   %7d    (target tps: %7.2f)", (int) Math.round(approxPoint),
            desiredTps.doubleValue()));

    return (int) Math.round(approxPoint);
}

From source file:guineu.modules.filter.Alignment.RANSAC.AlignmentRansacPlot.java

private List<RTs> smooth(List<RTs> list) {
    // Add points to the model in between of the real points to smooth the regression model
    Collections.sort(list, new RTs());

    for (int i = 0; i < list.size() - 1; i++) {
        RTs point1 = list.get(i);/*  ww  w. j  a  v a 2s.c  o  m*/
        RTs point2 = list.get(i + 1);
        if (point1.RT < point2.RT - 2) {
            SimpleRegression regression = new SimpleRegression();
            regression.addData(point1.RT, point1.RT2);
            regression.addData(point2.RT, point2.RT2);
            double rt = point1.RT + 1;
            while (rt < point2.RT) {
                RTs newPoint = new RTs(rt, regression.predict(rt));
                list.add(newPoint);
                rt++;
            }

        }
    }

    return list;
}

From source file:guineu.modules.filter.Alignment.RANSAC.RANSAC.java

/**
 * Build the model creating a line with the 2 points
 *
 * @param data vector with the points which represent all possible
 * alignments./*from  w ww .  j  a va  2s  . c  o  m*/
 */
private void getAllModelPoints(List<AlignStructMol> data) {

    // Create the regression line using the two points
    SimpleRegression regression = new SimpleRegression();

    for (int i = 0; i < data.size(); i++) {
        AlignStructMol point = data.get(i);
        if (point.ransacMaybeInLiers) {
            regression.addData(point.RT, point.RT2);
        }
    }

    // Add all the points which fit the model (the difference between the point
    // and the regression line is less than "t"
    for (AlignStructMol point : data) {
        double y = point.RT2;
        double bestY = regression.predict(point.RT);
        if (Math.abs(y - bestY) < t) {
            point.ransacAlsoInLiers = true;
            AlsoNumber++;
        } else {
            point.ransacAlsoInLiers = false;
        }
    }

}

From source file:guineu.modules.filter.Alignment.RANSACGCGC.RANSACGCGC.java

/**
 * Build the model creating a line with the 2 points
 * @param data vector with the points which represent all possible alignments.
 *///from ww  w.j a  v a 2s .  c  o  m
private void getAllModelPoints(List<AlignGCGCStructMol> data) {

    // Create the regression line using the two points
    SimpleRegression regression = new SimpleRegression();

    for (int i = 0; i < data.size(); i++) {
        AlignGCGCStructMol point = data.get(i);
        if (point.ransacMaybeInLiers) {
            regression.addData(point.RT, point.RT2);
        }
    }

    // Add all the points which fit the model (the difference between the point
    // and the regression line is less than "t"
    for (AlignGCGCStructMol point : data) {
        double y = point.RT2;
        double bestY = regression.predict(point.RT);
        if (Math.abs(y - bestY) < t) {
            point.ransacAlsoInLiers = true;
            AlsoNumber++;
        } else {
            point.ransacAlsoInLiers = false;
        }
    }

}

From source file:guineu.modules.filter.Alignment.RANSACGCGC.AlignmentGCGCRansacPlot.java

private List<GCGCRTs> smooth(List<GCGCRTs> list) {
    // Add points to the model in between of the real points to smooth the regression model
    Collections.sort(list, new GCGCRTs());

    for (int i = 0; i < list.size() - 1; i++) {
        GCGCRTs point1 = list.get(i);//from w  w  w .  j  av  a 2 s .c  om
        GCGCRTs point2 = list.get(i + 1);
        if (point1.RT < point2.RT - 2) {
            SimpleRegression regression = new SimpleRegression();
            regression.addData(point1.RT, point1.RT2);
            regression.addData(point2.RT, point2.RT2);
            double rt = point1.RT + 1;
            while (rt < point2.RT) {
                GCGCRTs newPoint = new GCGCRTs(rt, regression.predict(rt));
                list.add(newPoint);
                rt++;
            }

        }
    }

    return list;
}

From source file:com.netxforge.netxstudio.common.math.NativeFunctions.java

/**
 * Return a {@link GenericsTuple tuple} with a key being the slope and the
 * value being the intercept of the trendline.
 * /*from www  .ja  v a 2 s. c  o m*/
 */
public GenericsTuple<Double, Double> trend(double[][] dataPair) {

    SimpleRegression regression = new SimpleRegression();
    regression.addData(dataPair);
    double slope = regression.getSlope();
    double intercept = regression.getIntercept();

    return new GenericsTuple<Double, Double>(slope, intercept);
}

From source file:edu.indiana.soic.ts.crunch.CrunchDataReader.java

public PTable<String, String> extractText(PTable<ImmutableBytesWritable, Result> tableContent) {
    return tableContent.parallelDo("Read data",
            new DoFn<Pair<ImmutableBytesWritable, Result>, Pair<String, String>>() {
                @Override// ww w .jav a 2 s. c o m
                public void process(Pair<ImmutableBytesWritable, Result> row,
                        Emitter<Pair<String, String>> emitter) {
                    SimpleRegression regression;
                    NavigableMap<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> map = row.second()
                            .getMap();
                    System.out.println(map.size());
                    for (Map.Entry<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> columnFamilyMap : map
                            .entrySet()) {
                        regression = new SimpleRegression();
                        int count = 1;
                        for (Map.Entry<byte[], NavigableMap<Long, byte[]>> entryVersion : columnFamilyMap
                                .getValue().entrySet()) {
                            for (Map.Entry<Long, byte[]> entry : entryVersion.getValue().entrySet()) {
                                String rowKey = Bytes.toString(row.second().getRow());
                                String column = Bytes.toString(entryVersion.getKey());
                                byte[] val = entry.getValue();
                                String valOfColumn = new String(val);
                                System.out.println("RowKey : " + rowKey + " Column Key : " + column
                                        + " Column Val : " + valOfColumn);
                                if (!valOfColumn.isEmpty()) {
                                    String[] priceAndCap = valOfColumn.split("_");
                                    if (priceAndCap.length > 1) {
                                        String pr = priceAndCap[0];
                                        if (pr != null && !pr.equals("null")) {
                                            double price = Double.valueOf(pr);
                                            if (price < 0) {
                                                price = price - 2 * price;
                                            }
                                            System.out.println("Price : " + price + " count : " + count);
                                            regression.addData(count, price);
                                        }
                                    }
                                }
                            }
                            count++;
                        }
                        // displays intercept of regression line
                        System.out.println("Intercept : " + regression.getIntercept());

                        // displays slope of regression line
                        System.out.println("Slope : " + regression.getSlope());

                        // displays slope standard error
                        System.out.println("Slope STD Error : " + regression.getSlopeStdErr());
                        emitter.emit(new Pair<String, String>(String.valueOf(regression.getIntercept()),
                                String.valueOf(regression.getSlope())));
                    }
                }
            }, Writables.tableOf(Writables.strings(), Writables.strings()));
}

From source file:com.joliciel.jochre.graphics.SegmenterImpl.java

/**
 * Split rows if they're particularly high, and contain considerable white space in the middle.
 * Shapes causing the join will be removed if too high, or attached to the closest row otherwise.
 * @param sourceImage/*from w  w w.j a  v a2 s . c  o m*/
 * @param regressions
 * @return
 */
void splitRows(SourceImage sourceImage) {
    LOG.debug("########## splitRows #########");

    // Calculate the min row height to be considered for splitting
    double minHeightForSplit = sourceImage.getAverageShapeHeight();
    LOG.debug("minHeightForSplit: " + minHeightForSplit);

    double slopeMean = sourceImage.getMeanHorizontalSlope();

    List<RowOfShapes> candidateRows = new ArrayList<RowOfShapes>();
    for (RowOfShapes row : sourceImage.getRows()) {
        if (row.getRight() == row.getLeft())
            continue;
        int height = row.getBottom() - row.getTop();
        if (height >= minHeightForSplit) {
            LOG.debug("Adding candidate " + row.toString());
            candidateRows.add(row);
        }
    }

    // For each row to be considered for splitting, see if there are lines of white space inside it.
    Hashtable<RowOfShapes, List<RowOfShapes>> splitRows = new Hashtable<RowOfShapes, List<RowOfShapes>>();
    for (RowOfShapes row : candidateRows) {
        SimpleRegression regression = new SimpleRegression();
        // y = intercept + slope * x 
        LOG.debug("Left point: (" + row.getLeft() + " , " + row.getTop() + ")");
        regression.addData(row.getLeft(), row.getTop());
        double rightHandY = row.getTop() + ((double) (row.getRight() - row.getLeft()) * slopeMean);
        LOG.debug("Right point: (" + row.getRight() + " , " + rightHandY + ")");
        regression.addData(row.getRight(), rightHandY);

        int yDelta = (int) Math.ceil(Math.abs(rightHandY - (double) row.getTop()));
        int yInterval = yDelta + (row.getBottom() - row.getTop() + 1) + yDelta;

        LOG.debug("yDelta: " + yDelta);
        LOG.debug("yInterval: " + yInterval);
        // let's get pixel counts shape by shape, and leave out the rest (in case rows overlap vertically)
        int[] pixelCounts = new int[yInterval];
        for (Shape shape : row.getShapes()) {
            LOG.trace("Shape " + shape);
            int yDeltaAtLeft = (int) Math.round(regression.predict(shape.getLeft()));
            LOG.trace("yDeltaAtLeft: " + yDeltaAtLeft);
            // the shape offset + the offset between the regression line and the row top
            // + the delta we left at the start in case the line slopes upwards to the right
            int topIndex = (shape.getTop() - row.getTop()) + (row.getTop() - yDeltaAtLeft) + yDelta;
            LOG.trace("topIndex: (" + shape.getTop() + " - " + row.getTop() + ") + (" + row.getTop() + " - "
                    + yDeltaAtLeft + ") + " + yDelta + " = " + topIndex);
            for (int x = 0; x < shape.getWidth(); x++) {
                for (int y = 0; y < shape.getHeight(); y++) {
                    if (shape.isPixelBlack(x, y, sourceImage.getBlackThreshold())) {
                        pixelCounts[topIndex + y]++;
                    }
                }
            }
        }

        Mean pixelCountMean = new Mean();
        StandardDeviation pixelCountStdDev = new StandardDeviation();
        for (int i = 0; i < yInterval; i++) {
            LOG.debug("Pixel count " + i + ": " + pixelCounts[i]);
            pixelCountMean.increment(pixelCounts[i]);
            pixelCountStdDev.increment(pixelCounts[i]);
        }
        LOG.debug("pixel count mean: " + pixelCountMean.getResult() + ", std dev: "
                + pixelCountStdDev.getResult());

        // If there's a split required, we're going to go considerably above and below the mean several times
        double lowThreshold = pixelCountMean.getResult() / 2.0;
        double highThreshold = pixelCountMean.getResult() * 2.0;
        boolean inRow = false;
        List<Integer> switches = new ArrayList<Integer>();
        for (int i = 0; i < yInterval; i++) {
            if (!inRow && pixelCounts[i] > highThreshold) {
                LOG.debug("In row at " + i + ", pixel count " + pixelCounts[i]);
                inRow = true;
                switches.add(i);
            } else if (inRow && pixelCounts[i] < lowThreshold) {
                LOG.debug("Out of row at " + i + ", pixel count " + pixelCounts[i]);
                inRow = false;
                switches.add(i);
            }
        }
        if (switches.size() > 2) {
            // we have more than one row
            List<Integer> rowSeparations = new ArrayList<Integer>();

            // find the row separators
            for (int switchIndex = 1; switchIndex < switches.size() - 2; switchIndex = switchIndex + 2) {
                int outOfRow = switches.get(switchIndex);
                int intoRow = switches.get(switchIndex + 1);
                int minPixelCount = (int) Math.ceil(highThreshold);
                int minIndex = -1;
                // find the row with the lowest pixel count
                for (int i = outOfRow; i <= intoRow; i++) {
                    if (pixelCounts[i] < minPixelCount) {
                        minPixelCount = pixelCounts[i];
                        minIndex = i;
                    }
                }
                rowSeparations.add(minIndex);
            }

            // separate the shapes among the rows
            List<RowOfShapes> newRows = new ArrayList<RowOfShapes>(rowSeparations.size() + 1);
            for (int i = 0; i <= rowSeparations.size(); i++) {
                newRows.add(graphicsService.getEmptyRow(sourceImage));
            }

            // add a separator at the beginning and end
            rowSeparations.add(0, 0);
            rowSeparations.add(yInterval + 1);
            for (Shape shape : row.getShapes()) {
                int yDeltaAtLeft = (int) Math.round(regression.predict(shape.getLeft()));
                int topIndex = (shape.getTop() - row.getTop()) + (row.getTop() - yDeltaAtLeft) + yDelta;
                int firstSepAfterShapeBottom = rowSeparations.size();
                int lastSepBeforeShapeTop = -1;

                for (int i = rowSeparations.size() - 1; i >= 0; i--) {
                    int rowSeparation = rowSeparations.get(i);
                    if (rowSeparation <= topIndex) {
                        lastSepBeforeShapeTop = i;
                        break;
                    }
                }

                for (int i = 0; i < rowSeparations.size(); i++) {
                    int rowSeparation = rowSeparations.get(i);
                    if (rowSeparation >= topIndex + shape.getHeight()) {
                        firstSepAfterShapeBottom = i;
                        break;
                    }
                }

                if (lastSepBeforeShapeTop == firstSepAfterShapeBottom - 1) {
                    // shape clearly belongs to one row
                    RowOfShapes newRow = newRows.get(lastSepBeforeShapeTop);
                    newRow.addShape(shape);
                } else {
                    // is the shape much closer to one row than another?
                    // if yes, add it to then add it to this row
                    int[] yPixelsPerRow = new int[newRows.size()];
                    for (int i = 0; i < newRows.size(); i++) {
                        int separatorTop = rowSeparations.get(i);
                        int separatorBottom = rowSeparations.get(i + 1);
                        int top = topIndex < separatorTop ? separatorTop : topIndex;
                        int bottom = topIndex + shape.getHeight() < separatorBottom
                                ? topIndex + shape.getHeight()
                                : separatorBottom;
                        yPixelsPerRow[i] = bottom - top;
                    }

                    int pixelsInMaxRow = 0;
                    int maxPixelRowIndex = -1;
                    for (int i = 0; i < newRows.size(); i++) {
                        if (yPixelsPerRow[i] > pixelsInMaxRow) {
                            pixelsInMaxRow = yPixelsPerRow[i];
                            maxPixelRowIndex = i;
                        }
                    }
                    double minPercentage = 0.8;
                    if (((double) pixelsInMaxRow / (double) shape.getHeight()) >= minPercentage) {
                        RowOfShapes newRow = newRows.get(maxPixelRowIndex);
                        newRow.addShape(shape);
                    } else {
                        // otherwise, the shape needs to be got rid of
                        // as it's causing massive confusion
                        // do this by simply not adding it anywhere
                    }
                } // is the shape in one row exactly?
            } // next shape
            splitRows.put(row, newRows);
        } // do we have more than one row?
    } // next row

    for (RowOfShapes row : splitRows.keySet()) {
        List<RowOfShapes> newRows = splitRows.get(row);
        sourceImage.replaceRow(row, newRows);
    }
}