List of usage examples for org.apache.commons.math.stat.regression SimpleRegression addData
public void addData(double x, double y)
From source file:edu.indiana.soic.ts.crunch.CrunchDataReader.java
public PTable<String, String> extractText(PTable<ImmutableBytesWritable, Result> tableContent) { return tableContent.parallelDo("Read data", new DoFn<Pair<ImmutableBytesWritable, Result>, Pair<String, String>>() { @Override// www . java 2 s . co m public void process(Pair<ImmutableBytesWritable, Result> row, Emitter<Pair<String, String>> emitter) { SimpleRegression regression; NavigableMap<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> map = row.second() .getMap(); System.out.println(map.size()); for (Map.Entry<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> columnFamilyMap : map .entrySet()) { regression = new SimpleRegression(); int count = 1; for (Map.Entry<byte[], NavigableMap<Long, byte[]>> entryVersion : columnFamilyMap .getValue().entrySet()) { for (Map.Entry<Long, byte[]> entry : entryVersion.getValue().entrySet()) { String rowKey = Bytes.toString(row.second().getRow()); String column = Bytes.toString(entryVersion.getKey()); byte[] val = entry.getValue(); String valOfColumn = new String(val); System.out.println("RowKey : " + rowKey + " Column Key : " + column + " Column Val : " + valOfColumn); if (!valOfColumn.isEmpty()) { String[] priceAndCap = valOfColumn.split("_"); if (priceAndCap.length > 1) { String pr = priceAndCap[0]; if (pr != null && !pr.equals("null")) { double price = Double.valueOf(pr); if (price < 0) { price = price - 2 * price; } System.out.println("Price : " + price + " count : " + count); regression.addData(count, price); } } } } count++; } // displays intercept of regression line System.out.println("Intercept : " + regression.getIntercept()); // displays slope of regression line System.out.println("Slope : " + regression.getSlope()); // displays slope standard error System.out.println("Slope STD Error : " + regression.getSlopeStdErr()); emitter.emit(new Pair<String, String>(String.valueOf(regression.getIntercept()), String.valueOf(regression.getSlope()))); } } }, Writables.tableOf(Writables.strings(), Writables.strings())); }
From source file:net.sf.mzmine.modules.peaklistmethods.identification.metamsecorrelate.MetaMSEcorrelateTask.java
/** * correlates one row to another/*www . j av a 2s . c o m*/ * peak shape correlation (Pearson) (if negative or zero direct escape) * otherwise avg(corr)>= minCorrPeakShape * intensity profile correlation (Pearson) >= minCorrIProfile * TODO: maybe impute low values instead of 0 for not detected! * @param row * @param g * @return average correlation over both factors ranging from minR to 1 (or 0 if minimum correlation was not met) * @throws Exception */ private double corrRowToRow(final PeakList peakList, final RawDataFile raw[], PeakListRow row, PeakListRow row2) throws Exception { double corr = 0; SimpleRegression reg = new SimpleRegression(); // count int c = 0; // go through all raw files for (int r = 0; r < raw.length; r++) { Feature f1 = row.getPeak(raw[r]); Feature f2 = row2.getPeak(raw[r]); if (f1 != null && f2 != null) { // peak shape correlation FeatureShapeCorrelationData cFS = corrFeatureShape(f1, f2, true); if (cFS != null) { double tmpcorr = cFS.getR(); // escape if peak shapes are showing a negative correlation if (tmpcorr <= 0) return tmpcorr; corr += tmpcorr; c++; } else { // correlation was not possible // maybe due to a small peak in this raw file // escape if features would be high enough for a correlation // this means the features are not intercepting if (countDPHigherThanNoise(f1) >= minCorrelatedDataPoints && countDPHigherThanNoise(f2) >= minCorrelatedDataPoints) return 0; } } // I profile correlation // TODO: low value imputation? double I1 = f1 != null ? f1.getHeight() : 0; double I2 = f2 != null ? f2.getHeight() : 0; reg.addData(I1, I2); } // First search for isotopes TODO later fill in isotopes from raw int absCharge = AlignedIsotopeGrouperTask.find13CIsotope(peakList, row, row2, maxCharge, mzTolerance); boolean isIsotope = absCharge != -1; // TODO search for adducts and add correlation: IProfile doesnt have to be the same for adducts boolean isAdduct = false; if (!isIsotope) findAdducts(peakList, row, row2, row.getRowCharge(), row2.getRowCharge()); double adductBonus = (isIsotope || isAdduct) && useAdductBonusR ? adductBonusR : 0; // TODO weighting of intensity corr and feature shape corr // there was no correlation possible due to small peaks if (c == 0) { // return isAdduct || isIsotope ? 1 : 0; } else { corr = (corr / c); double corrIprofile = reg.getR(); if (corr + adductBonus < minShapeCorrR) return 0; else if (corrIprofile < minIntensityProfileR) return 0; else return (corr + corrIprofile) / 2; } }
From source file:com.joliciel.jochre.graphics.SegmenterImpl.java
/** * Split rows if they're particularly high, and contain considerable white space in the middle. * Shapes causing the join will be removed if too high, or attached to the closest row otherwise. * @param sourceImage/*from ww w . j a v a 2 s. co m*/ * @param regressions * @return */ void splitRows(SourceImage sourceImage) { LOG.debug("########## splitRows #########"); // Calculate the min row height to be considered for splitting double minHeightForSplit = sourceImage.getAverageShapeHeight(); LOG.debug("minHeightForSplit: " + minHeightForSplit); double slopeMean = sourceImage.getMeanHorizontalSlope(); List<RowOfShapes> candidateRows = new ArrayList<RowOfShapes>(); for (RowOfShapes row : sourceImage.getRows()) { if (row.getRight() == row.getLeft()) continue; int height = row.getBottom() - row.getTop(); if (height >= minHeightForSplit) { LOG.debug("Adding candidate " + row.toString()); candidateRows.add(row); } } // For each row to be considered for splitting, see if there are lines of white space inside it. Hashtable<RowOfShapes, List<RowOfShapes>> splitRows = new Hashtable<RowOfShapes, List<RowOfShapes>>(); for (RowOfShapes row : candidateRows) { SimpleRegression regression = new SimpleRegression(); // y = intercept + slope * x LOG.debug("Left point: (" + row.getLeft() + " , " + row.getTop() + ")"); regression.addData(row.getLeft(), row.getTop()); double rightHandY = row.getTop() + ((double) (row.getRight() - row.getLeft()) * slopeMean); LOG.debug("Right point: (" + row.getRight() + " , " + rightHandY + ")"); regression.addData(row.getRight(), rightHandY); int yDelta = (int) Math.ceil(Math.abs(rightHandY - (double) row.getTop())); int yInterval = yDelta + (row.getBottom() - row.getTop() + 1) + yDelta; LOG.debug("yDelta: " + yDelta); LOG.debug("yInterval: " + yInterval); // let's get pixel counts shape by shape, and leave out the rest (in case rows overlap vertically) int[] pixelCounts = new int[yInterval]; for (Shape shape : row.getShapes()) { LOG.trace("Shape " + shape); int yDeltaAtLeft = (int) Math.round(regression.predict(shape.getLeft())); LOG.trace("yDeltaAtLeft: " + yDeltaAtLeft); // the shape offset + the offset between the regression line and the row top // + the delta we left at the start in case the line slopes upwards to the right int topIndex = (shape.getTop() - row.getTop()) + (row.getTop() - yDeltaAtLeft) + yDelta; LOG.trace("topIndex: (" + shape.getTop() + " - " + row.getTop() + ") + (" + row.getTop() + " - " + yDeltaAtLeft + ") + " + yDelta + " = " + topIndex); for (int x = 0; x < shape.getWidth(); x++) { for (int y = 0; y < shape.getHeight(); y++) { if (shape.isPixelBlack(x, y, sourceImage.getBlackThreshold())) { pixelCounts[topIndex + y]++; } } } } Mean pixelCountMean = new Mean(); StandardDeviation pixelCountStdDev = new StandardDeviation(); for (int i = 0; i < yInterval; i++) { LOG.debug("Pixel count " + i + ": " + pixelCounts[i]); pixelCountMean.increment(pixelCounts[i]); pixelCountStdDev.increment(pixelCounts[i]); } LOG.debug("pixel count mean: " + pixelCountMean.getResult() + ", std dev: " + pixelCountStdDev.getResult()); // If there's a split required, we're going to go considerably above and below the mean several times double lowThreshold = pixelCountMean.getResult() / 2.0; double highThreshold = pixelCountMean.getResult() * 2.0; boolean inRow = false; List<Integer> switches = new ArrayList<Integer>(); for (int i = 0; i < yInterval; i++) { if (!inRow && pixelCounts[i] > highThreshold) { LOG.debug("In row at " + i + ", pixel count " + pixelCounts[i]); inRow = true; switches.add(i); } else if (inRow && pixelCounts[i] < lowThreshold) { LOG.debug("Out of row at " + i + ", pixel count " + pixelCounts[i]); inRow = false; switches.add(i); } } if (switches.size() > 2) { // we have more than one row List<Integer> rowSeparations = new ArrayList<Integer>(); // find the row separators for (int switchIndex = 1; switchIndex < switches.size() - 2; switchIndex = switchIndex + 2) { int outOfRow = switches.get(switchIndex); int intoRow = switches.get(switchIndex + 1); int minPixelCount = (int) Math.ceil(highThreshold); int minIndex = -1; // find the row with the lowest pixel count for (int i = outOfRow; i <= intoRow; i++) { if (pixelCounts[i] < minPixelCount) { minPixelCount = pixelCounts[i]; minIndex = i; } } rowSeparations.add(minIndex); } // separate the shapes among the rows List<RowOfShapes> newRows = new ArrayList<RowOfShapes>(rowSeparations.size() + 1); for (int i = 0; i <= rowSeparations.size(); i++) { newRows.add(graphicsService.getEmptyRow(sourceImage)); } // add a separator at the beginning and end rowSeparations.add(0, 0); rowSeparations.add(yInterval + 1); for (Shape shape : row.getShapes()) { int yDeltaAtLeft = (int) Math.round(regression.predict(shape.getLeft())); int topIndex = (shape.getTop() - row.getTop()) + (row.getTop() - yDeltaAtLeft) + yDelta; int firstSepAfterShapeBottom = rowSeparations.size(); int lastSepBeforeShapeTop = -1; for (int i = rowSeparations.size() - 1; i >= 0; i--) { int rowSeparation = rowSeparations.get(i); if (rowSeparation <= topIndex) { lastSepBeforeShapeTop = i; break; } } for (int i = 0; i < rowSeparations.size(); i++) { int rowSeparation = rowSeparations.get(i); if (rowSeparation >= topIndex + shape.getHeight()) { firstSepAfterShapeBottom = i; break; } } if (lastSepBeforeShapeTop == firstSepAfterShapeBottom - 1) { // shape clearly belongs to one row RowOfShapes newRow = newRows.get(lastSepBeforeShapeTop); newRow.addShape(shape); } else { // is the shape much closer to one row than another? // if yes, add it to then add it to this row int[] yPixelsPerRow = new int[newRows.size()]; for (int i = 0; i < newRows.size(); i++) { int separatorTop = rowSeparations.get(i); int separatorBottom = rowSeparations.get(i + 1); int top = topIndex < separatorTop ? separatorTop : topIndex; int bottom = topIndex + shape.getHeight() < separatorBottom ? topIndex + shape.getHeight() : separatorBottom; yPixelsPerRow[i] = bottom - top; } int pixelsInMaxRow = 0; int maxPixelRowIndex = -1; for (int i = 0; i < newRows.size(); i++) { if (yPixelsPerRow[i] > pixelsInMaxRow) { pixelsInMaxRow = yPixelsPerRow[i]; maxPixelRowIndex = i; } } double minPercentage = 0.8; if (((double) pixelsInMaxRow / (double) shape.getHeight()) >= minPercentage) { RowOfShapes newRow = newRows.get(maxPixelRowIndex); newRow.addShape(shape); } else { // otherwise, the shape needs to be got rid of // as it's causing massive confusion // do this by simply not adding it anywhere } } // is the shape in one row exactly? } // next shape splitRows.put(row, newRows); } // do we have more than one row? } // next row for (RowOfShapes row : splitRows.keySet()) { List<RowOfShapes> newRows = splitRows.get(row); sourceImage.replaceRow(row, newRows); } }
From source file:org.rascalmpl.library.analysis.statistics.SimpleRegressions.java
SimpleRegression make(IList dataValues) { if (dataValues.length() <= 2) throw RuntimeExceptionFactory.illegalArgument(dataValues, null, null, "SimpleRegression data should have more than 2 elements"); SimpleRegression simple = new SimpleRegression(); for (IValue v : dataValues) { ITuple t = (ITuple) v;/*w w w. j a va 2 s. co m*/ INumber x = (INumber) t.get(0); INumber y = (INumber) t.get(1); simple.addData(x.toReal().doubleValue(), y.toReal().doubleValue()); } return simple; }
From source file:playground.boescpa.converters.vissim.tools.TripMatcher.java
@Override public HashMap<Id<Trip>, Integer> matchTrips(HashMap<Id<Trip>, Long[]> msTrips, HashMap<Id<Trip>, Long[]> amTrips) { int matchesWithHighScores = 0; int matchesWithVeryHighScores = 0; int progressCounter = 0; int progressChecker = 2; HashMap<Id<Trip>, Integer> countsPerAnmTrip = new HashMap<>(); for (Id<Trip> amTrip : amTrips.keySet()) { countsPerAnmTrip.put(amTrip, 0); }//w w w.j ava 2 s . c o m List<Id<Trip>> amTripsKeySet = new ArrayList<>(amTrips.keySet()); for (Id<Trip> msTrip : msTrips.keySet()) { progressCounter++; Long[] msTripZones = msTrips.get(msTrip); Id<Trip> bestMatchingAmTrip = null; int bestMatchScore = Integer.MIN_VALUE; // Shuffle key set: Collections.shuffle(amTripsKeySet); for (Id<Trip> amTrip : amTripsKeySet) { Long[] amTripZones = amTrips.get(amTrip); // Linear regression between the to trips: SimpleRegression simpleRegression = new SimpleRegression(); for (int i = 0; i < msTripZones.length; i++) { boolean foundNone = true; for (int j = 0; j < amTripZones.length; j++) { if (msTripZones[i].equals(amTripZones[j])) { simpleRegression.addData(i, j); foundNone = false; } } if (foundNone) { int yPos = -(msTripZones.length - i) - NEG_OFFSET_IF_NOT_FOUNG; simpleRegression.addData(i, yPos); } } // Scoring: int matchScore = 0; // Criterion 1.1: Difference in length of trips not greater than 10%. if (((double) Math.abs(msTripZones.length - amTripZones.length)) / ((double) msTripZones.length) <= 0.1) { matchScore += TEN_PRCT_SCORE; } // Criterion 1.2: The smaller the difference in length, the better. matchScore -= (Math.abs(msTripZones.length - amTripZones.length) * LENGTH_SCORE); // Criterion 2: The closer the intercept to zero, the better. matchScore -= (int) (Math.abs(simpleRegression.getIntercept()) * INTERCEPT_SCORE); // Criterion 3: The closer the slope to one, the better. matchScore -= (int) (Math.abs(1 - simpleRegression.getSlope()) * SLOPE_SCORE); // Criterion 4: The smaller the mean square error of the regression, the better. matchScore -= (int) (Math.abs(simpleRegression.getMeanSquareError()) * MSE_SCORE); if (matchScore > bestMatchScore) { bestMatchScore = matchScore; bestMatchingAmTrip = amTrip; } } countsPerAnmTrip.put(bestMatchingAmTrip, (countsPerAnmTrip.get(bestMatchingAmTrip) + 1)); if (bestMatchScore >= 0.9 * TEN_PRCT_SCORE) { matchesWithHighScores++; if (bestMatchScore >= 0.99 * TEN_PRCT_SCORE) { matchesWithVeryHighScores++; } } // log progress: if (progressCounter >= progressChecker) { log.info(progressChecker + " trips matched."); progressChecker *= 2; } } log.info("Of total " + msTrips.size() + " trips, " + matchesWithHighScores + " were matched with a high score above " + 0.9 * TEN_PRCT_SCORE + " points."); log.info("Of total " + msTrips.size() + " trips, " + matchesWithVeryHighScores + " were matched with a very high score above " + 0.99 * TEN_PRCT_SCORE + " points."); return countsPerAnmTrip; }
From source file:playground.johannes.coopsim.analysis.ScoreTask.java
@Override public void analyze(Set<Trajectory> trajectories, Map<String, DescriptiveStatistics> results) { DescriptiveStatistics allScores = new DescriptiveStatistics(); for (Trajectory t : trajectories) allScores.addValue(t.getPerson().getSelectedPlan().getScore()); results.put("score", allScores); DescriptiveStatistics actScores = ActivityEvaluator.stopLogging(); results.put("score_act", actScores); DescriptiveStatistics legScores = LegEvaluator.stopLogging(); results.put("score_leg", legScores); Map<String, DescriptiveStatistics> jointScore = JointActivityEvaluator2.stopLogging(); // Map<String, DescriptiveStatistics> jointScore = JointActivityEvaluator.stopLogging(); for (Entry<String, DescriptiveStatistics> entry : jointScore.entrySet()) { results.put("score_join_" + entry.getKey(), entry.getValue()); }//ww w . j a v a 2 s .co m DescriptiveStatistics typeScore = ActivityTypeEvaluator.stopLogging(); results.put("score_type", typeScore); try { writeHistograms(allScores, "score", 50, 50); writeHistograms(actScores, "score_act", 50, 50); writeHistograms(legScores, "score_leg", 50, 50); for (Entry<String, DescriptiveStatistics> entry : jointScore.entrySet()) { writeHistograms(entry.getValue(), new LinearDiscretizer(0.5), "score_join_" + entry.getKey(), false); writeHistograms(entry.getValue(), "score_join_" + entry.getKey(), 50, 50); } writeHistograms(typeScore, new DummyDiscretizer(), "score_type", false); } catch (IOException e) { e.printStackTrace(); } scores.add(allScores.getMean()); if (scores.size() >= MIN_SAMPLES) { SimpleRegression reg = new SimpleRegression(); for (int i = scores.size() - MIN_SAMPLES; i < scores.size(); i++) { reg.addData(i, scores.get(i)); } if (reg.getSlope() < THRESHOLD) converged = true; } }
From source file:playground.johannes.studies.coopsim.Convergence.java
/** * @param args/*from ww w. j av a2s.c om*/ * @throws IOException */ public static void main(String[] args) throws IOException { String root = "/Volumes/cluster.math.tu-berlin.de/net/ils2/jillenberger/leisure/runs/run259/tasks/8/"; File outputDir = new File(root + "/output"); String property = "d_trip_culture"; File analysisDir = new File(root + "/analysis"); analysisDir.mkdirs(); BufferedWriter writer = new BufferedWriter( new FileWriter(analysisDir.getAbsolutePath() + "/" + property + ".txt")); writer.write("it\t"); writer.write(property); writer.newLine(); TDoubleArrayList yVals = new TDoubleArrayList(); TDoubleArrayList xVals = new TDoubleArrayList(); for (File file : outputDir.listFiles()) { if (file.isDirectory()) { File statsFile = new File(String.format("%1$s/statistics.txt", file.getAbsolutePath())); if (statsFile.exists()) { String iter = file.getName(); BufferedReader reader = new BufferedReader(new FileReader(statsFile)); String line = reader.readLine(); while ((line = reader.readLine()) != null) { String[] tokens = line.split("\t"); String key = tokens[0]; String val = tokens[1]; if (key.equals(property)) { writer.write(iter); writer.write("\t"); writer.write(val); writer.newLine(); xVals.add(Double.parseDouble(iter)); yVals.add(Double.parseDouble(val)); } } } } } writer.close(); for (int i = 40; i < yVals.size(); i++) { SimpleRegression reg = new SimpleRegression(); for (int k = i - 40; k < i; k++) { reg.addData(k, yVals.get(k)); } System.out.println(String.format("Slope after iteration %1$s: %2$s.", i, reg.getSlope())); } }
From source file:uk.ac.leeds.ccg.andyt.generic.visualisation.charts.Generic_ScatterPlotAndLinearRegression.java
/** * @param data double[2][] where: data[0][] are the y values data[1][] are * the x values/*from w w w . ja va2s . c om*/ * @return double[] result where: <ul> <li>result[0] is the y axis * intercept;</li> <li>result[1] is the change in y relative to x (gradient * or slope);</li> <li>result[2] is the rank correlation coefficient * (RSquare);</li> <li>result[3] is data[0].length.</li> </ul> */ public static double[] getSimpleRegressionParameters(double[][] data) { double[] result = new double[4]; // org.apache.commons.math.stat.regression.SimpleRegression; SimpleRegression a_SimpleRegression = new SimpleRegression(); //System.out.println("data.length " + data[0].length); for (int i = 0; i < data[0].length; i++) { a_SimpleRegression.addData(data[1][i], data[0][i]); //aSimpleRegression.addData(data[0][i], data[1][i]); } result[0] = a_SimpleRegression.getIntercept(); result[1] = a_SimpleRegression.getSlope(); result[2] = a_SimpleRegression.getRSquare(); result[3] = data[0].length; return result; }
From source file:uk.ac.leeds.ccg.andyt.projects.moses.process.RegressionReport.java
/** * data[0][] = observed SAR//from w w w. ja v a 2s . c om * data[1][] = expected CAS * * @param data * @return */ public static double[] printSimpleRegression(double[][] data) { double[] result = new double[3]; // org.apache.commons.math.stat.regression.SimpleRegression; SimpleRegression aSimpleRegression = new SimpleRegression(); System.out.println("data.length " + data[0].length); for (int i = 0; i < data[0].length; i++) { // aSimpleRegression.addData( data[1][i], data[0][i] ); aSimpleRegression.addData(data[0][i], data[1][i]); } double _Intercept = aSimpleRegression.getIntercept(); double _Slope = aSimpleRegression.getSlope(); double _RSquare = aSimpleRegression.getRSquare(); System.out.println(" y = " + _Slope + " * x + " + _Intercept); System.out.println(" RSquare " + _RSquare); result[0] = _Intercept; result[1] = _Slope; result[2] = _RSquare; return result; }