List of usage examples for org.apache.commons.math.stat.descriptive DescriptiveStatistics DescriptiveStatistics
public DescriptiveStatistics()
From source file:edu.usc.ee599.CommunityStats.java
public static void main(String[] args) throws Exception { File dir = new File("results5"); PrintWriter writer = new PrintWriter(new FileWriter("results5_stats.txt")); File[] files = dir.listFiles(); DescriptiveStatistics statistics1 = new DescriptiveStatistics(); DescriptiveStatistics statistics2 = new DescriptiveStatistics(); for (File file : files) { BufferedReader reader = new BufferedReader(new FileReader(file)); String line1 = reader.readLine(); String line2 = reader.readLine(); int balanced = Integer.parseInt(line1.split(",")[1]); int unbalanced = Integer.parseInt(line2.split(",")[1]); double bp = (double) balanced / (double) (balanced + unbalanced); double up = (double) unbalanced / (double) (balanced + unbalanced); statistics1.addValue(bp);/*from www . j a v a 2 s .c o m*/ statistics2.addValue(up); } writer.println("AVG Balanced %: " + statistics1.getMean()); writer.println("AVG Unbalanced %: " + statistics2.getMean()); writer.println("STD Balanced %: " + statistics1.getStandardDeviation()); writer.println("STD Unbalanced %: " + statistics2.getStandardDeviation()); writer.flush(); writer.close(); }
From source file:com.mozilla.socorro.RawDumpSizeScan.java
public static void main(String[] args) throws ParseException { String startDateStr = args[0]; String endDateStr = args[1];/*from w w w . j a va2 s . c o m*/ // Set both start/end time and start/stop row Calendar startCal = Calendar.getInstance(); Calendar endCal = Calendar.getInstance(); SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd"); if (!StringUtils.isBlank(startDateStr)) { startCal.setTime(sdf.parse(startDateStr)); } if (!StringUtils.isBlank(endDateStr)) { endCal.setTime(sdf.parse(endDateStr)); } DescriptiveStatistics stats = new DescriptiveStatistics(); long numNullRawBytes = 0L; HTable table = null; Map<String, Integer> rowValueSizeMap = new HashMap<String, Integer>(); try { table = new HTable(TABLE_NAME_CRASH_REPORTS); Scan[] scans = generateScans(startCal, endCal); for (Scan s : scans) { ResultScanner rs = table.getScanner(s); Iterator<Result> iter = rs.iterator(); while (iter.hasNext()) { Result r = iter.next(); ImmutableBytesWritable rawBytes = r.getBytes(); //length = r.getValue(RAW_DATA_BYTES, DUMP_BYTES); if (rawBytes != null) { int length = rawBytes.getLength(); if (length > 20971520) { rowValueSizeMap.put(new String(r.getRow()), length); } stats.addValue(length); } else { numNullRawBytes++; } if (stats.getN() % 10000 == 0) { System.out.println("Processed " + stats.getN()); System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", stats.getMin(), stats.getMax(), stats.getMean())); System.out.println( String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f", stats.getPercentile(25.0d), stats.getPercentile(50.0d), stats.getPercentile(75.0d))); System.out.println("Number of large entries: " + rowValueSizeMap.size()); } } rs.close(); } System.out.println("Finished Processing!"); System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", stats.getMin(), stats.getMax(), stats.getMean())); System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f", stats.getPercentile(25.0d), stats.getPercentile(50.0d), stats.getPercentile(75.0d))); for (Map.Entry<String, Integer> entry : rowValueSizeMap.entrySet()) { System.out.println(String.format("RowId: %s => Length: %d", entry.getKey(), entry.getValue())); } } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { if (table != null) { try { table.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } }
From source file:graticules2wld.Main.java
/** * @param args//from www. j a va 2 s. co m * @throws Exception */ public static void main(String[] args) throws Exception { /* parse the command line arguments */ // create the command line parser CommandLineParser parser = new PosixParser(); // create the Options Options options = new Options(); options.addOption("x", "originx", true, "x component of projected coordinates of upper left pixel"); options.addOption("y", "originy", true, "y component of projected coordinates of upper left pixel"); options.addOption("u", "tometers", true, "multiplication factor to get source units into meters"); options.addOption("h", "help", false, "prints this usage page"); options.addOption("d", "debug", false, "prints debugging information to stdout"); double originNorthing = 0; double originEasting = 0; String inputFileName = null; String outputFileName = null; try { // parse the command line arguments CommandLine line = parser.parse(options, args); if (line.hasOption("help")) printUsage(0); // print usage then exit using a non error exit status if (line.hasOption("debug")) debug = true; // these arguments are required if (!line.hasOption("originy") || !line.hasOption("originx")) printUsage(1); originNorthing = Double.parseDouble(line.getOptionValue("originy")); originEasting = Double.parseDouble(line.getOptionValue("originx")); if (line.hasOption("tometers")) unitsToMeters = Double.parseDouble(line.getOptionValue("tometers")); // two args should be left. the input csv file name and the output wld file name. String[] iofiles = line.getArgs(); if (iofiles.length < 2) { printUsage(1); } inputFileName = iofiles[0]; outputFileName = iofiles[1]; } catch (ParseException exp) { System.err.println("Unexpected exception:" + exp.getMessage()); System.exit(1); } // try to open the input file for reading and the output file for writing File graticulesCsvFile; BufferedReader csvReader = null; File wldFile; BufferedWriter wldWriter = null; try { graticulesCsvFile = new File(inputFileName); csvReader = new BufferedReader(new FileReader(graticulesCsvFile)); } catch (IOException exp) { System.err.println("Could not open input file for reading: " + inputFileName); System.exit(1); } try { wldFile = new File(outputFileName); wldWriter = new BufferedWriter(new FileWriter(wldFile)); } catch (IOException exp) { System.err.println("Could not open output file for writing: " + outputFileName); System.exit(1); } // list of lon graticules and lat graticules ArrayList<Graticule> lonGrats = new ArrayList<Graticule>(); ArrayList<Graticule> latGrats = new ArrayList<Graticule>(); // read the source CSV and convert its information into the two ArrayList<Graticule> data structures readCSV(csvReader, lonGrats, latGrats); // we now need to start finding the world file paramaters DescriptiveStatistics stats = new DescriptiveStatistics(); // find theta and phi for (Graticule g : latGrats) { stats.addValue(g.angle()); } double theta = stats.getMean(); // we use the mean of the lat angles as theta if (debug) System.out.println("theta range = " + Math.toDegrees(stats.getMax() - stats.getMin())); stats.clear(); for (Graticule g : lonGrats) { stats.addValue(g.angle()); } double phi = stats.getMean(); // ... and the mean of the lon angles for phi if (debug) System.out.println("phi range = " + Math.toDegrees(stats.getMax() - stats.getMin())); stats.clear(); // print these if in debug mode if (debug) { System.out.println("theta = " + Math.toDegrees(theta) + "deg"); System.out.println("phi = " + Math.toDegrees(phi) + "deg"); } // find x and y (distance beteen pixels in map units) Collections.sort(latGrats); Collections.sort(lonGrats); int prevMapValue = 0; //fixme: how to stop warning about not being initilised? Line2D prevGratPixelSys = new Line2D.Double(); boolean first = true; for (Graticule g : latGrats) { if (!first) { int deltaMapValue = Math.abs(g.realValue() - prevMapValue); double deltaPixelValue = (g.l.ptLineDist(prevGratPixelSys.getP1()) + (g.l.ptLineDist(prevGratPixelSys.getP2()))) / 2; double delta = deltaMapValue / deltaPixelValue; stats.addValue(delta); } else { first = false; prevMapValue = g.realValue(); prevGratPixelSys = (Line2D) g.l.clone(); } } double y = stats.getMean(); if (debug) System.out.println("y range = " + (stats.getMax() - stats.getMin())); stats.clear(); first = true; for (Graticule g : lonGrats) { if (!first) { int deltaMapValue = g.realValue() - prevMapValue; double deltaPixelValue = (g.l.ptLineDist(prevGratPixelSys.getP1()) + (g.l.ptLineDist(prevGratPixelSys.getP2()))) / 2; double delta = deltaMapValue / deltaPixelValue; stats.addValue(delta); } else { first = false; prevMapValue = g.realValue(); prevGratPixelSys = (Line2D) g.l.clone(); } } double x = stats.getMean(); if (debug) System.out.println("x range = " + (stats.getMax() - stats.getMin())); stats.clear(); if (debug) { System.out.println("x = " + x); System.out.println("y = " + y); } SimpleRegression regression = new SimpleRegression(); // C, F are translation terms: x, y map coordinates of the center of the upper-left pixel for (Graticule g : latGrats) { // find perp dist to pixel space 0,0 Double perpPixelDist = g.l.ptLineDist(new Point2D.Double(0, 0)); // find the map space distance from this graticule to the center of the 0,0 pixel Double perpMapDist = perpPixelDist * y; // perpMapDist / perpPixelDist = y regression.addData(perpMapDist, g.realValue()); } double F = regression.getIntercept(); regression.clear(); for (Graticule g : lonGrats) { // find perp dist to pixel space 0,0 Double perpPixelDist = g.l.ptLineDist(new Point2D.Double(0, 0)); // find the map space distance from this graticule to the center of the 0,0 pixel Double perpMapDist = perpPixelDist * x; // perpMapDist / perpPixelDist = x regression.addData(perpMapDist, g.realValue()); } double C = regression.getIntercept(); regression.clear(); if (debug) { System.out.println("Upper Left pixel has coordinates " + C + ", " + F); } // convert to meters C *= unitsToMeters; F *= unitsToMeters; // C,F store the projected (in map units) coordinates of the upper left pixel. // originNorthing,originEasting is the offset we need to apply to 0,0 to push the offsets into our global coordinate system C = originEasting + C; F = originNorthing + F; // calculate the affine transformation matrix elements double D = -1 * x * unitsToMeters * Math.sin(theta); double A = x * unitsToMeters * Math.cos(theta); double B = y * unitsToMeters * Math.sin(phi); // if should be negative, it'll formed by negative sin double E = -1 * y * unitsToMeters * Math.cos(phi); /* * Line 1: A: pixel size in the x-direction in map units/pixel * Line 2: D: rotation about y-axis * Line 3: B: rotation about x-axis * Line 4: E: pixel size in the y-direction in map units, almost always negative[3] * Line 5: C: x-coordinate of the center of the upper left pixel * Line 6: F: y-coordinate of the center of the upper left pixel */ if (debug) { System.out.println("A = " + A); System.out.println("D = " + D); System.out.println("B = " + B); System.out.println("E = " + E); System.out.println("C = " + C); System.out.println("F = " + F); // write the world file System.out.println(); System.out.println("World File:"); System.out.println(A); System.out.println(D); System.out.println(B); System.out.println(E); System.out.println(C); System.out.println(F); } // write to the .wld file wldWriter.write(A + "\n"); wldWriter.write(D + "\n"); wldWriter.write(B + "\n"); wldWriter.write(E + "\n"); wldWriter.write(C + "\n"); wldWriter.write(F + "\n"); wldWriter.close(); }
From source file:com.linkedin.pinot.tools.query.comparison.StatsGenerator.java
public static void generateReport(String dataFileName) throws IOException { List<DescriptiveStatistics> statisticsList = new ArrayList<>(); String dataString;/*w w w.j a va 2s . com*/ BufferedReader dataReader = new BufferedReader(new FileReader(dataFileName)); // First line is treated as header String[] columns = dataReader.readLine().split("\\s+"); int numColumns = columns.length; for (int i = 0; i < numColumns; ++i) { statisticsList.add(new DescriptiveStatistics()); } while ((dataString = dataReader.readLine()) != null) { String[] dataArray = dataString.trim().split(" "); if (dataArray.length != numColumns) { throw new RuntimeException("Row has missing columns: " + Arrays.toString(dataArray) + " Expected: " + numColumns + " columns."); } for (int i = 0; i < dataArray.length; ++i) { double data = Double.valueOf(dataArray[i]); statisticsList.get(i).addValue(data); } } for (int i = 0; i < numColumns; i++) { LOGGER.info("Stats: {}: {}", columns[i], statisticsList.get(i).toString().replace("\n", "\t")); } }
From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step11GoldDataStatistics.java
/** * (1) Plain text with 4 columns: (1) the rank of the document in the list * (2) average agreement rate over queries (3) standard deviation of * agreement rate over queries. (4) average length of the document in the * rank.// www. ja v a 2 s .c o m */ public static void statistics1(File inputDir, File outputDir) throws Exception { SortedMap<Integer, DescriptiveStatistics> mapDocumentRankObservedAgreement = new TreeMap<>(); SortedMap<Integer, DescriptiveStatistics> mapDocumentRankDocLength = new TreeMap<>(); // iterate over query containers for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) { QueryResultContainer queryResultContainer = QueryResultContainer .fromXML(FileUtils.readFileToString(f, "utf-8")); for (QueryResultContainer.SingleRankedResult rankedResult : queryResultContainer.rankedResults) { // add new entries if (!mapDocumentRankObservedAgreement.containsKey(rankedResult.rank)) { mapDocumentRankObservedAgreement.put(rankedResult.rank, new DescriptiveStatistics()); } if (!mapDocumentRankDocLength.containsKey(rankedResult.rank)) { mapDocumentRankDocLength.put(rankedResult.rank, new DescriptiveStatistics()); } Double observedAgreement = rankedResult.observedAgreement; if (observedAgreement == null) { System.err .println("Observed agreement is null; " + f.getName() + ", " + rankedResult.clueWebID); } else { // update value mapDocumentRankObservedAgreement.get(rankedResult.rank).addValue(observedAgreement); mapDocumentRankDocLength.get(rankedResult.rank).addValue(rankedResult.plainText.length()); } } } PrintWriter pw = new PrintWriter(new FileWriter(new File(outputDir, "stats1.csv"))); for (Map.Entry<Integer, DescriptiveStatistics> entry : mapDocumentRankObservedAgreement.entrySet()) { pw.printf(Locale.ENGLISH, "%d\t%.4f\t%.4f\t%.4f\t%.4f%n", entry.getKey(), entry.getValue().getMean(), entry.getValue().getStandardDeviation(), mapDocumentRankDocLength.get(entry.getKey()).getMean(), mapDocumentRankDocLength.get(entry.getKey()).getStandardDeviation()); } pw.close(); }
From source file:mecha.monitoring.Metric.java
public Metric(String name, int windowSize) throws Exception { this.name = name; this.windowSize = windowSize; stats = new DescriptiveStatistics(); stats.setWindowSize(windowSize);/*from w w w . ja v a 2 s. c o m*/ }
From source file:cs.cirg.cida.analysis.ColumnBasedDescriptiveStatistics.java
@Override public DataTable operate(DataTable dataTable) throws CIlibIOException { iterationsDescriptiveStatistics = new ArrayList<DescriptiveStatistics>(); List<Integer> selectedColumns = this.getSelectedItems(); int size = dataTable.getNumRows(); for (int rowIndex = 0; rowIndex < size; rowIndex++) { DescriptiveStatistics stats = new DescriptiveStatistics(); List<Numeric> row = (List<Numeric>) dataTable.getRow(rowIndex); for (Integer i : selectedColumns) { stats.addValue(row.get(i).getReal()); }/*from w ww . j a v a 2 s . c om*/ iterationsDescriptiveStatistics.add(stats); } return dataTable; }
From source file:guineu.modules.filter.Alignment.centering.mean.MeanCenteringTask.java
private void normalize(Dataset data) { DescriptiveStatistics stats = new DescriptiveStatistics(); for (String nameExperiment : data.getAllColumnNames()) { for (PeakListRow row : data.getRows()) { Object value = row.getPeak(nameExperiment); if (value != null && value instanceof Double) { stats.addValue((Double) value); }// www . j ava 2 s .c o m } for (PeakListRow row : data.getRows()) { Object value = row.getPeak(nameExperiment); if (value != null && value instanceof Double) { row.setPeak(nameExperiment, Math.abs((Double) value - stats.getMean())); } } stats.clear(); } }
From source file:guineu.modules.filter.Alignment.normalizationSTD.STDNormalizationTask.java
private void normalize(Dataset data) { DescriptiveStatistics stats = new DescriptiveStatistics(); for (String nameExperiment : data.getAllColumnNames()) { for (PeakListRow row : data.getRows()) { Object value = row.getPeak(nameExperiment); if (value != null && value instanceof Double) { stats.addValue((Double) value); }/*w w w .j av a 2s . com*/ } for (PeakListRow row : data.getRows()) { Object value = row.getPeak(nameExperiment); if (value != null && value instanceof Double) { row.setPeak(nameExperiment, (Double) value / stats.getStandardDeviation()); } } stats.clear(); } }
From source file:guineu.modules.dataanalysis.kstest.KSTestTask.java
public void run() { try {// w w w . ja v a 2s . com final Rengine rEngine; try { rEngine = RUtilities.getREngine(); } catch (Throwable t) { throw new IllegalStateException( "Kolmogorov-Smirnov test requires R but it couldn't be loaded (" + t.getMessage() + ')'); } synchronized (RUtilities.R_SEMAPHORE) { DescriptiveStatistics stats = new DescriptiveStatistics(); // assing the values to the matrix for (int row = 0; row < dataset.getNumberRows(); row++) { rEngine.eval("x <- vector(mode=\"numeric\",length=" + dataset.getNumberCols() + ")"); stats.clear(); PeakListRow peakListRow = dataset.getRow(row); for (int c = 0; c < dataset.getNumberCols(); c++) { int r = c + 1; double value = (Double) peakListRow.getPeak(dataset.getAllColumnNames().get(c)); rEngine.eval("x[" + r + "] <- " + value); stats.addValue(value); } rEngine.eval("y <- rnorm(" + dataset.getNumberCols() + ", mean= " + stats.getMean() + ", sd = " + stats.getStandardDeviation() + ")"); rEngine.eval("result <- ks.test(x,y)"); long e = rEngine.rniParse("result$p.value", 1); long r = rEngine.rniEval(e, 0); REXP x = new REXP(rEngine, r); double pValue = x.asDouble(); dataset.getRow(row).setVar("setPValue", pValue); if (peakListRow.getID() == 68) { rEngine.eval("write.csv(x, \"x.csv\""); } } } rEngine.end(); setStatus(TaskStatus.FINISHED); } catch (Exception ex) { Logger.getLogger(KSTestTask.class.getName()).log(Level.SEVERE, null, ex); setStatus(TaskStatus.ERROR); } }