List of usage examples for org.apache.commons.math.stat.descriptive DescriptiveStatistics addValue
public void addValue(double v)
From source file:guineu.modules.filter.Alignment.centering.mean.MeanCenteringTask.java
private void normalize(Dataset data) { DescriptiveStatistics stats = new DescriptiveStatistics(); for (String nameExperiment : data.getAllColumnNames()) { for (PeakListRow row : data.getRows()) { Object value = row.getPeak(nameExperiment); if (value != null && value instanceof Double) { stats.addValue((Double) value); }/* ww w . jav a2 s . co m*/ } for (PeakListRow row : data.getRows()) { Object value = row.getPeak(nameExperiment); if (value != null && value instanceof Double) { row.setPeak(nameExperiment, Math.abs((Double) value - stats.getMean())); } } stats.clear(); } }
From source file:guineu.modules.filter.Alignment.normalizationSTD.STDNormalizationTask.java
private void normalize(Dataset data) { DescriptiveStatistics stats = new DescriptiveStatistics(); for (String nameExperiment : data.getAllColumnNames()) { for (PeakListRow row : data.getRows()) { Object value = row.getPeak(nameExperiment); if (value != null && value instanceof Double) { stats.addValue((Double) value); }/*from w ww .j a va 2 s . c o m*/ } for (PeakListRow row : data.getRows()) { Object value = row.getPeak(nameExperiment); if (value != null && value instanceof Double) { row.setPeak(nameExperiment, (Double) value / stats.getStandardDeviation()); } } stats.clear(); } }
From source file:de.tudarmstadt.ukp.dkpro.keyphrases.core.evaluator.KeyphraseDatasetStatistics.java
public double median(final List<Integer> values) { final DescriptiveStatistics stats = new DescriptiveStatistics(); for (final Integer value : values) { stats.addValue(value); }// w w w. j a v a 2s . c o m return stats.getPercentile(0.5); }
From source file:com.linkedin.pinot.tools.PinotZKChanger.java
protected void printSegmentAssignment(Map<String, Map<String, String>> mapping) throws Exception { StringWriter sw = new StringWriter(); objectMapper.writerWithDefaultPrettyPrinter().writeValue(sw, mapping); LOGGER.info(sw.toString());/*ww w . j a v a 2 s. c o m*/ Map<String, List<String>> serverToSegmentMapping = new TreeMap<>(); for (String segment : mapping.keySet()) { Map<String, String> serverToStateMap = mapping.get(segment); for (String server : serverToStateMap.keySet()) { if (!serverToSegmentMapping.containsKey(server)) { serverToSegmentMapping.put(server, new ArrayList<String>()); } serverToSegmentMapping.get(server).add(segment); } } DescriptiveStatistics stats = new DescriptiveStatistics(); for (String server : serverToSegmentMapping.keySet()) { List<String> list = serverToSegmentMapping.get(server); LOGGER.info("server " + server + " has " + list.size() + " segments"); stats.addValue(list.size()); } LOGGER.info("Segment Distrbution stat"); LOGGER.info(stats.toString()); }
From source file:com.joliciel.jochre.lexicon.LexiconErrorWriter.java
static void mergeCrossValidation(File evalDir, String prefix) { try {//from w w w . j a v a 2s. c o m File[] files = evalDir.listFiles(new FilenameFilter() { @Override public boolean accept(File dir, String name) { if (name.endsWith(".csv")) return true; else return false; } }); List<String> groupNames = new ArrayList<String>(); Map<String, Writer> writers = new HashMap<String, Writer>(); Map<String, ErrorStatistics> errorMap = new LinkedHashMap<String, ErrorStatistics>(); Map<String, Map<String, DescriptiveStatistics>> statMap = new HashMap<String, Map<String, DescriptiveStatistics>>(); for (File file : files) { String filename = file.getName(); LOG.debug("Processing " + filename); int index = Integer.parseInt(filename.substring(prefix.length(), prefix.length() + 1)); String suffix = filename.substring(prefix.length() + 2, filename.lastIndexOf('_')); String fileType = filename.substring(filename.lastIndexOf('_') + 1, filename.lastIndexOf('.')); LOG.debug("Processing " + filename); LOG.debug("index: " + index); LOG.debug("suffix: " + suffix); LOG.debug("fileType: " + fileType); Writer writer = writers.get(fileType); boolean firstFile = false; if (writer == null) { writer = new BufferedWriter(new OutputStreamWriter( new FileOutputStream( new File(evalDir, prefix + "A_" + suffix + "_" + fileType + ".csv"), false), "UTF8")); writers.put(fileType, writer); firstFile = true; } if (fileType.equals("KEMatrix")) { Scanner scanner = new Scanner(file); int i = 0; List<String> myGroupNames = new ArrayList<String>(); Map<String, Boolean> haveCountMap = new HashMap<String, Boolean>(); while (scanner.hasNextLine()) { String line = scanner.nextLine(); List<String> cells = CSV.getCSVCells(line); if (i == 0) { for (int j = 0; j < cells.size(); j += 5) { String groupName = cells.get(j); if (!errorMap.containsKey(groupName)) { errorMap.put(groupName, new ErrorStatistics()); statMap.put(groupName, new HashMap<String, DescriptiveStatistics>()); groupNames.add(groupName); } myGroupNames.add(groupName); } } else if (i == 1) { // do nothing } else { String rowName = cells.get(0); int j = 0; for (String groupName : myGroupNames) { ErrorStatistics errorStats = errorMap.get(groupName); Map<String, DescriptiveStatistics> stats = statMap.get(groupName); double correctCount = Double.parseDouble(cells.get(j * 5 + 1)); double errorCount = Double.parseDouble(cells.get(j * 5 + 2)); double totalCount = Double.parseDouble(cells.get(j * 5 + 3)); Boolean haveCount = haveCountMap.get(groupName); if (rowName.equals("known")) { errorStats.knownWordCorrectCount += correctCount; errorStats.knownWordErrorCount += errorCount; } else if (rowName.equals("unknown")) { errorStats.unknownWordCorrectCount += correctCount; errorStats.unknownWordErrorCount += errorCount; } else if (rowName.equals("goodSeg")) { errorStats.goodSegCorrectCount += correctCount; errorStats.goodSegErrorCount += errorCount; } else if (rowName.equals("badSeg")) { errorStats.badSegCorrectCount += correctCount; errorStats.badSegErrorCount += errorCount; } else if (rowName.equals("knownLetters")) { errorStats.knownWordCorrectLetterCount += correctCount; errorStats.knownWordErrorLetterCount += errorCount; } else if (rowName.equals("unknownLetters")) { errorStats.unknownWordCorrectLetterCount += correctCount; errorStats.unknownWordErrorLetterCount += errorCount; } else if (rowName.equals("goodSegLetters")) { errorStats.goodSegCorrectLetterCount += correctCount; errorStats.goodSegErrorLetterCount += errorCount; } else if (rowName.equals("badSegLetters")) { errorStats.badSegCorrectLetterCount += correctCount; errorStats.badSegErrorLetterCount += errorCount; } else if (rowName.equals("inBeam")) { errorStats.answerInBeamCorrectCount += correctCount; errorStats.answerInBeamErrorCount += errorCount; } else if (rowName.equals("total")) { haveCountMap.put(groupName, totalCount > 0); } else if (rowName.endsWith("%")) { if (haveCount) { String keyPrefix = rowName.substring(0, rowName.length() - 1); String key = keyPrefix + "|correct"; DescriptiveStatistics correctStat = stats.get(key); if (correctStat == null) { correctStat = new DescriptiveStatistics(); stats.put(key, correctStat); } correctStat.addValue(correctCount); key = keyPrefix + "|error"; DescriptiveStatistics errorStat = stats.get(key); if (errorStat == null) { errorStat = new DescriptiveStatistics(); stats.put(key, errorStat); } errorStat.addValue(errorCount); key = keyPrefix + "|total"; DescriptiveStatistics totalStat = stats.get(key); if (totalStat == null) { totalStat = new DescriptiveStatistics(); stats.put(key, totalStat); } totalStat.addValue(totalCount); } } j++; } } i++; } } else { Scanner scanner = new Scanner(file); boolean firstLine = true; while (scanner.hasNextLine()) { String line = scanner.nextLine(); if (firstLine) { if (firstFile) writer.write(line + "\n"); firstLine = false; } else { writer.write(line + "\n"); } writer.flush(); } } // file type } // next file Writer statsWriter = writers.get("KEMatrix"); writeStats(statsWriter, errorMap); statsWriter.write("\n"); String[] statTypes = new String[] { "known", "unknown", "goodSeg", "badSeg", "inBeam", "total", "knownLetter", "unknownLetter", "goodSegLetter", "badSegLetter", "totalLetter" }; for (String statType : statTypes) { for (String groupName : groupNames) { Map<String, DescriptiveStatistics> statsMap = statMap.get(groupName); DescriptiveStatistics correctStat = statsMap.get(statType + "|correct"); DescriptiveStatistics errorStat = statsMap.get(statType + "|error"); DescriptiveStatistics totalStat = statsMap.get(statType + "|total"); statsWriter.write(CSV.format(statType + "%Avg") + CSV.format(correctStat.getMean()) + CSV.format(errorStat.getMean()) + CSV.format(totalStat.getMean()) + CSV.getCsvSeparator()); } // next group statsWriter.write("\n"); for (String groupName : groupNames) { Map<String, DescriptiveStatistics> statsMap = statMap.get(groupName); DescriptiveStatistics correctStat = statsMap.get(statType + "|correct"); DescriptiveStatistics errorStat = statsMap.get(statType + "|error"); DescriptiveStatistics totalStat = statsMap.get(statType + "|total"); statsWriter.write(CSV.format(statType + "%Dev") + CSV.format(correctStat.getStandardDeviation()) + CSV.format(errorStat.getStandardDeviation()) + CSV.format(totalStat.getStandardDeviation()) + CSV.getCsvSeparator()); } // next group statsWriter.write("\n"); statsWriter.flush(); } statsWriter.close(); } catch (IOException e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } }
From source file:guineu.modules.dataanalysis.Media.mediaFilterTask.java
public double[] getSTDDev(Dataset dataset) { DescriptiveStatistics stats = new DescriptiveStatistics(); double[] median = new double[dataset.getNumberRows()]; int numRows = 0; for (PeakListRow peak : dataset.getRows()) { stats.clear();/*from w w w. ja v a 2 s . c om*/ for (String nameExperiment : dataset.getAllColumnNames()) { try { stats.addValue((Double) peak.getPeak(nameExperiment)); } catch (Exception e) { } } double[] values = stats.getSortedValues(); median[numRows++] = values[values.length / 2]; } return median; }
From source file:cal.binBased.BinMSnSpectrum.java
public double[] getBin_spectrum(int shift) { ArrayList<Double> bin_spec_al = new ArrayList<Double>(); double binSize = (fragment_tolerance * 2), upperLimit = max_value + 0.00001; for (double lowerLimit = min_value; lowerLimit < upperLimit; lowerLimit = lowerLimit + binSize) { double tmp_intensity_bin = 0; DescriptiveStatistics obj = new DescriptiveStatistics(); for (Peak p : peakList) { double mz = p.getMz() + shift; if (mz >= lowerLimit && mz < lowerLimit + binSize) { obj.addValue(p.intensity); }// w w w . j a v a 2 s. c o m } if (obj.getN() > 0) { if (intensities_sum_or_mean_or_median == 0) { tmp_intensity_bin = obj.getSum(); } else if (intensities_sum_or_mean_or_median == 1) { tmp_intensity_bin = obj.getMean(); } else if (intensities_sum_or_mean_or_median == 2) { tmp_intensity_bin = obj.getPercentile(50); } } // put every bin_pectrum bin_spec_al.add(tmp_intensity_bin); } // convert an arraylist to double array // initiate size of array bin_size = bin_spec_al.size(); double[] bin_spectrum = new double[bin_spec_al.size()]; for (int i = 0; i < bin_spec_al.size(); i++) { bin_spectrum[i] = bin_spec_al.get(i); } return bin_spectrum; }
From source file:cal.binBased.BinMSnSpectrum.java
private ArrayList<double[]> prepareBinSpectra() { // first prepare bin-spectrum to be filled with zero int size = (2 * correctionFactor) + 1; ArrayList<double[]> shiftedSpectra = new ArrayList<double[]>(size); for (int i = 0; i < size; i++) { double[] shiftedSpectrum = new double[bin_size]; shiftedSpectra.add(shiftedSpectrum); }//from w w w .jav a 2 s . c o m // now fill each bin spectrum with correct mz values. double binSize = (fragment_tolerance * 2), upperLimit = max_value + 0.00001; int current_index = 0; for (double lowerLimit = min_value + correctionFactor; lowerLimit < upperLimit - correctionFactor; lowerLimit = lowerLimit + binSize) { double tmp_intensity_bin = 0; DescriptiveStatistics obj = new DescriptiveStatistics(); for (Peak p : peakList) { double mz = p.getMz(); if (mz >= lowerLimit && mz < lowerLimit + binSize) { obj.addValue(p.intensity); } } if (obj.getN() > 0) { if (intensities_sum_or_mean_or_median == 0) { tmp_intensity_bin = obj.getSum(); } else if (intensities_sum_or_mean_or_median == 1) { tmp_intensity_bin = obj.getMean(); } else if (intensities_sum_or_mean_or_median == 2) { tmp_intensity_bin = obj.getPercentile(50); } } // put every bin_pectrum int filling_index = current_index; // check every bin spectrum for (double[] shifted : shiftedSpectra) { shifted[filling_index] = tmp_intensity_bin; filling_index++; } current_index++; } return shiftedSpectra; }
From source file:fantail.algorithms.BinaryART.java
private double getMedian(Instances data, int attIndex) throws Exception { if (false) {// w ww .ja v a 2 s .c o m return getMedian2(data, attIndex); // added 07-july 2013; actually they are the same // removed 17/07/2013 } DescriptiveStatistics stats = new DescriptiveStatistics(); for (int i = 0; i < data.numInstances(); i++) { Instance inst = (Instance) data.instance(i); stats.addValue(inst.value(attIndex)); } double median = stats.getPercentile(50); return median; }
From source file:com.linkedin.pinot.tools.PinotSegmentRebalancer.java
private void printSegmentAssignment(Map<String, Map<String, String>> mapping) throws Exception { StringWriter sw = new StringWriter(); objectMapper.writerWithDefaultPrettyPrinter().writeValue(sw, mapping); LOGGER.info(sw.toString());//from www .j ava 2 s . co m Map<String, List<String>> serverToSegmentMapping = new TreeMap<>(); for (String segment : mapping.keySet()) { Map<String, String> serverToStateMap = mapping.get(segment); for (String server : serverToStateMap.keySet()) { if (!serverToSegmentMapping.containsKey(server)) { serverToSegmentMapping.put(server, new ArrayList<String>()); } serverToSegmentMapping.get(server).add(segment); } } DescriptiveStatistics stats = new DescriptiveStatistics(); for (String server : serverToSegmentMapping.keySet()) { List<String> list = serverToSegmentMapping.get(server); LOGGER.info("server " + server + " has " + list.size() + " segments"); stats.addValue(list.size()); } LOGGER.info("Segment Distrbution stat"); LOGGER.info(stats.toString()); }