List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getN
public long getN()
From source file:gdsc.smlm.ij.plugins.PSFEstimator.java
private void setParams(int i, double[] params, double[] params_dev, DescriptiveStatistics sample) { if (sample.getN() > 0) { params[i] = sample.getMean();/*w w w . ja v a 2s . c o m*/ params_dev[i] = sample.getStandardDeviation(); } }
From source file:com.itemanalysis.jmetrik.stats.descriptives.DescriptiveAnalysis.java
public void publishTable(VariableAttributes v) { TextTable table = null;/*from w w w. j av a 2 s . c o m*/ TextTableColumnFormat[] cformats = new TextTableColumnFormat[2]; cformats[0] = new TextTableColumnFormat(); cformats[0].setStringFormat(15, TextTableColumnFormat.OutputAlignment.LEFT); cformats[1] = new TextTableColumnFormat(); cformats[1].setDoubleFormat(10, 4, TextTableColumnFormat.OutputAlignment.RIGHT); DescriptiveStatistics temp = data.get(v); table = new TextTable(); table.addAllColumnFormats(cformats, 17); table.getRowAt(0).addHeader(0, 2, v.getName().toString(), TextTablePosition.CENTER); table.getRowAt(1).addHorizontalRule(0, 2, "="); table.getRowAt(2).addHeader(0, 1, "Statistic", TextTablePosition.CENTER); table.getRowAt(2).addHeader(1, 1, "Value", TextTablePosition.CENTER); table.getRowAt(3).addHorizontalRule(0, 2, "-"); table.addStringAt(4, 0, "N"); table.addDoubleAt(4, 1, maxProgress); table.addStringAt(5, 0, "Valid N"); table.addDoubleAt(5, 1, temp.getN()); table.addStringAt(6, 0, "Min"); table.addDoubleAt(6, 1, temp.getMin()); table.addStringAt(7, 0, "Max"); table.addDoubleAt(7, 1, temp.getMax()); table.addStringAt(8, 0, "Mean"); table.addDoubleAt(8, 1, temp.getMean()); table.addStringAt(9, 0, "Std. Dev."); table.addDoubleAt(9, 1, temp.getStandardDeviation()); table.addStringAt(10, 0, "Skewness"); table.addDoubleAt(10, 1, temp.getSkewness()); table.addStringAt(11, 0, "Kurtosis"); table.addDoubleAt(11, 1, temp.getKurtosis()); table.addStringAt(12, 0, "First Quartile"); table.addDoubleAt(12, 1, temp.getPercentile(25)); table.addStringAt(13, 0, "Median"); table.addDoubleAt(13, 1, temp.getPercentile(50)); table.addStringAt(14, 0, "Third Quartile"); table.addDoubleAt(14, 1, temp.getPercentile(75)); table.addStringAt(15, 0, "IQR"); table.addDoubleAt(15, 1, temp.getPercentile(75) - temp.getPercentile(25)); table.getRowAt(16).addHorizontalRule(0, 2, "="); publish(table.toString() + "\n"); }
From source file:gdsc.smlm.ij.plugins.PSFEstimator.java
private void getPairedP(DescriptiveStatistics sample1, DescriptiveStatistics sample2, int i, double[] p, boolean[] identical) throws IllegalArgumentException { if (sample1.getN() < 2) return;/*from w w w . j a va2 s . com*/ // The number returned is the smallest significance level at which one can reject the null // hypothesis that the mean of the paired differences is 0 in favor of the two-sided alternative // that the mean paired difference is not equal to 0. For a one-sided test, divide the returned value by 2 p[i] = TestUtils.pairedTTest(sample1.getValues(), sample2.getValues()); identical[i] = (p[i] > settings.pValue); }
From source file:com.intuit.tank.persistence.databases.BucketDataItemTest.java
/** * Run the DescriptiveStatistics getStats() method test. * /*w ww . jav a2s .c om*/ * @throws Exception * * @generatedBy CodePro at 9/10/14 10:32 AM */ @Test public void testGetStats_1() throws Exception { BucketDataItem fixture = new BucketDataItem(1, new Date(), new DescriptiveStatistics()); DescriptiveStatistics result = fixture.getStats(); assertNotNull(result); assertEquals( "DescriptiveStatistics:\nn: 0\nmin: NaN\nmax: NaN\nmean: NaN\nstd dev: NaN\nmedian: NaN\nskewness: NaN\nkurtosis: NaN\n", result.toString()); assertEquals(Double.NaN, result.getMax(), 1.0); assertEquals(Double.NaN, result.getVariance(), 1.0); assertEquals(Double.NaN, result.getMean(), 1.0); assertEquals(-1, result.getWindowSize()); assertEquals(0.0, result.getSumsq(), 1.0); assertEquals(Double.NaN, result.getKurtosis(), 1.0); assertEquals(0.0, result.getSum(), 1.0); assertEquals(Double.NaN, result.getSkewness(), 1.0); assertEquals(Double.NaN, result.getPopulationVariance(), 1.0); assertEquals(Double.NaN, result.getStandardDeviation(), 1.0); assertEquals(Double.NaN, result.getGeometricMean(), 1.0); assertEquals(0L, result.getN()); assertEquals(Double.NaN, result.getMin(), 1.0); }
From source file:com.fpuna.preproceso.PreprocesoTS.java
private static TrainingSetFeature calculoFeaturesMagnitud(List<Registro> muestras, String activity) { TrainingSetFeature Feature = new TrainingSetFeature(); DescriptiveStatistics stats_m = new DescriptiveStatistics(); double[] fft_m; double[] AR_4; muestras = Util.calcMagnitud(muestras); for (int i = 0; i < muestras.size(); i++) { stats_m.addValue(muestras.get(i).getM_1()); }/*from ww w.j av a 2 s . co m*/ //********* FFT ********* //fft_m = Util.transform(stats_m.getValues()); fft_m = FFTMixedRadix.fftPowerSpectrum(stats_m.getValues()); //******************* Calculos Magnitud *******************// //mean(s) - Arithmetic mean System.out.print(stats_m.getMean() + ","); Feature.setMeanX((float) stats_m.getMean()); //std(s) - Standard deviation System.out.print(stats_m.getStandardDeviation() + ","); Feature.setStdX((float) stats_m.getStandardDeviation()); //mad(s) - Median absolute deviation // //max(s) - Largest values in array System.out.print(stats_m.getMax() + ","); Feature.setMaxX((float) stats_m.getMax()); //min(s) - Smallest value in array System.out.print(stats_m.getMin() + ","); Feature.setMinX((float) stats_m.getMin()); //skewness(s) - Frequency signal Skewness System.out.print(stats_m.getSkewness() + ","); Feature.setSkewnessX((float) stats_m.getSkewness()); //kurtosis(s) - Frequency signal Kurtosis System.out.print(stats_m.getKurtosis() + ","); Feature.setKurtosisX((float) stats_m.getKurtosis()); //energy(s) - Average sum of the squares System.out.print(stats_m.getSumsq() / stats_m.getN() + ","); Feature.setEnergyX((float) (stats_m.getSumsq() / stats_m.getN())); //entropy(s) - Signal Entropy System.out.print(Util.calculateShannonEntropy(fft_m) + ","); Feature.setEntropyX(Util.calculateShannonEntropy(fft_m).floatValue()); //iqr (s) Interquartile range System.out.print(stats_m.getPercentile(75) - stats_m.getPercentile(25) + ","); Feature.setIqrX((float) (stats_m.getPercentile(75) - stats_m.getPercentile(25))); try { //autoregression (s) -4th order Burg Autoregression coefficients AR_4 = AutoRegression.calculateARCoefficients(stats_m.getValues(), 4, true); System.out.print(AR_4[0] + ","); System.out.print(AR_4[1] + ","); System.out.print(AR_4[2] + ","); System.out.print(AR_4[3] + ","); Feature.setArX1((float) AR_4[0]); Feature.setArX2((float) AR_4[1]); Feature.setArX3((float) AR_4[2]); Feature.setArX4((float) AR_4[3]); } catch (Exception ex) { Logger.getLogger(PreprocesoTS.class.getName()).log(Level.SEVERE, null, ex); } //meanFreq(s) - Frequency signal weighted average System.out.print(Util.meanFreq(fft_m, stats_m.getValues()) + ","); Feature.setMeanFreqx((float) Util.meanFreq(fft_m, stats_m.getValues())); //******************* Actividad *******************/ System.out.print(activity); System.out.print("\n"); Feature.setEtiqueta(activity); return Feature; }
From source file:com.fpuna.preproceso.PreprocesoTS.java
private static void calculoFeatures(Registro[] muestras, String activity) { DescriptiveStatistics stats_x = new DescriptiveStatistics(); DescriptiveStatistics stats_y = new DescriptiveStatistics(); DescriptiveStatistics stats_z = new DescriptiveStatistics(); //DescriptiveStatistics stats_m1 = new DescriptiveStatistics(); //DescriptiveStatistics stats_m2 = new DescriptiveStatistics(); double[] fft_x; double[] fft_y; double[] fft_z; double[] AR_4; for (int i = 0; i < muestras.length; i++) { stats_x.addValue(muestras[i].getValor_x()); stats_y.addValue(muestras[i].getValor_y()); stats_z.addValue(muestras[i].getValor_z()); }// w w w .ja v a 2s. com //********* FFT ********* fft_x = Util.transform(stats_x.getValues()); fft_y = Util.transform(stats_y.getValues()); fft_z = Util.transform(stats_z.getValues()); //******************* Eje X *******************// //mean(s) - Arithmetic mean System.out.print(stats_x.getMean() + ","); //std(s) - Standard deviation System.out.print(stats_x.getStandardDeviation() + ","); //mad(s) - Median absolute deviation // //max(s) - Largest values in array System.out.print(stats_x.getMax() + ","); //min(s) - Smallest value in array System.out.print(stats_x.getMin() + ","); //skewness(s) - Frequency signal Skewness System.out.print(stats_x.getSkewness() + ","); //kurtosis(s) - Frequency signal Kurtosis System.out.print(stats_x.getKurtosis() + ","); //energy(s) - Average sum of the squares System.out.print(stats_x.getSumsq() / stats_x.getN() + ","); //entropy(s) - Signal Entropy System.out.print(Util.calculateShannonEntropy(fft_x) + ","); //iqr (s) Interquartile range System.out.print(stats_x.getPercentile(75) - stats_x.getPercentile(25) + ","); try { //autoregression (s) -4th order Burg Autoregression coefficients AR_4 = AutoRegression.calculateARCoefficients(stats_x.getValues(), 4, true); System.out.print(AR_4[0] + ","); System.out.print(AR_4[1] + ","); System.out.print(AR_4[2] + ","); System.out.print(AR_4[3] + ","); } catch (Exception ex) { Logger.getLogger(PreprocesoTS.class.getName()).log(Level.SEVERE, null, ex); } //meanFreq(s) - Frequency signal weighted average System.out.print(Util.meanFreq(fft_x, stats_x.getValues()) + ","); //******************* Eje Y *******************// //mean(s) - Arithmetic mean System.out.print(stats_y.getMean() + ","); //std(s) - Standard deviation System.out.print(stats_y.getStandardDeviation() + ","); //mad(s) - Median absolute deviation // //max(s) - Largest values in array System.out.print(stats_y.getMax() + ","); //min(s) - Smallest value in array System.out.print(stats_y.getMin() + ","); //skewness(s) - Frequency signal Skewness System.out.print(stats_y.getSkewness() + ","); //kurtosis(s) - Frequency signal Kurtosis System.out.print(stats_y.getKurtosis() + ","); //energy(s) - Average sum of the squares System.out.print(stats_y.getSumsq() / stats_y.getN() + ","); //entropy(s) - Signal Entropy System.out.print(Util.calculateShannonEntropy(fft_y) + ","); //iqr (s) Interquartile range System.out.print(stats_y.getPercentile(75) - stats_y.getPercentile(25) + ","); try { //autoregression (s) -4th order Burg Autoregression coefficients AR_4 = AutoRegression.calculateARCoefficients(stats_y.getValues(), 4, true); System.out.print(AR_4[0] + ","); System.out.print(AR_4[1] + ","); System.out.print(AR_4[2] + ","); System.out.print(AR_4[3] + ","); } catch (Exception ex) { Logger.getLogger(PreprocesoTS.class.getName()).log(Level.SEVERE, null, ex); } //meanFreq(s) - Frequency signal weighted average System.out.print(Util.meanFreq(fft_y, stats_y.getValues()) + ","); //******************* Eje Z *******************// //mean(s) - Arithmetic mean System.out.print(stats_z.getMean() + ","); //std(s) - Standard deviation System.out.print(stats_z.getStandardDeviation() + ","); //mad(s) - Median absolute deviation // //max(s) - Largest values in array System.out.print(stats_z.getMax() + ","); //min(s) - Smallest value in array System.out.print(stats_z.getMin() + ","); //skewness(s) - Frequency signal Skewness System.out.print(stats_z.getSkewness() + ","); //kurtosis(s) - Frequency signal Kurtosis System.out.print(stats_z.getKurtosis() + ","); //energy(s) - Average sum of the squares System.out.print(stats_z.getSumsq() / stats_z.getN() + ","); //entropy(s) - Signal Entropy System.out.print(Util.calculateShannonEntropy(fft_z) + ","); //iqr (s) Interquartile range System.out.print(stats_z.getPercentile(75) - stats_z.getPercentile(25) + ","); try { //autoregression (s) -4th order Burg Autoregression coefficients AR_4 = AutoRegression.calculateARCoefficients(stats_z.getValues(), 4, true); System.out.print(AR_4[0] + ","); System.out.print(AR_4[1] + ","); System.out.print(AR_4[2] + ","); System.out.print(AR_4[3] + ","); } catch (Exception ex) { Logger.getLogger(PreprocesoTS.class.getName()).log(Level.SEVERE, null, ex); } //meanFreq(s) - Frequency signal weighted average System.out.print(Util.meanFreq(fft_z, stats_z.getValues()) + ","); //******************* Feature combinados *******************/ //sma(s1; s2; s3) - Signal magnitude area System.out.print(Util.sma(stats_x.getValues(), stats_y.getValues(), stats_z.getValues()) + ","); //correlation(s1; s2) - Pearson Correlation coefficient System.out.print(new PearsonsCorrelation().correlation(stats_x.getValues(), stats_y.getValues()) + ","); System.out.print(new PearsonsCorrelation().correlation(stats_x.getValues(), stats_z.getValues()) + ","); System.out.print(new PearsonsCorrelation().correlation(stats_y.getValues(), stats_z.getValues()) + ","); //******************* Actividad *******************/ System.out.print(activity); System.out.print("\n"); }
From source file:mase.spec.HybridStat.java
@Override public void postPreBreedingExchangeStatistics(EvolutionState state) { super.postPreBreedingExchangeStatistics(state); AbstractHybridExchanger exc = (AbstractHybridExchanger) state.exchanger; // generation, evaluations, and number of metapops state.output.print(state.generation + " " + ((MaseProblem) state.evaluator.p_problem).getTotalEvaluations() + " " + exc.metaPops.size(), log); DescriptiveStatistics ds = new DescriptiveStatistics(); for (MetaPopulation mp : exc.metaPops) { ds.addValue(mp.agents.size());// ww w. ja v a2 s. c o m } // metapop size (min, mean, max) state.output.print(" " + ds.getMin() + " " + ds.getMean() + " " + ds.getMax(), log); // metapop mean and max age ds.clear(); for (MetaPopulation mp : exc.metaPops) { ds.addValue(mp.age); } state.output.print(" " + ds.getMean() + " " + ds.getMax(), log); // number of splits and merges in this generation + total number of splits and merges totalMerges += exc.merges; totalSplits += exc.splits; state.output.print(" " + exc.merges + " " + exc.splits + " " + totalMerges + " " + totalSplits, log); if (exc instanceof StochasticHybridExchanger) { StochasticHybridExchanger she = (StochasticHybridExchanger) exc; // metapop difference to others ds.clear(); for (int i = 0; i < she.distanceMatrix.length; i++) { for (int j = i + 1; j < she.distanceMatrix.length; j++) { if (!Double.isInfinite(she.distanceMatrix[i][j]) && !Double.isNaN(she.distanceMatrix[i][j])) { ds.addValue(she.distanceMatrix[i][j]); } } } if (ds.getN() > 0) { state.output.print(" " + ds.getN() + " " + ds.getMin() + " " + ds.getMean() + " " + ds.getMax(), log); } else { state.output.print(" 0 0 0 0", log); } //printMatrix(she.distanceMatrix, state); } state.output.println("", log); /*for(MetaPopulation mp : exc.metaPops) { StringBuilder sb = new StringBuilder(); sb.append(String.format("%3d", mp.age)).append(" - ").append(mp.toString()); if(!mp.foreigns.isEmpty()) { sb.append(" - Foreigns:"); } for(Foreign f : mp.foreigns) { sb.append(" ").append(f.origin).append("(").append(f.age).append(")"); } state.output.message(sb.toString()); }*/ /*for(MetaPopulation mp : exc.metaPops) { state.output.message(mp.age + "/" + mp.lockDown); }*/ }
From source file:com.joliciel.talismane.extensions.corpus.CorpusStatistics.java
@Override public void onNextParseConfiguration(ParseConfiguration parseConfiguration, Writer writer) { sentenceCount++;/*from ww w. java2s . com*/ sentenceLengthStats.addValue(parseConfiguration.getPosTagSequence().size()); for (PosTaggedToken posTaggedToken : parseConfiguration.getPosTagSequence()) { if (posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG)) continue; Token token = posTaggedToken.getToken(); String word = token.getOriginalText(); words.add(word); if (referenceWords != null) { if (!referenceWords.contains(word)) unknownTokenCount++; } if (alphanumeric.matcher(token.getOriginalText()).find()) { String lowercase = word.toLowerCase(talismaneSession.getLocale()); lowerCaseWords.add(lowercase); alphanumericCount++; if (referenceLowercaseWords != null) { if (!referenceLowercaseWords.contains(lowercase)) unknownAlphanumericCount++; } } tokenCount++; Integer countObj = posTagCounts.get(posTaggedToken.getTag().getCode()); int count = countObj == null ? 0 : countObj.intValue(); count++; posTagCounts.put(posTaggedToken.getTag().getCode(), count); } int maxDepth = 0; DescriptiveStatistics avgSyntaxDepthForSentenceStats = new DescriptiveStatistics(); for (DependencyArc arc : parseConfiguration.getDependencies()) { Integer countObj = depLabelCounts.get(arc.getLabel()); int count = countObj == null ? 0 : countObj.intValue(); count++; depLabelCounts.put(arc.getLabel(), count); totalDepCount++; if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (arc.getLabel() == null || arc.getLabel().length() == 0)) { // do nothing for unattached stuff (e.g. punctuation) } else if (arc.getLabel().equals("ponct")) { // do nothing for punctuation } else { int depth = 0; DependencyArc theArc = arc; while (theArc != null && !theArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG)) { theArc = parseConfiguration.getGoverningDependency(theArc.getHead()); depth++; } if (depth > maxDepth) maxDepth = depth; syntaxDepthStats.addValue(depth); avgSyntaxDepthForSentenceStats.addValue(depth); int distance = Math .abs(arc.getHead().getToken().getIndex() - arc.getDependent().getToken().getIndex()); syntaxDistanceStats.addValue(distance); } maxSyntaxDepthStats.addValue(maxDepth); if (avgSyntaxDepthForSentenceStats.getN() > 0) avgSyntaxDepthStats.addValue(avgSyntaxDepthForSentenceStats.getMean()); } // we cheat a little bit by only allowing each arc to count once // there could be a situation where there are two independent non-projective arcs // crossing the same mother arc, but we prefer here to underestimate, // as this phenomenon is quite rare. Set<DependencyArc> nonProjectiveArcs = new HashSet<DependencyArc>(); int i = 0; for (DependencyArc arc : parseConfiguration.getDependencies()) { i++; if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (arc.getLabel() == null || arc.getLabel().length() == 0)) continue; if (nonProjectiveArcs.contains(arc)) continue; int headIndex = arc.getHead().getToken().getIndex(); int depIndex = arc.getDependent().getToken().getIndex(); int startIndex = headIndex < depIndex ? headIndex : depIndex; int endIndex = headIndex >= depIndex ? headIndex : depIndex; int j = 0; for (DependencyArc otherArc : parseConfiguration.getDependencies()) { j++; if (j <= i) continue; if (otherArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG) && (otherArc.getLabel() == null || otherArc.getLabel().length() == 0)) continue; if (nonProjectiveArcs.contains(otherArc)) continue; int headIndex2 = otherArc.getHead().getToken().getIndex(); int depIndex2 = otherArc.getDependent().getToken().getIndex(); int startIndex2 = headIndex2 < depIndex2 ? headIndex2 : depIndex2; int endIndex2 = headIndex2 >= depIndex2 ? headIndex2 : depIndex2; boolean nonProjective = false; if (startIndex2 < startIndex && endIndex2 > startIndex && endIndex2 < endIndex) { nonProjective = true; } else if (startIndex2 > startIndex && startIndex2 < endIndex && endIndex2 > endIndex) { nonProjective = true; } if (nonProjective) { nonProjectiveArcs.add(arc); nonProjectiveArcs.add(otherArc); nonProjectiveCount++; LOG.debug("Non-projective arcs in sentence: " + parseConfiguration.getSentence().getText()); LOG.debug(arc.toString()); LOG.debug(otherArc.toString()); break; } } } }
From source file:mase.spec.SpecialisationStats.java
@Override public void postPreBreedingExchangeStatistics(EvolutionState state) { super.postPreBreedingExchangeStatistics(state); SpecialisationExchanger exc = (SpecialisationExchanger) state.exchanger; state.output.print(state.generation + " " + exc.metaPops.size(), log); // metapop size (min, mean, max) DescriptiveStatistics ds = new DescriptiveStatistics(); for (MetaPopulation mp : exc.metaPops) { ds.addValue(mp.populations.size()); }// w w w. jav a 2s. c o m state.output.print(" " + ds.getMin() + " " + ds.getMean() + " " + ds.getMax(), log); // metapop dispersion (min, mean, max) ds.clear(); for (MetaPopulation mp : exc.metaPops) { double dispersion = 0; for (Integer i : mp.populations) { for (Integer j : mp.populations) { dispersion += exc.distanceMatrix[i][j]; } } ds.addValue(dispersion / (mp.populations.size() * mp.populations.size())); } state.output.print(" " + ds.getMin() + " " + ds.getMean() + " " + ds.getMax(), log); // total number of merges and splits int count = 0; for (MetaPopulation mp : exc.metaPops) { count += mp.waitingIndividuals.size(); } state.output.print(" " + count + " " + exc.splits, log); for (int i = 0; i < exc.prototypeSubs.length; i++) { // MetaPop to which they belong MetaPopulation pop = null; for (int m = 0; m < exc.metaPops.size(); m++) { if (exc.metaPops.get(m).populations.contains(i)) { pop = exc.metaPops.get(m); state.output.print(" " + m, log); } } // Population dispersion state.output.print(" " + exc.originalMatrix[i][i], log); // Normalised distance to internal pops -- include itself -- 1 ds.clear(); for (Integer p : pop.populations) { ds.addValue(exc.distanceMatrix[i][p]); } state.output.print(" " + ds.getMin() + " " + ds.getMean() + " " + ds.getMax(), log); // Normalised distance to external pops ds.clear(); for (MetaPopulation mp : exc.metaPops) { if (mp != pop) { for (Integer p : mp.populations) { ds.addValue(exc.distanceMatrix[i][p]); } } } if (ds.getN() == 0) { ds.addValue(1); } state.output.print(" " + ds.getMin() + " " + ds.getMean() + " " + ds.getMax(), log); } String str = ""; for (MetaPopulation mp : exc.metaPops) { str += mp + " ; "; } state.output.message(str); /*for(double[] m : exc.distanceMatrix) { state.output.message(Arrays.toString(m)); }*/ // representatives /*MetaEvaluator me = (MetaEvaluator) state.evaluator; MultiPopCoevolutionaryEvaluator2 baseEval = (MultiPopCoevolutionaryEvaluator2) me.getBaseEvaluator(); Individual[][] elites = baseEval.getEliteIndividuals(); ds.clear(); for(MetaPopulation mp : exc.metaPops) { HashSet<Individual> inds = new HashSet<Individual>(); for(Integer p : mp.populations) { inds.add(elites[p][0]); } ds.addValue(inds.size() / (double) mp.populations.size()); } state.output.print(" " + ds.getMin() + " " + ds.getMean() + " " + ds.getMax(), log);*/ state.output.println("", log); }
From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step6GraphTransitivityCleaner.java
public GraphCleaningResults processSingleFile(File file, File outputDir, String prefix, Boolean collectGeneratedArgumentPairs) throws Exception { GraphCleaningResults result = new GraphCleaningResults(); File outFileTable = new File(outputDir, prefix + file.getName() + "_table.csv"); File outFileInfo = new File(outputDir, prefix + file.getName() + "_info.txt"); PrintStream psTable = new PrintStream(new FileOutputStream(outFileTable)); PrintStream psInfo = new PrintStream(new FileOutputStream(outFileInfo)); // load one topic/side List<AnnotatedArgumentPair> pairs = new ArrayList<>( (List<AnnotatedArgumentPair>) XStreamTools.getXStream().fromXML(file)); int fullDataSize = pairs.size(); // filter out missing gold data Iterator<AnnotatedArgumentPair> iterator = pairs.iterator(); while (iterator.hasNext()) { AnnotatedArgumentPair pair = iterator.next(); if (pair.getGoldLabel() == null) { iterator.remove();//w ww. j a v a2 s. co m } // or we want to completely remove equal edges in advance! else if (this.removeEqualEdgesParam && "equal".equals(pair.getGoldLabel())) { iterator.remove(); } } // sort pairs by their weight this.argumentPairListSorter.sortArgumentPairs(pairs); int preFilteredDataSize = pairs.size(); // compute correlation between score threshold and number of removed edges double[] correlationEdgeWeights = new double[pairs.size()]; double[] correlationRemovedEdges = new double[pairs.size()]; // only cycles of length 0 to 5 are interesting (5+ are too big) Range<Integer> range = Range.between(0, 5); psTable.print( "EdgeWeightThreshold\tPairs\tignoredEdgesCount\tIsDAG\tTransitivityScoreMean\tTransitivityScoreMax\tTransitivityScoreSamples\tEdges\tNodes\t"); for (int j = range.getMinimum(); j <= range.getMaximum(); j++) { psTable.print("Cycles_" + j + "\t"); } psTable.println(); // store the indices of all pairs (edges) that have been successfully added without // generating cycles TreeSet<Integer> addedPairsIndices = new TreeSet<>(); // number of edges ignored as they generated cycles int ignoredEdgesCount = 0; Graph lastGraph = null; // flag that the first cycle was already processed boolean firstCycleAlreadyHit = false; for (int i = 1; i < pairs.size(); i++) { // now filter the finalArgumentPairList and add only pairs that have not generated cycles List<AnnotatedArgumentPair> subList = new ArrayList<>(); for (Integer index : addedPairsIndices) { subList.add(pairs.get(index)); } // and add the current at the end subList.add(pairs.get(i)); // what is the current lowest value of a pair weight? double weakestEdgeWeight = computeEdgeWeight(subList.get(subList.size() - 1), LAMBDA_PENALTY); // Graph graph = buildGraphFromArgumentPairs(finalArgumentPairList); int numberOfLoops; // map for storing cycles by their length TreeMap<Integer, TreeSet<String>> lengthCyclesMap = new TreeMap<>(); Graph graph = buildGraphFromArgumentPairs(subList); lastGraph = graph; List<List<Object>> cyclesInGraph = findCyclesInGraph(graph); DescriptiveStatistics transitivityScore = new DescriptiveStatistics(); if (cyclesInGraph.isEmpty()) { // we have DAG transitivityScore = computeTransitivityScores(graph); // update results result.maxTransitivityScore = (int) transitivityScore.getMax(); result.avgTransitivityScore = transitivityScore.getMean(); } numberOfLoops = cyclesInGraph.size(); // initialize map for (int r = range.getMinimum(); r <= range.getMaximum(); r++) { lengthCyclesMap.put(r, new TreeSet<String>()); } // we hit a loop if (numberOfLoops > 0) { // let's update the result if (!firstCycleAlreadyHit) { result.graphSizeEdgesBeforeFirstCycle = graph.getEdgeCount(); result.graphSizeNodesBeforeFirstCycle = graph.getNodeCount(); // find the shortest cycle int shortestCycleLength = Integer.MAX_VALUE; for (List<Object> cycle : cyclesInGraph) { shortestCycleLength = Math.min(shortestCycleLength, cycle.size()); } result.lengthOfFirstCircle = shortestCycleLength; result.pairsBeforeFirstCycle = i; firstCycleAlreadyHit = true; } // ignore this edge further ignoredEdgesCount++; // update counts of different cycles lengths for (List<Object> cycle : cyclesInGraph) { int currentSize = cycle.size(); // convert to sorted set of nodes List<String> cycleAsSortedIDs = new ArrayList<>(); for (Object o : cycle) { cycleAsSortedIDs.add(o.toString()); } Collections.sort(cycleAsSortedIDs); if (range.contains(currentSize)) { lengthCyclesMap.get(currentSize).add(cycleAsSortedIDs.toString()); } } } else { addedPairsIndices.add(i); } // we hit the first cycle // collect loop sizes StringBuilder loopsAsString = new StringBuilder(); for (int j = range.getMinimum(); j <= range.getMaximum(); j++) { // loopsAsString.append(j).append(":"); loopsAsString.append(lengthCyclesMap.get(j).size()); loopsAsString.append("\t"); } psTable.printf(Locale.ENGLISH, "%.4f\t%d\t%d\t%b\t%.2f\t%d\t%d\t%d\t%d\t%s%n", weakestEdgeWeight, i, ignoredEdgesCount, numberOfLoops == 0, Double.isNaN(transitivityScore.getMean()) ? 0d : transitivityScore.getMean(), (int) transitivityScore.getMax(), transitivityScore.getN(), graph.getEdgeCount(), graph.getNodeCount(), loopsAsString.toString().trim()); // update result result.finalGraphSizeEdges = graph.getEdgeCount(); result.finalGraphSizeNodes = graph.getNodeCount(); result.ignoredEdgesThatBrokeDAG = ignoredEdgesCount; // update stats for correlation correlationEdgeWeights[i] = weakestEdgeWeight; // correlationRemovedEdges[i] = (double) ignoredEdgesCount; // let's try: if we keep = 0, if we remove = 1 correlationRemovedEdges[i] = numberOfLoops == 0 ? 0.0 : 1.0; } psInfo.println("Original: " + fullDataSize + ", removed by MACE: " + (fullDataSize - preFilteredDataSize) + ", final: " + (preFilteredDataSize - ignoredEdgesCount) + " (removed: " + ignoredEdgesCount + ")"); double[][] matrix = new double[correlationEdgeWeights.length][]; for (int i = 0; i < correlationEdgeWeights.length; i++) { matrix[i] = new double[2]; matrix[i][0] = correlationEdgeWeights[i]; matrix[i][1] = correlationRemovedEdges[i]; } PearsonsCorrelation pearsonsCorrelation = new PearsonsCorrelation(matrix); double pValue = pearsonsCorrelation.getCorrelationPValues().getEntry(0, 1); double correlation = pearsonsCorrelation.getCorrelationMatrix().getEntry(0, 1); psInfo.printf(Locale.ENGLISH, "Correlation: %.3f, p-Value: %.4f%n", correlation, pValue); if (lastGraph == null) { throw new IllegalStateException("Graph is null"); } // close psInfo.close(); psTable.close(); // save filtered final gold data List<AnnotatedArgumentPair> finalArgumentPairList = new ArrayList<>(); for (Integer index : addedPairsIndices) { finalArgumentPairList.add(pairs.get(index)); } XStreamTools.toXML(finalArgumentPairList, new File(outputDir, prefix + file.getName())); // TODO: here, we can add newly generated edges from graph transitivity if (collectGeneratedArgumentPairs) { Set<GeneratedArgumentPair> generatedArgumentPairs = new HashSet<>(); // collect all arguments Map<String, Argument> allArguments = new HashMap<>(); for (ArgumentPair argumentPair : pairs) { allArguments.put(argumentPair.getArg1().getId(), argumentPair.getArg1()); allArguments.put(argumentPair.getArg2().getId(), argumentPair.getArg2()); } Graph finalGraph = buildGraphFromArgumentPairs(finalArgumentPairList); for (Edge e : finalGraph.getEdgeSet()) { e.setAttribute(WEIGHT, 1.0); } for (Node j : finalGraph) { for (Node k : finalGraph) { if (j != k) { // is there a path between? BellmanFord bfShortest = new BellmanFord(WEIGHT, j.getId()); bfShortest.init(finalGraph); bfShortest.compute(); Path shortestPath = bfShortest.getShortestPath(k); if (shortestPath.size() > 0) { // we have a path GeneratedArgumentPair ap = new GeneratedArgumentPair(); Argument arg1 = allArguments.get(j.getId()); if (arg1 == null) { throw new IllegalStateException("Cannot find argument " + j.getId()); } ap.setArg1(arg1); Argument arg2 = allArguments.get(k.getId()); if (arg2 == null) { throw new IllegalStateException("Cannot find argument " + k.getId()); } ap.setArg2(arg2); ap.setGoldLabel("a1"); generatedArgumentPairs.add(ap); } } } } // and now add the reverse ones Set<GeneratedArgumentPair> generatedReversePairs = new HashSet<>(); for (GeneratedArgumentPair pair : generatedArgumentPairs) { GeneratedArgumentPair ap = new GeneratedArgumentPair(); ap.setArg1(pair.getArg2()); ap.setArg2(pair.getArg1()); ap.setGoldLabel("a2"); generatedReversePairs.add(ap); } generatedArgumentPairs.addAll(generatedReversePairs); // and save it XStreamTools.toXML(generatedArgumentPairs, new File(outputDir, "generated_" + prefix + file.getName())); } result.fullPairsSize = fullDataSize; result.removedApriori = (fullDataSize - preFilteredDataSize); result.finalPairsRetained = finalArgumentPairList.size(); // save the final graph Graph outGraph = cleanCopyGraph(lastGraph); FileSinkDGS dgs1 = new FileSinkDGS(); File outFile = new File(outputDir, prefix + file.getName() + ".dgs"); System.out.println("Saved to " + outFile); FileWriter w1 = new FileWriter(outFile); dgs1.writeAll(outGraph, w1); w1.close(); return result; }