List of usage examples for weka.core Utils variance
public staticdouble variance(double[] vector)
From source file:PrincipalComponents.java
License:Open Source License
private void fillCovariance() throws Exception { // first store the means m_means = new double[m_trainInstances.numAttributes()]; m_stdDevs = new double[m_trainInstances.numAttributes()]; for (int i = 0; i < m_trainInstances.numAttributes(); i++) { m_means[i] = m_trainInstances.meanOrMode(i); m_stdDevs[i] = Math.sqrt(Utils.variance(m_trainInstances.attributeToDoubleArray(i))); }/*w w w. j a v a 2 s . c o m*/ // just center the data or standardize it? if (m_center) { m_centerFilter = new Center(); m_centerFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_centerFilter); } else { m_standardizeFilter = new Standardize(); m_standardizeFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_standardizeFilter); } // now compute the covariance matrix m_correlation = new UpperSymmDenseMatrix(m_numAttribs); for (int i = 0; i < m_numAttribs; i++) { for (int j = i; j < m_numAttribs; j++) { double cov = 0; for (Instance inst : m_trainInstances) { cov += inst.value(i) * inst.value(j); } cov /= m_trainInstances.numInstances() - 1; m_correlation.set(i, j, cov); } } }
From source file:AtomUtils.java
License:Apache License
static public double[] getStandardDeviation(ArrayList<ArrayList<Double>> lists) { int numAtts = lists.size(); assert (numAtts > 0); int numInsts = lists.get(0).size(); assert (numInsts > 0); double[] att1 = new double[numInsts]; double[] std = new double[numAtts]; for (int i = 0; i < numAtts; i++) { for (int j = 0; j < numInsts; j++) { att1[j] = lists.get(i).get(j); }//from w w w. ja v a2 s.c o m std[i] = Math.sqrt(Utils.variance(att1)); } return std; }
From source file:PCADetector.java
License:Apache License
public double[] getNormalSTD() { int numInsts = m_normalPts.size(); assert (numInsts > 0); double[] att1 = new double[numInsts]; double[] std = new double[m_nDims]; for (int i = 0; i < m_nDims; i++) { for (int j = 0; j < numInsts; j++) { att1[j] = m_oriDataMatrix.get(i).get(m_normalPts.get(j)); }//from w ww .j a v a2 s. c o m std[i] = Math.sqrt(Utils.variance(att1)); } return std; }
From source file:PCADetector.java
License:Apache License
public double[] getStandardDeviation(Instances Matrix) { int numAtts = Matrix.numAttributes(); int numInsts = Matrix.numInstances(); double[] att1 = new double[numInsts]; double[] std = new double[numAtts]; for (int i = 0; i < numAtts; i++) { for (int j = 0; j < numInsts; j++) { att1[j] = Matrix.instance(j).value(i); }//from ww w . jav a2 s . c o m std[i] = Math.sqrt(Utils.variance(att1)); } return std; }
From source file:com.github.polarisation.kea.main.KEAKeyphraseExtractor.java
License:Open Source License
/** * Builds the model from the files// www.j a v a 2s. c o m */ public void extractKeyphrases(Hashtable stems) throws Exception { Vector stats = new Vector(); // Check whether there is actually any data // = if there any files in the directory if (stems.size() == 0) { throw new Exception("Couldn't find any data!"); } m_KEAFilter.setNumPhrases(m_numPhrases); m_KEAFilter.setVocabulary(m_vocabulary); m_KEAFilter.setVocabularyFormat(m_vocabularyFormat); m_KEAFilter.setDocumentLanguage(getDocumentLanguage()); m_KEAFilter.setStemmer(m_Stemmer); m_KEAFilter.setStopwords(m_Stopwords); if (getVocabulary().equals("none")) { m_KEAFilter.m_NODEfeature = false; } else { m_KEAFilter.loadThesaurus(m_Stemmer, m_Stopwords); } FastVector atts = new FastVector(3); atts.addElement(new Attribute("doc", (FastVector) null)); atts.addElement(new Attribute("keyphrases", (FastVector) null)); atts.addElement(new Attribute("filename", (String) null)); Instances data = new Instances("keyphrase_training_data", atts, 0); if (m_KEAFilter.m_Dictionary == null) { buildGlobalDictionaries(stems); } System.err.println("-- Extracting Keyphrases... "); // Extract keyphrases Enumeration elem = stems.keys(); // Enumeration over all files in the directory (now in the hash): while (elem.hasMoreElements()) { String str = (String) elem.nextElement(); double[] newInst = new double[2]; try { File txt = new File(m_dirName + "/" + str + ".txt"); InputStreamReader is; if (!m_encoding.equals("default")) { is = new InputStreamReader(new FileInputStream(txt), m_encoding); } else { is = new InputStreamReader(new FileInputStream(txt)); } StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); } catch (Exception e) { if (m_debug) { System.err.println("Can't read document " + str + ".txt"); } newInst[0] = Instance.missingValue(); } try { File key = new File(m_dirName + "/" + str + ".key"); InputStreamReader is; if (!m_encoding.equals("default")) { is = new InputStreamReader(new FileInputStream(key), m_encoding); } else { is = new InputStreamReader(new FileInputStream(key)); } StringBuffer keyStr = new StringBuffer(); int c; // keyStr = keyphrases in the str.key file // Kea assumes, that these keyphrases were assigned by the author // and evaluates extracted keyphrases againse these while ((c = is.read()) != -1) { keyStr.append((char) c); } newInst[1] = (double) data.attribute(1).addStringValue(keyStr.toString()); } catch (Exception e) { if (m_debug) { System.err.println("No existing keyphrases for stem " + str + "."); } newInst[1] = Instance.missingValue(); } data.add(new Instance(1.0, newInst)); m_KEAFilter.input(data.instance(0)); data = data.stringFreeStructure(); if (m_debug) { System.err.println("-- Document: " + str); } Instance[] topRankedInstances = new Instance[m_numPhrases]; Instance inst; // Iterating over all extracted keyphrases (inst) while ((inst = m_KEAFilter.output()) != null) { int index = (int) inst.value(m_KEAFilter.getRankIndex()) - 1; if (index < m_numPhrases) { topRankedInstances[index] = inst; } } if (m_debug) { System.err.println("-- Keyphrases and feature values:"); } FileOutputStream out = null; PrintWriter printer = null; File key = new File(m_dirName + "/" + str + ".key"); if (!key.exists()) { out = new FileOutputStream(m_dirName + "/" + str + ".key"); if (!m_encoding.equals("default")) { printer = new PrintWriter(new OutputStreamWriter(out, m_encoding)); } else { printer = new PrintWriter(out); } } double numExtracted = 0, numCorrect = 0; for (int i = 0; i < m_numPhrases; i++) { if (topRankedInstances[i] != null) { if (!topRankedInstances[i].isMissing(topRankedInstances[i].numAttributes() - 1)) { numExtracted += 1.0; } if ((int) topRankedInstances[i].value(topRankedInstances[i].numAttributes() - 1) == 1) { numCorrect += 1.0; } if (printer != null) { printer.print(topRankedInstances[i].stringValue(m_KEAFilter.getUnstemmedPhraseIndex())); if (m_AdditionalInfo) { printer.print("\t"); printer.print(topRankedInstances[i].stringValue(m_KEAFilter.getStemmedPhraseIndex())); printer.print("\t"); printer.print(Utils.doubleToString( topRankedInstances[i].value(m_KEAFilter.getProbabilityIndex()), 4)); } printer.println(); } if (m_debug) { System.err.println(topRankedInstances[i]); } } } if (numExtracted > 0) { if (m_debug) { System.err.println("-- " + numCorrect + " correct"); } stats.addElement(new Double(numCorrect)); } if (printer != null) { printer.flush(); printer.close(); out.close(); } } double[] st = new double[stats.size()]; for (int i = 0; i < stats.size(); i++) { st[i] = ((Double) stats.elementAt(i)).doubleValue(); } double avg = Utils.mean(st); double stdDev = Math.sqrt(Utils.variance(st)); System.err.println("Avg. number of matching keyphrases compared to existing ones : " + Utils.doubleToString(avg, 2) + " +/- " + Utils.doubleToString(stdDev, 2)); System.err.println("Based on " + stats.size() + " documents"); // m_KEAFilter.batchFinished(); }
From source file:com.openkm.kea.modelcreator.KEAKeyphraseExtractor.java
License:Open Source License
/** * Builds the model from the files// w w w . j a v a2 s . co m */ public void extractKeyphrases(Hashtable<String, Double> stems) throws Exception { Vector<Double> stats = new Vector<Double>(); // Check whether there is actually any data // = if there any files in the directory if (stems.size() == 0) { throw new Exception("Couldn't find any data!"); } m_KEAFilter.setNumPhrases(m_numPhrases); m_KEAFilter.setVocabulary(m_vocabulary); m_KEAFilter.setVocabularyFormat(m_vocabularyFormat); m_KEAFilter.setDocumentLanguage(getDocumentLanguage()); m_KEAFilter.setStemmer(m_Stemmer); m_KEAFilter.setStopwords(m_Stopwords); if (getVocabulary().equals("none")) { m_KEAFilter.m_NODEfeature = false; } else { m_KEAFilter.loadThesaurus(m_Stemmer, m_Stopwords); } FastVector atts = new FastVector(3); atts.addElement(new Attribute("doc", (FastVector) null)); atts.addElement(new Attribute("keyphrases", (FastVector) null)); atts.addElement(new Attribute("filename", (String) null)); Instances data = new Instances("keyphrase_training_data", atts, 0); if (m_KEAFilter.m_Dictionary == null) { buildGlobalDictionaries(stems); } log.info("-- Extracting Keyphrases... "); // Extract keyphrases Enumeration<String> elem = stems.keys(); // Enumeration over all files in the directory (now in the hash): while (elem.hasMoreElements()) { String str = elem.nextElement(); double[] newInst = new double[2]; try { File txt = new File(m_dirName + "/" + str + ".txt"); InputStreamReader is; if (!m_encoding.equals("default")) { is = new InputStreamReader(new FileInputStream(txt), m_encoding); } else { is = new InputStreamReader(new FileInputStream(txt)); } StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); } catch (Exception e) { if (m_debug) { log.debug("Can't read document " + str + ".txt"); } newInst[0] = Instance.missingValue(); } try { File key = new File(m_dirName + "/" + str + ".key"); InputStreamReader is; if (!m_encoding.equals("default")) { is = new InputStreamReader(new FileInputStream(key), m_encoding); } else { is = new InputStreamReader(new FileInputStream(key)); } StringBuffer keyStr = new StringBuffer(); int c; // keyStr = keyphrases in the str.key file // Kea assumes, that these keyphrases were assigned by the // author // and evaluates extracted keyphrases againse these while ((c = is.read()) != -1) { keyStr.append((char) c); } newInst[1] = (double) data.attribute(1).addStringValue(keyStr.toString()); } catch (Exception e) { if (m_debug) { log.debug("No existing keyphrases for stem " + str + "."); } newInst[1] = Instance.missingValue(); } data.add(new Instance(1.0, newInst)); m_KEAFilter.input(data.instance(0)); data = data.stringFreeStructure(); if (m_debug) { log.debug("-- Document: " + str); } Instance[] topRankedInstances = new Instance[m_numPhrases]; Instance inst; // Iterating over all extracted keyphrases (inst) while ((inst = m_KEAFilter.output()) != null) { int index = (int) inst.value(m_KEAFilter.getRankIndex()) - 1; if (index < m_numPhrases) { topRankedInstances[index] = inst; } } if (m_debug) { log.debug("-- Keyphrases and feature values:"); } FileOutputStream out = null; PrintWriter printer = null; File key = new File(m_dirName + "/" + str + ".key"); if (!key.exists()) { out = new FileOutputStream(m_dirName + "/" + str + ".key"); if (!m_encoding.equals("default")) { printer = new PrintWriter(new OutputStreamWriter(out, m_encoding)); } else { printer = new PrintWriter(out); } } double numExtracted = 0, numCorrect = 0; for (int i = 0; i < m_numPhrases; i++) { if (topRankedInstances[i] != null) { // My addition: to exclude low ranking phrases double rank = topRankedInstances[i].value(m_KEAFilter.getProbabilityIndex()); if (rank >= 0.00) { if (!topRankedInstances[i].isMissing(topRankedInstances[i].numAttributes() - 1)) { numExtracted += 1.0; } if ((int) topRankedInstances[i].value(topRankedInstances[i].numAttributes() - 1) == 1) { numCorrect += 1.0; } if (printer != null) { printer.print(topRankedInstances[i].stringValue(m_KEAFilter.getUnstemmedPhraseIndex())); if (m_AdditionalInfo) { printer.print("\t"); printer.print( topRankedInstances[i].stringValue(m_KEAFilter.getStemmedPhraseIndex())); printer.print("\t"); printer.print(Utils.doubleToString( topRankedInstances[i].value(m_KEAFilter.getProbabilityIndex()), 4)); } printer.println(); } if (m_debug) { log.debug("" + topRankedInstances[i]); } } } } if (numExtracted > 0) { if (m_debug) { log.debug("-- " + numCorrect + " correct"); } stats.addElement(new Double(numCorrect)); } if (printer != null) { printer.flush(); printer.close(); out.close(); } } double[] st = new double[stats.size()]; for (int i = 0; i < stats.size(); i++) { st[i] = ((Double) stats.elementAt(i)).doubleValue(); } double avg = Utils.mean(st); double stdDev = Math.sqrt(Utils.variance(st)); log.info("Avg. number of matching keyphrases compared to existing ones : " + Utils.doubleToString(avg, 2) + " +/- " + Utils.doubleToString(stdDev, 2)); log.info("Based on " + stats.size() + " documents"); // m_KEAFilter.batchFinished(); }
From source file:data.generation.target.utils.PrincipalComponents.java
License:Open Source License
/** * Fill the correlation matrix/* w w w. j av a 2s . c om*/ */ private void fillCorrelation() throws Exception { m_correlation = new double[m_numAttribs][m_numAttribs]; double[] att1 = new double[m_numInstances]; double[] att2 = new double[m_numInstances]; double corr; for (int i = 0; i < m_numAttribs; i++) { for (int j = 0; j < m_numAttribs; j++) { for (int k = 0; k < m_numInstances; k++) { att1[k] = m_trainInstances.instance(k).value(i); att2[k] = m_trainInstances.instance(k).value(j); } if (i == j) { m_correlation[i][j] = 1.0; // store the standard deviation m_stdDevs[i] = Math.sqrt(Utils.variance(att1)); } else { corr = Utils.correlation(att1, att2, m_numInstances); m_correlation[i][j] = corr; m_correlation[j][i] = corr; } } } // now standardize the input data m_standardizeFilter = new Standardize(); m_standardizeFilter.setInputFormat(m_trainInstances); m_trainInstances = Filter.useFilter(m_trainInstances, m_standardizeFilter); }
From source file:data.statistics.MLStatistics.java
License:Open Source License
/** * Computes the variance of any IR vector. * /*w w w .ja v a 2s .com*/ * @param IR * An IR vector previously computed. * @return double. */ public double varianceIR(double[] IR) { return Utils.variance(IR); }
From source file:kea.KEAKeyphraseExtractor.java
License:Open Source License
/** * Builds the model from the files/*from ww w. j av a 2 s. co m*/ */ public void extractKeyphrases(Hashtable stems) throws Exception { Vector stats = new Vector(); // Check whether there is actually any data if (stems.size() == 0) { throw new Exception("Couldn't find any data!"); } FastVector atts = new FastVector(2); atts.addElement(new Attribute("doc", (FastVector) null)); atts.addElement(new Attribute("keyphrases", (FastVector) null)); Instances data = new Instances("keyphrase_training_data", atts, 0); // Extract keyphrases Enumeration elem = stems.keys(); while (elem.hasMoreElements()) { String str = (String) elem.nextElement(); double[] newInst = new double[2]; try { File txt = new File(m_dirName + "/" + str + ".txt"); Reader is; if (!m_encoding.equals("default")) { is = new BomStrippingInputStreamReader(new FileInputStream(txt), m_encoding); } else { is = new BomStrippingInputStreamReader(new FileInputStream(txt)); } StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); } catch (Exception e) { if (m_debug) { System.err.println("Can't read document " + str + ".txt"); } newInst[0] = Instance.missingValue(); } try { File key = new File(m_dirName + "/" + str + ".key"); Reader is; if (!m_encoding.equals("default")) { is = new BomStrippingInputStreamReader(new FileInputStream(key), m_encoding); } else { is = new BomStrippingInputStreamReader(new FileInputStream(key)); } StringBuffer keyStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { keyStr.append((char) c); } newInst[1] = (double) data.attribute(1).addStringValue(keyStr.toString()); } catch (Exception e) { if (m_debug) { System.err.println("No keyphrases for stem " + str + "."); } newInst[1] = Instance.missingValue(); } data.add(new Instance(1.0, newInst)); m_KEAFilter.input(data.instance(0)); data = data.stringFreeStructure(); if (m_debug) { System.err.println("-- Document: " + str); } Instance[] topRankedInstances = new Instance[m_numPhrases]; Instance inst; while ((inst = m_KEAFilter.output()) != null) { int index = (int) inst.value(m_KEAFilter.getRankIndex()) - 1; if (index < m_numPhrases) { topRankedInstances[index] = inst; } } if (m_debug) { System.err.println("-- Keyphrases and feature values:"); } FileOutputStream out = null; PrintWriter printer = null; File key = new File(m_dirName + "/" + str + ".key"); if (!key.exists()) { out = new FileOutputStream(m_dirName + "/" + str + ".key"); if (!m_encoding.equals("default")) { printer = new PrintWriter(new OutputStreamWriter(out, m_encoding)); } else { printer = new PrintWriter(out); } } double numExtracted = 0, numCorrect = 0; for (int i = 0; i < m_numPhrases; i++) { if (topRankedInstances[i] != null) { if (!topRankedInstances[i].isMissing(topRankedInstances[i].numAttributes() - 1)) { numExtracted += 1.0; } if ((int) topRankedInstances[i] .value(topRankedInstances[i].numAttributes() - 1) == topRankedInstances[i] .attribute(topRankedInstances[i].numAttributes() - 1).indexOfValue("True")) { numCorrect += 1.0; } if (printer != null) { printer.print(topRankedInstances[i].stringValue(m_KEAFilter.getUnstemmedPhraseIndex())); if (m_AdditionalInfo) { printer.print("\t"); printer.print(topRankedInstances[i].stringValue(m_KEAFilter.getStemmedPhraseIndex())); printer.print("\t"); printer.print(Utils.doubleToString( topRankedInstances[i].value(m_KEAFilter.getProbabilityIndex()), 4)); } printer.println(); } if (m_debug) { System.err.println(topRankedInstances[i]); } } } if (numExtracted > 0) { if (m_debug) { System.err.println("-- " + numCorrect + " correct"); } stats.addElement(new Double(numCorrect)); } if (printer != null) { printer.flush(); printer.close(); out.close(); } } double[] st = new double[stats.size()]; for (int i = 0; i < stats.size(); i++) { st[i] = ((Double) stats.elementAt(i)).doubleValue(); } double avg = Utils.mean(st); double stdDev = Math.sqrt(Utils.variance(st)); System.err.println("Avg. number of correct keyphrases: " + Utils.doubleToString(avg, 2) + " +/- " + Utils.doubleToString(stdDev, 2)); System.err.println("Based on " + stats.size() + " documents"); m_KEAFilter.batchFinished(); }
From source file:kea.main.KEAKeyphraseExtractor.java
License:Open Source License
/** * Builds the model from the files//from ww w. j a va 2 s . co m */ public synchronized void extractKeyphrases(Hashtable stems) throws Exception { Vector stats = new Vector(); // Check whether there is actually any data // = if there any files in the directory if (stems.size() == 0) { throw new Exception("Couldn't find any data!"); } this.m_KEAFilter.setNumPhrases(m_numPhrases); this.m_KEAFilter.setVocabulary(m_vocabulary); this.m_KEAFilter.setVocabularyFormat(m_vocabularyFormat); this.m_KEAFilter.setDocumentLanguage(getDocumentLanguage()); this.m_KEAFilter.setStemmer(m_Stemmer); this.m_KEAFilter.setStopwords(m_Stopwords); if (getVocabulary().equals("none")) { this.m_KEAFilter.m_NODEfeature = false; } else { // Know thesaurus is loaded in the constructor //m_KEAFilter.loadThesaurus(m_Stemmer, m_Stopwords, vocabularyDir, manager); } FastVector atts = new FastVector(3); atts.addElement(new Attribute("doc", (FastVector) null)); atts.addElement(new Attribute("keyphrases", (FastVector) null)); atts.addElement(new Attribute("filename", (String) null)); Instances data = new Instances("keyphrase_training_data", atts, 0); if (this.m_KEAFilter.m_Dictionary == null) { buildGlobalDictionaries(stems); } System.out.println("-- Extracting Keyphrases... "); // Extract keyphrases Enumeration elem = stems.keys(); // Enumeration over all files in the directory (now in the hash): while (elem.hasMoreElements()) { String str = (String) elem.nextElement(); double[] newInst = new double[2]; try { File txt = new File(m_dirName + "/" + str + ".txt"); InputStreamReader is; if (!m_encoding.equals("default")) { is = new InputStreamReader(new FileInputStream(txt), m_encoding); } else { is = new InputStreamReader(new FileInputStream(txt)); } StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } is.close(); newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); } catch (Exception e) { if (m_debug) { System.err.println("Can't read document " + str + ".txt"); } newInst[0] = Instance.missingValue(); } try { File key = new File(m_dirName + "/" + str + ".key"); InputStreamReader is; if (!m_encoding.equals("default")) { is = new InputStreamReader(new FileInputStream(key), m_encoding); } else { is = new InputStreamReader(new FileInputStream(key)); } StringBuffer keyStr = new StringBuffer(); int c; // keyStr = keyphrases in the str.key file // Kea assumes, that these keyphrases were assigned by the // author // and evaluates extracted keyphrases againse these while ((c = is.read()) != -1) { keyStr.append((char) c); } is.close(); newInst[1] = (double) data.attribute(1).addStringValue(keyStr.toString()); } catch (Exception e) { if (m_debug) { System.err.println("No existing keyphrases for stem " + str + "."); } newInst[1] = Instance.missingValue(); } data.add(new Instance(1.0, newInst)); this.m_KEAFilter.input(data.instance(0), vocabulary); data = data.stringFreeStructure(); if (m_debug) { System.err.println("-- Document: " + str); } Instance[] topRankedInstances = new Instance[m_numPhrases]; Instance inst; // Iterating over all extracted keyphrases (inst) while ((inst = this.m_KEAFilter.output()) != null) { int index = (int) inst.value(this.m_KEAFilter.getRankIndex()) - 1; if (index < m_numPhrases) { topRankedInstances[index] = inst; } } if (m_debug) { System.err.println("-- Keyphrases and feature values:"); } FileOutputStream out = null; PrintWriter printer = null; File key = new File(m_dirName + "/" + str + ".key"); if (!key.exists()) { out = new FileOutputStream(m_dirName + "/" + str + ".key"); if (!m_encoding.equals("default")) { printer = new PrintWriter(new OutputStreamWriter(out, m_encoding)); } else { printer = new PrintWriter(out); } } double numExtracted = 0, numCorrect = 0; for (int i = 0; i < m_numPhrases; i++) { if (topRankedInstances[i] != null) { if (!topRankedInstances[i].isMissing(topRankedInstances[i].numAttributes() - 1)) { numExtracted += 1.0; } if ((int) topRankedInstances[i].value(topRankedInstances[i].numAttributes() - 1) == 1) { numCorrect += 1.0; } if (printer != null) { printer.print( topRankedInstances[i].stringValue(this.m_KEAFilter.getUnstemmedPhraseIndex())); if (m_AdditionalInfo) { printer.print("\t"); printer.print( topRankedInstances[i].stringValue(this.m_KEAFilter.getStemmedPhraseIndex())); printer.print("\t"); printer.print(Utils.doubleToString( topRankedInstances[i].value(this.m_KEAFilter.getProbabilityIndex()), 4)); } printer.println(); } if (m_debug) { System.err.println(topRankedInstances[i]); } } } if (numExtracted > 0) { if (m_debug) { System.err.println("-- " + numCorrect + " correct"); } stats.addElement(new Double(numCorrect)); } if (printer != null) { printer.flush(); printer.close(); out.close(); } } double[] st = new double[stats.size()]; for (int i = 0; i < stats.size(); i++) { st[i] = ((Double) stats.elementAt(i)).doubleValue(); } double avg = Utils.mean(st); double stdDev = Math.sqrt(Utils.variance(st)); System.out.println("Avg. number of matching keyphrases compared to existing ones : " + Utils.doubleToString(avg, 2) + " +/- " + Utils.doubleToString(stdDev, 2)); System.out.println("Based on " + stats.size() + " documents"); // m_KEAFilter.batchFinished(); }