List of usage examples for org.apache.commons.math3.stat.descriptive SummaryStatistics getMean
public double getMean()
From source file:gov.llnl.lc.infiniband.opensm.plugin.data.OSM_FabricDeltaAnalyzer.java
public double getNodeUtilization(IB_Guid guid, PFM_Port.PortCounterName pcn) { // find all the ports for this node, and average up their utilization numbers IB_Vertex v = getIB_Vertex(guid);/*from www. j ava 2s.c o m*/ if (v == null) return 0.0; SummaryStatistics nodeStats = new SummaryStatistics(); // loop through all the ports in this vertex int num_ports = v.getNode().sbnNode.num_ports; for (int pn = 1; pn <= num_ports; pn++) { nodeStats.addValue(getPortUtilization(guid, pn, pcn)); } return nodeStats.getMean(); }
From source file:gdsc.smlm.ij.plugins.TraceMolecules.java
private void runOptimiser(TraceManager manager) { // Get an estimate of the number of molecules without blinking SummaryStatistics stats = new SummaryStatistics(); final double nmPerPixel = this.results.getNmPerPixel(); final double gain = this.results.getGain(); final boolean emCCD = this.results.isEMCCD(); for (PeakResult result : this.results.getResults()) stats.addValue(result.getPrecision(nmPerPixel, gain, emCCD)); // Use twice the precision to get the initial distance threshold // Use 2.5x sigma as per the PC-PALM protocol in Sengupta, et al (2013) Nature Protocols 8, 345 double dEstimate = stats.getMean() * 2.5 / nmPerPixel; int n = manager.traceMolecules(dEstimate, 1); //for (double d : new double[] { 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4 }) // System.out.printf("d=%.2f, estimate=%d\n", d, // manager.traceMolecules(stats.getMean() * d / this.results.getNmPerPixel(), 1)); if (!getParameters(n, dEstimate)) return;/* w w w . jav a2 s .c om*/ // TODO - Convert the distance threshold to use nm instead of pixels? List<double[]> results = runTracing(manager, settings.minDistanceThreshold, settings.maxDistanceThreshold, timeInFrames(settings.minTimeThreshold), timeInFrames(settings.maxTimeThreshold), settings.optimiserSteps); // Compute fractional difference from the true value: // Use blinking rate directly or the estimated number of molecules double nReference; int statistic; if (optimiseBlinkingRate) { nReference = settings.blinkingRate; statistic = 3; IJ.log(String.format("Estimating blinking rate: %.2f", nReference)); } else { nReference = n / settings.blinkingRate; statistic = 2; IJ.log(String.format("Estimating number of molecules: %d / %.2f = %.2f", n, settings.blinkingRate, nReference)); } for (double[] result : results) { //System.out.printf("%g %g = %g\n", result[0], result[1], result[2]); if (optimiseBlinkingRate) result[2] = (nReference - result[statistic]) / nReference; else result[2] = (result[statistic] - nReference) / nReference; } // Locate the optimal parameters with a fit of the zero contour boolean found = findOptimalParameters(results); createPlotResults(results); if (!found) { // Make fractional difference absolute so that lowest is best for (double[] result : results) result[2] = Math.abs(result[2]); // Set the optimal thresholds using the lowest value double[] best = new double[] { 0, 0, Double.MAX_VALUE }; for (double[] result : results) if (best[2] > result[2]) best = result; settings.distanceThreshold = best[0]; settings.timeThreshold = best[1]; } // The optimiser works using frames so convert back to seconds settings.timeThreshold *= exposureTime; IJ.log(String.format("Optimal fractional difference @ D-threshold=%g, T-threshold=%f (%d frames)", settings.distanceThreshold, settings.timeThreshold, timeInFrames(settings.timeThreshold))); SettingsManager.saveSettings(globalSettings); }
From source file:fr.inria.eventcloud.benchmarks.radix10_conversion.Radix10ConversionBenchmark.java
private final void test(Run run) { System.out.println("Testing precision with NB_QUADS_TO_READ=" + run.getNbQuadsToRead() + " and PRECISION=" + run.getPrecision());/* ww w . j av a2s.co m*/ FileInputStream fis = null; Iterator<Quad> it = null; try { fis = new FileInputStream(this.trigResource); it = RiotReader.createIteratorQuads(fis, Lang.TRIG, null); SummaryStatistics statsWithoutPrefixRemoval = new SummaryStatistics(); SummaryStatistics statsWithPrefixRemoval = new SummaryStatistics(); Stopwatch stopwatch = Stopwatch.createUnstarted(); int i = 1; while (it.hasNext()) { if (i >= run.getNbQuadsToRead()) { break; } Quad quad = it.next(); if (this.enableRdfStats) { // compute stats without applying doping function statsWithoutPrefixRemoval.addValue(size(quad.getGraph())); statsWithoutPrefixRemoval.addValue(size(quad.getSubject())); statsWithoutPrefixRemoval.addValue(size(quad.getPredicate())); statsWithoutPrefixRemoval.addValue(size(quad.getObject())); } String g = SemanticCoordinate.applyDopingFunction(quad.getGraph()); String s = SemanticCoordinate.applyDopingFunction(quad.getSubject()); String p = SemanticCoordinate.applyDopingFunction(quad.getPredicate()); String o = SemanticCoordinate.applyDopingFunction(quad.getObject()); if (this.enableRdfStats) { // compute stats by applying doping function statsWithPrefixRemoval.addValue(g.length()); statsWithPrefixRemoval.addValue(s.length()); statsWithPrefixRemoval.addValue(p.length()); statsWithPrefixRemoval.addValue(o.length()); } long precision = run.getPrecision(); stopwatch.start(); ApfloatUtils.toFloatRadix10(g, precision); ApfloatUtils.toFloatRadix10(s, precision); ApfloatUtils.toFloatRadix10(p, precision); ApfloatUtils.toFloatRadix10(o, precision); stopwatch.stop(); i++; } if (this.enableRdfStats) { System.out.println( " RDF term min size before prefix removal is " + statsWithoutPrefixRemoval.getMin()); System.out.println( " RDF term max size before prefix removal is " + statsWithoutPrefixRemoval.getMax()); System.out.println( " RDF term average size before prefix removal is " + statsWithoutPrefixRemoval.getMean()); System.out .println(" RDF term min size after prefix removal is " + statsWithPrefixRemoval.getMin()); System.out .println(" RDF term max size after prefix removal is " + statsWithPrefixRemoval.getMax()); System.out.println( " RDF term average size after prefix removal is " + statsWithPrefixRemoval.getMean()); } System.out.println("Time to perform radix 10 conversion for " + i + " with precision set to " + run.getPrecision() + " is " + stopwatch.toString() + " --> " + stopwatch.elapsed(TimeUnit.MILLISECONDS) + " ms"); System.out.println(); } catch (FileNotFoundException e) { e.printStackTrace(); } finally { if (fis != null) { try { fis.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:iDynoOptimizer.MOEAFramework26.src.org.moeaframework.analysis.sensitivity.SimpleStatistics.java
@Override public void run(CommandLine commandLine) throws Exception { String mode = null;//w w w . j a v a2 s . com PrintStream out = null; List<double[][]> entries = new ArrayList<double[][]>(); SummaryStatistics statistics = new SummaryStatistics(); OptionCompleter completer = new OptionCompleter("minimum", "maximum", "average", "stdev", "count"); //load data from all input files for (String filename : commandLine.getArgs()) { entries.add(load(new File(filename))); } //validate the inputs if (entries.isEmpty()) { throw new IllegalArgumentException("requires at least one file"); } int numberOfRows = -1; int numberOfColumns = -1; for (int i = 0; i < entries.size(); i++) { if (numberOfRows == -1) { numberOfRows = entries.get(i).length; if (numberOfRows == 0) { throw new IllegalArgumentException("empty file: " + commandLine.getArgs()[i]); } } else if (numberOfRows != entries.get(i).length) { throw new IllegalArgumentException("unbalanced rows: " + commandLine.getArgs()[i]); } if (numberOfColumns == -1) { numberOfColumns = entries.get(i)[0].length; } else if (numberOfColumns != entries.get(i)[0].length) { throw new IllegalArgumentException("unbalanced columns: " + commandLine.getArgs()[i]); } } //setup the mode if (commandLine.hasOption("mode")) { mode = completer.lookup(commandLine.getOptionValue("mode")); if (mode == null) { throw new IllegalArgumentException("invalid mode"); } } else { mode = "average"; } try { //instantiate the writer if (commandLine.hasOption("output")) { out = new PrintStream(commandLine.getOptionValue("output")); } else { out = System.out; } //compute the statistics for (int i = 0; i < numberOfRows; i++) { for (int j = 0; j < numberOfColumns; j++) { statistics.clear(); for (int k = 0; k < entries.size(); k++) { double value = entries.get(k)[i][j]; if (Double.isInfinite(value) && commandLine.hasOption("maximum")) { value = Double.parseDouble(commandLine.getOptionValue("maximum")); } if ((Double.isInfinite(value) || Double.isNaN(value)) && commandLine.hasOption("ignore")) { // ignore infinity or NaN values } else { statistics.addValue(value); } } if (j > 0) { out.print(' '); } if (mode.equals("minimum")) { out.print(statistics.getMin()); } else if (mode.equals("maximum")) { out.print(statistics.getMax()); } else if (mode.equals("average")) { out.print(statistics.getMean()); } else if (mode.equals("stdev")) { out.print(statistics.getStandardDeviation()); } else if (mode.equals("count")) { out.print(statistics.getN()); } else { throw new IllegalArgumentException("unknown mode: " + mode); } } out.println(); } } finally { if ((out != null) && (out != System.out)) { out.close(); } } }
From source file:model.experiments.stickyprices.StickyPricesCSVPrinter.java
private static void woodMonopolistSweep(final BigDecimal minimumP, final BigDecimal maximumP, final BigDecimal minimumI, final BigDecimal maximumI, final BigDecimal increment, final int runsPerParameterCombination) throws IOException { CSVWriter writer = new CSVWriter(new FileWriter(Paths.get("runs", "rawdata", "monoSweep.csv").toFile())); writer.writeNext(new String[] { "P", "I", "distance", "variance", "success" }); BigDecimal currentP = minimumP; while (currentP.compareTo(maximumP) <= 0) { BigDecimal currentI = minimumI; while (currentI.compareTo(maximumI) <= 0) { SummaryStatistics averageSquaredDistance = new SummaryStatistics(); SummaryStatistics averageVariance = new SummaryStatistics(); int successes = 0; for (int run = 0; run < runsPerParameterCombination; run++) { //create the run MacroII macroII = new MacroII(run); MonopolistScenario scenario = new MonopolistScenario(macroII); macroII.setScenario(scenario); //set the demand scenario.setDemandIntercept(102); scenario.setDemandSlope(2); scenario.setDailyWageSlope(1); scenario.setDailyWageIntercept(0); scenario.setAskPricingStrategy(SimpleFlowSellerPID.class); scenario.setWorkersToBeRehiredEveryDay(true); scenario.setControlType( MonopolistScenario.MonopolistScenarioIntegratedControlEnum.MARGINAL_PLANT_CONTROL); scenario.setBuyerDelay(0); //start it and have one step macroII.start();/*from ww w .j av a 2s . c o m*/ macroII.schedule.step(macroII); //now set the right parameters final SalesDepartment salesDepartment = scenario.getMonopolist() .getSalesDepartment(UndifferentiatedGoodType.GENERIC); final SimpleFlowSellerPID strategy = new SimpleFlowSellerPID(salesDepartment, currentP.floatValue(), currentI.floatValue(), 0f, 0, salesDepartment.getMarket(), salesDepartment.getRandom().nextInt(100), salesDepartment.getFirm().getModel()); // strategy.setInitialPrice(102); //start them all at the same price, otherwise you advantage the slow by being so slow initially that they end up being right later salesDepartment.setAskPricingStrategy(strategy); //and make it learned! salesDepartment.setPredictorStrategy(new FixedDecreaseSalesPredictor(2)); final HumanResources hr = scenario.getMonopolist().getHRs().iterator().next(); hr.setPredictor(new FixedIncreasePurchasesPredictor(1)); float totalDistance = 0; SummaryStatistics prices = new SummaryStatistics(); //run the model double price = 0; double quantity = 0; for (int i = 0; i < 1000; i++) { macroII.schedule.step(macroII); price = strategy.getTargetPrice(); quantity = salesDepartment.getTodayInflow(); totalDistance += Math .pow(Math.min(price - (102 - 2 * quantity), price - (102 - 2 * quantity - 1)), 2); prices.addValue(price); } //Model over, now compute statistics averageSquaredDistance.addValue(Math.sqrt(totalDistance)); averageVariance.addValue(prices.getVariance()); if (price <= 68 && price >= 67) successes++; // System.out.println(salesDepartment.getLatestObservation(SalesDataType.LAST_ASKED_PRICE)); macroII.finish(); } String[] csvLine = new String[5]; csvLine[0] = currentP.toString(); csvLine[1] = currentI.toString(); csvLine[2] = String.valueOf(averageSquaredDistance.getMean()); csvLine[3] = String.valueOf(averageVariance.getMean()); csvLine[4] = String.valueOf(successes); writer.writeNext(csvLine); writer.flush(); System.out.println(Arrays.toString(csvLine)); currentI = currentI.add(increment).setScale(2); System.out.println(); } currentP = currentP.add(increment).setScale(2); } }
From source file:model.experiments.stickyprices.StickyPricesCSVPrinter.java
public static double[] beefMonopolistOneRun(long seed, float divideMonopolistGainsByThis, int monopolistSpeed, final boolean beefLearned, final boolean foodLearned, int maximizationSpeed, File csvFileToWrite) { SummaryStatistics distance = new SummaryStatistics(); SummaryStatistics last1000Distance = new SummaryStatistics(); final MacroII macroII = new MacroII(seed); final OneLinkSupplyChainScenarioWithCheatingBuyingPrice scenario1 = new OneLinkSupplyChainScenarioWithCheatingBuyingPrice( macroII) {/*from w w w. j av a 2 s . com*/ @Override protected void buildBeefSalesPredictor(SalesDepartment dept) { if (beefLearned) { FixedDecreaseSalesPredictor predictor = SalesPredictor.Factory .newSalesPredictor(FixedDecreaseSalesPredictor.class, dept); predictor.setDecrementDelta(2); dept.setPredictorStrategy(predictor); } else { assert dept.getPredictorStrategy() instanceof RecursiveSalePredictor; //assuming here nothing has been changed and we are still dealing with recursive sale predictors dept.setPredictorStrategy(new RecursiveSalePredictor(model, dept, 500)); } } @Override public void buildFoodPurchasesPredictor(PurchasesDepartment department) { if (foodLearned) department.setPredictor(new FixedIncreasePurchasesPredictor(0)); } @Override protected SalesDepartment createSalesDepartment(Firm firm, Market goodmarket) { SalesDepartment department = super.createSalesDepartment(firm, goodmarket); if (goodmarket.getGoodType().equals(OneLinkSupplyChainScenario.OUTPUT_GOOD)) { if (foodLearned) department.setPredictorStrategy(new FixedDecreaseSalesPredictor(0)); } return department; } @Override protected HumanResources createPlant(Blueprint blueprint, Firm firm, Market laborMarket) { HumanResources hr = super.createPlant(blueprint, firm, laborMarket); if (blueprint.getOutputs().containsKey(OneLinkSupplyChainScenario.INPUT_GOOD)) { if (beefLearned) { hr.setPredictor(new FixedIncreasePurchasesPredictor(1)); } } if (blueprint.getOutputs().containsKey(OneLinkSupplyChainScenario.OUTPUT_GOOD)) { if (foodLearned) hr.setPredictor(new FixedIncreasePurchasesPredictor(0)); } return hr; } }; scenario1.setControlType(MarginalMaximizer.class); scenario1.setSalesDepartmentType(SalesDepartmentOneAtATime.class); scenario1.setBeefPriceFilterer(null); //competition! scenario1.setNumberOfBeefProducers(1); scenario1.setBeefTargetInventory(100); scenario1.setNumberOfFoodProducers(5); scenario1.setDivideProportionalGainByThis(divideMonopolistGainsByThis); scenario1.setDivideIntegrativeGainByThis(divideMonopolistGainsByThis); //no delay scenario1.setBeefPricingSpeed(monopolistSpeed); //add csv writer if needed if (csvFileToWrite != null) DailyStatCollector.addDailyStatCollectorToModel(csvFileToWrite, macroII); macroII.setScenario(scenario1); macroII.start(); macroII.schedule.step(macroII); Preconditions.checkState(scenario1.getMaximizers().size() == 6, scenario1.getMaximizers().size()); // 1 monopolist, 5 competitors for (WorkforceMaximizer control : scenario1.getMaximizers()) ((PeriodicMaximizer) control).setHowManyDaysBeforeEachCheck(maximizationSpeed); while (macroII.schedule.getTime() < 5000) { macroII.schedule.step(macroII); printProgressBar(14001, (int) macroII.schedule.getSteps(), 100); long price = macroII.getMarket(OneLinkSupplyChainScenario.INPUT_GOOD).getLastPrice(); if (price < 0) price = 0; distance.addValue(Math.pow(68 - price, 2)); } SummaryStatistics averageFoodPrice = new SummaryStatistics(); SummaryStatistics averageBeefProduced = new SummaryStatistics(); SummaryStatistics averageBeefPrice = new SummaryStatistics(); for (int j = 0; j < 1000; j++) { //make the model run one more day: macroII.schedule.step(macroII); averageFoodPrice.addValue(macroII.getMarket(OneLinkSupplyChainScenario.OUTPUT_GOOD) .getLatestObservation(MarketDataType.AVERAGE_CLOSING_PRICE)); averageBeefProduced .addValue(macroII.getMarket(OneLinkSupplyChainScenario.INPUT_GOOD).getYesterdayVolume()); averageBeefPrice.addValue(macroII.getMarket(OneLinkSupplyChainScenario.INPUT_GOOD) .getLatestObservation(MarketDataType.AVERAGE_CLOSING_PRICE)); long price = macroII.getMarket(OneLinkSupplyChainScenario.INPUT_GOOD).getLastPrice(); if (price < 0) price = 0; distance.addValue(Math.pow(68 - price, 2)); last1000Distance.addValue(Math.pow(68 - price, 2)); } return new double[] { distance.getMean(), last1000Distance.getMean() }; }
From source file:com.civprod.writerstoolbox.OpenNLP.training.TokenizerTrainer.java
private void cmdTrainActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_cmdTrainActionPerformed final TokenizerTrainer tempThis = this; new Thread(() -> { textTestResults.setText(""); Charset charset = Charset.forName("UTF-8"); //create TokenizerFactory part of the training context String alphaNumericRegex = txtAlphaNumericPattern.getText(); alphaNumericRegex = alphaNumericRegex.trim(); if (alphaNumericRegex.isEmpty()) { alphaNumericRegex = "^[A-Za-z0-9]+$"; }/*w w w.j a v a2 s. co m*/ Pattern alphaNumericPattern = Pattern.compile(alphaNumericRegex); TokenizerFactory myTokenizerFactory = new TokenizerFactory("EN", mAbbreviationDictionary, this.cbUseAlphaNumericOptimization.isSelected(), alphaNumericPattern); Tokenizer stdTokenizer = null; try { stdTokenizer = OpenNLPUtils.createTokenizer(); } catch (IOException ex) { Logger.getLogger(TokenizerTrainer.class.getName()).log(Level.SEVERE, null, ex); } List<FileSplit> FileSplits = FileSplit.generateFileSplitsLOO(mFileCollectionListModel); File trainingFile = new File("en-token.train"); File testFile = new File("en-token.test"); SummaryStatistics curFStats = new SummaryStatistics(); SummaryStatistics curRecallStats = new SummaryStatistics(); SummaryStatistics curPrecisionStats = new SummaryStatistics(); SummaryStatistics stdFStats = new SummaryStatistics(); SummaryStatistics stdRecallStats = new SummaryStatistics(); SummaryStatistics stdPrecisionStats = new SummaryStatistics(); java.io.BufferedOutputStream trainingFileWriter = null; for (FileSplit curFileSplit : FileSplits) { try { //create training file trainingFileWriter = new java.io.BufferedOutputStream( new java.io.FileOutputStream(trainingFile)); for (File curTrainingFile : curFileSplit.getTrainingFiles()) { java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { trainingFileWriter.write(curTrainingFileReader.read()); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } trainingFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingFileWriter != null) { try { trainingFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create test file java.io.BufferedOutputStream testFileWriter = null; try { //create training file testFileWriter = new java.io.BufferedOutputStream(new java.io.FileOutputStream(testFile)); for (File curTrainingFile : curFileSplit.getTestFiles()) { String testingFileName = curTrainingFile.getCanonicalPath(); textTestResults .setText(textTestResults.getText() + "testing with " + testingFileName + "\n"); java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { int read = curTrainingFileReader.read(); testFileWriter.write(read); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } testFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (testFileWriter != null) { try { testFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create and train model ObjectStream<String> trainingLineStream = null; TokenizerModel train = null; try { trainingLineStream = new PlainTextByLineStream(new FileInputStream(trainingFile), charset); ObjectStream<TokenSample> sampleStream = null; try { sampleStream = new TokenSampleStream(trainingLineStream); train = TokenizerME.train(sampleStream, myTokenizerFactory, TrainingParameters.defaultParams()); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingLineStream != null) { try { trainingLineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } if (train != null) { ObjectStream<String> testingLineStream = null; try { testingLineStream = new PlainTextByLineStream(new FileInputStream(testFile), charset); ObjectStream<TokenSample> sampleStream = null; try { sampleStream = new TokenSampleStream(testingLineStream); TokenizerME testDetector = new TokenizerME(train); TokenizerEvaluator evaluator = new TokenizerEvaluator(testDetector); evaluator.evaluate(sampleStream); FMeasure testFMeasure = evaluator.getFMeasure(); curFStats.addValue(testFMeasure.getFMeasure()); curRecallStats.addValue(testFMeasure.getRecallScore()); curPrecisionStats.addValue(testFMeasure.getPrecisionScore()); textTestResults.setText(textTestResults.getText() + testFMeasure.getFMeasure() + " " + testFMeasure.getPrecisionScore() + " " + testFMeasure.getRecallScore() + "\n"); if (stdTokenizer != null) { testingLineStream = new PlainTextByLineStream(new FileInputStream(testFile), charset); sampleStream = new TokenSampleStream(testingLineStream); TokenizerEvaluator stdEvaluator = new TokenizerEvaluator(stdTokenizer); stdEvaluator.evaluate(sampleStream); FMeasure stdFMeasure = stdEvaluator.getFMeasure(); stdFStats.addValue(stdFMeasure.getFMeasure()); stdRecallStats.addValue(stdFMeasure.getRecallScore()); stdPrecisionStats.addValue(stdFMeasure.getPrecisionScore()); textTestResults.setText(textTestResults.getText() + " " + stdFMeasure.getFMeasure() + " " + stdFMeasure.getPrecisionScore() + " " + stdFMeasure.getRecallScore() + "\n"); } textTestResults.setText(textTestResults.getText() + "\n"); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (testingLineStream != null) { try { testingLineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } } textTestResults.setText(textTestResults.getText() + "\n"); textTestResults.setText(textTestResults.getText() + "test model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + curFStats.getMean() + " stdDev " + curFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + curRecallStats.getMean() + " stdDev " + curRecallStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "precision score mean " + curPrecisionStats.getMean() + " stdDev " + curPrecisionStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "std model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + stdFStats.getMean() + " stdDev " + stdFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + stdRecallStats.getMean() + " stdDev " + stdRecallStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "precision score mean " + stdPrecisionStats.getMean() + " stdDev " + stdPrecisionStats.getStandardDeviation() + "\n"); //create combinded training file trainingFileWriter = null; try { trainingFileWriter = new java.io.BufferedOutputStream(new java.io.FileOutputStream(trainingFile)); for (File curTrainingFile : mFileCollectionListModel) { java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { trainingFileWriter.write(curTrainingFileReader.read()); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } trainingFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingFileWriter != null) { try { trainingFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create and train model ObjectStream<String> lineStream = null; this.createdObject = null; try { lineStream = new PlainTextByLineStream(new FileInputStream(trainingFile), charset); ObjectStream<TokenSample> sampleStream = null; try { sampleStream = new TokenSampleStream(lineStream); this.createdObject = TokenizerME.train(sampleStream, myTokenizerFactory, TrainingParameters.defaultParams()); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (lineStream != null) { try { lineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } if (createdObject != null) { OutputStream modelOut = null; File modelFile = new File("en-fiction-token.bin"); try { modelOut = new BufferedOutputStream(new FileOutputStream(modelFile)); createdObject.serialize(modelOut); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (modelOut != null) { try { modelOut.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } textTestResults.setText(textTestResults.getText() + "done"); }).start(); }
From source file:com.civprod.writerstoolbox.OpenNLP.training.ThoughtAndSpeechTrainer.java
private void cmdTrainActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_cmdTrainActionPerformed final ThoughtAndSpeechTrainer tempThis = this; new Thread(() -> { textTestResults.setText(""); Charset charset = Charset.forName("UTF-8"); //create TokenizerFactory part of the training context ThoughtAndSpeechParserFactory myTokenizerFactory = new ThoughtAndSpeechParserFactory("EN", this.saidWordsDictionary, this.thoughtWordsDictionary); /*ThoughtAndSpeechParser stdTokenizer = null; try {//from w w w . j a v a 2s. com stdTokenizer = OpenNLPUtils.createTokenizer(); } catch (IOException ex) { Logger.getLogger(TokenizerTrainer.class.getName()).log(Level.SEVERE, null, ex); }*/ List<FileSplit> FileSplits = FileSplit.generateFileSplitsLOO(mFileCollectionListModel); File trainingFile = new File("en-ThoughtAndSpeech.train"); File testFile = new File("en-ThoughtAndSpeech.test"); SummaryStatistics curFStats = new SummaryStatistics(); SummaryStatistics curRecallStats = new SummaryStatistics(); SummaryStatistics curPrecisionStats = new SummaryStatistics(); SummaryStatistics stdFStats = new SummaryStatistics(); SummaryStatistics stdRecallStats = new SummaryStatistics(); SummaryStatistics stdPrecisionStats = new SummaryStatistics(); java.io.BufferedOutputStream trainingFileWriter = null; for (FileSplit curFileSplit : FileSplits) { try { //create training file trainingFileWriter = new java.io.BufferedOutputStream( new java.io.FileOutputStream(trainingFile)); for (File curTrainingFile : curFileSplit.getTrainingFiles()) { java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { trainingFileWriter.write(curTrainingFileReader.read()); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } trainingFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingFileWriter != null) { try { trainingFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create test file java.io.BufferedOutputStream testFileWriter = null; try { //create training file testFileWriter = new java.io.BufferedOutputStream(new java.io.FileOutputStream(testFile)); for (File curTrainingFile : curFileSplit.getTestFiles()) { String testingFileName = curTrainingFile.getCanonicalPath(); textTestResults .setText(textTestResults.getText() + "testing with " + testingFileName + "\n"); java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { int read = curTrainingFileReader.read(); testFileWriter.write(read); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } testFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (testFileWriter != null) { try { testFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create and train model ObjectStream<String> trainingLineStream = null; ThoughtAndSpeechModel train = null; try { trainingLineStream = new PlainTextByLineStream(new MarkableFileInputStreamFactory(trainingFile), charset); ObjectStream<ThoughtAndSpeechSample> sampleStream = null; try { sampleStream = new ThoughtAndSpeechSampleStream(trainingLineStream); train = ThoughtAndSpeechParserME.train("en", sampleStream, myTokenizerFactory, TrainingParameters.defaultParams()); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingLineStream != null) { try { trainingLineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } if (train != null) { ObjectStream<String> testingLineStream = null; try { testingLineStream = new PlainTextByLineStream(new MarkableFileInputStreamFactory(testFile), charset); ObjectStream<ThoughtAndSpeechSample> sampleStream = null; try { sampleStream = new ThoughtAndSpeechSampleStream(testingLineStream); ThoughtAndSpeechParserME testDetector = new ThoughtAndSpeechParserME(train); ThoughtAndSpeechEvaluator evaluator = new ThoughtAndSpeechEvaluator(testDetector); evaluator.evaluate(sampleStream); FMeasure testFMeasure = evaluator.getFMeasure(); curFStats.addValue(testFMeasure.getFMeasure()); curRecallStats.addValue(testFMeasure.getRecallScore()); curPrecisionStats.addValue(testFMeasure.getPrecisionScore()); textTestResults.setText(textTestResults.getText() + testFMeasure.getFMeasure() + " " + testFMeasure.getPrecisionScore() + " " + testFMeasure.getRecallScore() + "\n"); /*if (stdTokenizer != null) { testingLineStream = new PlainTextByLineStream(new FileInputStream(testFile), charset); sampleStream = new TokenSampleStream(testingLineStream); TokenizerEvaluator stdEvaluator = new TokenizerEvaluator(stdTokenizer); stdEvaluator.evaluate(sampleStream); FMeasure stdFMeasure = stdEvaluator.getFMeasure(); stdFStats.addValue(stdFMeasure.getFMeasure()); stdRecallStats.addValue(stdFMeasure.getRecallScore()); stdPrecisionStats.addValue(stdFMeasure.getPrecisionScore()); textTestResults.setText(textTestResults.getText() + " " + stdFMeasure.getFMeasure() + " " + stdFMeasure.getPrecisionScore() + " " + stdFMeasure.getRecallScore() + "\n"); }*/ textTestResults.setText(textTestResults.getText() + "\n"); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (testingLineStream != null) { try { testingLineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } } textTestResults.setText(textTestResults.getText() + "\n"); textTestResults.setText(textTestResults.getText() + "test model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + curFStats.getMean() + " stdDev " + curFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + curRecallStats.getMean() + " stdDev " + curRecallStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "precision score mean " + curPrecisionStats.getMean() + " stdDev " + curPrecisionStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "std model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + stdFStats.getMean() + " stdDev " + stdFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + stdRecallStats.getMean() + " stdDev " + stdRecallStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "precision score mean " + stdPrecisionStats.getMean() + " stdDev " + stdPrecisionStats.getStandardDeviation() + "\n"); //create combinded training file trainingFileWriter = null; try { trainingFileWriter = new java.io.BufferedOutputStream(new java.io.FileOutputStream(trainingFile)); for (File curTrainingFile : mFileCollectionListModel) { java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { trainingFileWriter.write(curTrainingFileReader.read()); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } trainingFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingFileWriter != null) { try { trainingFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create and train model ObjectStream<String> lineStream = null; this.createdObject = null; try { lineStream = new PlainTextByLineStream(new MarkableFileInputStreamFactory(trainingFile), charset); ObjectStream<ThoughtAndSpeechSample> sampleStream = null; try { sampleStream = new ThoughtAndSpeechSampleStream(lineStream); this.createdObject = ThoughtAndSpeechParserME.train("en", sampleStream, myTokenizerFactory, TrainingParameters.defaultParams()); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (lineStream != null) { try { lineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } if (createdObject != null) { OutputStream modelOut = null; File modelFile = new File("en-ThoughtAndSpeech-token.bin"); try { modelOut = new BufferedOutputStream(new FileOutputStream(modelFile)); createdObject.serialize(modelOut); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (modelOut != null) { try { modelOut.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } textTestResults.setText(textTestResults.getText() + "done"); }).start(); }
From source file:com.civprod.writerstoolbox.OpenNLP.training.SentenceDetectorTrainer.java
private void cmdTrainSentenceDetectorActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_cmdTrainSentenceDetectorActionPerformed final SentenceDetectorTrainer tempThis = this; new Thread(() -> { textTestResults.setText(""); Charset charset = Charset.forName("UTF-8"); //read other models SentenceDetector stdDetector = null; try {// w ww . ja v a 2 s . c o m stdDetector = OpenNLPUtils.createSentenceDetector(); } catch (IOException ex) { } List<FileSplit> FileSplits = FileSplit.generateFileSplitsLOO(mFileCollectionListModel); File trainingFile = new File("en-sent.train"); File testFile = new File("en-sent.test"); SummaryStatistics curFStats = new SummaryStatistics(); SummaryStatistics curRecallStats = new SummaryStatistics(); SummaryStatistics curPrecisionStats = new SummaryStatistics(); SummaryStatistics stdFStats = new SummaryStatistics(); SummaryStatistics stdRecallStats = new SummaryStatistics(); SummaryStatistics stdPrecisionStats = new SummaryStatistics(); java.io.BufferedOutputStream trainingFileWriter = null; for (FileSplit curFileSplit : FileSplits) { try { //create training file trainingFileWriter = new java.io.BufferedOutputStream( new java.io.FileOutputStream(trainingFile)); for (File curTrainingFile : curFileSplit.getTrainingFiles()) { java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { trainingFileWriter.write(curTrainingFileReader.read()); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } trainingFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingFileWriter != null) { try { trainingFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create test file java.io.BufferedOutputStream testFileWriter = null; try { //create training file testFileWriter = new java.io.BufferedOutputStream(new java.io.FileOutputStream(testFile)); for (File curTrainingFile : curFileSplit.getTestFiles()) { String testingFileName = curTrainingFile.getCanonicalPath(); textTestResults .setText(textTestResults.getText() + "testing with " + testingFileName + "\n"); java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { int read = curTrainingFileReader.read(); testFileWriter.write(read); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } testFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (testFileWriter != null) { try { testFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create SentenceDetectorFactory part of the training context SentenceDetectorFactory mySentenceDetectorFactory = new SentenceDetectorFactory("EN", cbUseTokenEnd.isSelected(), mAbbreviationDictionary, txtEosChars.getText().toCharArray()); ObjectStream<String> trainingLineStream = null; SentenceModel train = null; try { trainingLineStream = new PlainTextByLineStream(new FileInputStream(trainingFile), charset); ObjectStream<SentenceSample> sampleStream = null; try { sampleStream = new SentenceSampleStream(trainingLineStream); train = SentenceDetectorME.train("EN", sampleStream, mySentenceDetectorFactory, TrainingParameters.defaultParams()); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingLineStream != null) { try { trainingLineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } trainingLineStream = null; if (train != null) { ObjectStream<String> testingLineStream = null; try { testingLineStream = new PlainTextByLineStream(new FileInputStream(testFile), charset); ObjectStream<SentenceSample> sampleStream = null; try { sampleStream = new SentenceSampleStream(testingLineStream); SentenceDetectorME testDetector = new SentenceDetectorME(train); SentenceDetectorEvaluator evaluator = new SentenceDetectorEvaluator(testDetector); evaluator.evaluate(sampleStream); FMeasure testFMeasure = evaluator.getFMeasure(); curFStats.addValue(testFMeasure.getFMeasure()); curRecallStats.addValue(testFMeasure.getRecallScore()); curPrecisionStats.addValue(testFMeasure.getPrecisionScore()); textTestResults.setText(textTestResults.getText() + testFMeasure.getFMeasure() + " " + testFMeasure.getPrecisionScore() + " " + testFMeasure.getRecallScore() + "\n"); if (stdDetector != null) { testingLineStream = new PlainTextByLineStream(new FileInputStream(testFile), charset); sampleStream = new SentenceSampleStream(testingLineStream); SentenceDetectorEvaluator stdEvaluator = new SentenceDetectorEvaluator(stdDetector); stdEvaluator.evaluate(sampleStream); FMeasure stdFMeasure = stdEvaluator.getFMeasure(); stdFStats.addValue(stdFMeasure.getFMeasure()); stdRecallStats.addValue(stdFMeasure.getRecallScore()); stdPrecisionStats.addValue(stdFMeasure.getPrecisionScore()); textTestResults.setText(textTestResults.getText() + " " + stdFMeasure.getFMeasure() + " " + stdFMeasure.getPrecisionScore() + " " + stdFMeasure.getRecallScore() + "\n"); } textTestResults.setText(textTestResults.getText() + "\n"); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (testingLineStream != null) { try { testingLineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } } textTestResults.setText(textTestResults.getText() + "\n"); textTestResults.setText(textTestResults.getText() + "test model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + curFStats.getMean() + " stdDev " + curFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + curRecallStats.getMean() + " stdDev " + curRecallStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "precision score mean " + curPrecisionStats.getMean() + " stdDev " + curPrecisionStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "std model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + stdFStats.getMean() + " stdDev " + stdFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + stdRecallStats.getMean() + " stdDev " + stdRecallStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "precision score mean " + stdPrecisionStats.getMean() + " stdDev " + stdPrecisionStats.getStandardDeviation() + "\n"); //create combinded training file trainingFileWriter = null; try { trainingFileWriter = new java.io.BufferedOutputStream(new java.io.FileOutputStream(trainingFile)); for (File curTrainingFile : mFileCollectionListModel) { java.io.BufferedInputStream curTrainingFileReader = null; try { curTrainingFileReader = new java.io.BufferedInputStream( new java.io.FileInputStream(curTrainingFile)); while (curTrainingFileReader.available() > 0) { trainingFileWriter.write(curTrainingFileReader.read()); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } trainingFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingFileWriter != null) { try { trainingFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create SentenceDetectorFactory part of the training context SentenceDetectorFactory mySentenceDetectorFactory = new SentenceDetectorFactory("EN", cbUseTokenEnd.isSelected(), mAbbreviationDictionary, txtEosChars.getText().toCharArray()); //create and train model ObjectStream<String> lineStream = null; this.createdObject = null; try { lineStream = new PlainTextByLineStream(new FileInputStream(trainingFile), charset); ObjectStream<SentenceSample> sampleStream = null; try { sampleStream = new SentenceSampleStream(lineStream); this.createdObject = SentenceDetectorME.train("EN", sampleStream, mySentenceDetectorFactory, TrainingParameters.defaultParams()); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (lineStream != null) { try { lineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } if (createdObject != null) { OutputStream modelOut = null; File modelFile = new File("en-fiction-sent.bin"); try { modelOut = new BufferedOutputStream(new FileOutputStream(modelFile)); createdObject.serialize(modelOut); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (modelOut != null) { try { modelOut.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } textTestResults.setText(textTestResults.getText() + "done"); }).start(); }
From source file:com.civprod.writerstoolbox.OpenNLP.training.WordSplitingTokenizerTrainer.java
private void cmdTrainActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_cmdTrainActionPerformed final WordSplitingTokenizerTrainer tempThis = this; final Charset utf8 = Charset.forName("UTF-8"); new Thread(() -> { textTestResults.setText(""); //create TokenizerFactory part of the training context WordSplittingTokenizerFactory myTokenizerFactory = new WordSplittingTokenizerFactory("EN", mAbbreviationDictionary, false, null, mSpellingDictionary, (TimeComplexity) comboTimeComplexity.getSelectedItem()); Tokenizer stdTokenizer = null;/*from ww w . ja v a 2 s . c om*/ try { stdTokenizer = OpenNLPUtils.createTokenizer(); } catch (IOException ex) { Logger.getLogger(WordSplitingTokenizerTrainer.class.getName()).log(Level.SEVERE, null, ex); } Tokenizer myNonSplitingTokenizer = null; try { myNonSplitingTokenizer = OpenNLPUtils.createTokenizer(OpenNLPUtils.readTokenizerModel( OpenNLPUtils.buildModelFileStream(".\\data\\OpenNLP\\en-fiction-token.bin"))); } catch (IOException ex) { Logger.getLogger(WordSplitingTokenizerTrainer.class.getName()).log(Level.SEVERE, null, ex); } List<FileSplit> FileSplits = FileSplit.generateFileSplitsLOO(mFileCollectionListModel); File trainingFile = new File("en-token.train"); File testFile = new File("en-token.test"); SummaryStatistics curFStats = new SummaryStatistics(); SummaryStatistics curRecallStats = new SummaryStatistics(); SummaryStatistics curPrecisionStats = new SummaryStatistics(); SummaryStatistics stdFStats = new SummaryStatistics(); SummaryStatistics stdRecallStats = new SummaryStatistics(); SummaryStatistics stdPrecisionStats = new SummaryStatistics(); SummaryStatistics myNonSplitFStats = new SummaryStatistics(); SummaryStatistics myNonSplitRecallStats = new SummaryStatistics(); SummaryStatistics myNonSplitPrecisionStats = new SummaryStatistics(); java.io.BufferedWriter trainingFileWriter = null; for (FileSplit curFileSplit : FileSplits) { try { //create training file trainingFileWriter = new java.io.BufferedWriter( new java.io.OutputStreamWriter(new java.io.FileOutputStream(trainingFile), utf8)); for (File curTrainingFile : curFileSplit.getTrainingFiles()) { java.io.BufferedReader curTrainingFileReader = null; try { Charset fileCharset = FileUtils.determineCharset(curTrainingFile); if (fileCharset == null) { fileCharset = utf8; } curTrainingFileReader = new java.io.BufferedReader(new java.io.InputStreamReader( new java.io.FileInputStream(curTrainingFile), fileCharset)); while (curTrainingFileReader.ready()) { String curLine = curTrainingFileReader.readLine(); trainingFileWriter.append(curLine).append("\n"); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } trainingFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingFileWriter != null) { try { trainingFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create test file java.io.BufferedWriter testFileWriter = null; try { //create training file testFileWriter = new java.io.BufferedWriter( new java.io.OutputStreamWriter(new java.io.FileOutputStream(testFile), utf8)); for (File curTrainingFile : curFileSplit.getTestFiles()) { String testingFileName = curTrainingFile.getCanonicalPath(); textTestResults .setText(textTestResults.getText() + "testing with " + testingFileName + "\n"); java.io.BufferedReader curTrainingFileReader = null; try { Charset fileCharset = FileUtils.determineCharset(curTrainingFile); if (fileCharset == null) { fileCharset = utf8; } curTrainingFileReader = new java.io.BufferedReader(new java.io.InputStreamReader( new java.io.FileInputStream(curTrainingFile), fileCharset)); while (curTrainingFileReader.ready()) { String curLine = curTrainingFileReader.readLine(); testFileWriter.append(curLine).append("\n"); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } testFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (testFileWriter != null) { try { testFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create and train model ObjectStream<String> trainingLineStream = null; TokenizerModel train = null; try { trainingLineStream = new PlainTextByLineStream(new FileInputStream(trainingFile), utf8); ObjectStream<TokenSample> sampleStream = null; try { sampleStream = new TokenSampleStream(trainingLineStream); train = TokenizerME.train(sampleStream, myTokenizerFactory, TrainingParameters.defaultParams()); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingLineStream != null) { try { trainingLineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } if (train != null) { ObjectStream<String> testingLineStream = null; try { testingLineStream = new PlainTextByLineStream(new FileInputStream(testFile), utf8); ObjectStream<TokenSample> sampleStream = null; try { sampleStream = new TokenSampleStream(testingLineStream); TokenizerME testDetector = new TokenizerME(train); TokenizerEvaluator evaluator = new TokenizerEvaluator(testDetector); evaluator.evaluate(sampleStream); FMeasure testFMeasure = evaluator.getFMeasure(); curFStats.addValue(testFMeasure.getFMeasure()); curRecallStats.addValue(testFMeasure.getRecallScore()); curPrecisionStats.addValue(testFMeasure.getPrecisionScore()); textTestResults.setText(textTestResults.getText() + testFMeasure.getFMeasure() + " " + testFMeasure.getPrecisionScore() + " " + testFMeasure.getRecallScore() + "\n"); if (stdTokenizer != null) { testingLineStream = new PlainTextByLineStream(new FileInputStream(testFile), utf8); sampleStream = new TokenSampleStream(testingLineStream); TokenizerEvaluator stdEvaluator = new TokenizerEvaluator(stdTokenizer); stdEvaluator.evaluate(sampleStream); FMeasure stdFMeasure = stdEvaluator.getFMeasure(); stdFStats.addValue(stdFMeasure.getFMeasure()); stdRecallStats.addValue(stdFMeasure.getRecallScore()); stdPrecisionStats.addValue(stdFMeasure.getPrecisionScore()); textTestResults.setText(textTestResults.getText() + " " + stdFMeasure.getFMeasure() + " " + stdFMeasure.getPrecisionScore() + " " + stdFMeasure.getRecallScore() + "\n"); } if (myNonSplitingTokenizer != null) { testingLineStream = new PlainTextByLineStream(new FileInputStream(testFile), utf8); sampleStream = new TokenSampleStream(testingLineStream); TokenizerEvaluator myNonSplitingEvaluator = new TokenizerEvaluator( myNonSplitingTokenizer); myNonSplitingEvaluator.evaluate(sampleStream); FMeasure myNonSplitFMeasure = myNonSplitingEvaluator.getFMeasure(); myNonSplitFStats.addValue(myNonSplitFMeasure.getFMeasure()); myNonSplitRecallStats.addValue(myNonSplitFMeasure.getRecallScore()); myNonSplitPrecisionStats.addValue(myNonSplitFMeasure.getPrecisionScore()); textTestResults .setText(textTestResults.getText() + " " + myNonSplitFMeasure.getFMeasure() + " " + myNonSplitFMeasure.getPrecisionScore() + " " + myNonSplitFMeasure.getRecallScore() + "\n"); } textTestResults.setText(textTestResults.getText() + "\n"); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (testingLineStream != null) { try { testingLineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } } textTestResults.setText(textTestResults.getText() + "\n"); textTestResults.setText(textTestResults.getText() + "test model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + curFStats.getMean() + " stdDev " + curFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + curRecallStats.getMean() + " stdDev " + curRecallStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "precision score mean " + curPrecisionStats.getMean() + " stdDev " + curPrecisionStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "std model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + stdFStats.getMean() + " stdDev " + stdFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + stdRecallStats.getMean() + " stdDev " + stdRecallStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "precision score mean " + stdPrecisionStats.getMean() + " stdDev " + stdPrecisionStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "my non spliting model\n"); textTestResults.setText(textTestResults.getText() + "f score mean " + myNonSplitFStats.getMean() + " stdDev " + myNonSplitFStats.getStandardDeviation() + "\n"); textTestResults.setText(textTestResults.getText() + "recall mean " + myNonSplitRecallStats.getMean() + " stdDev " + myNonSplitRecallStats.getStandardDeviation() + "\n"); textTestResults.setText( textTestResults.getText() + "precision score mean " + myNonSplitPrecisionStats.getMean() + " stdDev " + myNonSplitPrecisionStats.getStandardDeviation() + "\n"); //create combinded training file trainingFileWriter = null; try { trainingFileWriter = new java.io.BufferedWriter( new java.io.OutputStreamWriter(new java.io.FileOutputStream(trainingFile), utf8)); for (File curTrainingFile : mFileCollectionListModel) { java.io.BufferedReader curTrainingFileReader = null; try { Charset fileCharset = FileUtils.determineCharset(curTrainingFile); if (fileCharset == null) { fileCharset = utf8; } curTrainingFileReader = new java.io.BufferedReader(new java.io.InputStreamReader( new java.io.FileInputStream(curTrainingFile), fileCharset)); while (curTrainingFileReader.ready()) { String curLine = curTrainingFileReader.readLine(); trainingFileWriter.append(curLine).append("\n"); } } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (curTrainingFileReader != null) { curTrainingFileReader.close(); } } } trainingFileWriter.write('\n'); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (trainingFileWriter != null) { try { trainingFileWriter.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } //create and train model ObjectStream<String> lineStream = null; this.createdObject = null; try { lineStream = new PlainTextByLineStream(new FileInputStream(trainingFile), utf8); ObjectStream<TokenSample> sampleStream = null; try { sampleStream = new TokenSampleStream(lineStream); this.createdObject = TokenizerME.train(sampleStream, myTokenizerFactory, TrainingParameters.defaultParams()); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (sampleStream != null) { try { sampleStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } catch (FileNotFoundException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (lineStream != null) { try { lineStream.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } if (createdObject != null) { OutputStream modelOut = null; File modelFile = new File("en-fiction-token.bin"); try { modelOut = new BufferedOutputStream(new FileOutputStream(modelFile)); createdObject.serialize(modelOut); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } finally { if (modelOut != null) { try { modelOut.close(); } catch (IOException ex) { Logger.getLogger(SentenceDetectorTrainer.class.getName()).log(Level.SEVERE, null, ex); } } } } textTestResults.setText(textTestResults.getText() + "done"); }).start(); }