List of usage examples for org.apache.commons.math3.stat.descriptive SynchronizedDescriptiveStatistics SynchronizedDescriptiveStatistics
public SynchronizedDescriptiveStatistics()
From source file:com.weibo.motan.demo.client.DemoRpcClient.java
public static void main(String[] args) throws Exception { final DescriptiveStatistics stats = new SynchronizedDescriptiveStatistics(); int threads = Integer.parseInt(args[0]); DubboBenchmark.BenchmarkMessage msg = prepareArgs(); final byte[] msgBytes = msg.toByteArray(); int n = 1000000; final CountDownLatch latch = new CountDownLatch(n); ExecutorService es = Executors.newFixedThreadPool(threads); final AtomicInteger trans = new AtomicInteger(0); final AtomicInteger transOK = new AtomicInteger(0); ApplicationContext ctx = new ClassPathXmlApplicationContext( new String[] { "classpath:motan_demo_client.xml" }); MotanDemoService service = (MotanDemoService) ctx.getBean("motanDemoReferer"); long start = System.currentTimeMillis(); for (int i = 0; i < n; i++) { es.submit(() -> {// w w w . java2 s. co m try { long t = System.currentTimeMillis(); DubboBenchmark.BenchmarkMessage m = testSay(service, msgBytes); t = System.currentTimeMillis() - t; stats.addValue(t); trans.incrementAndGet(); if (m != null && m.getField1().equals("OK")) { transOK.incrementAndGet(); } } finally { latch.countDown(); } }); } latch.await(); start = System.currentTimeMillis() - start; System.out.printf("sent requests : %d\n", n); System.out.printf("received requests : %d\n", trans.get()); System.out.printf("received requests_OK : %d\n", transOK.get()); System.out.printf("throughput (TPS) : %d\n", n * 1000 / start); System.out.printf("mean: %f\n", stats.getMean()); System.out.printf("median: %f\n", stats.getPercentile(50)); System.out.printf("max: %f\n", stats.getMax()); System.out.printf("min: %f\n", stats.getMin()); System.out.printf("99P: %f\n", stats.getPercentile(90)); }
From source file:com.alibaba.dubbo.demo.consumer.DemoAction.java
public void start() throws Exception { int threads = 100; final DescriptiveStatistics stats = new SynchronizedDescriptiveStatistics(); DubboBenchmark.BenchmarkMessage msg = prepareArgs(); final byte[] msgBytes = msg.toByteArray(); int n = 1000000; final CountDownLatch latch = new CountDownLatch(n); ExecutorService es = Executors.newFixedThreadPool(threads); final AtomicInteger trans = new AtomicInteger(0); final AtomicInteger transOK = new AtomicInteger(0); long start = System.currentTimeMillis(); for (int i = 0; i < n; i++) { es.submit(() -> {//w ww.j a v a 2s. co m try { long t = System.currentTimeMillis(); DubboBenchmark.BenchmarkMessage m = testSay(msgBytes); t = System.currentTimeMillis() - t; stats.addValue(t); trans.incrementAndGet(); if (m != null && m.getField1().equals("OK")) { transOK.incrementAndGet(); } } catch (InterruptedException e) { e.printStackTrace(); } finally { latch.countDown(); } }); } latch.await(); start = System.currentTimeMillis() - start; System.out.printf("sent requests : %d\n", n); System.out.printf("received requests : %d\n", trans.get()); System.out.printf("received requests_OK : %d\n", transOK.get()); System.out.printf("throughput (TPS) : %d\n", n * 1000 / start); System.out.printf("mean: %f\n", stats.getMean()); System.out.printf("median: %f\n", stats.getPercentile(50)); System.out.printf("max: %f\n", stats.getMax()); System.out.printf("min: %f\n", stats.getMin()); System.out.printf("99P: %f\n", stats.getPercentile(90)); }
From source file:org.bml.util.rt.telemetry.track.AtomicIntegerTrack.java
private void initTrack(TRACK_TYPE trackType) { switch (trackType) { case DESCRIPTIVE: dStats = new SynchronizedDescriptiveStatistics(); break;//from ww w . j a va 2 s . c o m case SUMMARY: sStats = new SummaryStatistics(); break; } frequency = new Frequency(); }
From source file:org.dllearner.algorithms.qtl.experiments.PRConvergenceExperiment.java
public void run(int maxNrOfProcessedQueries, int maxTreeDepth, int[] exampleInterval, double[] noiseInterval, HeuristicType[] measures) throws Exception { this.maxTreeDepth = maxTreeDepth; queryTreeFactory.setMaxDepth(maxTreeDepth); if (exampleInterval != null) { nrOfExamplesIntervals = exampleInterval; }/* w ww .j a v a 2 s .c o m*/ if (noiseInterval != null) { this.noiseIntervals = noiseInterval; } if (measures != null) { this.measures = measures; } boolean noiseEnabled = noiseIntervals.length > 1 || noiseInterval[0] > 0; boolean posOnly = noiseEnabled ? false : true; logger.info("Started QTL evaluation..."); long t1 = System.currentTimeMillis(); List<String> queries = dataset.getSparqlQueries().values().stream().map(q -> q.toString()) .collect(Collectors.toList()); logger.info("#loaded queries: " + queries.size()); // filter for debugging purposes queries = queries.stream().filter(q -> queriesToProcessTokens.stream().noneMatch(t -> !q.contains(t))) .collect(Collectors.toList()); queries = queries.stream().filter(q -> queriesToOmitTokens.stream().noneMatch(t -> q.contains(t))) .collect(Collectors.toList()); if (maxNrOfProcessedQueries == -1) { maxNrOfProcessedQueries = queries.size(); } // queries = filter(queries, (int) Math.ceil((double) maxNrOfProcessedQueries / maxTreeDepth)); // queries = queries.subList(0, Math.min(queries.size(), maxNrOfProcessedQueries)); logger.info("#queries to process: " + queries.size()); // generate examples for each query logger.info("precomputing pos. and neg. examples..."); for (String query : queries) {//if(!(query.contains("Borough_(New_York_City)")))continue; query2Examples.put(query, generateExamples(query, posOnly, noiseEnabled)); } logger.info("precomputing pos. and neg. examples finished."); // check for queries that do not return any result (should not happen, but we never know) Set<String> emptyQueries = query2Examples.entrySet().stream() .filter(e -> e.getValue().correctPosExampleCandidates.isEmpty()).map(e -> e.getKey()) .collect(Collectors.toSet()); logger.info("got {} empty queries.", emptyQueries.size()); queries.removeAll(emptyQueries); // min. pos examples int min = 3; Set<String> lowNrOfExamplesQueries = query2Examples.entrySet().stream() .filter(e -> e.getValue().correctPosExampleCandidates.size() < min).map(e -> e.getKey()) .collect(Collectors.toSet()); logger.info("got {} queries with < {} pos. examples.", emptyQueries.size(), min); queries.removeAll(lowNrOfExamplesQueries); queries = queries.subList(0, Math.min(80, queries.size())); final int totalNrOfQTLRuns = heuristics.length * this.measures.length * nrOfExamplesIntervals.length * noiseIntervals.length * queries.size(); logger.info("#QTL runs: " + totalNrOfQTLRuns); final AtomicInteger currentNrOfFinishedRuns = new AtomicInteger(0); // loop over heuristics for (final QueryTreeHeuristic heuristic : heuristics) { final String heuristicName = heuristic.getClass().getAnnotation(ComponentAnn.class).shortName(); // loop over heuristics measures for (HeuristicType measure : this.measures) { final String measureName = measure.toString(); heuristic.setHeuristicType(measure); double[][] data = new double[nrOfExamplesIntervals.length][noiseIntervals.length]; // loop over number of positive examples for (int i = 0; i < nrOfExamplesIntervals.length; i++) { final int nrOfExamples = nrOfExamplesIntervals[i]; // loop over noise value for (int j = 0; j < noiseIntervals.length; j++) { final double noise = noiseIntervals[j]; // check if not already processed File logFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-" + heuristicName + "-" + measureName + ".log"); File statsFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-" + heuristicName + "-" + measureName + ".stats"); if (!override && logFile.exists() && statsFile.exists()) { logger.info( "Eval config already processed. For re-running please remove corresponding output files."); continue; } FileAppender appender = null; try { appender = new FileAppender(new SimpleLayout(), logFile.getPath(), false); Logger.getRootLogger().addAppender(appender); } catch (IOException e) { e.printStackTrace(); } logger.info("#examples: " + nrOfExamples + " noise: " + noise); final DescriptiveStatistics nrOfReturnedSolutionsStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselinePrecisionStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselineRecallStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselineFMeasureStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselinePredAccStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselineMathCorrStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionPrecisionStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionRecallStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionFMeasureStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionPredAccStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionMathCorrStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionRuntimeStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionPrecisionStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionRecallStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionFMeasureStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionPredAccStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionMathCorrStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionPositionStats = new SynchronizedDescriptiveStatistics(); MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).reset(); MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).reset(); ExecutorService tp = Executors.newFixedThreadPool(nrOfThreads); // indicates if the execution for some of the queries failed final AtomicBoolean failed = new AtomicBoolean(false); Set<String> queriesToProcess = new TreeSet<>(queries); queriesToProcess.retainAll(query2Examples.entrySet().stream() .filter(e -> e.getValue().correctPosExampleCandidates.size() >= nrOfExamples) .map(e -> e.getKey()).collect(Collectors.toSet())); // loop over SPARQL queries for (final String sparqlQuery : queriesToProcess) { CBDStructureTree cbdStructure = cbdStructureTree != null ? cbdStructureTree : QueryUtils.getOptimalCBDStructure(QueryFactory.create(sparqlQuery)); tp.submit(() -> { logger.info("CBD tree:" + cbdStructure.toStringVerbose()); // update max tree depth this.maxTreeDepth = QueryTreeUtils.getDepth(cbdStructure); logger.info("##############################################################"); logger.info("Processing query\n" + sparqlQuery); // we repeat it n times with different permutations of examples int nrOfPermutations = 1; if (nrOfExamples >= query2Examples.get(sparqlQuery).correctPosExampleCandidates .size()) { nrOfPermutations = 1; } for (int perm = 1; perm <= nrOfPermutations; perm++) { logger.info("Run {}/{}", perm, nrOfPermutations); try { ExamplesWrapper examples = getExamples(sparqlQuery, nrOfExamples, nrOfExamples, noise, cbdStructure); logger.info("pos. examples:\n" + Joiner.on("\n").join(examples.correctPosExamples)); logger.info("neg. examples:\n" + Joiner.on("\n").join(examples.correctNegExamples)); // write examples to disk File dir = new File(benchmarkDirectory, "data/" + hash(sparqlQuery)); dir.mkdirs(); Files.write(Joiner.on("\n").join(examples.correctPosExamples), new File(dir, "examples" + perm + "_" + nrOfExamples + "_" + noise + ".tp"), Charsets.UTF_8); Files.write(Joiner.on("\n").join(examples.correctNegExamples), new File(dir, "examples" + perm + "_" + nrOfExamples + "_" + noise + ".tn"), Charsets.UTF_8); Files.write(Joiner.on("\n").join(examples.falsePosExamples), new File(dir, "examples" + perm + "_" + nrOfExamples + "_" + noise + ".fp"), Charsets.UTF_8); // compute baseline RDFResourceTree baselineSolution = applyBaseLine(examples, Baseline.MOST_INFORMATIVE_EDGE_IN_EXAMPLES); logger.info("Evaluating baseline..."); Score baselineScore = computeScore(sparqlQuery, baselineSolution, noise); logger.info("Baseline score:\n" + baselineScore); String baseLineQuery = QueryTreeUtils.toSPARQLQueryString(baselineSolution, dataset.getBaseIRI(), dataset.getPrefixMapping()); baselinePrecisionStats.addValue(baselineScore.precision); baselineRecallStats.addValue(baselineScore.recall); baselineFMeasureStats.addValue(baselineScore.fmeasure); baselinePredAccStats.addValue(baselineScore.predAcc); baselineMathCorrStats.addValue(baselineScore.mathCorr); // run QTL PosNegLPStandard lp = new PosNegLPStandard(); lp.setPositiveExamples(examples.posExamplesMapping.keySet()); lp.setNegativeExamples(examples.negExamplesMapping.keySet()); // QTL2Disjunctive la = new QTL2Disjunctive(lp, qef); QTL2DisjunctiveMultiThreaded la = new QTL2DisjunctiveMultiThreaded(lp, qef); la.setRenderer(new org.dllearner.utilities.owl.DLSyntaxObjectRenderer()); la.setReasoner(dataset.getReasoner()); la.setEntailment(Entailment.SIMPLE); la.setTreeFactory(queryTreeFactory); la.setPositiveExampleTrees(examples.posExamplesMapping); la.setNegativeExampleTrees(examples.negExamplesMapping); la.setNoise(noise); la.setHeuristic(heuristic); la.setMaxExecutionTimeInSeconds(maxExecutionTimeInSeconds); la.setMaxTreeComputationTimeInSeconds(maxExecutionTimeInSeconds); la.init(); la.start(); List<EvaluatedRDFResourceTree> solutions = new ArrayList<>( la.getSolutions()); // List<EvaluatedRDFResourceTree> solutions = generateSolutions(examples, noise, heuristic); nrOfReturnedSolutionsStats.addValue(solutions.size()); // the best returned solution by QTL EvaluatedRDFResourceTree bestSolution = solutions.get(0); logger.info("Got " + solutions.size() + " query trees."); // logger.info("Best computed solution:\n" + render(bestSolution.asEvaluatedDescription())); logger.info("QTL Score:\n" + bestSolution.getTreeScore()); long runtimeBestSolution = la.getTimeBestSolutionFound(); bestReturnedSolutionRuntimeStats.addValue(runtimeBestSolution); // convert to SPARQL query RDFResourceTree tree = bestSolution.getTree(); tree = filter.apply(tree); String learnedSPARQLQuery = QueryTreeUtils.toSPARQLQueryString(tree, dataset.getBaseIRI(), dataset.getPrefixMapping()); // compute score Score score = computeScore(sparqlQuery, tree, noise); bestReturnedSolutionPrecisionStats.addValue(score.precision); bestReturnedSolutionRecallStats.addValue(score.recall); bestReturnedSolutionFMeasureStats.addValue(score.fmeasure); bestReturnedSolutionPredAccStats.addValue(score.predAcc); bestReturnedSolutionMathCorrStats.addValue(score.mathCorr); logger.info(score.toString()); // find the extensionally best matching tree in the list Pair<EvaluatedRDFResourceTree, Score> bestMatchingTreeWithScore = findBestMatchingTreeFast( solutions, sparqlQuery, noise, examples); EvaluatedRDFResourceTree bestMatchingTree = bestMatchingTreeWithScore .getFirst(); Score bestMatchingScore = bestMatchingTreeWithScore.getSecond(); // position of best tree in list of solutions int positionBestScore = solutions.indexOf(bestMatchingTree); bestSolutionPositionStats.addValue(positionBestScore); Score bestScore = score; if (positionBestScore > 0) { logger.info( "Position of best covering tree in list: " + positionBestScore); logger.info("Best covering solution:\n" + render(bestMatchingTree.asEvaluatedDescription())); logger.info("Tree score: " + bestMatchingTree.getTreeScore()); bestScore = bestMatchingScore; logger.info(bestMatchingScore.toString()); } else { logger.info( "Best returned solution was also the best covering solution."); } bestSolutionRecallStats.addValue(bestScore.recall); bestSolutionPrecisionStats.addValue(bestScore.precision); bestSolutionFMeasureStats.addValue(bestScore.fmeasure); bestSolutionPredAccStats.addValue(bestScore.predAcc); bestSolutionMathCorrStats.addValue(bestScore.mathCorr); for (RDFResourceTree negTree : examples.negExamplesMapping.values()) { if (QueryTreeUtils.isSubsumedBy(negTree, bestMatchingTree.getTree())) { Files.append(sparqlQuery + "\n", new File("/tmp/negCovered.txt"), Charsets.UTF_8); break; } } String bestQuery = QueryFactory .create(QueryTreeUtils.toSPARQLQueryString( filter.apply(bestMatchingTree.getTree()), dataset.getBaseIRI(), dataset.getPrefixMapping())) .toString(); if (write2DB) { write2DB(sparqlQuery, nrOfExamples, examples, noise, baseLineQuery, baselineScore, heuristicName, measureName, QueryFactory.create(learnedSPARQLQuery).toString(), score, runtimeBestSolution, bestQuery, positionBestScore, bestScore); } } catch (Exception e) { failed.set(true); logger.error("Error occured for query\n" + sparqlQuery, e); try { StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); e.printStackTrace(pw); Files.append(sparqlQuery + "\n" + sw.toString(), new File(benchmarkDirectory, "failed-" + nrOfExamples + "-" + noise + "-" + heuristicName + "-" + measureName + ".txt"), Charsets.UTF_8); } catch (IOException e1) { e1.printStackTrace(); } } finally { int cnt = currentNrOfFinishedRuns.incrementAndGet(); logger.info("***********Evaluation Progress:" + NumberFormat.getPercentInstance() .format((double) cnt / totalNrOfQTLRuns) + "(" + cnt + "/" + totalNrOfQTLRuns + ")" + "***********"); } } }); } tp.shutdown(); tp.awaitTermination(12, TimeUnit.HOURS); Logger.getRootLogger().removeAppender(appender); if (!failed.get()) { String result = ""; result += "\nBaseline Precision:\n" + baselinePrecisionStats; result += "\nBaseline Recall:\n" + baselineRecallStats; result += "\nBaseline F-measure:\n" + baselineFMeasureStats; result += "\nBaseline PredAcc:\n" + baselinePredAccStats; result += "\nBaseline MathCorr:\n" + baselineMathCorrStats; result += "#Returned solutions:\n" + nrOfReturnedSolutionsStats; result += "\nOverall Precision:\n" + bestReturnedSolutionPrecisionStats; result += "\nOverall Recall:\n" + bestReturnedSolutionRecallStats; result += "\nOverall F-measure:\n" + bestReturnedSolutionFMeasureStats; result += "\nOverall PredAcc:\n" + bestReturnedSolutionPredAccStats; result += "\nOverall MathCorr:\n" + bestReturnedSolutionMathCorrStats; result += "\nTime until best returned solution found:\n" + bestReturnedSolutionRuntimeStats; result += "\nPositions of best solution:\n" + Arrays.toString(bestSolutionPositionStats.getValues()); result += "\nPosition of best solution stats:\n" + bestSolutionPositionStats; result += "\nOverall Precision of best solution:\n" + bestSolutionPrecisionStats; result += "\nOverall Recall of best solution:\n" + bestSolutionRecallStats; result += "\nOverall F-measure of best solution:\n" + bestSolutionFMeasureStats; result += "\nCBD generation time(total):\t" + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getTotal() + "\n"; result += "CBD generation time(avg):\t" + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getAvg() + "\n"; result += "Tree generation time(total):\t" + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getTotal() + "\n"; result += "Tree generation time(avg):\t" + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getAvg() + "\n"; result += "Tree size(avg):\t" + treeSizeStats.getMean() + "\n"; logger.info(result); try { Files.write(result, statsFile, Charsets.UTF_8); } catch (IOException e) { e.printStackTrace(); } data[i][j] = bestReturnedSolutionFMeasureStats.getMean(); if (write2DB) { write2DB(heuristicName, measureName, nrOfExamples, noise, bestReturnedSolutionFMeasureStats.getMean(), bestReturnedSolutionPrecisionStats.getMean(), bestReturnedSolutionRecallStats.getMean(), bestReturnedSolutionPredAccStats.getMean(), bestReturnedSolutionMathCorrStats.getMean(), bestSolutionPositionStats.getMean(), bestSolutionFMeasureStats.getMean(), bestSolutionPrecisionStats.getMean(), bestSolutionRecallStats.getMean(), bestSolutionPredAccStats.getMean(), bestSolutionMathCorrStats.getMean(), baselineFMeasureStats.getMean(), baselinePrecisionStats.getMean(), baselineRecallStats.getMean(), baselinePredAccStats.getMean(), baselineMathCorrStats.getMean(), bestReturnedSolutionRuntimeStats.getMean()); } } } } String content = "###"; String separator = "\t"; for (double noiseInterval1 : noiseIntervals) { content += separator + noiseInterval1; } content += "\n"; for (int i = 0; i < nrOfExamplesIntervals.length; i++) { content += nrOfExamplesIntervals[i]; for (int j = 0; j < noiseIntervals.length; j++) { content += separator + data[i][j]; } content += "\n"; } File examplesVsNoise = new File(benchmarkDirectory, "examplesVsNoise-" + heuristicName + "-" + measureName + ".tsv"); try { Files.write(content, examplesVsNoise, Charsets.UTF_8); } catch (IOException e) { logger.error("failed to write stats to file", e); } } } if (write2DB) { conn.close(); } if (useEmailNotification) { sendFinishedMail(); } long t2 = System.currentTimeMillis(); long duration = t2 - t1; logger.info("QTL evaluation finished in " + DurationFormatUtils.formatDurationHMS(duration) + "ms."); }
From source file:org.dllearner.algorithms.qtl.experiments.QTLEvaluation.java
public void run(int maxNrOfProcessedQueries, int maxTreeDepth, int[] exampleInterval, double[] noiseInterval, HeuristicType[] measures) throws Exception { this.maxTreeDepth = maxTreeDepth; queryTreeFactory.setMaxDepth(maxTreeDepth); if (exampleInterval != null) { nrOfExamplesIntervals = exampleInterval; }/*www . ja v a 2 s . co m*/ if (noiseInterval != null) { this.noiseIntervals = noiseInterval; } if (measures != null) { this.measures = measures; } logger.info("Started QTL evaluation..."); long t1 = System.currentTimeMillis(); List<String> queries = dataset.getSparqlQueries().values().stream().map(q -> q.toString()) .collect(Collectors.toList()); logger.info("#loaded queries: " + queries.size()); // filter for debugging purposes queries = queries.stream().filter(q -> tokens.stream().noneMatch(t -> !q.contains(t))) .collect(Collectors.toList()); if (maxNrOfProcessedQueries == -1) { maxNrOfProcessedQueries = queries.size(); } // queries = filter(queries, (int) Math.ceil((double) maxNrOfProcessedQueries / maxTreeDepth)); // queries = queries.subList(0, Math.min(queries.size(), maxNrOfProcessedQueries)); logger.info("#queries to process: " + queries.size()); // generate examples for each query logger.info("precomputing pos. and neg. examples..."); final Map<String, ExampleCandidates> query2Examples = new HashMap<>(); for (String query : queries) {//if(!(query.contains("Borough_(New_York_City)")))continue; query2Examples.put(query, generateExamples(query)); } logger.info("precomputing pos. and neg. examples finished."); // check for queries that do not return any result (should not happen, but we never know) Set<String> emptyQueries = query2Examples.entrySet().stream() .filter(e -> e.getValue().correctPosExampleCandidates.isEmpty()).map(e -> e.getKey()) .collect(Collectors.toSet()); logger.info("got {} empty queries.", emptyQueries.size()); queries.removeAll(emptyQueries); // min. pos examples Set<String> lowNrOfExamplesQueries = query2Examples.entrySet().stream() .filter(e -> e.getValue().correctPosExampleCandidates.size() < 2).map(e -> e.getKey()) .collect(Collectors.toSet()); logger.info("got {} queries with < 2 pos. examples.", emptyQueries.size()); queries.removeAll(lowNrOfExamplesQueries); final int totalNrOfQTLRuns = heuristics.length * this.measures.length * nrOfExamplesIntervals.length * noiseIntervals.length * queries.size(); logger.info("#QTL runs: " + totalNrOfQTLRuns); final AtomicInteger currentNrOfFinishedRuns = new AtomicInteger(0); // loop over heuristics for (final QueryTreeHeuristic heuristic : heuristics) { final String heuristicName = heuristic.getClass().getAnnotation(ComponentAnn.class).shortName(); // loop over heuristics measures for (HeuristicType measure : this.measures) { final String measureName = measure.toString(); heuristic.setHeuristicType(measure); double[][] data = new double[nrOfExamplesIntervals.length][noiseIntervals.length]; // loop over number of positive examples for (int i = 0; i < nrOfExamplesIntervals.length; i++) { final int nrOfExamples = nrOfExamplesIntervals[i]; // loop over noise value for (int j = 0; j < noiseIntervals.length; j++) { final double noise = noiseIntervals[j]; // check if not already processed File logFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-" + heuristicName + "-" + measureName + ".log"); File statsFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-" + heuristicName + "-" + measureName + ".stats"); if (!override && logFile.exists() && statsFile.exists()) { logger.info( "Eval config already processed. For re-running please remove corresponding output files."); continue; } FileAppender appender = null; try { appender = new FileAppender(new SimpleLayout(), logFile.getPath(), false); Logger.getRootLogger().addAppender(appender); } catch (IOException e) { e.printStackTrace(); } logger.info("#examples: " + nrOfExamples + " noise: " + noise); final DescriptiveStatistics nrOfReturnedSolutionsStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselinePrecisionStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselineRecallStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselineFMeasureStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselinePredAccStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselineMathCorrStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionPrecisionStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionRecallStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionFMeasureStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionPredAccStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionMathCorrStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionRuntimeStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionPrecisionStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionRecallStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionFMeasureStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionPredAccStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionMathCorrStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionPositionStats = new SynchronizedDescriptiveStatistics(); MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).reset(); MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).reset(); ExecutorService tp = Executors.newFixedThreadPool(nrOfThreads); // indicates if the execution for some of the queries failed final AtomicBoolean failed = new AtomicBoolean(false); // loop over SPARQL queries for (final String sparqlQuery : queries) { tp.submit(() -> { logger.info("##############################################################"); logger.info("Processing query\n" + sparqlQuery); try { ExamplesWrapper examples = query2Examples.get(sparqlQuery).get(nrOfExamples, nrOfExamples, noise); logger.info( "pos. examples:\n" + Joiner.on("\n").join(examples.correctPosExamples)); logger.info( "neg. examples:\n" + Joiner.on("\n").join(examples.correctNegExamples)); // write examples to disk File dir = new File(benchmarkDirectory, "data/" + hash(sparqlQuery)); dir.mkdirs(); Files.write(Joiner.on("\n").join(examples.correctPosExamples), new File(dir, "examples_" + nrOfExamples + "_" + noise + ".tp"), Charsets.UTF_8); Files.write(Joiner.on("\n").join(examples.correctNegExamples), new File(dir, "examples_" + nrOfExamples + "_" + noise + ".tn"), Charsets.UTF_8); Files.write(Joiner.on("\n").join(examples.falsePosExamples), new File(dir, "examples_" + nrOfExamples + "_" + noise + ".fp"), Charsets.UTF_8); // compute baseline logger.info("Computing baseline..."); RDFResourceTree baselineSolution = applyBaseLine(examples, Baseline.MOST_INFORMATIVE_EDGE_IN_EXAMPLES); logger.info("Baseline solution:\n" + owlRenderer .render(QueryTreeUtils.toOWLClassExpression(baselineSolution))); logger.info("Evaluating baseline..."); Score baselineScore = computeScore(sparqlQuery, baselineSolution, noise); logger.info("Baseline score:\n" + baselineScore); String baseLineQuery = QueryTreeUtils.toSPARQLQueryString(baselineSolution, dataset.getBaseIRI(), dataset.getPrefixMapping()); baselinePrecisionStats.addValue(baselineScore.precision); baselineRecallStats.addValue(baselineScore.recall); baselineFMeasureStats.addValue(baselineScore.fmeasure); baselinePredAccStats.addValue(baselineScore.predAcc); baselineMathCorrStats.addValue(baselineScore.mathCorr); // run QTL PosNegLPStandard lp = new PosNegLPStandard(); lp.setPositiveExamples(examples.posExamplesMapping.keySet()); lp.setNegativeExamples(examples.negExamplesMapping.keySet()); QTL2Disjunctive la = new QTL2Disjunctive(lp, qef); la.setRenderer(new org.dllearner.utilities.owl.DLSyntaxObjectRenderer()); la.setReasoner(dataset.getReasoner()); la.setEntailment(Entailment.SIMPLE); la.setTreeFactory(queryTreeFactory); la.setPositiveExampleTrees(examples.posExamplesMapping); la.setNegativeExampleTrees(examples.negExamplesMapping); la.setNoise(noise); la.setHeuristic(heuristic); la.setMaxExecutionTimeInSeconds(maxExecutionTimeInSeconds); la.setMaxTreeComputationTimeInSeconds(maxExecutionTimeInSeconds); la.init(); la.start(); List<EvaluatedRDFResourceTree> solutions = new ArrayList<>(la.getSolutions()); // List<EvaluatedRDFResourceTree> solutions = generateSolutions(examples, noise, heuristic); nrOfReturnedSolutionsStats.addValue(solutions.size()); // the best returned solution by QTL EvaluatedRDFResourceTree bestSolution = solutions.get(0); logger.info("Got " + solutions.size() + " query trees."); logger.info("Best computed solution:\n" + render(bestSolution.asEvaluatedDescription())); logger.info("QTL Score:\n" + bestSolution.getTreeScore()); long runtimeBestSolution = la.getTimeBestSolutionFound(); bestReturnedSolutionRuntimeStats.addValue(runtimeBestSolution); // convert to SPARQL query RDFResourceTree tree = bestSolution.getTree(); // filter.filter(tree); String learnedSPARQLQuery = QueryTreeUtils.toSPARQLQueryString(tree, dataset.getBaseIRI(), dataset.getPrefixMapping()); // compute score Score score = computeScore(sparqlQuery, tree, noise); bestReturnedSolutionPrecisionStats.addValue(score.precision); bestReturnedSolutionRecallStats.addValue(score.recall); bestReturnedSolutionFMeasureStats.addValue(score.fmeasure); bestReturnedSolutionPredAccStats.addValue(score.predAcc); bestReturnedSolutionMathCorrStats.addValue(score.mathCorr); logger.info(score.toString()); // find the extensionally best matching tree in the list Pair<EvaluatedRDFResourceTree, Score> bestMatchingTreeWithScore = findBestMatchingTreeFast( solutions, sparqlQuery, noise, examples); EvaluatedRDFResourceTree bestMatchingTree = bestMatchingTreeWithScore .getFirst(); Score bestMatchingScore = bestMatchingTreeWithScore.getSecond(); // position of best tree in list of solutions int positionBestScore = solutions.indexOf(bestMatchingTree); bestSolutionPositionStats.addValue(positionBestScore); Score bestScore = score; if (positionBestScore > 0) { logger.info("Position of best covering tree in list: " + positionBestScore); logger.info("Best covering solution:\n" + render(bestMatchingTree.asEvaluatedDescription())); logger.info("Tree score: " + bestMatchingTree.getTreeScore()); bestScore = bestMatchingScore; logger.info(bestMatchingScore.toString()); } else { logger.info("Best returned solution was also the best covering solution."); } bestSolutionRecallStats.addValue(bestScore.recall); bestSolutionPrecisionStats.addValue(bestScore.precision); bestSolutionFMeasureStats.addValue(bestScore.fmeasure); bestSolutionPredAccStats.addValue(bestScore.predAcc); bestSolutionMathCorrStats.addValue(bestScore.mathCorr); for (RDFResourceTree negTree : examples.negExamplesMapping.values()) { if (QueryTreeUtils.isSubsumedBy(negTree, bestMatchingTree.getTree())) { Files.append(sparqlQuery + "\n", new File("/tmp/negCovered.txt"), Charsets.UTF_8); break; } } String bestQuery = QueryFactory.create(QueryTreeUtils.toSPARQLQueryString( filter.apply(bestMatchingTree.getTree()), dataset.getBaseIRI(), dataset.getPrefixMapping())).toString(); if (write2DB) { write2DB(sparqlQuery, nrOfExamples, examples, noise, baseLineQuery, baselineScore, heuristicName, measureName, QueryFactory.create(learnedSPARQLQuery).toString(), score, runtimeBestSolution, bestQuery, positionBestScore, bestScore); } } catch (Exception e) { failed.set(true); logger.error("Error occured for query\n" + sparqlQuery, e); try { StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); e.printStackTrace(pw); Files.append(sparqlQuery + "\n" + sw.toString(), new File(benchmarkDirectory, "failed-" + nrOfExamples + "-" + noise + "-" + heuristicName + "-" + measureName + ".txt"), Charsets.UTF_8); } catch (IOException e1) { e1.printStackTrace(); } } finally { int cnt = currentNrOfFinishedRuns.incrementAndGet(); logger.info("***********Evaluation Progress:" + NumberFormat.getPercentInstance() .format((double) cnt / totalNrOfQTLRuns) + "(" + cnt + "/" + totalNrOfQTLRuns + ")" + "***********"); } }); } tp.shutdown(); tp.awaitTermination(12, TimeUnit.HOURS); Logger.getRootLogger().removeAppender(appender); if (!failed.get()) { String result = ""; result += "\nBaseline Precision:\n" + baselinePrecisionStats; result += "\nBaseline Recall:\n" + baselineRecallStats; result += "\nBaseline F-measure:\n" + baselineFMeasureStats; result += "\nBaseline PredAcc:\n" + baselinePredAccStats; result += "\nBaseline MathCorr:\n" + baselineMathCorrStats; result += "#Returned solutions:\n" + nrOfReturnedSolutionsStats; result += "\nOverall Precision:\n" + bestReturnedSolutionPrecisionStats; result += "\nOverall Recall:\n" + bestReturnedSolutionRecallStats; result += "\nOverall F-measure:\n" + bestReturnedSolutionFMeasureStats; result += "\nOverall PredAcc:\n" + bestReturnedSolutionPredAccStats; result += "\nOverall MathCorr:\n" + bestReturnedSolutionMathCorrStats; result += "\nTime until best returned solution found:\n" + bestReturnedSolutionRuntimeStats; result += "\nPositions of best solution:\n" + Arrays.toString(bestSolutionPositionStats.getValues()); result += "\nPosition of best solution stats:\n" + bestSolutionPositionStats; result += "\nOverall Precision of best solution:\n" + bestSolutionPrecisionStats; result += "\nOverall Recall of best solution:\n" + bestSolutionRecallStats; result += "\nOverall F-measure of best solution:\n" + bestSolutionFMeasureStats; result += "\nCBD generation time(total):\t" + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getTotal() + "\n"; result += "CBD generation time(avg):\t" + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getAvg() + "\n"; result += "Tree generation time(total):\t" + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getTotal() + "\n"; result += "Tree generation time(avg):\t" + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getAvg() + "\n"; result += "Tree size(avg):\t" + treeSizeStats.getMean() + "\n"; logger.info(result); try { Files.write(result, statsFile, Charsets.UTF_8); } catch (IOException e) { e.printStackTrace(); } data[i][j] = bestReturnedSolutionFMeasureStats.getMean(); if (write2DB) { write2DB(heuristicName, measureName, nrOfExamples, noise, bestReturnedSolutionFMeasureStats.getMean(), bestReturnedSolutionPrecisionStats.getMean(), bestReturnedSolutionRecallStats.getMean(), bestReturnedSolutionPredAccStats.getMean(), bestReturnedSolutionMathCorrStats.getMean(), bestSolutionPositionStats.getMean(), bestSolutionFMeasureStats.getMean(), bestSolutionPrecisionStats.getMean(), bestSolutionRecallStats.getMean(), bestSolutionPredAccStats.getMean(), bestSolutionMathCorrStats.getMean(), baselineFMeasureStats.getMean(), baselinePrecisionStats.getMean(), baselineRecallStats.getMean(), baselinePredAccStats.getMean(), baselineMathCorrStats.getMean(), bestReturnedSolutionRuntimeStats.getMean()); } } } } String content = "###"; String separator = "\t"; for (double noiseInterval1 : noiseIntervals) { content += separator + noiseInterval1; } content += "\n"; for (int i = 0; i < nrOfExamplesIntervals.length; i++) { content += nrOfExamplesIntervals[i]; for (int j = 0; j < noiseIntervals.length; j++) { content += separator + data[i][j]; } content += "\n"; } File examplesVsNoise = new File(benchmarkDirectory, "examplesVsNoise-" + heuristicName + "-" + measureName + ".tsv"); try { Files.write(content, examplesVsNoise, Charsets.UTF_8); } catch (IOException e) { logger.error("failed to write stats to file", e); } } } if (write2DB) { conn.close(); } if (useEmailNotification) { sendFinishedMail(); } long t2 = System.currentTimeMillis(); long duration = t2 - t1; logger.info("QTL evaluation finished in " + DurationFormatUtils.formatDurationHMS(duration) + "ms."); }