List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics addValue
public void addValue(double v)
From source file:org.commoncrawl.mapred.pipelineV3.domainmeta.fuzzydedupe.CrossDomainDupesReducer.java
@Override public void reduce(TextBytes key, Iterator<TextBytes> values, OutputCollector<TextBytes, TextBytes> output, Reporter reporter) throws IOException { filter.clear();//from w w w .jav a 2s . c o m double crossDomainDupesCount = 0; double totalHitsCount = 0; double uniqueRootDomainsCount = 0; double uniqueIPs = 0; double validDupePatternMatches = 0; URLFPV2 rootFP = URLUtils.getURLFPV2FromHost(key.toString()); URLFPV2 fp = new URLFPV2(); int sampleCount = 0; ArrayList<Integer> ipAddresses = new ArrayList<Integer>(); JsonArray thisHostsDupes = new JsonArray(); DescriptiveStatistics lengthStats = new DescriptiveStatistics(); while (values.hasNext()) { JsonArray jsonArray = parser.parse(values.next().toString()).getAsJsonArray(); for (JsonElement elem : jsonArray) { totalHitsCount++; fp.setRootDomainHash(elem.getAsJsonObject().get("dh").getAsLong()); if (fp.getRootDomainHash() != rootFP.getRootDomainHash()) { crossDomainDupesCount++; fp.setDomainHash(fp.getRootDomainHash()); fp.setUrlHash(fp.getRootDomainHash()); // track length average .... lengthStats.addValue(elem.getAsJsonObject().get("length").getAsInt()); if (!filter.isPresent(fp)) { uniqueRootDomainsCount++; filter.add(fp); if (sampleCount < samples.length) { String url = elem.getAsJsonObject().get("url").getAsString(); GoogleURL urlObject = new GoogleURL(url); if (knownValidDupesPatterns.matcher(urlObject.getCanonicalURL()).find()) { validDupePatternMatches++; } samples[sampleCount++] = url; } } } else { thisHostsDupes.add(elem); } int ipAddress = elem.getAsJsonObject().get("ip").getAsInt(); fp.setRootDomainHash(ipAddress); fp.setDomainHash(ipAddress); fp.setUrlHash(ipAddress); if (!filter.isPresent(fp)) { uniqueIPs++; filter.add(fp); ipAddresses.add(ipAddress); } } } if (totalHitsCount > 15 && crossDomainDupesCount >= 2) { double otherDomainToLocalScore = otherDomainToLocalDomainScore(totalHitsCount, crossDomainDupesCount); double spamIPScore = spamHostScore(totalHitsCount, crossDomainDupesCount, uniqueIPs); if (otherDomainToLocalScore >= .50 || spamIPScore > .50) { JsonObject objectOut = new JsonObject(); objectOut.addProperty("ratio", (crossDomainDupesCount / totalHitsCount)); objectOut.addProperty("totalHits", totalHitsCount); objectOut.addProperty("crossDomainDupes", crossDomainDupesCount); objectOut.addProperty("uniqueRootDomains", uniqueRootDomainsCount); objectOut.addProperty("otherDomainToLocalScore", otherDomainToLocalScore); objectOut.addProperty("spamIPScore", spamIPScore); objectOut.addProperty("validDupeMatches", validDupePatternMatches); objectOut.addProperty("content-len-mean", lengthStats.getMean()); objectOut.addProperty("content-len-geo-mean", lengthStats.getGeometricMean()); for (int i = 0; i < sampleCount; ++i) { objectOut.addProperty("sample-" + i, samples[i]); } // compute path edit distance ... if (sampleCount > 1) { int sampleEditDistanceSize = Math.min(sampleCount, 5); DescriptiveStatistics stats = new DescriptiveStatistics(); for (int j = 0; j < sampleEditDistanceSize; ++j) { for (int k = 0; k < sampleEditDistanceSize; ++k) { if (k != j) { GoogleURL urlObjectA = new GoogleURL(samples[j]); GoogleURL urlObjectB = new GoogleURL(samples[k]); if (urlObjectA.getPath().length() < 100 && urlObjectB.getPath().length() < 100) { stats.addValue(StringUtils.getLevenshteinDistance(urlObjectA.getPath(), urlObjectB.getPath())); } } } } if (stats.getMean() != 0.0) { objectOut.addProperty("lev-distance-mean", stats.getMean()); objectOut.addProperty("lev-distance-geomean", stats.getGeometricMean()); } } JsonArray ipAddressArray = new JsonArray(); for (int j = 0; j < Math.min(1000, ipAddresses.size()); ++j) { ipAddressArray.add(new JsonPrimitive(ipAddresses.get(j))); } if (ipAddresses.size() != 0) { objectOut.add("ipList", ipAddressArray); } objectOut.add("thisHostDupes", thisHostsDupes); output.collect(key, new TextBytes(objectOut.toString())); } } }
From source file:org.cse.visiri.app.algoevaluation.DistributionEval.java
public void EvaluateDistribution(QueryDistribution dist, String algoname) { Map<String, Integer> allInfo = new TreeMap<String, Integer>(); Map<String, Integer> nodeInfo = new TreeMap<String, Integer>(); Map<String, Double> nodeCosts = new TreeMap<String, Double>(); for (Query q : dist.getQueryAllocation().keySet()) { String node = dist.getQueryAllocation().get(q); if (!allInfo.containsKey(node)) { allInfo.put(node, 0);/* w w w .j av a2 s . c om*/ } int val = allInfo.get(node); allInfo.put(node, val + 1); if (node.startsWith(NODE_PREFIX)) { if (!nodeInfo.containsKey(node)) { nodeInfo.put(node, 0); nodeCosts.put(node, 0.0); } val = nodeInfo.get(node); nodeInfo.put(node, val + 1); nodeCosts.put(node, nodeCosts.get(node) + q.getCost()); } } DescriptiveStatistics stat = new DescriptiveStatistics(); DescriptiveStatistics costStat = new DescriptiveStatistics(); System.out.println("Query counts : "); for (String node : nodeInfo.keySet()) { System.out.println(node + " : " + allInfo.get(node)); stat.addValue(nodeInfo.get(node)); costStat.addValue(nodeCosts.get(node)); } System.out.println(); double mean = stat.getMean(); double stdDev = Math.sqrt(stat.getPopulationVariance()); double varCoef = stdDev / mean; System.out.println("mean : " + mean); System.out.println("stdDev : " + stdDev); System.out.println("Coefficient of var : " + varCoef); System.out.println("\nCosts :"); mean = costStat.getMean(); stdDev = Math.sqrt(costStat.getPopulationVariance()); varCoef = stdDev / mean; System.out.println("mean : " + mean); System.out.println("stdDev : " + stdDev); System.out.println("Coefficient of var : " + varCoef); //calculate event duplication Map<String, Set<String>> eventMap = new TreeMap<String, Set<String>>(); for (Query q : dist.getQueryAllocation().keySet()) { String targetNode = dist.getQueryAllocation().get(q); for (StreamDefinition def : q.getInputStreamDefinitionsList()) { if (!eventMap.containsKey(def.getStreamId())) { eventMap.put(def.getStreamId(), new HashSet<String>()); } eventMap.get(def.getStreamId()).add(targetNode); } } stat = new DescriptiveStatistics(); for (Set<String> nodes : eventMap.values()) { stat.addValue(nodes.size()); } double avg = stat.getMean(); System.out.println(); System.out.println("Avg. event duplication " + avg); try { PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("VISIRI_algoeval.txt", true))); out.println(stdDev); out.close(); } catch (IOException e) { e.printStackTrace(); } try { PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("VISIRI_eventDup.txt", true))); out.println(avg); out.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:org.deidentifier.arx.framework.check.distribution.DistributionAggregateFunction.java
/** * Adds all values from the distribution to the given descriptive statistics object * @param statistics/*w ww . j av a 2 s .co m*/ * @param distribution * @param type * @param offset will be added to values */ protected <T> void addAll(DescriptiveStatistics statistics, Distribution distribution, DataTypeWithRatioScale<T> type, double offset) { Iterator<Double> it = DistributionIterator.createIteratorDouble(distribution, dictionary, type); while (it.hasNext()) { Double value = it.next(); value = value == null ? (ignoreMissingData ? null : 0d) : value; if (value != null) { statistics.addValue(value + offset); } } }
From source file:org.dllearner.algorithms.qtl.experiments.BenchmarkDescriptionGenerator.java
private DescriptiveStatistics determineDefaultCBDSizes(Query query, List<String> resources) { DescriptiveStatistics stats = new DescriptiveStatistics(); NumberFormat df = DecimalFormat.getPercentInstance(); AtomicInteger idx = new AtomicInteger(1); CBDStructureTree cbdStructure = getDefaultCBDStructureTree(); System.out.println(cbdStructure.toStringVerbose()); ProgressBar progressBar = new ProgressBar(); resources.forEach(r -> {/* ww w .ja va 2s . c om*/ long cnt = -1; if (useConstruct) { Model cbd = null; try { // cbd = cbdGen.getConciseBoundedDescription(r, cbdStructure); // cnt = cbd.size(); // System.out.println(r + ":" + cnt); } catch (Exception e) { LOGGER.error(e.getMessage(), e.getCause()); } } else { ParameterizedSparqlString template = SPARQLUtils.CBD_TEMPLATE_DEPTH3.copy(); template.setIri("uri", r); try (QueryExecution qe = qef.createQueryExecution(template.toString())) { ResultSet rs = qe.execSelect(); cnt = rs.next().getLiteral("cnt").getInt(); } catch (Exception e) { LOGGER.error(e.getMessage(), e.getCause()); } } stats.addValue(cnt); progressBar.update(idx.getAndAdd(1), resources.size()); }); return stats; }
From source file:org.dllearner.algorithms.qtl.experiments.BenchmarkDescriptionGenerator.java
private DescriptiveStatistics determineOptimalCBDSizes(Query query, List<String> resources) { DescriptiveStatistics stats = new DescriptiveStatistics(); NumberFormat df = DecimalFormat.getPercentInstance(); AtomicInteger idx = new AtomicInteger(1); CBDStructureTree cbdStructure = QueryUtils.getOptimalCBDStructure(query); System.out.println(cbdStructure.toStringVerbose()); ProgressBar progressBar = new ProgressBar(); resources.forEach(r -> {/*from w w w.j a v a 2 s .com*/ long cnt = -1; if (useConstruct) { Model cbd = null; try { // cbd = cbdGen.getConciseBoundedDescription(r, cbdStructure); // cnt = cbd.size(); } catch (Exception e) { LOGGER.error(e.getMessage(), e.getCause()); } } else { ParameterizedSparqlString template = SPARQLUtils.CBD_TEMPLATE_DEPTH3.copy(); template.setIri("uri", r); try (QueryExecution qe = qef.createQueryExecution(template.toString())) { ResultSet rs = qe.execSelect(); cnt = rs.next().getLiteral("cnt").getInt(); } catch (Exception e) { LOGGER.error(e.getMessage(), e.getCause()); } } stats.addValue(cnt); progressBar.update(idx.getAndAdd(1), resources.size()); }); return stats; }
From source file:org.dllearner.algorithms.qtl.experiments.PRConvergenceExperiment.java
public void run(int maxNrOfProcessedQueries, int maxTreeDepth, int[] exampleInterval, double[] noiseInterval, HeuristicType[] measures) throws Exception { this.maxTreeDepth = maxTreeDepth; queryTreeFactory.setMaxDepth(maxTreeDepth); if (exampleInterval != null) { nrOfExamplesIntervals = exampleInterval; }//from ww w .jav a 2 s.co m if (noiseInterval != null) { this.noiseIntervals = noiseInterval; } if (measures != null) { this.measures = measures; } boolean noiseEnabled = noiseIntervals.length > 1 || noiseInterval[0] > 0; boolean posOnly = noiseEnabled ? false : true; logger.info("Started QTL evaluation..."); long t1 = System.currentTimeMillis(); List<String> queries = dataset.getSparqlQueries().values().stream().map(q -> q.toString()) .collect(Collectors.toList()); logger.info("#loaded queries: " + queries.size()); // filter for debugging purposes queries = queries.stream().filter(q -> queriesToProcessTokens.stream().noneMatch(t -> !q.contains(t))) .collect(Collectors.toList()); queries = queries.stream().filter(q -> queriesToOmitTokens.stream().noneMatch(t -> q.contains(t))) .collect(Collectors.toList()); if (maxNrOfProcessedQueries == -1) { maxNrOfProcessedQueries = queries.size(); } // queries = filter(queries, (int) Math.ceil((double) maxNrOfProcessedQueries / maxTreeDepth)); // queries = queries.subList(0, Math.min(queries.size(), maxNrOfProcessedQueries)); logger.info("#queries to process: " + queries.size()); // generate examples for each query logger.info("precomputing pos. and neg. examples..."); for (String query : queries) {//if(!(query.contains("Borough_(New_York_City)")))continue; query2Examples.put(query, generateExamples(query, posOnly, noiseEnabled)); } logger.info("precomputing pos. and neg. examples finished."); // check for queries that do not return any result (should not happen, but we never know) Set<String> emptyQueries = query2Examples.entrySet().stream() .filter(e -> e.getValue().correctPosExampleCandidates.isEmpty()).map(e -> e.getKey()) .collect(Collectors.toSet()); logger.info("got {} empty queries.", emptyQueries.size()); queries.removeAll(emptyQueries); // min. pos examples int min = 3; Set<String> lowNrOfExamplesQueries = query2Examples.entrySet().stream() .filter(e -> e.getValue().correctPosExampleCandidates.size() < min).map(e -> e.getKey()) .collect(Collectors.toSet()); logger.info("got {} queries with < {} pos. examples.", emptyQueries.size(), min); queries.removeAll(lowNrOfExamplesQueries); queries = queries.subList(0, Math.min(80, queries.size())); final int totalNrOfQTLRuns = heuristics.length * this.measures.length * nrOfExamplesIntervals.length * noiseIntervals.length * queries.size(); logger.info("#QTL runs: " + totalNrOfQTLRuns); final AtomicInteger currentNrOfFinishedRuns = new AtomicInteger(0); // loop over heuristics for (final QueryTreeHeuristic heuristic : heuristics) { final String heuristicName = heuristic.getClass().getAnnotation(ComponentAnn.class).shortName(); // loop over heuristics measures for (HeuristicType measure : this.measures) { final String measureName = measure.toString(); heuristic.setHeuristicType(measure); double[][] data = new double[nrOfExamplesIntervals.length][noiseIntervals.length]; // loop over number of positive examples for (int i = 0; i < nrOfExamplesIntervals.length; i++) { final int nrOfExamples = nrOfExamplesIntervals[i]; // loop over noise value for (int j = 0; j < noiseIntervals.length; j++) { final double noise = noiseIntervals[j]; // check if not already processed File logFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-" + heuristicName + "-" + measureName + ".log"); File statsFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-" + heuristicName + "-" + measureName + ".stats"); if (!override && logFile.exists() && statsFile.exists()) { logger.info( "Eval config already processed. For re-running please remove corresponding output files."); continue; } FileAppender appender = null; try { appender = new FileAppender(new SimpleLayout(), logFile.getPath(), false); Logger.getRootLogger().addAppender(appender); } catch (IOException e) { e.printStackTrace(); } logger.info("#examples: " + nrOfExamples + " noise: " + noise); final DescriptiveStatistics nrOfReturnedSolutionsStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselinePrecisionStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselineRecallStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselineFMeasureStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselinePredAccStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselineMathCorrStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionPrecisionStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionRecallStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionFMeasureStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionPredAccStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionMathCorrStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionRuntimeStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionPrecisionStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionRecallStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionFMeasureStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionPredAccStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionMathCorrStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionPositionStats = new SynchronizedDescriptiveStatistics(); MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).reset(); MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).reset(); ExecutorService tp = Executors.newFixedThreadPool(nrOfThreads); // indicates if the execution for some of the queries failed final AtomicBoolean failed = new AtomicBoolean(false); Set<String> queriesToProcess = new TreeSet<>(queries); queriesToProcess.retainAll(query2Examples.entrySet().stream() .filter(e -> e.getValue().correctPosExampleCandidates.size() >= nrOfExamples) .map(e -> e.getKey()).collect(Collectors.toSet())); // loop over SPARQL queries for (final String sparqlQuery : queriesToProcess) { CBDStructureTree cbdStructure = cbdStructureTree != null ? cbdStructureTree : QueryUtils.getOptimalCBDStructure(QueryFactory.create(sparqlQuery)); tp.submit(() -> { logger.info("CBD tree:" + cbdStructure.toStringVerbose()); // update max tree depth this.maxTreeDepth = QueryTreeUtils.getDepth(cbdStructure); logger.info("##############################################################"); logger.info("Processing query\n" + sparqlQuery); // we repeat it n times with different permutations of examples int nrOfPermutations = 1; if (nrOfExamples >= query2Examples.get(sparqlQuery).correctPosExampleCandidates .size()) { nrOfPermutations = 1; } for (int perm = 1; perm <= nrOfPermutations; perm++) { logger.info("Run {}/{}", perm, nrOfPermutations); try { ExamplesWrapper examples = getExamples(sparqlQuery, nrOfExamples, nrOfExamples, noise, cbdStructure); logger.info("pos. examples:\n" + Joiner.on("\n").join(examples.correctPosExamples)); logger.info("neg. examples:\n" + Joiner.on("\n").join(examples.correctNegExamples)); // write examples to disk File dir = new File(benchmarkDirectory, "data/" + hash(sparqlQuery)); dir.mkdirs(); Files.write(Joiner.on("\n").join(examples.correctPosExamples), new File(dir, "examples" + perm + "_" + nrOfExamples + "_" + noise + ".tp"), Charsets.UTF_8); Files.write(Joiner.on("\n").join(examples.correctNegExamples), new File(dir, "examples" + perm + "_" + nrOfExamples + "_" + noise + ".tn"), Charsets.UTF_8); Files.write(Joiner.on("\n").join(examples.falsePosExamples), new File(dir, "examples" + perm + "_" + nrOfExamples + "_" + noise + ".fp"), Charsets.UTF_8); // compute baseline RDFResourceTree baselineSolution = applyBaseLine(examples, Baseline.MOST_INFORMATIVE_EDGE_IN_EXAMPLES); logger.info("Evaluating baseline..."); Score baselineScore = computeScore(sparqlQuery, baselineSolution, noise); logger.info("Baseline score:\n" + baselineScore); String baseLineQuery = QueryTreeUtils.toSPARQLQueryString(baselineSolution, dataset.getBaseIRI(), dataset.getPrefixMapping()); baselinePrecisionStats.addValue(baselineScore.precision); baselineRecallStats.addValue(baselineScore.recall); baselineFMeasureStats.addValue(baselineScore.fmeasure); baselinePredAccStats.addValue(baselineScore.predAcc); baselineMathCorrStats.addValue(baselineScore.mathCorr); // run QTL PosNegLPStandard lp = new PosNegLPStandard(); lp.setPositiveExamples(examples.posExamplesMapping.keySet()); lp.setNegativeExamples(examples.negExamplesMapping.keySet()); // QTL2Disjunctive la = new QTL2Disjunctive(lp, qef); QTL2DisjunctiveMultiThreaded la = new QTL2DisjunctiveMultiThreaded(lp, qef); la.setRenderer(new org.dllearner.utilities.owl.DLSyntaxObjectRenderer()); la.setReasoner(dataset.getReasoner()); la.setEntailment(Entailment.SIMPLE); la.setTreeFactory(queryTreeFactory); la.setPositiveExampleTrees(examples.posExamplesMapping); la.setNegativeExampleTrees(examples.negExamplesMapping); la.setNoise(noise); la.setHeuristic(heuristic); la.setMaxExecutionTimeInSeconds(maxExecutionTimeInSeconds); la.setMaxTreeComputationTimeInSeconds(maxExecutionTimeInSeconds); la.init(); la.start(); List<EvaluatedRDFResourceTree> solutions = new ArrayList<>( la.getSolutions()); // List<EvaluatedRDFResourceTree> solutions = generateSolutions(examples, noise, heuristic); nrOfReturnedSolutionsStats.addValue(solutions.size()); // the best returned solution by QTL EvaluatedRDFResourceTree bestSolution = solutions.get(0); logger.info("Got " + solutions.size() + " query trees."); // logger.info("Best computed solution:\n" + render(bestSolution.asEvaluatedDescription())); logger.info("QTL Score:\n" + bestSolution.getTreeScore()); long runtimeBestSolution = la.getTimeBestSolutionFound(); bestReturnedSolutionRuntimeStats.addValue(runtimeBestSolution); // convert to SPARQL query RDFResourceTree tree = bestSolution.getTree(); tree = filter.apply(tree); String learnedSPARQLQuery = QueryTreeUtils.toSPARQLQueryString(tree, dataset.getBaseIRI(), dataset.getPrefixMapping()); // compute score Score score = computeScore(sparqlQuery, tree, noise); bestReturnedSolutionPrecisionStats.addValue(score.precision); bestReturnedSolutionRecallStats.addValue(score.recall); bestReturnedSolutionFMeasureStats.addValue(score.fmeasure); bestReturnedSolutionPredAccStats.addValue(score.predAcc); bestReturnedSolutionMathCorrStats.addValue(score.mathCorr); logger.info(score.toString()); // find the extensionally best matching tree in the list Pair<EvaluatedRDFResourceTree, Score> bestMatchingTreeWithScore = findBestMatchingTreeFast( solutions, sparqlQuery, noise, examples); EvaluatedRDFResourceTree bestMatchingTree = bestMatchingTreeWithScore .getFirst(); Score bestMatchingScore = bestMatchingTreeWithScore.getSecond(); // position of best tree in list of solutions int positionBestScore = solutions.indexOf(bestMatchingTree); bestSolutionPositionStats.addValue(positionBestScore); Score bestScore = score; if (positionBestScore > 0) { logger.info( "Position of best covering tree in list: " + positionBestScore); logger.info("Best covering solution:\n" + render(bestMatchingTree.asEvaluatedDescription())); logger.info("Tree score: " + bestMatchingTree.getTreeScore()); bestScore = bestMatchingScore; logger.info(bestMatchingScore.toString()); } else { logger.info( "Best returned solution was also the best covering solution."); } bestSolutionRecallStats.addValue(bestScore.recall); bestSolutionPrecisionStats.addValue(bestScore.precision); bestSolutionFMeasureStats.addValue(bestScore.fmeasure); bestSolutionPredAccStats.addValue(bestScore.predAcc); bestSolutionMathCorrStats.addValue(bestScore.mathCorr); for (RDFResourceTree negTree : examples.negExamplesMapping.values()) { if (QueryTreeUtils.isSubsumedBy(negTree, bestMatchingTree.getTree())) { Files.append(sparqlQuery + "\n", new File("/tmp/negCovered.txt"), Charsets.UTF_8); break; } } String bestQuery = QueryFactory .create(QueryTreeUtils.toSPARQLQueryString( filter.apply(bestMatchingTree.getTree()), dataset.getBaseIRI(), dataset.getPrefixMapping())) .toString(); if (write2DB) { write2DB(sparqlQuery, nrOfExamples, examples, noise, baseLineQuery, baselineScore, heuristicName, measureName, QueryFactory.create(learnedSPARQLQuery).toString(), score, runtimeBestSolution, bestQuery, positionBestScore, bestScore); } } catch (Exception e) { failed.set(true); logger.error("Error occured for query\n" + sparqlQuery, e); try { StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); e.printStackTrace(pw); Files.append(sparqlQuery + "\n" + sw.toString(), new File(benchmarkDirectory, "failed-" + nrOfExamples + "-" + noise + "-" + heuristicName + "-" + measureName + ".txt"), Charsets.UTF_8); } catch (IOException e1) { e1.printStackTrace(); } } finally { int cnt = currentNrOfFinishedRuns.incrementAndGet(); logger.info("***********Evaluation Progress:" + NumberFormat.getPercentInstance() .format((double) cnt / totalNrOfQTLRuns) + "(" + cnt + "/" + totalNrOfQTLRuns + ")" + "***********"); } } }); } tp.shutdown(); tp.awaitTermination(12, TimeUnit.HOURS); Logger.getRootLogger().removeAppender(appender); if (!failed.get()) { String result = ""; result += "\nBaseline Precision:\n" + baselinePrecisionStats; result += "\nBaseline Recall:\n" + baselineRecallStats; result += "\nBaseline F-measure:\n" + baselineFMeasureStats; result += "\nBaseline PredAcc:\n" + baselinePredAccStats; result += "\nBaseline MathCorr:\n" + baselineMathCorrStats; result += "#Returned solutions:\n" + nrOfReturnedSolutionsStats; result += "\nOverall Precision:\n" + bestReturnedSolutionPrecisionStats; result += "\nOverall Recall:\n" + bestReturnedSolutionRecallStats; result += "\nOverall F-measure:\n" + bestReturnedSolutionFMeasureStats; result += "\nOverall PredAcc:\n" + bestReturnedSolutionPredAccStats; result += "\nOverall MathCorr:\n" + bestReturnedSolutionMathCorrStats; result += "\nTime until best returned solution found:\n" + bestReturnedSolutionRuntimeStats; result += "\nPositions of best solution:\n" + Arrays.toString(bestSolutionPositionStats.getValues()); result += "\nPosition of best solution stats:\n" + bestSolutionPositionStats; result += "\nOverall Precision of best solution:\n" + bestSolutionPrecisionStats; result += "\nOverall Recall of best solution:\n" + bestSolutionRecallStats; result += "\nOverall F-measure of best solution:\n" + bestSolutionFMeasureStats; result += "\nCBD generation time(total):\t" + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getTotal() + "\n"; result += "CBD generation time(avg):\t" + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getAvg() + "\n"; result += "Tree generation time(total):\t" + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getTotal() + "\n"; result += "Tree generation time(avg):\t" + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getAvg() + "\n"; result += "Tree size(avg):\t" + treeSizeStats.getMean() + "\n"; logger.info(result); try { Files.write(result, statsFile, Charsets.UTF_8); } catch (IOException e) { e.printStackTrace(); } data[i][j] = bestReturnedSolutionFMeasureStats.getMean(); if (write2DB) { write2DB(heuristicName, measureName, nrOfExamples, noise, bestReturnedSolutionFMeasureStats.getMean(), bestReturnedSolutionPrecisionStats.getMean(), bestReturnedSolutionRecallStats.getMean(), bestReturnedSolutionPredAccStats.getMean(), bestReturnedSolutionMathCorrStats.getMean(), bestSolutionPositionStats.getMean(), bestSolutionFMeasureStats.getMean(), bestSolutionPrecisionStats.getMean(), bestSolutionRecallStats.getMean(), bestSolutionPredAccStats.getMean(), bestSolutionMathCorrStats.getMean(), baselineFMeasureStats.getMean(), baselinePrecisionStats.getMean(), baselineRecallStats.getMean(), baselinePredAccStats.getMean(), baselineMathCorrStats.getMean(), bestReturnedSolutionRuntimeStats.getMean()); } } } } String content = "###"; String separator = "\t"; for (double noiseInterval1 : noiseIntervals) { content += separator + noiseInterval1; } content += "\n"; for (int i = 0; i < nrOfExamplesIntervals.length; i++) { content += nrOfExamplesIntervals[i]; for (int j = 0; j < noiseIntervals.length; j++) { content += separator + data[i][j]; } content += "\n"; } File examplesVsNoise = new File(benchmarkDirectory, "examplesVsNoise-" + heuristicName + "-" + measureName + ".tsv"); try { Files.write(content, examplesVsNoise, Charsets.UTF_8); } catch (IOException e) { logger.error("failed to write stats to file", e); } } } if (write2DB) { conn.close(); } if (useEmailNotification) { sendFinishedMail(); } long t2 = System.currentTimeMillis(); long duration = t2 - t1; logger.info("QTL evaluation finished in " + DurationFormatUtils.formatDurationHMS(duration) + "ms."); }
From source file:org.dllearner.algorithms.qtl.experiments.QTLEvaluation.java
public void run(int maxNrOfProcessedQueries, int maxTreeDepth, int[] exampleInterval, double[] noiseInterval, HeuristicType[] measures) throws Exception { this.maxTreeDepth = maxTreeDepth; queryTreeFactory.setMaxDepth(maxTreeDepth); if (exampleInterval != null) { nrOfExamplesIntervals = exampleInterval; }//from ww w. ja v a 2 s .c o m if (noiseInterval != null) { this.noiseIntervals = noiseInterval; } if (measures != null) { this.measures = measures; } logger.info("Started QTL evaluation..."); long t1 = System.currentTimeMillis(); List<String> queries = dataset.getSparqlQueries().values().stream().map(q -> q.toString()) .collect(Collectors.toList()); logger.info("#loaded queries: " + queries.size()); // filter for debugging purposes queries = queries.stream().filter(q -> tokens.stream().noneMatch(t -> !q.contains(t))) .collect(Collectors.toList()); if (maxNrOfProcessedQueries == -1) { maxNrOfProcessedQueries = queries.size(); } // queries = filter(queries, (int) Math.ceil((double) maxNrOfProcessedQueries / maxTreeDepth)); // queries = queries.subList(0, Math.min(queries.size(), maxNrOfProcessedQueries)); logger.info("#queries to process: " + queries.size()); // generate examples for each query logger.info("precomputing pos. and neg. examples..."); final Map<String, ExampleCandidates> query2Examples = new HashMap<>(); for (String query : queries) {//if(!(query.contains("Borough_(New_York_City)")))continue; query2Examples.put(query, generateExamples(query)); } logger.info("precomputing pos. and neg. examples finished."); // check for queries that do not return any result (should not happen, but we never know) Set<String> emptyQueries = query2Examples.entrySet().stream() .filter(e -> e.getValue().correctPosExampleCandidates.isEmpty()).map(e -> e.getKey()) .collect(Collectors.toSet()); logger.info("got {} empty queries.", emptyQueries.size()); queries.removeAll(emptyQueries); // min. pos examples Set<String> lowNrOfExamplesQueries = query2Examples.entrySet().stream() .filter(e -> e.getValue().correctPosExampleCandidates.size() < 2).map(e -> e.getKey()) .collect(Collectors.toSet()); logger.info("got {} queries with < 2 pos. examples.", emptyQueries.size()); queries.removeAll(lowNrOfExamplesQueries); final int totalNrOfQTLRuns = heuristics.length * this.measures.length * nrOfExamplesIntervals.length * noiseIntervals.length * queries.size(); logger.info("#QTL runs: " + totalNrOfQTLRuns); final AtomicInteger currentNrOfFinishedRuns = new AtomicInteger(0); // loop over heuristics for (final QueryTreeHeuristic heuristic : heuristics) { final String heuristicName = heuristic.getClass().getAnnotation(ComponentAnn.class).shortName(); // loop over heuristics measures for (HeuristicType measure : this.measures) { final String measureName = measure.toString(); heuristic.setHeuristicType(measure); double[][] data = new double[nrOfExamplesIntervals.length][noiseIntervals.length]; // loop over number of positive examples for (int i = 0; i < nrOfExamplesIntervals.length; i++) { final int nrOfExamples = nrOfExamplesIntervals[i]; // loop over noise value for (int j = 0; j < noiseIntervals.length; j++) { final double noise = noiseIntervals[j]; // check if not already processed File logFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-" + heuristicName + "-" + measureName + ".log"); File statsFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-" + heuristicName + "-" + measureName + ".stats"); if (!override && logFile.exists() && statsFile.exists()) { logger.info( "Eval config already processed. For re-running please remove corresponding output files."); continue; } FileAppender appender = null; try { appender = new FileAppender(new SimpleLayout(), logFile.getPath(), false); Logger.getRootLogger().addAppender(appender); } catch (IOException e) { e.printStackTrace(); } logger.info("#examples: " + nrOfExamples + " noise: " + noise); final DescriptiveStatistics nrOfReturnedSolutionsStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselinePrecisionStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselineRecallStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselineFMeasureStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselinePredAccStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics baselineMathCorrStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionPrecisionStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionRecallStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionFMeasureStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionPredAccStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionMathCorrStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestReturnedSolutionRuntimeStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionPrecisionStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionRecallStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionFMeasureStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionPredAccStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionMathCorrStats = new SynchronizedDescriptiveStatistics(); final DescriptiveStatistics bestSolutionPositionStats = new SynchronizedDescriptiveStatistics(); MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).reset(); MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).reset(); ExecutorService tp = Executors.newFixedThreadPool(nrOfThreads); // indicates if the execution for some of the queries failed final AtomicBoolean failed = new AtomicBoolean(false); // loop over SPARQL queries for (final String sparqlQuery : queries) { tp.submit(() -> { logger.info("##############################################################"); logger.info("Processing query\n" + sparqlQuery); try { ExamplesWrapper examples = query2Examples.get(sparqlQuery).get(nrOfExamples, nrOfExamples, noise); logger.info( "pos. examples:\n" + Joiner.on("\n").join(examples.correctPosExamples)); logger.info( "neg. examples:\n" + Joiner.on("\n").join(examples.correctNegExamples)); // write examples to disk File dir = new File(benchmarkDirectory, "data/" + hash(sparqlQuery)); dir.mkdirs(); Files.write(Joiner.on("\n").join(examples.correctPosExamples), new File(dir, "examples_" + nrOfExamples + "_" + noise + ".tp"), Charsets.UTF_8); Files.write(Joiner.on("\n").join(examples.correctNegExamples), new File(dir, "examples_" + nrOfExamples + "_" + noise + ".tn"), Charsets.UTF_8); Files.write(Joiner.on("\n").join(examples.falsePosExamples), new File(dir, "examples_" + nrOfExamples + "_" + noise + ".fp"), Charsets.UTF_8); // compute baseline logger.info("Computing baseline..."); RDFResourceTree baselineSolution = applyBaseLine(examples, Baseline.MOST_INFORMATIVE_EDGE_IN_EXAMPLES); logger.info("Baseline solution:\n" + owlRenderer .render(QueryTreeUtils.toOWLClassExpression(baselineSolution))); logger.info("Evaluating baseline..."); Score baselineScore = computeScore(sparqlQuery, baselineSolution, noise); logger.info("Baseline score:\n" + baselineScore); String baseLineQuery = QueryTreeUtils.toSPARQLQueryString(baselineSolution, dataset.getBaseIRI(), dataset.getPrefixMapping()); baselinePrecisionStats.addValue(baselineScore.precision); baselineRecallStats.addValue(baselineScore.recall); baselineFMeasureStats.addValue(baselineScore.fmeasure); baselinePredAccStats.addValue(baselineScore.predAcc); baselineMathCorrStats.addValue(baselineScore.mathCorr); // run QTL PosNegLPStandard lp = new PosNegLPStandard(); lp.setPositiveExamples(examples.posExamplesMapping.keySet()); lp.setNegativeExamples(examples.negExamplesMapping.keySet()); QTL2Disjunctive la = new QTL2Disjunctive(lp, qef); la.setRenderer(new org.dllearner.utilities.owl.DLSyntaxObjectRenderer()); la.setReasoner(dataset.getReasoner()); la.setEntailment(Entailment.SIMPLE); la.setTreeFactory(queryTreeFactory); la.setPositiveExampleTrees(examples.posExamplesMapping); la.setNegativeExampleTrees(examples.negExamplesMapping); la.setNoise(noise); la.setHeuristic(heuristic); la.setMaxExecutionTimeInSeconds(maxExecutionTimeInSeconds); la.setMaxTreeComputationTimeInSeconds(maxExecutionTimeInSeconds); la.init(); la.start(); List<EvaluatedRDFResourceTree> solutions = new ArrayList<>(la.getSolutions()); // List<EvaluatedRDFResourceTree> solutions = generateSolutions(examples, noise, heuristic); nrOfReturnedSolutionsStats.addValue(solutions.size()); // the best returned solution by QTL EvaluatedRDFResourceTree bestSolution = solutions.get(0); logger.info("Got " + solutions.size() + " query trees."); logger.info("Best computed solution:\n" + render(bestSolution.asEvaluatedDescription())); logger.info("QTL Score:\n" + bestSolution.getTreeScore()); long runtimeBestSolution = la.getTimeBestSolutionFound(); bestReturnedSolutionRuntimeStats.addValue(runtimeBestSolution); // convert to SPARQL query RDFResourceTree tree = bestSolution.getTree(); // filter.filter(tree); String learnedSPARQLQuery = QueryTreeUtils.toSPARQLQueryString(tree, dataset.getBaseIRI(), dataset.getPrefixMapping()); // compute score Score score = computeScore(sparqlQuery, tree, noise); bestReturnedSolutionPrecisionStats.addValue(score.precision); bestReturnedSolutionRecallStats.addValue(score.recall); bestReturnedSolutionFMeasureStats.addValue(score.fmeasure); bestReturnedSolutionPredAccStats.addValue(score.predAcc); bestReturnedSolutionMathCorrStats.addValue(score.mathCorr); logger.info(score.toString()); // find the extensionally best matching tree in the list Pair<EvaluatedRDFResourceTree, Score> bestMatchingTreeWithScore = findBestMatchingTreeFast( solutions, sparqlQuery, noise, examples); EvaluatedRDFResourceTree bestMatchingTree = bestMatchingTreeWithScore .getFirst(); Score bestMatchingScore = bestMatchingTreeWithScore.getSecond(); // position of best tree in list of solutions int positionBestScore = solutions.indexOf(bestMatchingTree); bestSolutionPositionStats.addValue(positionBestScore); Score bestScore = score; if (positionBestScore > 0) { logger.info("Position of best covering tree in list: " + positionBestScore); logger.info("Best covering solution:\n" + render(bestMatchingTree.asEvaluatedDescription())); logger.info("Tree score: " + bestMatchingTree.getTreeScore()); bestScore = bestMatchingScore; logger.info(bestMatchingScore.toString()); } else { logger.info("Best returned solution was also the best covering solution."); } bestSolutionRecallStats.addValue(bestScore.recall); bestSolutionPrecisionStats.addValue(bestScore.precision); bestSolutionFMeasureStats.addValue(bestScore.fmeasure); bestSolutionPredAccStats.addValue(bestScore.predAcc); bestSolutionMathCorrStats.addValue(bestScore.mathCorr); for (RDFResourceTree negTree : examples.negExamplesMapping.values()) { if (QueryTreeUtils.isSubsumedBy(negTree, bestMatchingTree.getTree())) { Files.append(sparqlQuery + "\n", new File("/tmp/negCovered.txt"), Charsets.UTF_8); break; } } String bestQuery = QueryFactory.create(QueryTreeUtils.toSPARQLQueryString( filter.apply(bestMatchingTree.getTree()), dataset.getBaseIRI(), dataset.getPrefixMapping())).toString(); if (write2DB) { write2DB(sparqlQuery, nrOfExamples, examples, noise, baseLineQuery, baselineScore, heuristicName, measureName, QueryFactory.create(learnedSPARQLQuery).toString(), score, runtimeBestSolution, bestQuery, positionBestScore, bestScore); } } catch (Exception e) { failed.set(true); logger.error("Error occured for query\n" + sparqlQuery, e); try { StringWriter sw = new StringWriter(); PrintWriter pw = new PrintWriter(sw); e.printStackTrace(pw); Files.append(sparqlQuery + "\n" + sw.toString(), new File(benchmarkDirectory, "failed-" + nrOfExamples + "-" + noise + "-" + heuristicName + "-" + measureName + ".txt"), Charsets.UTF_8); } catch (IOException e1) { e1.printStackTrace(); } } finally { int cnt = currentNrOfFinishedRuns.incrementAndGet(); logger.info("***********Evaluation Progress:" + NumberFormat.getPercentInstance() .format((double) cnt / totalNrOfQTLRuns) + "(" + cnt + "/" + totalNrOfQTLRuns + ")" + "***********"); } }); } tp.shutdown(); tp.awaitTermination(12, TimeUnit.HOURS); Logger.getRootLogger().removeAppender(appender); if (!failed.get()) { String result = ""; result += "\nBaseline Precision:\n" + baselinePrecisionStats; result += "\nBaseline Recall:\n" + baselineRecallStats; result += "\nBaseline F-measure:\n" + baselineFMeasureStats; result += "\nBaseline PredAcc:\n" + baselinePredAccStats; result += "\nBaseline MathCorr:\n" + baselineMathCorrStats; result += "#Returned solutions:\n" + nrOfReturnedSolutionsStats; result += "\nOverall Precision:\n" + bestReturnedSolutionPrecisionStats; result += "\nOverall Recall:\n" + bestReturnedSolutionRecallStats; result += "\nOverall F-measure:\n" + bestReturnedSolutionFMeasureStats; result += "\nOverall PredAcc:\n" + bestReturnedSolutionPredAccStats; result += "\nOverall MathCorr:\n" + bestReturnedSolutionMathCorrStats; result += "\nTime until best returned solution found:\n" + bestReturnedSolutionRuntimeStats; result += "\nPositions of best solution:\n" + Arrays.toString(bestSolutionPositionStats.getValues()); result += "\nPosition of best solution stats:\n" + bestSolutionPositionStats; result += "\nOverall Precision of best solution:\n" + bestSolutionPrecisionStats; result += "\nOverall Recall of best solution:\n" + bestSolutionRecallStats; result += "\nOverall F-measure of best solution:\n" + bestSolutionFMeasureStats; result += "\nCBD generation time(total):\t" + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getTotal() + "\n"; result += "CBD generation time(avg):\t" + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getAvg() + "\n"; result += "Tree generation time(total):\t" + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getTotal() + "\n"; result += "Tree generation time(avg):\t" + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getAvg() + "\n"; result += "Tree size(avg):\t" + treeSizeStats.getMean() + "\n"; logger.info(result); try { Files.write(result, statsFile, Charsets.UTF_8); } catch (IOException e) { e.printStackTrace(); } data[i][j] = bestReturnedSolutionFMeasureStats.getMean(); if (write2DB) { write2DB(heuristicName, measureName, nrOfExamples, noise, bestReturnedSolutionFMeasureStats.getMean(), bestReturnedSolutionPrecisionStats.getMean(), bestReturnedSolutionRecallStats.getMean(), bestReturnedSolutionPredAccStats.getMean(), bestReturnedSolutionMathCorrStats.getMean(), bestSolutionPositionStats.getMean(), bestSolutionFMeasureStats.getMean(), bestSolutionPrecisionStats.getMean(), bestSolutionRecallStats.getMean(), bestSolutionPredAccStats.getMean(), bestSolutionMathCorrStats.getMean(), baselineFMeasureStats.getMean(), baselinePrecisionStats.getMean(), baselineRecallStats.getMean(), baselinePredAccStats.getMean(), baselineMathCorrStats.getMean(), bestReturnedSolutionRuntimeStats.getMean()); } } } } String content = "###"; String separator = "\t"; for (double noiseInterval1 : noiseIntervals) { content += separator + noiseInterval1; } content += "\n"; for (int i = 0; i < nrOfExamplesIntervals.length; i++) { content += nrOfExamplesIntervals[i]; for (int j = 0; j < noiseIntervals.length; j++) { content += separator + data[i][j]; } content += "\n"; } File examplesVsNoise = new File(benchmarkDirectory, "examplesVsNoise-" + heuristicName + "-" + measureName + ".tsv"); try { Files.write(content, examplesVsNoise, Charsets.UTF_8); } catch (IOException e) { logger.error("failed to write stats to file", e); } } } if (write2DB) { conn.close(); } if (useEmailNotification) { sendFinishedMail(); } long t2 = System.currentTimeMillis(); long duration = t2 - t1; logger.info("QTL evaluation finished in " + DurationFormatUtils.formatDurationHMS(duration) + "ms."); }
From source file:org.dllearner.algorithms.qtl.qald.QALDExperiment.java
public void run() { List<String> sparqlQueries = loadSPARQLQueries(); logger.info("Total number of queries: " + sparqlQueries.size()); // parameters int minNrOfExamples = 3; int maxNrOfExamples = 10; int stepSize = 2; double[] noiseIntervals = { 0.0, 0.2, // 0.4, // 0.6 };//ww w . j a v a2s.co m // loop over number of positive examples for (int nrOfExamples = minNrOfExamples; nrOfExamples <= maxNrOfExamples; nrOfExamples = nrOfExamples + stepSize) { // loop over noise value for (double noise : noiseIntervals) { FileAppender appender = null; try { appender = new FileAppender(new SimpleLayout(), "log/qtl/qtl2-" + nrOfExamples + "-" + noise + ".log", false); Logger.getRootLogger().addAppender(appender); } catch (IOException e1) { e1.printStackTrace(); } logger.info("#examples: " + nrOfExamples + " noise: " + noise); DescriptiveStatistics bestReturnedSolutionPrecisionStats = new DescriptiveStatistics(); DescriptiveStatistics bestReturnedSolutionRecallStats = new DescriptiveStatistics(); DescriptiveStatistics bestReturnedSolutionFMeasureStats = new DescriptiveStatistics(); DescriptiveStatistics bestSolutionPrecisionStats = new DescriptiveStatistics(); DescriptiveStatistics bestSolutionRecallStats = new DescriptiveStatistics(); DescriptiveStatistics bestSolutionFMeasureStats = new DescriptiveStatistics(); DescriptiveStatistics bestSolutionPositionStats = new DescriptiveStatistics(); // if(nrOfExamples != 7) continue; // loop over SPARQL queries for (String sparqlQuery : sparqlQueries) { // if(!sparqlQuery.contains("Nobel_Prize_in_Literature"))continue; logger.info("##############################################################"); logger.info("Processing query\n" + sparqlQuery); // some queries can return less examples int possibleNrOfExamples = Math.min(getResultCount(sparqlQuery), nrOfExamples); try { // compute or load cached solutions List<EvaluatedRDFResourceTree> solutions = generateSolutions(sparqlQuery, possibleNrOfExamples, noise); // the best solution by QTL EvaluatedRDFResourceTree bestSolution = solutions.get(0); logger.info("Got " + solutions.size() + " query trees."); logger.info("Best computed solution:\n" + bestSolution.asEvaluatedDescription()); logger.info("Score:\n" + bestSolution.getTreeScore()); // convert to SPARQL query String learnedSPARQLQuery = QueryTreeUtils.toSPARQLQueryString( filter.apply(bestSolution.getTree()), kb.baseIRI, kb.prefixMapping); Score score = computeScore(sparqlQuery, learnedSPARQLQuery); // compute precision double precision = score.getPrecision(); bestReturnedSolutionPrecisionStats.addValue(precision); // compute recall double recall = score.getRecall(); bestReturnedSolutionRecallStats.addValue(recall); // compute F1-score double fmeasure = score.getFmeasure(); bestReturnedSolutionFMeasureStats.addValue(fmeasure); logger.info(String.format("P=%f\nR=%f\nF-score=%f", precision, recall, fmeasure)); // find the extensionally best matching tree in the list Pair<EvaluatedRDFResourceTree, Score> bestMatchingTreeWithScore = findBestMatchingTree( solutions, sparqlQuery); EvaluatedRDFResourceTree bestMatchingTree = bestMatchingTreeWithScore.getFirst(); Score bestMatchingScore = bestMatchingTreeWithScore.getSecond(); // position of best tree in list of solutions int position = solutions.indexOf(bestMatchingTree); bestSolutionPositionStats.addValue(position); if (position > 0) { logger.info("Position of best covering tree in list: " + position); logger.info("Best covering solution:\n" + bestMatchingTree.asEvaluatedDescription()); logger.info("Tree score: " + bestMatchingTree.getTreeScore()); String bestLearnedSPARQLQuery = QueryTreeUtils.toSPARQLQueryString( filter.apply(bestMatchingTree.getTree()), kb.baseIRI, kb.prefixMapping); precision = bestMatchingScore.getPrecision(); recall = bestMatchingScore.getRecall(); fmeasure = bestMatchingScore.getFmeasure(); logger.info(String.format("P=%f\nR=%f\nF-score=%f", precision, recall, fmeasure)); } else { logger.info("Best returned solution was also the best covering solution."); } bestSolutionRecallStats.addValue(recall); bestSolutionPrecisionStats.addValue(precision); bestSolutionFMeasureStats.addValue(fmeasure); } catch (Exception e) { logger.error("Error occured.", e); System.exit(0); } } Logger.getRootLogger().removeAppender(appender); String result = ""; result += "\nOverall Precision:\n" + bestReturnedSolutionPrecisionStats; result += "\nOverall Recall:\n" + bestReturnedSolutionRecallStats; result += "\nOverall FMeasure:\n" + bestReturnedSolutionFMeasureStats; result += "\nPositions of best solution:\n" + Arrays.toString(bestSolutionPositionStats.getValues()); result += "\nPosition of best solution stats:\n" + bestSolutionPositionStats; result += "\nOverall Precision of best solution:\n" + bestSolutionPrecisionStats; result += "\nOverall Recall of best solution:\n" + bestSolutionRecallStats; result += "\nOverall FMeasure of best solution:\n" + bestSolutionFMeasureStats; logger.info(result); try { Files.write(result, new File("log/qtl/qtl2-" + nrOfExamples + "-" + noise + ".stats"), Charsets.UTF_8); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:org.jgrasstools.hortonmachine.modules.hydrogeomorphology.lwrecruitment.OmsLW09_AreaToNetpointAssociator.java
@Execute public void process() throws Exception { RegionMap regionMap = CoverageUtilities.getRegionParamsFromGridCoverage(inFlow); int cols = regionMap.getCols(); int rows = regionMap.getRows(); GridGeometry2D gridGeometry = inFlow.getGridGeometry(); GeometryFactory gf = GeometryUtilities.gf(); /*// w ww . ja v a 2s . c o m * extract the inundated area from the polygon */ PreparedGeometry preparedFooldingArea = getFloofindArea(inInundationArea); /* * extract the Canopy Height Model from DTM and DSM */ GridCoverage2D chmGC = getChm(inDsm, inDtm); /* * extract basins calling netnumbering with in input all the network points */ OmsNetNumbering omsnetnumbering = new OmsNetNumbering(); omsnetnumbering.inFlow = inFlow; omsnetnumbering.inNet = inNet; omsnetnumbering.inTca = inTca; omsnetnumbering.inPoints = inNetPoints; omsnetnumbering.pThres = 0.0; omsnetnumbering.pm = pm; omsnetnumbering.process(); outNetnum = omsnetnumbering.outNetnum; outBasins = omsnetnumbering.outBasins; RandomIter netnumBasinsIter = CoverageUtilities.getRandomIterator(outBasins); RandomIter connectivityIter = CoverageUtilities.getRandomIterator(inConnectivity); RandomIter chmIter = CoverageUtilities.getRandomIterator(chmGC); RandomIter standIter = CoverageUtilities.getRandomIterator(inStand); HashMap<Integer, DescriptiveStatistics> heightBasin2ValueMap = new HashMap<Integer, DescriptiveStatistics>(); HashMap<Integer, DescriptiveStatistics> standBasin2ValueMap = new HashMap<Integer, DescriptiveStatistics>(); pm.beginTask("Calculating vegetation stats.", cols); for (int c = 0; c < cols; c++) { for (int r = 0; r < rows; r++) { double netnumDouble = netnumBasinsIter.getSampleDouble(c, r, 0); if (!isNovalue(netnumDouble)) { Integer netNum = (int) netnumDouble; Coordinate coordinate = CoverageUtilities.coordinateFromColRow(c, r, gridGeometry); Point point = gf.createPoint(coordinate); double connectivityDouble = connectivityIter.getSampleDouble(c, r, 0); /* * check if the point is connected to the network: * - connectivity index less than the threshold * - point is inside the inundated area * and fill the hashmaps with the correspondent positions. */ if (connectivityDouble < pConnectivityThreshold || preparedFooldingArea.intersects(point)) { double chmDouble = chmIter.getSampleDouble(c, r, 0); double standDouble = standIter.getSampleDouble(c, r, 0); DescriptiveStatistics summaryHeightStatistics = heightBasin2ValueMap.get(netNum); DescriptiveStatistics summaryStandStatistics = standBasin2ValueMap.get(netNum); if (summaryHeightStatistics == null) { summaryHeightStatistics = new DescriptiveStatistics(); summaryStandStatistics = new DescriptiveStatistics(); heightBasin2ValueMap.put(netNum, summaryHeightStatistics); standBasin2ValueMap.put(netNum, summaryStandStatistics); } summaryHeightStatistics.addValue(chmDouble); summaryStandStatistics.addValue(standDouble); } } } pm.worked(1); } pm.done(); /* * create the structure for the output attributes and insert the summary statistics * as attributes */ FeatureExtender ext = new FeatureExtender(inNetPoints.getSchema(), new String[] { LWFields.VOLUME, LWFields.MEDIAN }, new Class[] { Double.class, Double.class }); List<SimpleFeature> inNetworkPointsList = FeatureUtilities.featureCollectionToList(inNetPoints); DefaultFeatureCollection finalNetworkPointsFC = new DefaultFeatureCollection(); final java.awt.Point point = new java.awt.Point(); for (SimpleFeature inPointFeature : inNetworkPointsList) { Geometry geometry = (Geometry) inPointFeature.getDefaultGeometry(); Coordinate coordinate = geometry.getCoordinate(); CoverageUtilities.colRowFromCoordinate(coordinate, gridGeometry, point); int netnum = netnumBasinsIter.getSample(point.x, point.y, 0); DescriptiveStatistics summaryHeightStatistics = heightBasin2ValueMap.get(netnum); double medianHeight = 0.0; if (summaryHeightStatistics != null) { medianHeight = summaryHeightStatistics.getPercentile(50); } DescriptiveStatistics summaryStandStatistics = standBasin2ValueMap.get(netnum); double sumStand = 0.0; if (summaryStandStatistics != null) { sumStand = summaryStandStatistics.getSum(); } SimpleFeature newPointFeature = ext.extendFeature(inPointFeature, new Object[] { sumStand, medianHeight }); finalNetworkPointsFC.add(newPointFeature); } outNetPoints = finalNetworkPointsFC; }
From source file:org.jobscheduler.dashboard.service.SchedulerJobService.java
/** * Get statistics for JobName/*ww w . java2 s .c om*/ * * @param spoolerId * @param jobName * @return */ public SchedulerJobStatsDTO getStatsPerJobName(String spoolerId, String jobName, Pageable pageable) { SchedulerJobStatsDTO dto = new SchedulerJobStatsDTO(); dto.setJobName(jobName); dto.setSpoolerId(spoolerId); // Number of executed jobs Long executedTimes = schedulerHistoryRepository.countBySpoolerIdAndJobName(spoolerId, jobName); dto.setExecutedTimes(executedTimes); // / Retrieve last executed jobs for a job name (max 100) List<SchedulerHistory> schedulerHistories = schedulerHistoryRepository.findBySpoolerIdAndJobName(spoolerId, jobName, pageable); dto.setNbJobsLastExecutedTimes(schedulerHistories.size()); DescriptiveStatistics errorStats = new DescriptiveStatistics(); DescriptiveStatistics workingTimeStats = new DescriptiveStatistics(); SerieDTO workingTimes = new SerieDTO(); SerieDTO meanWorkingTimes = new SerieDTO(); List<PointDTO> points = new ArrayList<PointDTO>(); List<PointDTO> meanWorkingTimePoints = new ArrayList<PointDTO>(); workingTimes.setKey("Working time"); workingTimes.setValues(points); meanWorkingTimes.setKey("Mean working time"); meanWorkingTimes.setValues(meanWorkingTimePoints); DateTime startDateTime = null; DateTime endDateTime = null; for (SchedulerHistory schedulerHistory : schedulerHistories) { if ((schedulerHistory.getEndTime() != null) && (schedulerHistory.getEndTime().getTime() >= 0)) { endDateTime = new DateTime(schedulerHistory.getEndTime().getTime()); startDateTime = new DateTime(schedulerHistory.getStartTime().getTime()); long workingTime = endDateTime.getMillis() - startDateTime.getMillis(); workingTimeStats.addValue(workingTime); points.add(new PointDTO(startDateTime.getMillis(), (long) workingTime)); errorStats.addValue(0); } else { errorStats.addValue(1); } } dto.setLastStartDateTime(startDateTime); dto.setLastEndDateTime(endDateTime); dto.setMeanWorkingTime(workingTimeStats.getMean()); for (SchedulerHistory schedulerHistory : schedulerHistories) { startDateTime = new DateTime(schedulerHistory.getStartTime().getTime()); meanWorkingTimePoints.add(new PointDTO(startDateTime.getMillis(), (long) workingTimeStats.getMean())); } List<SerieDTO> seriesDTO = new ArrayList<SerieDTO>(); seriesDTO.add(workingTimes); seriesDTO.add(meanWorkingTimes); dto.setWorkingTime(seriesDTO); schedulerJobRepository.findBySpoolerId(spoolerId); return dto; }