Example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics addValue

List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics addValue

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics addValue.

Prototype

public void addValue(double v) 

Source Link

Document

Adds the value to the dataset.

Usage

From source file:org.commoncrawl.mapred.pipelineV3.domainmeta.fuzzydedupe.CrossDomainDupesReducer.java

@Override
public void reduce(TextBytes key, Iterator<TextBytes> values, OutputCollector<TextBytes, TextBytes> output,
        Reporter reporter) throws IOException {

    filter.clear();//from w w  w .jav a  2s . c  o  m
    double crossDomainDupesCount = 0;
    double totalHitsCount = 0;
    double uniqueRootDomainsCount = 0;
    double uniqueIPs = 0;
    double validDupePatternMatches = 0;

    URLFPV2 rootFP = URLUtils.getURLFPV2FromHost(key.toString());
    URLFPV2 fp = new URLFPV2();
    int sampleCount = 0;
    ArrayList<Integer> ipAddresses = new ArrayList<Integer>();
    JsonArray thisHostsDupes = new JsonArray();
    DescriptiveStatistics lengthStats = new DescriptiveStatistics();

    while (values.hasNext()) {
        JsonArray jsonArray = parser.parse(values.next().toString()).getAsJsonArray();
        for (JsonElement elem : jsonArray) {
            totalHitsCount++;
            fp.setRootDomainHash(elem.getAsJsonObject().get("dh").getAsLong());
            if (fp.getRootDomainHash() != rootFP.getRootDomainHash()) {
                crossDomainDupesCount++;
                fp.setDomainHash(fp.getRootDomainHash());
                fp.setUrlHash(fp.getRootDomainHash());
                // track length average ....
                lengthStats.addValue(elem.getAsJsonObject().get("length").getAsInt());

                if (!filter.isPresent(fp)) {
                    uniqueRootDomainsCount++;
                    filter.add(fp);
                    if (sampleCount < samples.length) {
                        String url = elem.getAsJsonObject().get("url").getAsString();
                        GoogleURL urlObject = new GoogleURL(url);
                        if (knownValidDupesPatterns.matcher(urlObject.getCanonicalURL()).find()) {
                            validDupePatternMatches++;
                        }
                        samples[sampleCount++] = url;
                    }
                }
            } else {
                thisHostsDupes.add(elem);
            }

            int ipAddress = elem.getAsJsonObject().get("ip").getAsInt();

            fp.setRootDomainHash(ipAddress);
            fp.setDomainHash(ipAddress);
            fp.setUrlHash(ipAddress);

            if (!filter.isPresent(fp)) {
                uniqueIPs++;
                filter.add(fp);
                ipAddresses.add(ipAddress);
            }
        }
    }

    if (totalHitsCount > 15 && crossDomainDupesCount >= 2) {

        double otherDomainToLocalScore = otherDomainToLocalDomainScore(totalHitsCount, crossDomainDupesCount);
        double spamIPScore = spamHostScore(totalHitsCount, crossDomainDupesCount, uniqueIPs);

        if (otherDomainToLocalScore >= .50 || spamIPScore > .50) {
            JsonObject objectOut = new JsonObject();

            objectOut.addProperty("ratio", (crossDomainDupesCount / totalHitsCount));
            objectOut.addProperty("totalHits", totalHitsCount);
            objectOut.addProperty("crossDomainDupes", crossDomainDupesCount);
            objectOut.addProperty("uniqueRootDomains", uniqueRootDomainsCount);
            objectOut.addProperty("otherDomainToLocalScore", otherDomainToLocalScore);
            objectOut.addProperty("spamIPScore", spamIPScore);
            objectOut.addProperty("validDupeMatches", validDupePatternMatches);
            objectOut.addProperty("content-len-mean", lengthStats.getMean());
            objectOut.addProperty("content-len-geo-mean", lengthStats.getGeometricMean());

            for (int i = 0; i < sampleCount; ++i) {
                objectOut.addProperty("sample-" + i, samples[i]);
            }
            // compute path edit distance ...
            if (sampleCount > 1) {
                int sampleEditDistanceSize = Math.min(sampleCount, 5);
                DescriptiveStatistics stats = new DescriptiveStatistics();
                for (int j = 0; j < sampleEditDistanceSize; ++j) {
                    for (int k = 0; k < sampleEditDistanceSize; ++k) {
                        if (k != j) {
                            GoogleURL urlObjectA = new GoogleURL(samples[j]);
                            GoogleURL urlObjectB = new GoogleURL(samples[k]);

                            if (urlObjectA.getPath().length() < 100 && urlObjectB.getPath().length() < 100) {
                                stats.addValue(StringUtils.getLevenshteinDistance(urlObjectA.getPath(),
                                        urlObjectB.getPath()));
                            }
                        }
                    }
                }
                if (stats.getMean() != 0.0) {
                    objectOut.addProperty("lev-distance-mean", stats.getMean());
                    objectOut.addProperty("lev-distance-geomean", stats.getGeometricMean());
                }
            }

            JsonArray ipAddressArray = new JsonArray();
            for (int j = 0; j < Math.min(1000, ipAddresses.size()); ++j) {
                ipAddressArray.add(new JsonPrimitive(ipAddresses.get(j)));
            }
            if (ipAddresses.size() != 0) {
                objectOut.add("ipList", ipAddressArray);
            }
            objectOut.add("thisHostDupes", thisHostsDupes);

            output.collect(key, new TextBytes(objectOut.toString()));
        }
    }

}

From source file:org.cse.visiri.app.algoevaluation.DistributionEval.java

public void EvaluateDistribution(QueryDistribution dist, String algoname) {
    Map<String, Integer> allInfo = new TreeMap<String, Integer>();
    Map<String, Integer> nodeInfo = new TreeMap<String, Integer>();
    Map<String, Double> nodeCosts = new TreeMap<String, Double>();
    for (Query q : dist.getQueryAllocation().keySet()) {
        String node = dist.getQueryAllocation().get(q);
        if (!allInfo.containsKey(node)) {
            allInfo.put(node, 0);/*  w  w w .j  av a2  s .  c om*/
        }
        int val = allInfo.get(node);
        allInfo.put(node, val + 1);
        if (node.startsWith(NODE_PREFIX)) {
            if (!nodeInfo.containsKey(node)) {
                nodeInfo.put(node, 0);
                nodeCosts.put(node, 0.0);
            }
            val = nodeInfo.get(node);
            nodeInfo.put(node, val + 1);
            nodeCosts.put(node, nodeCosts.get(node) + q.getCost());
        }
    }

    DescriptiveStatistics stat = new DescriptiveStatistics();
    DescriptiveStatistics costStat = new DescriptiveStatistics();
    System.out.println("Query counts : ");
    for (String node : nodeInfo.keySet()) {
        System.out.println(node + " : " + allInfo.get(node));
        stat.addValue(nodeInfo.get(node));
        costStat.addValue(nodeCosts.get(node));
    }

    System.out.println();
    double mean = stat.getMean();
    double stdDev = Math.sqrt(stat.getPopulationVariance());
    double varCoef = stdDev / mean;
    System.out.println("mean : " + mean);
    System.out.println("stdDev : " + stdDev);
    System.out.println("Coefficient of var : " + varCoef);

    System.out.println("\nCosts :");
    mean = costStat.getMean();
    stdDev = Math.sqrt(costStat.getPopulationVariance());
    varCoef = stdDev / mean;
    System.out.println("mean : " + mean);
    System.out.println("stdDev : " + stdDev);
    System.out.println("Coefficient of var : " + varCoef);

    //calculate event duplication
    Map<String, Set<String>> eventMap = new TreeMap<String, Set<String>>();
    for (Query q : dist.getQueryAllocation().keySet()) {
        String targetNode = dist.getQueryAllocation().get(q);

        for (StreamDefinition def : q.getInputStreamDefinitionsList()) {
            if (!eventMap.containsKey(def.getStreamId())) {
                eventMap.put(def.getStreamId(), new HashSet<String>());
            }
            eventMap.get(def.getStreamId()).add(targetNode);
        }
    }

    stat = new DescriptiveStatistics();
    for (Set<String> nodes : eventMap.values()) {
        stat.addValue(nodes.size());
    }

    double avg = stat.getMean();

    System.out.println();
    System.out.println("Avg. event duplication " + avg);

    try {
        PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("VISIRI_algoeval.txt", true)));
        out.println(stdDev);
        out.close();
    } catch (IOException e) {
        e.printStackTrace();
    }

    try {
        PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("VISIRI_eventDup.txt", true)));
        out.println(avg);
        out.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:org.deidentifier.arx.framework.check.distribution.DistributionAggregateFunction.java

/**
 * Adds all values from the distribution to the given descriptive statistics object
 * @param statistics/*w ww  . j av  a  2 s .co  m*/
 * @param distribution
 * @param type
 * @param offset will be added to values
 */
protected <T> void addAll(DescriptiveStatistics statistics, Distribution distribution,
        DataTypeWithRatioScale<T> type, double offset) {
    Iterator<Double> it = DistributionIterator.createIteratorDouble(distribution, dictionary, type);
    while (it.hasNext()) {
        Double value = it.next();
        value = value == null ? (ignoreMissingData ? null : 0d) : value;
        if (value != null) {
            statistics.addValue(value + offset);
        }
    }
}

From source file:org.dllearner.algorithms.qtl.experiments.BenchmarkDescriptionGenerator.java

private DescriptiveStatistics determineDefaultCBDSizes(Query query, List<String> resources) {
    DescriptiveStatistics stats = new DescriptiveStatistics();
    NumberFormat df = DecimalFormat.getPercentInstance();
    AtomicInteger idx = new AtomicInteger(1);

    CBDStructureTree cbdStructure = getDefaultCBDStructureTree();
    System.out.println(cbdStructure.toStringVerbose());

    ProgressBar progressBar = new ProgressBar();

    resources.forEach(r -> {/*  ww  w .ja  va  2s .  c om*/
        long cnt = -1;
        if (useConstruct) {
            Model cbd = null;
            try {
                //               cbd = cbdGen.getConciseBoundedDescription(r, cbdStructure);
                //               cnt = cbd.size();
                //               System.out.println(r + ":" + cnt);
            } catch (Exception e) {
                LOGGER.error(e.getMessage(), e.getCause());
            }

        } else {
            ParameterizedSparqlString template = SPARQLUtils.CBD_TEMPLATE_DEPTH3.copy();
            template.setIri("uri", r);
            try (QueryExecution qe = qef.createQueryExecution(template.toString())) {
                ResultSet rs = qe.execSelect();
                cnt = rs.next().getLiteral("cnt").getInt();
            } catch (Exception e) {
                LOGGER.error(e.getMessage(), e.getCause());
            }
        }
        stats.addValue(cnt);
        progressBar.update(idx.getAndAdd(1), resources.size());

    });

    return stats;
}

From source file:org.dllearner.algorithms.qtl.experiments.BenchmarkDescriptionGenerator.java

private DescriptiveStatistics determineOptimalCBDSizes(Query query, List<String> resources) {
    DescriptiveStatistics stats = new DescriptiveStatistics();
    NumberFormat df = DecimalFormat.getPercentInstance();
    AtomicInteger idx = new AtomicInteger(1);

    CBDStructureTree cbdStructure = QueryUtils.getOptimalCBDStructure(query);
    System.out.println(cbdStructure.toStringVerbose());

    ProgressBar progressBar = new ProgressBar();

    resources.forEach(r -> {/*from   w w  w.j a  v  a  2  s .com*/
        long cnt = -1;
        if (useConstruct) {
            Model cbd = null;
            try {
                //               cbd = cbdGen.getConciseBoundedDescription(r, cbdStructure);
                //               cnt = cbd.size();
            } catch (Exception e) {
                LOGGER.error(e.getMessage(), e.getCause());
            }

        } else {
            ParameterizedSparqlString template = SPARQLUtils.CBD_TEMPLATE_DEPTH3.copy();
            template.setIri("uri", r);
            try (QueryExecution qe = qef.createQueryExecution(template.toString())) {
                ResultSet rs = qe.execSelect();
                cnt = rs.next().getLiteral("cnt").getInt();
            } catch (Exception e) {
                LOGGER.error(e.getMessage(), e.getCause());
            }
        }
        stats.addValue(cnt);
        progressBar.update(idx.getAndAdd(1), resources.size());

    });

    return stats;
}

From source file:org.dllearner.algorithms.qtl.experiments.PRConvergenceExperiment.java

public void run(int maxNrOfProcessedQueries, int maxTreeDepth, int[] exampleInterval, double[] noiseInterval,
        HeuristicType[] measures) throws Exception {
    this.maxTreeDepth = maxTreeDepth;
    queryTreeFactory.setMaxDepth(maxTreeDepth);

    if (exampleInterval != null) {
        nrOfExamplesIntervals = exampleInterval;
    }//from ww w  .jav a 2  s.co m
    if (noiseInterval != null) {
        this.noiseIntervals = noiseInterval;
    }
    if (measures != null) {
        this.measures = measures;
    }

    boolean noiseEnabled = noiseIntervals.length > 1 || noiseInterval[0] > 0;
    boolean posOnly = noiseEnabled ? false : true;

    logger.info("Started QTL evaluation...");
    long t1 = System.currentTimeMillis();

    List<String> queries = dataset.getSparqlQueries().values().stream().map(q -> q.toString())
            .collect(Collectors.toList());
    logger.info("#loaded queries: " + queries.size());

    // filter for debugging purposes
    queries = queries.stream().filter(q -> queriesToProcessTokens.stream().noneMatch(t -> !q.contains(t)))
            .collect(Collectors.toList());
    queries = queries.stream().filter(q -> queriesToOmitTokens.stream().noneMatch(t -> q.contains(t)))
            .collect(Collectors.toList());

    if (maxNrOfProcessedQueries == -1) {
        maxNrOfProcessedQueries = queries.size();
    }

    //      queries = filter(queries, (int) Math.ceil((double) maxNrOfProcessedQueries / maxTreeDepth));
    //      queries = queries.subList(0, Math.min(queries.size(), maxNrOfProcessedQueries));
    logger.info("#queries to process: " + queries.size());

    // generate examples for each query
    logger.info("precomputing pos. and neg. examples...");
    for (String query : queries) {//if(!(query.contains("Borough_(New_York_City)")))continue;
        query2Examples.put(query, generateExamples(query, posOnly, noiseEnabled));
    }
    logger.info("precomputing pos. and neg. examples finished.");

    // check for queries that do not return any result (should not happen, but we never know)
    Set<String> emptyQueries = query2Examples.entrySet().stream()
            .filter(e -> e.getValue().correctPosExampleCandidates.isEmpty()).map(e -> e.getKey())
            .collect(Collectors.toSet());
    logger.info("got {} empty queries.", emptyQueries.size());
    queries.removeAll(emptyQueries);

    // min. pos examples
    int min = 3;
    Set<String> lowNrOfExamplesQueries = query2Examples.entrySet().stream()
            .filter(e -> e.getValue().correctPosExampleCandidates.size() < min).map(e -> e.getKey())
            .collect(Collectors.toSet());
    logger.info("got {} queries with < {} pos. examples.", emptyQueries.size(), min);
    queries.removeAll(lowNrOfExamplesQueries);
    queries = queries.subList(0, Math.min(80, queries.size()));

    final int totalNrOfQTLRuns = heuristics.length * this.measures.length * nrOfExamplesIntervals.length
            * noiseIntervals.length * queries.size();
    logger.info("#QTL runs: " + totalNrOfQTLRuns);

    final AtomicInteger currentNrOfFinishedRuns = new AtomicInteger(0);

    // loop over heuristics
    for (final QueryTreeHeuristic heuristic : heuristics) {
        final String heuristicName = heuristic.getClass().getAnnotation(ComponentAnn.class).shortName();

        // loop over heuristics measures
        for (HeuristicType measure : this.measures) {
            final String measureName = measure.toString();
            heuristic.setHeuristicType(measure);

            double[][] data = new double[nrOfExamplesIntervals.length][noiseIntervals.length];

            // loop over number of positive examples
            for (int i = 0; i < nrOfExamplesIntervals.length; i++) {
                final int nrOfExamples = nrOfExamplesIntervals[i];

                // loop over noise value
                for (int j = 0; j < noiseIntervals.length; j++) {
                    final double noise = noiseIntervals[j];

                    // check if not already processed
                    File logFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-"
                            + heuristicName + "-" + measureName + ".log");
                    File statsFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-"
                            + heuristicName + "-" + measureName + ".stats");

                    if (!override && logFile.exists() && statsFile.exists()) {
                        logger.info(
                                "Eval config already processed. For re-running please remove corresponding output files.");
                        continue;
                    }

                    FileAppender appender = null;
                    try {
                        appender = new FileAppender(new SimpleLayout(), logFile.getPath(), false);
                        Logger.getRootLogger().addAppender(appender);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }

                    logger.info("#examples: " + nrOfExamples + " noise: " + noise);

                    final DescriptiveStatistics nrOfReturnedSolutionsStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics baselinePrecisionStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselineRecallStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselineFMeasureStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselinePredAccStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselineMathCorrStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestReturnedSolutionPrecisionStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionRecallStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionFMeasureStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionPredAccStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionMathCorrStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestReturnedSolutionRuntimeStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestSolutionPrecisionStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionRecallStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionFMeasureStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionPredAccStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionMathCorrStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestSolutionPositionStats = new SynchronizedDescriptiveStatistics();

                    MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).reset();
                    MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).reset();

                    ExecutorService tp = Executors.newFixedThreadPool(nrOfThreads);

                    // indicates if the execution for some of the queries failed
                    final AtomicBoolean failed = new AtomicBoolean(false);

                    Set<String> queriesToProcess = new TreeSet<>(queries);
                    queriesToProcess.retainAll(query2Examples.entrySet().stream()
                            .filter(e -> e.getValue().correctPosExampleCandidates.size() >= nrOfExamples)
                            .map(e -> e.getKey()).collect(Collectors.toSet()));

                    // loop over SPARQL queries
                    for (final String sparqlQuery : queriesToProcess) {
                        CBDStructureTree cbdStructure = cbdStructureTree != null ? cbdStructureTree
                                : QueryUtils.getOptimalCBDStructure(QueryFactory.create(sparqlQuery));

                        tp.submit(() -> {
                            logger.info("CBD tree:" + cbdStructure.toStringVerbose());

                            // update max tree depth
                            this.maxTreeDepth = QueryTreeUtils.getDepth(cbdStructure);
                            logger.info("##############################################################");
                            logger.info("Processing query\n" + sparqlQuery);

                            // we repeat it n times with different permutations of examples
                            int nrOfPermutations = 1;

                            if (nrOfExamples >= query2Examples.get(sparqlQuery).correctPosExampleCandidates
                                    .size()) {
                                nrOfPermutations = 1;
                            }
                            for (int perm = 1; perm <= nrOfPermutations; perm++) {
                                logger.info("Run {}/{}", perm, nrOfPermutations);
                                try {
                                    ExamplesWrapper examples = getExamples(sparqlQuery, nrOfExamples,
                                            nrOfExamples, noise, cbdStructure);
                                    logger.info("pos. examples:\n"
                                            + Joiner.on("\n").join(examples.correctPosExamples));
                                    logger.info("neg. examples:\n"
                                            + Joiner.on("\n").join(examples.correctNegExamples));

                                    // write examples to disk
                                    File dir = new File(benchmarkDirectory, "data/" + hash(sparqlQuery));
                                    dir.mkdirs();
                                    Files.write(Joiner.on("\n").join(examples.correctPosExamples), new File(dir,
                                            "examples" + perm + "_" + nrOfExamples + "_" + noise + ".tp"),
                                            Charsets.UTF_8);
                                    Files.write(Joiner.on("\n").join(examples.correctNegExamples), new File(dir,
                                            "examples" + perm + "_" + nrOfExamples + "_" + noise + ".tn"),
                                            Charsets.UTF_8);
                                    Files.write(Joiner.on("\n").join(examples.falsePosExamples), new File(dir,
                                            "examples" + perm + "_" + nrOfExamples + "_" + noise + ".fp"),
                                            Charsets.UTF_8);

                                    // compute baseline
                                    RDFResourceTree baselineSolution = applyBaseLine(examples,
                                            Baseline.MOST_INFORMATIVE_EDGE_IN_EXAMPLES);
                                    logger.info("Evaluating baseline...");
                                    Score baselineScore = computeScore(sparqlQuery, baselineSolution, noise);
                                    logger.info("Baseline score:\n" + baselineScore);
                                    String baseLineQuery = QueryTreeUtils.toSPARQLQueryString(baselineSolution,
                                            dataset.getBaseIRI(), dataset.getPrefixMapping());
                                    baselinePrecisionStats.addValue(baselineScore.precision);
                                    baselineRecallStats.addValue(baselineScore.recall);
                                    baselineFMeasureStats.addValue(baselineScore.fmeasure);
                                    baselinePredAccStats.addValue(baselineScore.predAcc);
                                    baselineMathCorrStats.addValue(baselineScore.mathCorr);

                                    // run QTL
                                    PosNegLPStandard lp = new PosNegLPStandard();
                                    lp.setPositiveExamples(examples.posExamplesMapping.keySet());
                                    lp.setNegativeExamples(examples.negExamplesMapping.keySet());
                                    //                                 QTL2Disjunctive la = new QTL2Disjunctive(lp, qef);
                                    QTL2DisjunctiveMultiThreaded la = new QTL2DisjunctiveMultiThreaded(lp, qef);
                                    la.setRenderer(new org.dllearner.utilities.owl.DLSyntaxObjectRenderer());
                                    la.setReasoner(dataset.getReasoner());
                                    la.setEntailment(Entailment.SIMPLE);
                                    la.setTreeFactory(queryTreeFactory);
                                    la.setPositiveExampleTrees(examples.posExamplesMapping);
                                    la.setNegativeExampleTrees(examples.negExamplesMapping);
                                    la.setNoise(noise);
                                    la.setHeuristic(heuristic);
                                    la.setMaxExecutionTimeInSeconds(maxExecutionTimeInSeconds);
                                    la.setMaxTreeComputationTimeInSeconds(maxExecutionTimeInSeconds);
                                    la.init();
                                    la.start();
                                    List<EvaluatedRDFResourceTree> solutions = new ArrayList<>(
                                            la.getSolutions());

                                    //                              List<EvaluatedRDFResourceTree> solutions = generateSolutions(examples, noise, heuristic);
                                    nrOfReturnedSolutionsStats.addValue(solutions.size());

                                    // the best returned solution by QTL
                                    EvaluatedRDFResourceTree bestSolution = solutions.get(0);
                                    logger.info("Got " + solutions.size() + " query trees.");
                                    //                                 logger.info("Best computed solution:\n" + render(bestSolution.asEvaluatedDescription()));
                                    logger.info("QTL Score:\n" + bestSolution.getTreeScore());
                                    long runtimeBestSolution = la.getTimeBestSolutionFound();
                                    bestReturnedSolutionRuntimeStats.addValue(runtimeBestSolution);

                                    // convert to SPARQL query
                                    RDFResourceTree tree = bestSolution.getTree();
                                    tree = filter.apply(tree);
                                    String learnedSPARQLQuery = QueryTreeUtils.toSPARQLQueryString(tree,
                                            dataset.getBaseIRI(), dataset.getPrefixMapping());

                                    // compute score
                                    Score score = computeScore(sparqlQuery, tree, noise);
                                    bestReturnedSolutionPrecisionStats.addValue(score.precision);
                                    bestReturnedSolutionRecallStats.addValue(score.recall);
                                    bestReturnedSolutionFMeasureStats.addValue(score.fmeasure);
                                    bestReturnedSolutionPredAccStats.addValue(score.predAcc);
                                    bestReturnedSolutionMathCorrStats.addValue(score.mathCorr);
                                    logger.info(score.toString());

                                    // find the extensionally best matching tree in the list
                                    Pair<EvaluatedRDFResourceTree, Score> bestMatchingTreeWithScore = findBestMatchingTreeFast(
                                            solutions, sparqlQuery, noise, examples);
                                    EvaluatedRDFResourceTree bestMatchingTree = bestMatchingTreeWithScore
                                            .getFirst();
                                    Score bestMatchingScore = bestMatchingTreeWithScore.getSecond();

                                    // position of best tree in list of solutions
                                    int positionBestScore = solutions.indexOf(bestMatchingTree);
                                    bestSolutionPositionStats.addValue(positionBestScore);

                                    Score bestScore = score;
                                    if (positionBestScore > 0) {
                                        logger.info(
                                                "Position of best covering tree in list: " + positionBestScore);
                                        logger.info("Best covering solution:\n"
                                                + render(bestMatchingTree.asEvaluatedDescription()));
                                        logger.info("Tree score: " + bestMatchingTree.getTreeScore());
                                        bestScore = bestMatchingScore;
                                        logger.info(bestMatchingScore.toString());
                                    } else {
                                        logger.info(
                                                "Best returned solution was also the best covering solution.");
                                    }
                                    bestSolutionRecallStats.addValue(bestScore.recall);
                                    bestSolutionPrecisionStats.addValue(bestScore.precision);
                                    bestSolutionFMeasureStats.addValue(bestScore.fmeasure);
                                    bestSolutionPredAccStats.addValue(bestScore.predAcc);
                                    bestSolutionMathCorrStats.addValue(bestScore.mathCorr);

                                    for (RDFResourceTree negTree : examples.negExamplesMapping.values()) {
                                        if (QueryTreeUtils.isSubsumedBy(negTree, bestMatchingTree.getTree())) {
                                            Files.append(sparqlQuery + "\n", new File("/tmp/negCovered.txt"),
                                                    Charsets.UTF_8);
                                            break;
                                        }
                                    }

                                    String bestQuery = QueryFactory
                                            .create(QueryTreeUtils.toSPARQLQueryString(
                                                    filter.apply(bestMatchingTree.getTree()),
                                                    dataset.getBaseIRI(), dataset.getPrefixMapping()))
                                            .toString();

                                    if (write2DB) {
                                        write2DB(sparqlQuery, nrOfExamples, examples, noise, baseLineQuery,
                                                baselineScore, heuristicName, measureName,
                                                QueryFactory.create(learnedSPARQLQuery).toString(), score,
                                                runtimeBestSolution, bestQuery, positionBestScore, bestScore);
                                    }

                                } catch (Exception e) {
                                    failed.set(true);
                                    logger.error("Error occured for query\n" + sparqlQuery, e);
                                    try {
                                        StringWriter sw = new StringWriter();
                                        PrintWriter pw = new PrintWriter(sw);
                                        e.printStackTrace(pw);
                                        Files.append(sparqlQuery + "\n" + sw.toString(),
                                                new File(benchmarkDirectory,
                                                        "failed-" + nrOfExamples + "-" + noise + "-"
                                                                + heuristicName + "-" + measureName + ".txt"),
                                                Charsets.UTF_8);
                                    } catch (IOException e1) {
                                        e1.printStackTrace();
                                    }
                                } finally {
                                    int cnt = currentNrOfFinishedRuns.incrementAndGet();
                                    logger.info("***********Evaluation Progress:"
                                            + NumberFormat.getPercentInstance()
                                                    .format((double) cnt / totalNrOfQTLRuns)
                                            + "(" + cnt + "/" + totalNrOfQTLRuns + ")" + "***********");
                                }
                            }
                        });
                    }

                    tp.shutdown();
                    tp.awaitTermination(12, TimeUnit.HOURS);

                    Logger.getRootLogger().removeAppender(appender);

                    if (!failed.get()) {
                        String result = "";
                        result += "\nBaseline Precision:\n" + baselinePrecisionStats;
                        result += "\nBaseline Recall:\n" + baselineRecallStats;
                        result += "\nBaseline F-measure:\n" + baselineFMeasureStats;
                        result += "\nBaseline PredAcc:\n" + baselinePredAccStats;
                        result += "\nBaseline MathCorr:\n" + baselineMathCorrStats;

                        result += "#Returned solutions:\n" + nrOfReturnedSolutionsStats;

                        result += "\nOverall Precision:\n" + bestReturnedSolutionPrecisionStats;
                        result += "\nOverall Recall:\n" + bestReturnedSolutionRecallStats;
                        result += "\nOverall F-measure:\n" + bestReturnedSolutionFMeasureStats;
                        result += "\nOverall PredAcc:\n" + bestReturnedSolutionPredAccStats;
                        result += "\nOverall MathCorr:\n" + bestReturnedSolutionMathCorrStats;

                        result += "\nTime until best returned solution found:\n"
                                + bestReturnedSolutionRuntimeStats;

                        result += "\nPositions of best solution:\n"
                                + Arrays.toString(bestSolutionPositionStats.getValues());
                        result += "\nPosition of best solution stats:\n" + bestSolutionPositionStats;
                        result += "\nOverall Precision of best solution:\n" + bestSolutionPrecisionStats;
                        result += "\nOverall Recall of best solution:\n" + bestSolutionRecallStats;
                        result += "\nOverall F-measure of best solution:\n" + bestSolutionFMeasureStats;

                        result += "\nCBD generation time(total):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getTotal()
                                + "\n";
                        result += "CBD generation time(avg):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getAvg()
                                + "\n";
                        result += "Tree generation time(total):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getTotal()
                                + "\n";
                        result += "Tree generation time(avg):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getAvg()
                                + "\n";
                        result += "Tree size(avg):\t" + treeSizeStats.getMean() + "\n";

                        logger.info(result);

                        try {
                            Files.write(result, statsFile, Charsets.UTF_8);
                        } catch (IOException e) {
                            e.printStackTrace();
                        }

                        data[i][j] = bestReturnedSolutionFMeasureStats.getMean();

                        if (write2DB) {
                            write2DB(heuristicName, measureName, nrOfExamples, noise,
                                    bestReturnedSolutionFMeasureStats.getMean(),
                                    bestReturnedSolutionPrecisionStats.getMean(),
                                    bestReturnedSolutionRecallStats.getMean(),
                                    bestReturnedSolutionPredAccStats.getMean(),
                                    bestReturnedSolutionMathCorrStats.getMean(),
                                    bestSolutionPositionStats.getMean(), bestSolutionFMeasureStats.getMean(),
                                    bestSolutionPrecisionStats.getMean(), bestSolutionRecallStats.getMean(),
                                    bestSolutionPredAccStats.getMean(), bestSolutionMathCorrStats.getMean(),
                                    baselineFMeasureStats.getMean(), baselinePrecisionStats.getMean(),
                                    baselineRecallStats.getMean(), baselinePredAccStats.getMean(),
                                    baselineMathCorrStats.getMean(),
                                    bestReturnedSolutionRuntimeStats.getMean());
                        }
                    }
                }
            }

            String content = "###";
            String separator = "\t";
            for (double noiseInterval1 : noiseIntervals) {
                content += separator + noiseInterval1;
            }
            content += "\n";
            for (int i = 0; i < nrOfExamplesIntervals.length; i++) {
                content += nrOfExamplesIntervals[i];
                for (int j = 0; j < noiseIntervals.length; j++) {
                    content += separator + data[i][j];
                }
                content += "\n";
            }

            File examplesVsNoise = new File(benchmarkDirectory,
                    "examplesVsNoise-" + heuristicName + "-" + measureName + ".tsv");
            try {
                Files.write(content, examplesVsNoise, Charsets.UTF_8);
            } catch (IOException e) {
                logger.error("failed to write stats to file", e);
            }
        }
    }

    if (write2DB) {
        conn.close();
    }

    if (useEmailNotification) {
        sendFinishedMail();
    }
    long t2 = System.currentTimeMillis();
    long duration = t2 - t1;
    logger.info("QTL evaluation finished in " + DurationFormatUtils.formatDurationHMS(duration) + "ms.");
}

From source file:org.dllearner.algorithms.qtl.experiments.QTLEvaluation.java

public void run(int maxNrOfProcessedQueries, int maxTreeDepth, int[] exampleInterval, double[] noiseInterval,
        HeuristicType[] measures) throws Exception {
    this.maxTreeDepth = maxTreeDepth;
    queryTreeFactory.setMaxDepth(maxTreeDepth);

    if (exampleInterval != null) {
        nrOfExamplesIntervals = exampleInterval;
    }//from   ww w.  ja  v a  2  s  .c o  m
    if (noiseInterval != null) {
        this.noiseIntervals = noiseInterval;
    }
    if (measures != null) {
        this.measures = measures;
    }

    logger.info("Started QTL evaluation...");
    long t1 = System.currentTimeMillis();

    List<String> queries = dataset.getSparqlQueries().values().stream().map(q -> q.toString())
            .collect(Collectors.toList());
    logger.info("#loaded queries: " + queries.size());

    // filter for debugging purposes
    queries = queries.stream().filter(q -> tokens.stream().noneMatch(t -> !q.contains(t)))
            .collect(Collectors.toList());

    if (maxNrOfProcessedQueries == -1) {
        maxNrOfProcessedQueries = queries.size();
    }

    //      queries = filter(queries, (int) Math.ceil((double) maxNrOfProcessedQueries / maxTreeDepth));
    //      queries = queries.subList(0, Math.min(queries.size(), maxNrOfProcessedQueries));
    logger.info("#queries to process: " + queries.size());

    // generate examples for each query
    logger.info("precomputing pos. and neg. examples...");
    final Map<String, ExampleCandidates> query2Examples = new HashMap<>();
    for (String query : queries) {//if(!(query.contains("Borough_(New_York_City)")))continue;
        query2Examples.put(query, generateExamples(query));
    }
    logger.info("precomputing pos. and neg. examples finished.");

    // check for queries that do not return any result (should not happen, but we never know)
    Set<String> emptyQueries = query2Examples.entrySet().stream()
            .filter(e -> e.getValue().correctPosExampleCandidates.isEmpty()).map(e -> e.getKey())
            .collect(Collectors.toSet());
    logger.info("got {} empty queries.", emptyQueries.size());
    queries.removeAll(emptyQueries);

    // min. pos examples
    Set<String> lowNrOfExamplesQueries = query2Examples.entrySet().stream()
            .filter(e -> e.getValue().correctPosExampleCandidates.size() < 2).map(e -> e.getKey())
            .collect(Collectors.toSet());
    logger.info("got {} queries with < 2 pos. examples.", emptyQueries.size());
    queries.removeAll(lowNrOfExamplesQueries);

    final int totalNrOfQTLRuns = heuristics.length * this.measures.length * nrOfExamplesIntervals.length
            * noiseIntervals.length * queries.size();
    logger.info("#QTL runs: " + totalNrOfQTLRuns);

    final AtomicInteger currentNrOfFinishedRuns = new AtomicInteger(0);

    // loop over heuristics
    for (final QueryTreeHeuristic heuristic : heuristics) {
        final String heuristicName = heuristic.getClass().getAnnotation(ComponentAnn.class).shortName();

        // loop over heuristics measures
        for (HeuristicType measure : this.measures) {
            final String measureName = measure.toString();
            heuristic.setHeuristicType(measure);

            double[][] data = new double[nrOfExamplesIntervals.length][noiseIntervals.length];

            // loop over number of positive examples
            for (int i = 0; i < nrOfExamplesIntervals.length; i++) {
                final int nrOfExamples = nrOfExamplesIntervals[i];

                // loop over noise value
                for (int j = 0; j < noiseIntervals.length; j++) {
                    final double noise = noiseIntervals[j];

                    // check if not already processed
                    File logFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-"
                            + heuristicName + "-" + measureName + ".log");
                    File statsFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-"
                            + heuristicName + "-" + measureName + ".stats");

                    if (!override && logFile.exists() && statsFile.exists()) {
                        logger.info(
                                "Eval config already processed. For re-running please remove corresponding output files.");
                        continue;
                    }

                    FileAppender appender = null;
                    try {
                        appender = new FileAppender(new SimpleLayout(), logFile.getPath(), false);
                        Logger.getRootLogger().addAppender(appender);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }

                    logger.info("#examples: " + nrOfExamples + " noise: " + noise);

                    final DescriptiveStatistics nrOfReturnedSolutionsStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics baselinePrecisionStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselineRecallStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselineFMeasureStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselinePredAccStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselineMathCorrStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestReturnedSolutionPrecisionStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionRecallStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionFMeasureStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionPredAccStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionMathCorrStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestReturnedSolutionRuntimeStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestSolutionPrecisionStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionRecallStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionFMeasureStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionPredAccStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionMathCorrStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestSolutionPositionStats = new SynchronizedDescriptiveStatistics();

                    MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).reset();
                    MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).reset();

                    ExecutorService tp = Executors.newFixedThreadPool(nrOfThreads);

                    // indicates if the execution for some of the queries failed
                    final AtomicBoolean failed = new AtomicBoolean(false);

                    // loop over SPARQL queries
                    for (final String sparqlQuery : queries) {

                        tp.submit(() -> {

                            logger.info("##############################################################");
                            logger.info("Processing query\n" + sparqlQuery);

                            try {
                                ExamplesWrapper examples = query2Examples.get(sparqlQuery).get(nrOfExamples,
                                        nrOfExamples, noise);
                                logger.info(
                                        "pos. examples:\n" + Joiner.on("\n").join(examples.correctPosExamples));
                                logger.info(
                                        "neg. examples:\n" + Joiner.on("\n").join(examples.correctNegExamples));

                                // write examples to disk
                                File dir = new File(benchmarkDirectory, "data/" + hash(sparqlQuery));
                                dir.mkdirs();
                                Files.write(Joiner.on("\n").join(examples.correctPosExamples),
                                        new File(dir, "examples_" + nrOfExamples + "_" + noise + ".tp"),
                                        Charsets.UTF_8);
                                Files.write(Joiner.on("\n").join(examples.correctNegExamples),
                                        new File(dir, "examples_" + nrOfExamples + "_" + noise + ".tn"),
                                        Charsets.UTF_8);
                                Files.write(Joiner.on("\n").join(examples.falsePosExamples),
                                        new File(dir, "examples_" + nrOfExamples + "_" + noise + ".fp"),
                                        Charsets.UTF_8);

                                // compute baseline
                                logger.info("Computing baseline...");
                                RDFResourceTree baselineSolution = applyBaseLine(examples,
                                        Baseline.MOST_INFORMATIVE_EDGE_IN_EXAMPLES);
                                logger.info("Baseline solution:\n" + owlRenderer
                                        .render(QueryTreeUtils.toOWLClassExpression(baselineSolution)));
                                logger.info("Evaluating baseline...");
                                Score baselineScore = computeScore(sparqlQuery, baselineSolution, noise);
                                logger.info("Baseline score:\n" + baselineScore);
                                String baseLineQuery = QueryTreeUtils.toSPARQLQueryString(baselineSolution,
                                        dataset.getBaseIRI(), dataset.getPrefixMapping());
                                baselinePrecisionStats.addValue(baselineScore.precision);
                                baselineRecallStats.addValue(baselineScore.recall);
                                baselineFMeasureStats.addValue(baselineScore.fmeasure);
                                baselinePredAccStats.addValue(baselineScore.predAcc);
                                baselineMathCorrStats.addValue(baselineScore.mathCorr);

                                // run QTL
                                PosNegLPStandard lp = new PosNegLPStandard();
                                lp.setPositiveExamples(examples.posExamplesMapping.keySet());
                                lp.setNegativeExamples(examples.negExamplesMapping.keySet());
                                QTL2Disjunctive la = new QTL2Disjunctive(lp, qef);
                                la.setRenderer(new org.dllearner.utilities.owl.DLSyntaxObjectRenderer());
                                la.setReasoner(dataset.getReasoner());
                                la.setEntailment(Entailment.SIMPLE);
                                la.setTreeFactory(queryTreeFactory);
                                la.setPositiveExampleTrees(examples.posExamplesMapping);
                                la.setNegativeExampleTrees(examples.negExamplesMapping);
                                la.setNoise(noise);
                                la.setHeuristic(heuristic);
                                la.setMaxExecutionTimeInSeconds(maxExecutionTimeInSeconds);
                                la.setMaxTreeComputationTimeInSeconds(maxExecutionTimeInSeconds);
                                la.init();
                                la.start();
                                List<EvaluatedRDFResourceTree> solutions = new ArrayList<>(la.getSolutions());

                                //                              List<EvaluatedRDFResourceTree> solutions = generateSolutions(examples, noise, heuristic);
                                nrOfReturnedSolutionsStats.addValue(solutions.size());

                                // the best returned solution by QTL
                                EvaluatedRDFResourceTree bestSolution = solutions.get(0);
                                logger.info("Got " + solutions.size() + " query trees.");
                                logger.info("Best computed solution:\n"
                                        + render(bestSolution.asEvaluatedDescription()));
                                logger.info("QTL Score:\n" + bestSolution.getTreeScore());
                                long runtimeBestSolution = la.getTimeBestSolutionFound();
                                bestReturnedSolutionRuntimeStats.addValue(runtimeBestSolution);

                                // convert to SPARQL query
                                RDFResourceTree tree = bestSolution.getTree();
                                //                  filter.filter(tree);
                                String learnedSPARQLQuery = QueryTreeUtils.toSPARQLQueryString(tree,
                                        dataset.getBaseIRI(), dataset.getPrefixMapping());

                                // compute score
                                Score score = computeScore(sparqlQuery, tree, noise);
                                bestReturnedSolutionPrecisionStats.addValue(score.precision);
                                bestReturnedSolutionRecallStats.addValue(score.recall);
                                bestReturnedSolutionFMeasureStats.addValue(score.fmeasure);
                                bestReturnedSolutionPredAccStats.addValue(score.predAcc);
                                bestReturnedSolutionMathCorrStats.addValue(score.mathCorr);
                                logger.info(score.toString());

                                // find the extensionally best matching tree in the list
                                Pair<EvaluatedRDFResourceTree, Score> bestMatchingTreeWithScore = findBestMatchingTreeFast(
                                        solutions, sparqlQuery, noise, examples);
                                EvaluatedRDFResourceTree bestMatchingTree = bestMatchingTreeWithScore
                                        .getFirst();
                                Score bestMatchingScore = bestMatchingTreeWithScore.getSecond();

                                // position of best tree in list of solutions
                                int positionBestScore = solutions.indexOf(bestMatchingTree);
                                bestSolutionPositionStats.addValue(positionBestScore);

                                Score bestScore = score;
                                if (positionBestScore > 0) {
                                    logger.info("Position of best covering tree in list: " + positionBestScore);
                                    logger.info("Best covering solution:\n"
                                            + render(bestMatchingTree.asEvaluatedDescription()));
                                    logger.info("Tree score: " + bestMatchingTree.getTreeScore());
                                    bestScore = bestMatchingScore;
                                    logger.info(bestMatchingScore.toString());
                                } else {
                                    logger.info("Best returned solution was also the best covering solution.");
                                }
                                bestSolutionRecallStats.addValue(bestScore.recall);
                                bestSolutionPrecisionStats.addValue(bestScore.precision);
                                bestSolutionFMeasureStats.addValue(bestScore.fmeasure);
                                bestSolutionPredAccStats.addValue(bestScore.predAcc);
                                bestSolutionMathCorrStats.addValue(bestScore.mathCorr);

                                for (RDFResourceTree negTree : examples.negExamplesMapping.values()) {
                                    if (QueryTreeUtils.isSubsumedBy(negTree, bestMatchingTree.getTree())) {
                                        Files.append(sparqlQuery + "\n", new File("/tmp/negCovered.txt"),
                                                Charsets.UTF_8);
                                        break;
                                    }
                                }

                                String bestQuery = QueryFactory.create(QueryTreeUtils.toSPARQLQueryString(
                                        filter.apply(bestMatchingTree.getTree()), dataset.getBaseIRI(),
                                        dataset.getPrefixMapping())).toString();

                                if (write2DB) {
                                    write2DB(sparqlQuery, nrOfExamples, examples, noise, baseLineQuery,
                                            baselineScore, heuristicName, measureName,
                                            QueryFactory.create(learnedSPARQLQuery).toString(), score,
                                            runtimeBestSolution, bestQuery, positionBestScore, bestScore);
                                }

                            } catch (Exception e) {
                                failed.set(true);
                                logger.error("Error occured for query\n" + sparqlQuery, e);
                                try {
                                    StringWriter sw = new StringWriter();
                                    PrintWriter pw = new PrintWriter(sw);
                                    e.printStackTrace(pw);
                                    Files.append(sparqlQuery + "\n" + sw.toString(),
                                            new File(benchmarkDirectory, "failed-" + nrOfExamples + "-" + noise
                                                    + "-" + heuristicName + "-" + measureName + ".txt"),
                                            Charsets.UTF_8);
                                } catch (IOException e1) {
                                    e1.printStackTrace();
                                }
                            } finally {
                                int cnt = currentNrOfFinishedRuns.incrementAndGet();
                                logger.info("***********Evaluation Progress:"
                                        + NumberFormat.getPercentInstance()
                                                .format((double) cnt / totalNrOfQTLRuns)
                                        + "(" + cnt + "/" + totalNrOfQTLRuns + ")" + "***********");
                            }
                        });

                    }

                    tp.shutdown();
                    tp.awaitTermination(12, TimeUnit.HOURS);

                    Logger.getRootLogger().removeAppender(appender);

                    if (!failed.get()) {
                        String result = "";
                        result += "\nBaseline Precision:\n" + baselinePrecisionStats;
                        result += "\nBaseline Recall:\n" + baselineRecallStats;
                        result += "\nBaseline F-measure:\n" + baselineFMeasureStats;
                        result += "\nBaseline PredAcc:\n" + baselinePredAccStats;
                        result += "\nBaseline MathCorr:\n" + baselineMathCorrStats;

                        result += "#Returned solutions:\n" + nrOfReturnedSolutionsStats;

                        result += "\nOverall Precision:\n" + bestReturnedSolutionPrecisionStats;
                        result += "\nOverall Recall:\n" + bestReturnedSolutionRecallStats;
                        result += "\nOverall F-measure:\n" + bestReturnedSolutionFMeasureStats;
                        result += "\nOverall PredAcc:\n" + bestReturnedSolutionPredAccStats;
                        result += "\nOverall MathCorr:\n" + bestReturnedSolutionMathCorrStats;

                        result += "\nTime until best returned solution found:\n"
                                + bestReturnedSolutionRuntimeStats;

                        result += "\nPositions of best solution:\n"
                                + Arrays.toString(bestSolutionPositionStats.getValues());
                        result += "\nPosition of best solution stats:\n" + bestSolutionPositionStats;
                        result += "\nOverall Precision of best solution:\n" + bestSolutionPrecisionStats;
                        result += "\nOverall Recall of best solution:\n" + bestSolutionRecallStats;
                        result += "\nOverall F-measure of best solution:\n" + bestSolutionFMeasureStats;

                        result += "\nCBD generation time(total):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getTotal()
                                + "\n";
                        result += "CBD generation time(avg):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getAvg()
                                + "\n";
                        result += "Tree generation time(total):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getTotal()
                                + "\n";
                        result += "Tree generation time(avg):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getAvg()
                                + "\n";
                        result += "Tree size(avg):\t" + treeSizeStats.getMean() + "\n";

                        logger.info(result);

                        try {
                            Files.write(result, statsFile, Charsets.UTF_8);
                        } catch (IOException e) {
                            e.printStackTrace();
                        }

                        data[i][j] = bestReturnedSolutionFMeasureStats.getMean();

                        if (write2DB) {
                            write2DB(heuristicName, measureName, nrOfExamples, noise,
                                    bestReturnedSolutionFMeasureStats.getMean(),
                                    bestReturnedSolutionPrecisionStats.getMean(),
                                    bestReturnedSolutionRecallStats.getMean(),
                                    bestReturnedSolutionPredAccStats.getMean(),
                                    bestReturnedSolutionMathCorrStats.getMean(),
                                    bestSolutionPositionStats.getMean(), bestSolutionFMeasureStats.getMean(),
                                    bestSolutionPrecisionStats.getMean(), bestSolutionRecallStats.getMean(),
                                    bestSolutionPredAccStats.getMean(), bestSolutionMathCorrStats.getMean(),
                                    baselineFMeasureStats.getMean(), baselinePrecisionStats.getMean(),
                                    baselineRecallStats.getMean(), baselinePredAccStats.getMean(),
                                    baselineMathCorrStats.getMean(),
                                    bestReturnedSolutionRuntimeStats.getMean());
                        }
                    }
                }
            }

            String content = "###";
            String separator = "\t";
            for (double noiseInterval1 : noiseIntervals) {
                content += separator + noiseInterval1;
            }
            content += "\n";
            for (int i = 0; i < nrOfExamplesIntervals.length; i++) {
                content += nrOfExamplesIntervals[i];
                for (int j = 0; j < noiseIntervals.length; j++) {
                    content += separator + data[i][j];
                }
                content += "\n";
            }

            File examplesVsNoise = new File(benchmarkDirectory,
                    "examplesVsNoise-" + heuristicName + "-" + measureName + ".tsv");
            try {
                Files.write(content, examplesVsNoise, Charsets.UTF_8);
            } catch (IOException e) {
                logger.error("failed to write stats to file", e);
            }
        }
    }

    if (write2DB) {
        conn.close();
    }

    if (useEmailNotification) {
        sendFinishedMail();
    }
    long t2 = System.currentTimeMillis();
    long duration = t2 - t1;
    logger.info("QTL evaluation finished in " + DurationFormatUtils.formatDurationHMS(duration) + "ms.");
}

From source file:org.dllearner.algorithms.qtl.qald.QALDExperiment.java

public void run() {

    List<String> sparqlQueries = loadSPARQLQueries();
    logger.info("Total number of queries: " + sparqlQueries.size());

    // parameters
    int minNrOfExamples = 3;
    int maxNrOfExamples = 10;
    int stepSize = 2;

    double[] noiseIntervals = { 0.0, 0.2,
            //            0.4,
            //            0.6
    };//ww w  .  j  a v a2s.co  m

    // loop over number of positive examples
    for (int nrOfExamples = minNrOfExamples; nrOfExamples <= maxNrOfExamples; nrOfExamples = nrOfExamples
            + stepSize) {

        // loop over noise value
        for (double noise : noiseIntervals) {

            FileAppender appender = null;
            try {
                appender = new FileAppender(new SimpleLayout(),
                        "log/qtl/qtl2-" + nrOfExamples + "-" + noise + ".log", false);
                Logger.getRootLogger().addAppender(appender);
            } catch (IOException e1) {
                e1.printStackTrace();
            }

            logger.info("#examples: " + nrOfExamples + " noise: " + noise);

            DescriptiveStatistics bestReturnedSolutionPrecisionStats = new DescriptiveStatistics();
            DescriptiveStatistics bestReturnedSolutionRecallStats = new DescriptiveStatistics();
            DescriptiveStatistics bestReturnedSolutionFMeasureStats = new DescriptiveStatistics();

            DescriptiveStatistics bestSolutionPrecisionStats = new DescriptiveStatistics();
            DescriptiveStatistics bestSolutionRecallStats = new DescriptiveStatistics();
            DescriptiveStatistics bestSolutionFMeasureStats = new DescriptiveStatistics();

            DescriptiveStatistics bestSolutionPositionStats = new DescriptiveStatistics();

            //            if(nrOfExamples != 7) continue;
            // loop over SPARQL queries
            for (String sparqlQuery : sparqlQueries) {
                //               if(!sparqlQuery.contains("Nobel_Prize_in_Literature"))continue;
                logger.info("##############################################################");
                logger.info("Processing query\n" + sparqlQuery);
                // some queries can return less examples
                int possibleNrOfExamples = Math.min(getResultCount(sparqlQuery), nrOfExamples);

                try {
                    // compute or load cached solutions
                    List<EvaluatedRDFResourceTree> solutions = generateSolutions(sparqlQuery,
                            possibleNrOfExamples, noise);

                    // the best solution by QTL
                    EvaluatedRDFResourceTree bestSolution = solutions.get(0);
                    logger.info("Got " + solutions.size() + " query trees.");
                    logger.info("Best computed solution:\n" + bestSolution.asEvaluatedDescription());
                    logger.info("Score:\n" + bestSolution.getTreeScore());

                    // convert to SPARQL query
                    String learnedSPARQLQuery = QueryTreeUtils.toSPARQLQueryString(
                            filter.apply(bestSolution.getTree()), kb.baseIRI, kb.prefixMapping);

                    Score score = computeScore(sparqlQuery, learnedSPARQLQuery);

                    // compute precision
                    double precision = score.getPrecision();
                    bestReturnedSolutionPrecisionStats.addValue(precision);

                    // compute recall
                    double recall = score.getRecall();
                    bestReturnedSolutionRecallStats.addValue(recall);

                    // compute F1-score
                    double fmeasure = score.getFmeasure();
                    bestReturnedSolutionFMeasureStats.addValue(fmeasure);

                    logger.info(String.format("P=%f\nR=%f\nF-score=%f", precision, recall, fmeasure));

                    // find the extensionally best matching tree in the list
                    Pair<EvaluatedRDFResourceTree, Score> bestMatchingTreeWithScore = findBestMatchingTree(
                            solutions, sparqlQuery);
                    EvaluatedRDFResourceTree bestMatchingTree = bestMatchingTreeWithScore.getFirst();
                    Score bestMatchingScore = bestMatchingTreeWithScore.getSecond();

                    // position of best tree in list of solutions
                    int position = solutions.indexOf(bestMatchingTree);
                    bestSolutionPositionStats.addValue(position);

                    if (position > 0) {
                        logger.info("Position of best covering tree in list: " + position);
                        logger.info("Best covering solution:\n" + bestMatchingTree.asEvaluatedDescription());
                        logger.info("Tree score: " + bestMatchingTree.getTreeScore());
                        String bestLearnedSPARQLQuery = QueryTreeUtils.toSPARQLQueryString(
                                filter.apply(bestMatchingTree.getTree()), kb.baseIRI, kb.prefixMapping);
                        precision = bestMatchingScore.getPrecision();
                        recall = bestMatchingScore.getRecall();
                        fmeasure = bestMatchingScore.getFmeasure();
                        logger.info(String.format("P=%f\nR=%f\nF-score=%f", precision, recall, fmeasure));
                    } else {
                        logger.info("Best returned solution was also the best covering solution.");
                    }
                    bestSolutionRecallStats.addValue(recall);
                    bestSolutionPrecisionStats.addValue(precision);
                    bestSolutionFMeasureStats.addValue(fmeasure);

                } catch (Exception e) {
                    logger.error("Error occured.", e);
                    System.exit(0);
                }
            }

            Logger.getRootLogger().removeAppender(appender);

            String result = "";
            result += "\nOverall Precision:\n" + bestReturnedSolutionPrecisionStats;
            result += "\nOverall Recall:\n" + bestReturnedSolutionRecallStats;
            result += "\nOverall FMeasure:\n" + bestReturnedSolutionFMeasureStats;
            result += "\nPositions of best solution:\n"
                    + Arrays.toString(bestSolutionPositionStats.getValues());
            result += "\nPosition of best solution stats:\n" + bestSolutionPositionStats;

            result += "\nOverall Precision of best solution:\n" + bestSolutionPrecisionStats;
            result += "\nOverall Recall of best solution:\n" + bestSolutionRecallStats;
            result += "\nOverall FMeasure of best solution:\n" + bestSolutionFMeasureStats;

            logger.info(result);

            try {
                Files.write(result, new File("log/qtl/qtl2-" + nrOfExamples + "-" + noise + ".stats"),
                        Charsets.UTF_8);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:org.jgrasstools.hortonmachine.modules.hydrogeomorphology.lwrecruitment.OmsLW09_AreaToNetpointAssociator.java

@Execute
public void process() throws Exception {

    RegionMap regionMap = CoverageUtilities.getRegionParamsFromGridCoverage(inFlow);
    int cols = regionMap.getCols();
    int rows = regionMap.getRows();
    GridGeometry2D gridGeometry = inFlow.getGridGeometry();
    GeometryFactory gf = GeometryUtilities.gf();

    /*//  w ww .  ja  v  a  2s  . c  o m
     * extract the inundated area from the polygon
     */
    PreparedGeometry preparedFooldingArea = getFloofindArea(inInundationArea);

    /*
     * extract the Canopy Height Model from DTM and DSM
     */
    GridCoverage2D chmGC = getChm(inDsm, inDtm);

    /*
     * extract basins calling netnumbering with in input all the network points
     */
    OmsNetNumbering omsnetnumbering = new OmsNetNumbering();
    omsnetnumbering.inFlow = inFlow;
    omsnetnumbering.inNet = inNet;
    omsnetnumbering.inTca = inTca;
    omsnetnumbering.inPoints = inNetPoints;
    omsnetnumbering.pThres = 0.0;
    omsnetnumbering.pm = pm;
    omsnetnumbering.process();
    outNetnum = omsnetnumbering.outNetnum;
    outBasins = omsnetnumbering.outBasins;

    RandomIter netnumBasinsIter = CoverageUtilities.getRandomIterator(outBasins);
    RandomIter connectivityIter = CoverageUtilities.getRandomIterator(inConnectivity);
    RandomIter chmIter = CoverageUtilities.getRandomIterator(chmGC);
    RandomIter standIter = CoverageUtilities.getRandomIterator(inStand);

    HashMap<Integer, DescriptiveStatistics> heightBasin2ValueMap = new HashMap<Integer, DescriptiveStatistics>();
    HashMap<Integer, DescriptiveStatistics> standBasin2ValueMap = new HashMap<Integer, DescriptiveStatistics>();

    pm.beginTask("Calculating vegetation stats.", cols);
    for (int c = 0; c < cols; c++) {
        for (int r = 0; r < rows; r++) {
            double netnumDouble = netnumBasinsIter.getSampleDouble(c, r, 0);
            if (!isNovalue(netnumDouble)) {
                Integer netNum = (int) netnumDouble;
                Coordinate coordinate = CoverageUtilities.coordinateFromColRow(c, r, gridGeometry);
                Point point = gf.createPoint(coordinate);
                double connectivityDouble = connectivityIter.getSampleDouble(c, r, 0);
                /*
                 * check if the point is connected to the network:
                 * - connectivity index less than the threshold
                 * - point is inside the inundated area
                 * and fill the hashmaps with the correspondent positions.
                 */
                if (connectivityDouble < pConnectivityThreshold || preparedFooldingArea.intersects(point)) {
                    double chmDouble = chmIter.getSampleDouble(c, r, 0);
                    double standDouble = standIter.getSampleDouble(c, r, 0);
                    DescriptiveStatistics summaryHeightStatistics = heightBasin2ValueMap.get(netNum);
                    DescriptiveStatistics summaryStandStatistics = standBasin2ValueMap.get(netNum);
                    if (summaryHeightStatistics == null) {
                        summaryHeightStatistics = new DescriptiveStatistics();
                        summaryStandStatistics = new DescriptiveStatistics();
                        heightBasin2ValueMap.put(netNum, summaryHeightStatistics);
                        standBasin2ValueMap.put(netNum, summaryStandStatistics);
                    }
                    summaryHeightStatistics.addValue(chmDouble);
                    summaryStandStatistics.addValue(standDouble);
                }
            }

        }
        pm.worked(1);
    }
    pm.done();

    /*
     * create the structure for the output attributes and insert the summary statistics
     * as attributes
     */
    FeatureExtender ext = new FeatureExtender(inNetPoints.getSchema(),
            new String[] { LWFields.VOLUME, LWFields.MEDIAN }, new Class[] { Double.class, Double.class });
    List<SimpleFeature> inNetworkPointsList = FeatureUtilities.featureCollectionToList(inNetPoints);
    DefaultFeatureCollection finalNetworkPointsFC = new DefaultFeatureCollection();
    final java.awt.Point point = new java.awt.Point();
    for (SimpleFeature inPointFeature : inNetworkPointsList) {
        Geometry geometry = (Geometry) inPointFeature.getDefaultGeometry();
        Coordinate coordinate = geometry.getCoordinate();
        CoverageUtilities.colRowFromCoordinate(coordinate, gridGeometry, point);
        int netnum = netnumBasinsIter.getSample(point.x, point.y, 0);

        DescriptiveStatistics summaryHeightStatistics = heightBasin2ValueMap.get(netnum);
        double medianHeight = 0.0;
        if (summaryHeightStatistics != null) {
            medianHeight = summaryHeightStatistics.getPercentile(50);
        }

        DescriptiveStatistics summaryStandStatistics = standBasin2ValueMap.get(netnum);
        double sumStand = 0.0;
        if (summaryStandStatistics != null) {
            sumStand = summaryStandStatistics.getSum();
        }

        SimpleFeature newPointFeature = ext.extendFeature(inPointFeature,
                new Object[] { sumStand, medianHeight });
        finalNetworkPointsFC.add(newPointFeature);
    }
    outNetPoints = finalNetworkPointsFC;
}

From source file:org.jobscheduler.dashboard.service.SchedulerJobService.java

/**
 * Get statistics for JobName/*ww w .  java2 s  .c  om*/
 * 
 * @param spoolerId
 * @param jobName
 * @return
 */
public SchedulerJobStatsDTO getStatsPerJobName(String spoolerId, String jobName, Pageable pageable) {
    SchedulerJobStatsDTO dto = new SchedulerJobStatsDTO();
    dto.setJobName(jobName);
    dto.setSpoolerId(spoolerId);

    // Number of executed jobs
    Long executedTimes = schedulerHistoryRepository.countBySpoolerIdAndJobName(spoolerId, jobName);
    dto.setExecutedTimes(executedTimes);

    // / Retrieve last executed jobs for a job name (max 100)
    List<SchedulerHistory> schedulerHistories = schedulerHistoryRepository.findBySpoolerIdAndJobName(spoolerId,
            jobName, pageable);
    dto.setNbJobsLastExecutedTimes(schedulerHistories.size());

    DescriptiveStatistics errorStats = new DescriptiveStatistics();
    DescriptiveStatistics workingTimeStats = new DescriptiveStatistics();
    SerieDTO workingTimes = new SerieDTO();
    SerieDTO meanWorkingTimes = new SerieDTO();
    List<PointDTO> points = new ArrayList<PointDTO>();
    List<PointDTO> meanWorkingTimePoints = new ArrayList<PointDTO>();
    workingTimes.setKey("Working time");
    workingTimes.setValues(points);
    meanWorkingTimes.setKey("Mean working time");
    meanWorkingTimes.setValues(meanWorkingTimePoints);

    DateTime startDateTime = null;
    DateTime endDateTime = null;
    for (SchedulerHistory schedulerHistory : schedulerHistories) {
        if ((schedulerHistory.getEndTime() != null) && (schedulerHistory.getEndTime().getTime() >= 0)) {
            endDateTime = new DateTime(schedulerHistory.getEndTime().getTime());
            startDateTime = new DateTime(schedulerHistory.getStartTime().getTime());
            long workingTime = endDateTime.getMillis() - startDateTime.getMillis();
            workingTimeStats.addValue(workingTime);
            points.add(new PointDTO(startDateTime.getMillis(), (long) workingTime));
            errorStats.addValue(0);
        } else {
            errorStats.addValue(1);
        }
    }
    dto.setLastStartDateTime(startDateTime);
    dto.setLastEndDateTime(endDateTime);
    dto.setMeanWorkingTime(workingTimeStats.getMean());

    for (SchedulerHistory schedulerHistory : schedulerHistories) {
        startDateTime = new DateTime(schedulerHistory.getStartTime().getTime());
        meanWorkingTimePoints.add(new PointDTO(startDateTime.getMillis(), (long) workingTimeStats.getMean()));
    }

    List<SerieDTO> seriesDTO = new ArrayList<SerieDTO>();
    seriesDTO.add(workingTimes);
    seriesDTO.add(meanWorkingTimes);
    dto.setWorkingTime(seriesDTO);

    schedulerJobRepository.findBySpoolerId(spoolerId);
    return dto;
}