Example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getMean

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getMean.

Prototype

public double getMean()

Source Link

Document

Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm"> arithmetic mean </a> of the available values

Usage

From source file:org.cirdles.ludwig.isoplot3.Means.java

/**
 * Ludwig's WeightedAverage and assumes ConstExtErr = true since all
 * possible values are returned and caller can decide
 *
 * @param inValues as double[] with length nPts
 * @param inErrors as double[] with length nPts
 * @param canReject// w  w w .  j a  v a2  s .co  m
 * @param canTukeys
 * @return double[7][]{mean, sigmaMean, err68, err95, MSWD, probability, externalFlag}, {values
 * with rejected as 0.0}.  externalFlag = 1.0 for external uncertainty, 0.0 for internal
 */
public static double[][] weightedAverage(double[] inValues, double[] inErrors, boolean canReject,
        boolean canTukeys) {

    double[] values = inValues.clone();
    double[] errors = inErrors.clone();

    double[][] retVal = new double[][] { { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, {} };

    // check precondition of same size values and errors and at least 3 points
    int nPts = values.length;
    int nN = nPts;
    int count = 0;

    // where does this come from??
    boolean hardRej = false;

    if ((nPts == errors.length) && nPts > 2) {
        // proceed
        double[] inverseVar = new double[nPts];
        double[] wtdResid = new double[nPts];
        double[] yy;
        double[] iVarY;
        double[] tbX = new double[nPts];

        double[][] wRejected = new double[nPts][2];

        for (int i = 0; i < nPts; i++) {
            inverseVar[i] = 1.0 / Math.pow(errors[i], 2);
        }

        double intMean = 0.0;
        double MSWD = 0.0;
        double intSigmaMean = 0.0;
        double probability = 0.0;
        double intMeanErr95 = 0.0;
        double intErr68 = 0.0;

        double extMean = 0.0;
        double extMeanErr95 = 0.0;
        double extMeanErr68 = 0.0;
        double extSigma = 0.0;

        double biWtMean = 0.0;
        double biWtSigma = 0.0;

        boolean reCalc;

        // entry point for RECALC goto - consider another private method?
        do {
            reCalc = false;

            extSigma = 0.0;
            double weight = 0.0;
            double sumWtdRatios = 0.0;
            double q = 0.0;
            count++;

            for (int i = 0; i < nPts; i++) {
                if (values[i] * errors[i] != 0.0) {
                    weight += inverseVar[i];
                    sumWtdRatios += inverseVar[i] * values[i];
                    q += inverseVar[i] * Math.pow(values[i], 2);
                }
            }

            int nU = nN - 1;// ' Deg. freedom
            TDistribution studentsT = new TDistribution(nU);
            // see https://stackoverflow.com/questions/21730285/calculating-t-inverse
            // for explanation of cutting the tail mass in two to get agreement with Excel two-tail
            double t68 = Math.abs(studentsT.inverseCumulativeProbability((1.0 - 0.6826) / 2.0));
            double t95 = Math.abs(studentsT.inverseCumulativeProbability((1.0 - 0.95) / 2));

            intMean = sumWtdRatios / weight;//  ' "Internal" error of wtd average

            double sums = 0.0;
            for (int i = 0; i < nPts; i++) {
                if (values[i] * errors[i] != 0.0) {
                    double resid = values[i] - intMean;//  ' Simple residual
                    wtdResid[i] = resid / errors[i];// ' Wtd residual
                    double wtdR2 = Math.pow(wtdResid[i], 2);//' Square of wtd residual
                    sums += wtdR2;
                }
            }
            sums = Math.max(sums, 0.0);

            MSWD = sums / nU;//  ' Mean square of weighted deviates
            intSigmaMean = Math.sqrt(1.0 / weight);

            // http://commons.apache.org/proper/commons-math/apidocs/org/apache/commons/math3/distribution/FDistribution.html
            FDistribution fdist = new FDistribution(nU, 1E9);
            probability = 1.0 - fdist.cumulativeProbability(MSWD);//     ChiSquare(.MSWD, (nU))
            intMeanErr95 = intSigmaMean * (double) (probability >= 0.3 ? 1.96 : t95 * Math.sqrt(MSWD));
            intErr68 = intSigmaMean * (double) (probability >= 0.3 ? 0.9998 : t68 * Math.sqrt(MSWD));

            extMean = 0.0;
            extMeanErr95 = 0.0;
            extMeanErr68 = 0.0;

            // need to find external uncertainty
            List<Double> yyList = new ArrayList<>();
            List<Double> iVarYList = new ArrayList<>();
            if ((probability < SQUID_MINIMUM_PROBABILITY) && (MSWD > 1.0)) {
                // Find the MLE constant external variance
                nN = 0;
                for (int i = 0; i < nPts; i++) {
                    if (values[i] != 0.0) {
                        yyList.add(values[i]);
                        iVarYList.add(errors[i] * errors[i]);
                        nN++;
                    }
                }
                // resize arrays
                yy = yyList.stream().mapToDouble(Double::doubleValue).toArray();
                iVarY = iVarYList.stream().mapToDouble(Double::doubleValue).toArray();

                // call secant method
                double[] wtdExtRtsec = wtdExtRTSEC(0, 10.0 * intSigmaMean * intSigmaMean, yy, iVarY);

                // check for failure
                if (wtdExtRtsec[3] == 0.0) {
                    extMean = wtdExtRtsec[1];
                    extSigma = Math.sqrt(wtdExtRtsec[0]);

                    studentsT = new TDistribution(2 * nN - 2);
                    extMeanErr95 = Math.abs(studentsT.inverseCumulativeProbability((1.0 - 0.95) / 2.0))
                            * wtdExtRtsec[2];

                } else if (MSWD > 4.0) { //Failure of RTSEC algorithm because of extremely high MSWD
                    DescriptiveStatistics stats = new DescriptiveStatistics(yy);
                    extSigma = stats.getStandardDeviation();
                    extMean = stats.getMean();
                    extMeanErr95 = t95 * extSigma / Math.sqrt(nN);
                } else {
                    extSigma = 0.0;
                    extMean = 0.0;
                    extMeanErr95 = 0.0;
                }

                extMeanErr68 = t68 / t95 * extMeanErr95;
            }

            if (canReject && (probability < SQUID_MINIMUM_PROBABILITY)) {
                // GOSUB REJECT
                double wtdAvg = 0.0;
                if (extSigma != 0.0) {
                    wtdAvg = extMean;
                } else {
                    wtdAvg = intMean;
                }

                //  reject outliers
                int n0 = nN;

                for (int i = 0; i < nPts; i++) {
                    if ((values[i] != 0.0) && (nN > 0.85 * nPts)) { //  Reject no more than 30% of ratios
                        // Start rej. tolerance at 2-sigma, increase slightly each pass.
                        double pointError = 2.0 * Math.sqrt(errors[i] * errors[i] + extSigma * extSigma);
                        // 2-sigma error of point being tested
                        double totalError = Math
                                .sqrt(pointError * pointError + (4.0 * extMeanErr68 * extMeanErr68));
                        // 1st-pass tolerance is 2-sigma; 2nd is 2.25-sigma; 3rd is 2.5-sigma.
                        double tolerance = (1.0 + (double) (count - 1.0) / 4.0) * totalError;
                        if (hardRej) {
                            tolerance = tolerance * 1.25;
                        }
                        // 1st-pass tolerance is 2-sigma; 2nd is 2.5-sigma; 3rd is 3-sigma...
                        q = values[i] - wtdAvg;

                        if ((Math.abs(q) > tolerance) && nN > 2) {
                            nN--;
                            wRejected[i][0] = values[i];
                            values[i] = 0.0;
                            wRejected[i][1] = errors[i];
                            errors[i] = 0.0;
                        } // check tolerance

                    } //  Reject no more than 30% of ratios
                } // nPts loop               

                reCalc = (nN < n0);
            } // canReject test
        } while (reCalc);

        if (canTukeys) { // March 2018 not finished as not sure where used
            System.arraycopy(values, 0, tbX, 0, nPts);

            double[] tukey = SquidMathUtils.tukeysBiweight(tbX, 6);
            biWtMean = tukey[0];
            biWtSigma = tukey[1];
            DescriptiveStatistics stats = new DescriptiveStatistics(tbX);
            double biWtErr95Median = stats.getPercentile(50);

            double median = median(tbX);
            double medianConf = medianConfLevel(nPts);
            double medianPlusErr = medianUpperLim(tbX) - median;
            double medianMinusErr = median - medianLowerLim(tbX);
        }

        // determine whether to return internal or external
        if (extMean != 0.0) {
            retVal = new double[][] { { extMean, extSigma, extMeanErr68, extMeanErr95, MSWD, probability, 1.0 },
                    // contains zeroes for each reject
                    values };
        } else {
            retVal = new double[][] { { intMean, intSigmaMean, intErr68, intMeanErr95, MSWD, probability, 0.0 },
                    // contains zeroes for each reject
                    values };
        }

    }

    return retVal;
}

From source file:org.commoncrawl.mapred.ec2.postprocess.deduper.DeduperUtils.java

/** 
 * /* w  w w  .j  a v  a2 s. c  om*/
 * @param args
 */
public static void main(String[] args) throws IOException {

    URLFPBloomFilter filter = new URLFPBloomFilter(JSONSetBuilder.NUM_ELEMENTS,
            JSONSetBuilder.NUM_HASH_FUNCTIONS, JSONSetBuilder.NUM_BITS);
    DescriptiveStatistics filterClearStats = new DescriptiveStatistics();
    for (int i = 0; i < 1000; ++i) {
        long timeStart = System.nanoTime();
        filter.clear();
        long timeEnd = System.nanoTime();
        filterClearStats.addValue(timeEnd - timeStart);
    }
    System.out.println("Mean Clear Time:" + filterClearStats.getMean());

    System.out.println("size:" + BINOMIAL_COFF);
    for (int j = 0; j < BINOMIAL_COFF; ++j) {
        int value = patternArray[j];
        System.out.print("value:" + value + " ");
        for (int i = 5; i >= 0; --i) {
            System.out.print(((value & (1 << i)) != 0) ? '1' : '0');
        }
        System.out.print("  Key MSBLen:" + Integer.toString(patternKeyMSBits[j]) + "\n");
    }
    validateGenerator();

    long key1 = new BitBuilder().on(10).off(1).on(53).bits();
    long key2 = new BitBuilder().on(10).off(4).on(50).bits();
    long key3 = new BitBuilder().on(10).off(4).on(47).off(3).bits();
    long key4 = new BitBuilder().off(10).on(4).off(47).on(3).bits();
    long key5 = new BitBuilder().off(10).on(4).off(47).on(1).off(2).bits();

    Assert.assertTrue(SimHash.hammingDistance(key1, key2) == 3);
    Assert.assertTrue(SimHash.hammingDistance(key1, key3) != 3);
    Assert.assertTrue(SimHash.hammingDistance(key2, key3) == 3);
    Assert.assertTrue(SimHash.hammingDistance(key1, key4) > 3);
    Assert.assertTrue(SimHash.hammingDistance(key2, key4) > 3);
    Assert.assertTrue(SimHash.hammingDistance(key3, key4) > 3);
    Assert.assertTrue(SimHash.hammingDistance(key4, key5) <= 3);

    ImmutableList<DeduperValue> values = new ImmutableList.Builder<DeduperValue>()

            .add(new DeduperValue(key1, 1000, 2000, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.1"), 1000,
                    new TextBytes("http://adomain.com/")))
            .add(new DeduperValue(key2, 1001, 2001, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.2"), 1000,
                    new TextBytes("http://bdomain.com/")))
            .add(new DeduperValue(key3, 1002, 2002, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.3"), 1000,
                    new TextBytes("http://cdomain.com/")))
            .add(new DeduperValue(key4, 1003, 2003, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.4"), 1000,
                    new TextBytes("http://ddomain.com/")))
            .add(new DeduperValue(key5, 1004, 2004, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.5"), 1000,
                    new TextBytes("http://edomain.com/")))
            .build();

    SimhashMatcher unionFinder = new SimhashMatcher();

    final Multimap<String, Long> rootDomainToDupes = TreeMultimap.create();
    // collect all json set representations ... 
    final ArrayList<TextBytes> jsonSets = new ArrayList<TextBytes>();

    unionFinder.emitMatches(3, values.iterator(), new OutputCollector<TextBytes, TextBytes>() {

        @Override
        public void collect(TextBytes key, TextBytes value) throws IOException {
            System.out.println("Root:" + key + " JSON: " + value.toString());

            populateTestJSONSetData(rootDomainToDupes, key, value);
            // collect all json sets for later disjoint-set join 
            jsonSets.add(value);
        }
    }, null);

    ImmutableList<Long> hashSuperSet1 = ImmutableList.of(2000L, 2001L, 2002L);
    ImmutableList<Long> hashSuperSet2 = ImmutableList.of(2003L, 2004L);

    Assert.assertTrue(rootDomainToDupes.get("adomain.com").containsAll(hashSuperSet1));
    Assert.assertTrue(rootDomainToDupes.get("bdomain.com").containsAll(hashSuperSet1));
    Assert.assertTrue(rootDomainToDupes.get("cdomain.com").containsAll(hashSuperSet1));

    Assert.assertTrue(rootDomainToDupes.get("ddomain.com").containsAll(hashSuperSet2));
    Assert.assertTrue(rootDomainToDupes.get("edomain.com").containsAll(hashSuperSet2));

    ImmutableList<DeduperValue> secondSetValues = new ImmutableList.Builder<DeduperValue>()

            .add(new DeduperValue(key1, 1000, 2000, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.2"), 1000,
                    new TextBytes("http://adomain.com/")))
            .add(new DeduperValue(key1, 1007, 2007, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.2"), 1000,
                    new TextBytes("http://z1domain.com/")))
            .add(new DeduperValue(key2, 1008, 2008, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.2"), 1000,
                    new TextBytes("http://z2domain.com/")))
            .add(new DeduperValue(key3, 1009, 2009, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.2"), 1000,
                    new TextBytes("http://z3domain.com/")))
            .build();

    unionFinder.emitMatches(3, secondSetValues.iterator(), new OutputCollector<TextBytes, TextBytes>() {

        @Override
        public void collect(TextBytes key, TextBytes value) throws IOException {
            System.out.println("Root:" + key + " JSON: " + value.toString());
            // collect all json sets for later disjoint-set join 
            jsonSets.add(value);
        }
    }, null);

    SetUnionFinder unionFinder2 = new SetUnionFinder();

    // union all json sets ... 
    unionFinder2.union(jsonSets.iterator());

    // ok emit union of sets ... 
    unionFinder2.emit(new TextBytes("test"), new OutputCollector<TextBytes, TextBytes>() {

        @Override
        public void collect(TextBytes key, TextBytes value) throws IOException {
            System.out.println("Root:" + key + " JSON: " + value.toString());
        }
    }, null);

}

From source file:org.commoncrawl.mapred.pipelineV3.domainmeta.fuzzydedupe.CrossDomainDupesReducer.java

@Override
public void reduce(TextBytes key, Iterator<TextBytes> values, OutputCollector<TextBytes, TextBytes> output,
        Reporter reporter) throws IOException {

    filter.clear();/*from ww w.  j a va 2s  . c o m*/
    double crossDomainDupesCount = 0;
    double totalHitsCount = 0;
    double uniqueRootDomainsCount = 0;
    double uniqueIPs = 0;
    double validDupePatternMatches = 0;

    URLFPV2 rootFP = URLUtils.getURLFPV2FromHost(key.toString());
    URLFPV2 fp = new URLFPV2();
    int sampleCount = 0;
    ArrayList<Integer> ipAddresses = new ArrayList<Integer>();
    JsonArray thisHostsDupes = new JsonArray();
    DescriptiveStatistics lengthStats = new DescriptiveStatistics();

    while (values.hasNext()) {
        JsonArray jsonArray = parser.parse(values.next().toString()).getAsJsonArray();
        for (JsonElement elem : jsonArray) {
            totalHitsCount++;
            fp.setRootDomainHash(elem.getAsJsonObject().get("dh").getAsLong());
            if (fp.getRootDomainHash() != rootFP.getRootDomainHash()) {
                crossDomainDupesCount++;
                fp.setDomainHash(fp.getRootDomainHash());
                fp.setUrlHash(fp.getRootDomainHash());
                // track length average ....
                lengthStats.addValue(elem.getAsJsonObject().get("length").getAsInt());

                if (!filter.isPresent(fp)) {
                    uniqueRootDomainsCount++;
                    filter.add(fp);
                    if (sampleCount < samples.length) {
                        String url = elem.getAsJsonObject().get("url").getAsString();
                        GoogleURL urlObject = new GoogleURL(url);
                        if (knownValidDupesPatterns.matcher(urlObject.getCanonicalURL()).find()) {
                            validDupePatternMatches++;
                        }
                        samples[sampleCount++] = url;
                    }
                }
            } else {
                thisHostsDupes.add(elem);
            }

            int ipAddress = elem.getAsJsonObject().get("ip").getAsInt();

            fp.setRootDomainHash(ipAddress);
            fp.setDomainHash(ipAddress);
            fp.setUrlHash(ipAddress);

            if (!filter.isPresent(fp)) {
                uniqueIPs++;
                filter.add(fp);
                ipAddresses.add(ipAddress);
            }
        }
    }

    if (totalHitsCount > 15 && crossDomainDupesCount >= 2) {

        double otherDomainToLocalScore = otherDomainToLocalDomainScore(totalHitsCount, crossDomainDupesCount);
        double spamIPScore = spamHostScore(totalHitsCount, crossDomainDupesCount, uniqueIPs);

        if (otherDomainToLocalScore >= .50 || spamIPScore > .50) {
            JsonObject objectOut = new JsonObject();

            objectOut.addProperty("ratio", (crossDomainDupesCount / totalHitsCount));
            objectOut.addProperty("totalHits", totalHitsCount);
            objectOut.addProperty("crossDomainDupes", crossDomainDupesCount);
            objectOut.addProperty("uniqueRootDomains", uniqueRootDomainsCount);
            objectOut.addProperty("otherDomainToLocalScore", otherDomainToLocalScore);
            objectOut.addProperty("spamIPScore", spamIPScore);
            objectOut.addProperty("validDupeMatches", validDupePatternMatches);
            objectOut.addProperty("content-len-mean", lengthStats.getMean());
            objectOut.addProperty("content-len-geo-mean", lengthStats.getGeometricMean());

            for (int i = 0; i < sampleCount; ++i) {
                objectOut.addProperty("sample-" + i, samples[i]);
            }
            // compute path edit distance ...
            if (sampleCount > 1) {
                int sampleEditDistanceSize = Math.min(sampleCount, 5);
                DescriptiveStatistics stats = new DescriptiveStatistics();
                for (int j = 0; j < sampleEditDistanceSize; ++j) {
                    for (int k = 0; k < sampleEditDistanceSize; ++k) {
                        if (k != j) {
                            GoogleURL urlObjectA = new GoogleURL(samples[j]);
                            GoogleURL urlObjectB = new GoogleURL(samples[k]);

                            if (urlObjectA.getPath().length() < 100 && urlObjectB.getPath().length() < 100) {
                                stats.addValue(StringUtils.getLevenshteinDistance(urlObjectA.getPath(),
                                        urlObjectB.getPath()));
                            }
                        }
                    }
                }
                if (stats.getMean() != 0.0) {
                    objectOut.addProperty("lev-distance-mean", stats.getMean());
                    objectOut.addProperty("lev-distance-geomean", stats.getGeometricMean());
                }
            }

            JsonArray ipAddressArray = new JsonArray();
            for (int j = 0; j < Math.min(1000, ipAddresses.size()); ++j) {
                ipAddressArray.add(new JsonPrimitive(ipAddresses.get(j)));
            }
            if (ipAddresses.size() != 0) {
                objectOut.add("ipList", ipAddressArray);
            }
            objectOut.add("thisHostDupes", thisHostsDupes);

            output.collect(key, new TextBytes(objectOut.toString()));
        }
    }

}

From source file:org.cse.visiri.app.algoevaluation.DistributionEval.java

public void EvaluateDistribution(QueryDistribution dist, String algoname) {
    Map<String, Integer> allInfo = new TreeMap<String, Integer>();
    Map<String, Integer> nodeInfo = new TreeMap<String, Integer>();
    Map<String, Double> nodeCosts = new TreeMap<String, Double>();
    for (Query q : dist.getQueryAllocation().keySet()) {
        String node = dist.getQueryAllocation().get(q);
        if (!allInfo.containsKey(node)) {
            allInfo.put(node, 0);/*ww w  . j  av a 2 s.c  om*/
        }
        int val = allInfo.get(node);
        allInfo.put(node, val + 1);
        if (node.startsWith(NODE_PREFIX)) {
            if (!nodeInfo.containsKey(node)) {
                nodeInfo.put(node, 0);
                nodeCosts.put(node, 0.0);
            }
            val = nodeInfo.get(node);
            nodeInfo.put(node, val + 1);
            nodeCosts.put(node, nodeCosts.get(node) + q.getCost());
        }
    }

    DescriptiveStatistics stat = new DescriptiveStatistics();
    DescriptiveStatistics costStat = new DescriptiveStatistics();
    System.out.println("Query counts : ");
    for (String node : nodeInfo.keySet()) {
        System.out.println(node + " : " + allInfo.get(node));
        stat.addValue(nodeInfo.get(node));
        costStat.addValue(nodeCosts.get(node));
    }

    System.out.println();
    double mean = stat.getMean();
    double stdDev = Math.sqrt(stat.getPopulationVariance());
    double varCoef = stdDev / mean;
    System.out.println("mean : " + mean);
    System.out.println("stdDev : " + stdDev);
    System.out.println("Coefficient of var : " + varCoef);

    System.out.println("\nCosts :");
    mean = costStat.getMean();
    stdDev = Math.sqrt(costStat.getPopulationVariance());
    varCoef = stdDev / mean;
    System.out.println("mean : " + mean);
    System.out.println("stdDev : " + stdDev);
    System.out.println("Coefficient of var : " + varCoef);

    //calculate event duplication
    Map<String, Set<String>> eventMap = new TreeMap<String, Set<String>>();
    for (Query q : dist.getQueryAllocation().keySet()) {
        String targetNode = dist.getQueryAllocation().get(q);

        for (StreamDefinition def : q.getInputStreamDefinitionsList()) {
            if (!eventMap.containsKey(def.getStreamId())) {
                eventMap.put(def.getStreamId(), new HashSet<String>());
            }
            eventMap.get(def.getStreamId()).add(targetNode);
        }
    }

    stat = new DescriptiveStatistics();
    for (Set<String> nodes : eventMap.values()) {
        stat.addValue(nodes.size());
    }

    double avg = stat.getMean();

    System.out.println();
    System.out.println("Avg. event duplication " + avg);

    try {
        PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("VISIRI_algoeval.txt", true)));
        out.println(stdDev);
        out.close();
    } catch (IOException e) {
        e.printStackTrace();
    }

    try {
        PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("VISIRI_eventDup.txt", true)));
        out.println(avg);
        out.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:org.deidentifier.arx.aggregates.StatisticsBuilder.java

/**
 * Returns summary statistics for all attributes.
 * /*from www .  j a  v  a  2 s  .c  om*/
 * @param listwiseDeletion A flag enabling list-wise deletion
 * @return
 */
@SuppressWarnings({ "unchecked", "rawtypes" })
public <T> Map<String, StatisticsSummary<?>> getSummaryStatistics(boolean listwiseDeletion) {

    // Reset stop flag
    interrupt.value = false;

    Map<String, DescriptiveStatistics> statistics = new HashMap<String, DescriptiveStatistics>();
    Map<String, StatisticsSummaryOrdinal> ordinal = new HashMap<String, StatisticsSummaryOrdinal>();
    Map<String, DataScale> scales = new HashMap<String, DataScale>();
    Map<String, GeometricMean> geomean = new HashMap<String, GeometricMean>();

    // Detect scales
    for (int col = 0; col < handle.getNumColumns(); col++) {

        // Meta
        String attribute = handle.getAttributeName(col);
        DataType<?> type = handle.getDataType(attribute);

        // Scale
        DataScale scale = type.getDescription().getScale();

        // Try to replace nominal scale with ordinal scale based on base data type
        if (scale == DataScale.NOMINAL && handle.getGeneralization(attribute) != 0) {
            if (!(handle.getBaseDataType(attribute) instanceof ARXString) && getHierarchy(col, true) != null) {
                scale = DataScale.ORDINAL;
            }
        }

        // Store
        scales.put(attribute, scale);
        statistics.put(attribute, new DescriptiveStatistics());
        geomean.put(attribute, new GeometricMean());
        ordinal.put(attribute, getSummaryStatisticsOrdinal(handle.getGeneralization(attribute),
                handle.getDataType(attribute), handle.getBaseDataType(attribute), getHierarchy(col, true)));
    }

    // Compute summary statistics
    for (int row = 0; row < handle.getNumRows(); row++) {

        // Check, if we should include this row
        boolean include = true;
        if (listwiseDeletion) {
            for (int col = 0; col < handle.getNumColumns(); col++) {
                if (handle.isOutlier(row) || DataType.isNull(handle.getValue(row, col))) {
                    include = false;
                    break;
                }
            }
        }

        // Check
        checkInterrupt();

        // If yes, add
        if (include) {

            // For each column
            for (int col = 0; col < handle.getNumColumns(); col++) {

                // Meta
                String value = handle.getValue(row, col);
                String attribute = handle.getAttributeName(col);
                DataType<?> type = handle.getDataType(attribute);

                // Analyze
                if (!DataType.isAny(value) && !DataType.isNull(value)) {
                    ordinal.get(attribute).addValue(value);
                    if (type instanceof DataTypeWithRatioScale) {
                        double doubleValue = ((DataTypeWithRatioScale) type).toDouble(type.parse(value));
                        statistics.get(attribute).addValue(doubleValue);
                        geomean.get(attribute).increment(doubleValue + 1d);
                    }
                }
            }
        }
    }

    // Convert
    Map<String, StatisticsSummary<?>> result = new HashMap<String, StatisticsSummary<?>>();
    for (int col = 0; col < handle.getNumColumns(); col++) {

        // Check
        checkInterrupt();

        // Depending on scale
        String attribute = handle.getAttributeName(col);
        DataScale scale = scales.get(attribute);
        DataType<T> type = (DataType<T>) handle.getDataType(attribute);
        ordinal.get(attribute).analyze();
        if (scale == DataScale.NOMINAL) {
            StatisticsSummaryOrdinal stats = ordinal.get(attribute);
            result.put(attribute, new StatisticsSummary<T>(DataScale.NOMINAL, stats.getNumberOfMeasures(),
                    stats.getMode(), type.parse(stats.getMode())));
        } else if (scale == DataScale.ORDINAL) {
            StatisticsSummaryOrdinal stats = ordinal.get(attribute);
            result.put(attribute,
                    new StatisticsSummary<T>(DataScale.ORDINAL, stats.getNumberOfMeasures(), stats.getMode(),
                            type.parse(stats.getMode()), stats.getMedian(), type.parse(stats.getMedian()),
                            stats.getMin(), type.parse(stats.getMin()), stats.getMax(),
                            type.parse(stats.getMax())));
        } else if (scale == DataScale.INTERVAL) {
            StatisticsSummaryOrdinal stats = ordinal.get(attribute);
            DescriptiveStatistics stats2 = statistics.get(attribute);
            boolean isPeriod = type.getDescription().getWrappedClass() == Date.class;

            // TODO: Something is wrong with commons math's kurtosis
            double kurtosis = stats2.getKurtosis();
            kurtosis = kurtosis < 0d ? Double.NaN : kurtosis;
            double range = stats2.getMax() - stats2.getMin();
            double stddev = Math.sqrt(stats2.getVariance());

            result.put(attribute, new StatisticsSummary<T>(DataScale.INTERVAL, stats.getNumberOfMeasures(),
                    stats.getMode(), type.parse(stats.getMode()), stats.getMedian(),
                    type.parse(stats.getMedian()), stats.getMin(), type.parse(stats.getMin()), stats.getMax(),
                    type.parse(stats.getMax()), toString(type, stats2.getMean(), false, false),
                    toValue(type, stats2.getMean()), stats2.getMean(),
                    toString(type, stats2.getVariance(), isPeriod, true), toValue(type, stats2.getVariance()),
                    stats2.getVariance(), toString(type, stats2.getPopulationVariance(), isPeriod, true),
                    toValue(type, stats2.getPopulationVariance()), stats2.getPopulationVariance(),
                    toString(type, stddev, isPeriod, false), toValue(type, stddev), stddev,
                    toString(type, range, isPeriod, false), toValue(type, range),
                    stats2.getMax() - stats2.getMin(), toString(type, kurtosis, isPeriod, false),
                    toValue(type, kurtosis), kurtosis));
        } else if (scale == DataScale.RATIO) {
            StatisticsSummaryOrdinal stats = ordinal.get(attribute);
            DescriptiveStatistics stats2 = statistics.get(attribute);
            GeometricMean geo = geomean.get(attribute);

            // TODO: Something is wrong with commons math's kurtosis
            double kurtosis = stats2.getKurtosis();
            kurtosis = kurtosis < 0d ? Double.NaN : kurtosis;
            double range = stats2.getMax() - stats2.getMin();
            double stddev = Math.sqrt(stats2.getVariance());

            result.put(attribute, new StatisticsSummary<T>(DataScale.RATIO, stats.getNumberOfMeasures(),
                    stats.getMode(), type.parse(stats.getMode()), stats.getMedian(),
                    type.parse(stats.getMedian()), stats.getMin(), type.parse(stats.getMin()), stats.getMax(),
                    type.parse(stats.getMax()), toString(type, stats2.getMean(), false, false),
                    toValue(type, stats2.getMean()), stats2.getMean(),
                    toString(type, stats2.getVariance(), false, false), toValue(type, stats2.getVariance()),
                    stats2.getVariance(), toString(type, stats2.getPopulationVariance(), false, false),
                    toValue(type, stats2.getPopulationVariance()), stats2.getPopulationVariance(),
                    toString(type, stddev, false, false), toValue(type, stddev), stddev,
                    toString(type, range, false, false), toValue(type, range), range,
                    toString(type, kurtosis, false, false), toValue(type, kurtosis), kurtosis,
                    toString(type, geo.getResult() - 1d, false, false), toValue(type, geo.getResult() - 1d),
                    stats2.getGeometricMean()));
        }
    }

    return result;
}

From source file:org.dllearner.algorithms.qtl.experiments.BenchmarkDescriptionGeneratorHTML.java

@Override
protected void addRow(QueryData queryData) {
    sb.append("<tr>\n");

    // column: ID
    sb.append("<td>" + queryData.id + "</td>\n");

    // column: SPARQL query
    sb.append("<td><pre>" + queryData.query.toString().replace("<", "&lt;").replace(">", "&gt;")
            + "</pre></td>\n");

    // column: SPARQL query type
    sb.append("<td>" + queryData.queryType + "</td>\n");

    // query graph
    //      QueryToGraphExporter.exportYedGraph(queryData.query, new File(""));
    //      sb.append("<td><img src=\"" + graphFile.getPath() + "\" alt=\"query graph\"></td>\n");

    // column: depth
    sb.append("<td class='number'>" + queryData.maxTreeDepth + "</td>\n");

    // column: #instances
    sb.append("<td class='number'>" + queryData.nrOfInstances + "</td>\n");

    // columns: optimal CBD sizes (min, max, avg)
    DescriptiveStatistics optimalCBDSizeStats = queryData.optimalCBDSizeStats;
    sb.append("<td class='number'>" + (int) optimalCBDSizeStats.getMin() + "</td>\n");
    sb.append("<td class='number'>" + (int) optimalCBDSizeStats.getMax() + "</td>\n");
    sb.append("<td class='number'>" + (int) optimalCBDSizeStats.getMean() + "</td>\n");

    // columns: generic CBD sizes (min, max, avg)
    DescriptiveStatistics genericCBDSizeStats = queryData.defaultCBDSizesStats;
    sb.append("<td class='number'>" + (int) genericCBDSizeStats.getMin() + "</td>\n");
    sb.append("<td class='number'>" + (int) genericCBDSizeStats.getMax() + "</td>\n");
    sb.append("<td class='number'>" + (int) genericCBDSizeStats.getMean() + "</td>\n");

    sb.append("</tr>\n");
}

From source file:org.dllearner.algorithms.qtl.experiments.EvaluationDataset.java

public void analyze() {
    ConciseBoundedDescriptionGenerator cbdGen = new SymmetricConciseBoundedDescriptionGeneratorImpl(
            ks.getQueryExecutionFactory());

    String separator = "\t";
    String tsv = sparqlQueries.entrySet().stream().map(entry -> {
        StringBuilder sb = new StringBuilder();

        // ID//from w w  w.  j a  v a  2 s .c  o m
        String id = entry.getKey();
        sb.append(id).append(separator);

        // query
        Query q = entry.getValue();
        sb.append(q.toString().replace("\n", " "));
        try {
            // get query result
            List<String> result = SPARQLUtils.getResult(ks.getQueryExecutionFactory(), q);
            sb.append(separator).append(result.size());

            // query type
            SPARQLUtils.QueryType queryType = SPARQLUtils.getQueryType(q);
            sb.append(separator).append(queryType.name());

            // check CBD sizes and time
            Monitor mon = MonitorFactory.getTimeMonitor("CBD");
            mon.reset();
            DescriptiveStatistics sizeStats = new DescriptiveStatistics();
            result.stream().map(r -> {
                System.out.println(r);
                mon.start();
                Model cbd = cbdGen.getConciseBoundedDescription(r, 2);
                mon.stop();
                return cbd;
            }).map(Model::size).forEach(sizeStats::addValue);

            // show min., max. and avg. size
            sb.append(separator).append(sizeStats.getMin());
            sb.append(separator).append(sizeStats.getMax());
            sb.append(separator).append(sizeStats.getMean());

            // show min., max. and avg. CBD time
            sb.append(separator).append(mon.getTotal());
            sb.append(separator).append(mon.getMin());
            sb.append(separator).append(mon.getMax());
            sb.append(separator).append(mon.getAvg());
        } catch (Exception e) {
            e.printStackTrace();
        }
        return sb;
    }).collect(Collectors.joining("\n"));

    System.out.println(tsv);
}

From source file:org.dllearner.algorithms.qtl.experiments.PRConvergenceExperiment.java

public void run(int maxNrOfProcessedQueries, int maxTreeDepth, int[] exampleInterval, double[] noiseInterval,
        HeuristicType[] measures) throws Exception {
    this.maxTreeDepth = maxTreeDepth;
    queryTreeFactory.setMaxDepth(maxTreeDepth);

    if (exampleInterval != null) {
        nrOfExamplesIntervals = exampleInterval;
    }/* w ww. ja  v  a2 s.  c om*/
    if (noiseInterval != null) {
        this.noiseIntervals = noiseInterval;
    }
    if (measures != null) {
        this.measures = measures;
    }

    boolean noiseEnabled = noiseIntervals.length > 1 || noiseInterval[0] > 0;
    boolean posOnly = noiseEnabled ? false : true;

    logger.info("Started QTL evaluation...");
    long t1 = System.currentTimeMillis();

    List<String> queries = dataset.getSparqlQueries().values().stream().map(q -> q.toString())
            .collect(Collectors.toList());
    logger.info("#loaded queries: " + queries.size());

    // filter for debugging purposes
    queries = queries.stream().filter(q -> queriesToProcessTokens.stream().noneMatch(t -> !q.contains(t)))
            .collect(Collectors.toList());
    queries = queries.stream().filter(q -> queriesToOmitTokens.stream().noneMatch(t -> q.contains(t)))
            .collect(Collectors.toList());

    if (maxNrOfProcessedQueries == -1) {
        maxNrOfProcessedQueries = queries.size();
    }

    //      queries = filter(queries, (int) Math.ceil((double) maxNrOfProcessedQueries / maxTreeDepth));
    //      queries = queries.subList(0, Math.min(queries.size(), maxNrOfProcessedQueries));
    logger.info("#queries to process: " + queries.size());

    // generate examples for each query
    logger.info("precomputing pos. and neg. examples...");
    for (String query : queries) {//if(!(query.contains("Borough_(New_York_City)")))continue;
        query2Examples.put(query, generateExamples(query, posOnly, noiseEnabled));
    }
    logger.info("precomputing pos. and neg. examples finished.");

    // check for queries that do not return any result (should not happen, but we never know)
    Set<String> emptyQueries = query2Examples.entrySet().stream()
            .filter(e -> e.getValue().correctPosExampleCandidates.isEmpty()).map(e -> e.getKey())
            .collect(Collectors.toSet());
    logger.info("got {} empty queries.", emptyQueries.size());
    queries.removeAll(emptyQueries);

    // min. pos examples
    int min = 3;
    Set<String> lowNrOfExamplesQueries = query2Examples.entrySet().stream()
            .filter(e -> e.getValue().correctPosExampleCandidates.size() < min).map(e -> e.getKey())
            .collect(Collectors.toSet());
    logger.info("got {} queries with < {} pos. examples.", emptyQueries.size(), min);
    queries.removeAll(lowNrOfExamplesQueries);
    queries = queries.subList(0, Math.min(80, queries.size()));

    final int totalNrOfQTLRuns = heuristics.length * this.measures.length * nrOfExamplesIntervals.length
            * noiseIntervals.length * queries.size();
    logger.info("#QTL runs: " + totalNrOfQTLRuns);

    final AtomicInteger currentNrOfFinishedRuns = new AtomicInteger(0);

    // loop over heuristics
    for (final QueryTreeHeuristic heuristic : heuristics) {
        final String heuristicName = heuristic.getClass().getAnnotation(ComponentAnn.class).shortName();

        // loop over heuristics measures
        for (HeuristicType measure : this.measures) {
            final String measureName = measure.toString();
            heuristic.setHeuristicType(measure);

            double[][] data = new double[nrOfExamplesIntervals.length][noiseIntervals.length];

            // loop over number of positive examples
            for (int i = 0; i < nrOfExamplesIntervals.length; i++) {
                final int nrOfExamples = nrOfExamplesIntervals[i];

                // loop over noise value
                for (int j = 0; j < noiseIntervals.length; j++) {
                    final double noise = noiseIntervals[j];

                    // check if not already processed
                    File logFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-"
                            + heuristicName + "-" + measureName + ".log");
                    File statsFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-"
                            + heuristicName + "-" + measureName + ".stats");

                    if (!override && logFile.exists() && statsFile.exists()) {
                        logger.info(
                                "Eval config already processed. For re-running please remove corresponding output files.");
                        continue;
                    }

                    FileAppender appender = null;
                    try {
                        appender = new FileAppender(new SimpleLayout(), logFile.getPath(), false);
                        Logger.getRootLogger().addAppender(appender);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }

                    logger.info("#examples: " + nrOfExamples + " noise: " + noise);

                    final DescriptiveStatistics nrOfReturnedSolutionsStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics baselinePrecisionStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselineRecallStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselineFMeasureStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselinePredAccStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselineMathCorrStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestReturnedSolutionPrecisionStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionRecallStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionFMeasureStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionPredAccStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionMathCorrStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestReturnedSolutionRuntimeStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestSolutionPrecisionStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionRecallStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionFMeasureStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionPredAccStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionMathCorrStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestSolutionPositionStats = new SynchronizedDescriptiveStatistics();

                    MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).reset();
                    MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).reset();

                    ExecutorService tp = Executors.newFixedThreadPool(nrOfThreads);

                    // indicates if the execution for some of the queries failed
                    final AtomicBoolean failed = new AtomicBoolean(false);

                    Set<String> queriesToProcess = new TreeSet<>(queries);
                    queriesToProcess.retainAll(query2Examples.entrySet().stream()
                            .filter(e -> e.getValue().correctPosExampleCandidates.size() >= nrOfExamples)
                            .map(e -> e.getKey()).collect(Collectors.toSet()));

                    // loop over SPARQL queries
                    for (final String sparqlQuery : queriesToProcess) {
                        CBDStructureTree cbdStructure = cbdStructureTree != null ? cbdStructureTree
                                : QueryUtils.getOptimalCBDStructure(QueryFactory.create(sparqlQuery));

                        tp.submit(() -> {
                            logger.info("CBD tree:" + cbdStructure.toStringVerbose());

                            // update max tree depth
                            this.maxTreeDepth = QueryTreeUtils.getDepth(cbdStructure);
                            logger.info("##############################################################");
                            logger.info("Processing query\n" + sparqlQuery);

                            // we repeat it n times with different permutations of examples
                            int nrOfPermutations = 1;

                            if (nrOfExamples >= query2Examples.get(sparqlQuery).correctPosExampleCandidates
                                    .size()) {
                                nrOfPermutations = 1;
                            }
                            for (int perm = 1; perm <= nrOfPermutations; perm++) {
                                logger.info("Run {}/{}", perm, nrOfPermutations);
                                try {
                                    ExamplesWrapper examples = getExamples(sparqlQuery, nrOfExamples,
                                            nrOfExamples, noise, cbdStructure);
                                    logger.info("pos. examples:\n"
                                            + Joiner.on("\n").join(examples.correctPosExamples));
                                    logger.info("neg. examples:\n"
                                            + Joiner.on("\n").join(examples.correctNegExamples));

                                    // write examples to disk
                                    File dir = new File(benchmarkDirectory, "data/" + hash(sparqlQuery));
                                    dir.mkdirs();
                                    Files.write(Joiner.on("\n").join(examples.correctPosExamples), new File(dir,
                                            "examples" + perm + "_" + nrOfExamples + "_" + noise + ".tp"),
                                            Charsets.UTF_8);
                                    Files.write(Joiner.on("\n").join(examples.correctNegExamples), new File(dir,
                                            "examples" + perm + "_" + nrOfExamples + "_" + noise + ".tn"),
                                            Charsets.UTF_8);
                                    Files.write(Joiner.on("\n").join(examples.falsePosExamples), new File(dir,
                                            "examples" + perm + "_" + nrOfExamples + "_" + noise + ".fp"),
                                            Charsets.UTF_8);

                                    // compute baseline
                                    RDFResourceTree baselineSolution = applyBaseLine(examples,
                                            Baseline.MOST_INFORMATIVE_EDGE_IN_EXAMPLES);
                                    logger.info("Evaluating baseline...");
                                    Score baselineScore = computeScore(sparqlQuery, baselineSolution, noise);
                                    logger.info("Baseline score:\n" + baselineScore);
                                    String baseLineQuery = QueryTreeUtils.toSPARQLQueryString(baselineSolution,
                                            dataset.getBaseIRI(), dataset.getPrefixMapping());
                                    baselinePrecisionStats.addValue(baselineScore.precision);
                                    baselineRecallStats.addValue(baselineScore.recall);
                                    baselineFMeasureStats.addValue(baselineScore.fmeasure);
                                    baselinePredAccStats.addValue(baselineScore.predAcc);
                                    baselineMathCorrStats.addValue(baselineScore.mathCorr);

                                    // run QTL
                                    PosNegLPStandard lp = new PosNegLPStandard();
                                    lp.setPositiveExamples(examples.posExamplesMapping.keySet());
                                    lp.setNegativeExamples(examples.negExamplesMapping.keySet());
                                    //                                 QTL2Disjunctive la = new QTL2Disjunctive(lp, qef);
                                    QTL2DisjunctiveMultiThreaded la = new QTL2DisjunctiveMultiThreaded(lp, qef);
                                    la.setRenderer(new org.dllearner.utilities.owl.DLSyntaxObjectRenderer());
                                    la.setReasoner(dataset.getReasoner());
                                    la.setEntailment(Entailment.SIMPLE);
                                    la.setTreeFactory(queryTreeFactory);
                                    la.setPositiveExampleTrees(examples.posExamplesMapping);
                                    la.setNegativeExampleTrees(examples.negExamplesMapping);
                                    la.setNoise(noise);
                                    la.setHeuristic(heuristic);
                                    la.setMaxExecutionTimeInSeconds(maxExecutionTimeInSeconds);
                                    la.setMaxTreeComputationTimeInSeconds(maxExecutionTimeInSeconds);
                                    la.init();
                                    la.start();
                                    List<EvaluatedRDFResourceTree> solutions = new ArrayList<>(
                                            la.getSolutions());

                                    //                              List<EvaluatedRDFResourceTree> solutions = generateSolutions(examples, noise, heuristic);
                                    nrOfReturnedSolutionsStats.addValue(solutions.size());

                                    // the best returned solution by QTL
                                    EvaluatedRDFResourceTree bestSolution = solutions.get(0);
                                    logger.info("Got " + solutions.size() + " query trees.");
                                    //                                 logger.info("Best computed solution:\n" + render(bestSolution.asEvaluatedDescription()));
                                    logger.info("QTL Score:\n" + bestSolution.getTreeScore());
                                    long runtimeBestSolution = la.getTimeBestSolutionFound();
                                    bestReturnedSolutionRuntimeStats.addValue(runtimeBestSolution);

                                    // convert to SPARQL query
                                    RDFResourceTree tree = bestSolution.getTree();
                                    tree = filter.apply(tree);
                                    String learnedSPARQLQuery = QueryTreeUtils.toSPARQLQueryString(tree,
                                            dataset.getBaseIRI(), dataset.getPrefixMapping());

                                    // compute score
                                    Score score = computeScore(sparqlQuery, tree, noise);
                                    bestReturnedSolutionPrecisionStats.addValue(score.precision);
                                    bestReturnedSolutionRecallStats.addValue(score.recall);
                                    bestReturnedSolutionFMeasureStats.addValue(score.fmeasure);
                                    bestReturnedSolutionPredAccStats.addValue(score.predAcc);
                                    bestReturnedSolutionMathCorrStats.addValue(score.mathCorr);
                                    logger.info(score.toString());

                                    // find the extensionally best matching tree in the list
                                    Pair<EvaluatedRDFResourceTree, Score> bestMatchingTreeWithScore = findBestMatchingTreeFast(
                                            solutions, sparqlQuery, noise, examples);
                                    EvaluatedRDFResourceTree bestMatchingTree = bestMatchingTreeWithScore
                                            .getFirst();
                                    Score bestMatchingScore = bestMatchingTreeWithScore.getSecond();

                                    // position of best tree in list of solutions
                                    int positionBestScore = solutions.indexOf(bestMatchingTree);
                                    bestSolutionPositionStats.addValue(positionBestScore);

                                    Score bestScore = score;
                                    if (positionBestScore > 0) {
                                        logger.info(
                                                "Position of best covering tree in list: " + positionBestScore);
                                        logger.info("Best covering solution:\n"
                                                + render(bestMatchingTree.asEvaluatedDescription()));
                                        logger.info("Tree score: " + bestMatchingTree.getTreeScore());
                                        bestScore = bestMatchingScore;
                                        logger.info(bestMatchingScore.toString());
                                    } else {
                                        logger.info(
                                                "Best returned solution was also the best covering solution.");
                                    }
                                    bestSolutionRecallStats.addValue(bestScore.recall);
                                    bestSolutionPrecisionStats.addValue(bestScore.precision);
                                    bestSolutionFMeasureStats.addValue(bestScore.fmeasure);
                                    bestSolutionPredAccStats.addValue(bestScore.predAcc);
                                    bestSolutionMathCorrStats.addValue(bestScore.mathCorr);

                                    for (RDFResourceTree negTree : examples.negExamplesMapping.values()) {
                                        if (QueryTreeUtils.isSubsumedBy(negTree, bestMatchingTree.getTree())) {
                                            Files.append(sparqlQuery + "\n", new File("/tmp/negCovered.txt"),
                                                    Charsets.UTF_8);
                                            break;
                                        }
                                    }

                                    String bestQuery = QueryFactory
                                            .create(QueryTreeUtils.toSPARQLQueryString(
                                                    filter.apply(bestMatchingTree.getTree()),
                                                    dataset.getBaseIRI(), dataset.getPrefixMapping()))
                                            .toString();

                                    if (write2DB) {
                                        write2DB(sparqlQuery, nrOfExamples, examples, noise, baseLineQuery,
                                                baselineScore, heuristicName, measureName,
                                                QueryFactory.create(learnedSPARQLQuery).toString(), score,
                                                runtimeBestSolution, bestQuery, positionBestScore, bestScore);
                                    }

                                } catch (Exception e) {
                                    failed.set(true);
                                    logger.error("Error occured for query\n" + sparqlQuery, e);
                                    try {
                                        StringWriter sw = new StringWriter();
                                        PrintWriter pw = new PrintWriter(sw);
                                        e.printStackTrace(pw);
                                        Files.append(sparqlQuery + "\n" + sw.toString(),
                                                new File(benchmarkDirectory,
                                                        "failed-" + nrOfExamples + "-" + noise + "-"
                                                                + heuristicName + "-" + measureName + ".txt"),
                                                Charsets.UTF_8);
                                    } catch (IOException e1) {
                                        e1.printStackTrace();
                                    }
                                } finally {
                                    int cnt = currentNrOfFinishedRuns.incrementAndGet();
                                    logger.info("***********Evaluation Progress:"
                                            + NumberFormat.getPercentInstance()
                                                    .format((double) cnt / totalNrOfQTLRuns)
                                            + "(" + cnt + "/" + totalNrOfQTLRuns + ")" + "***********");
                                }
                            }
                        });
                    }

                    tp.shutdown();
                    tp.awaitTermination(12, TimeUnit.HOURS);

                    Logger.getRootLogger().removeAppender(appender);

                    if (!failed.get()) {
                        String result = "";
                        result += "\nBaseline Precision:\n" + baselinePrecisionStats;
                        result += "\nBaseline Recall:\n" + baselineRecallStats;
                        result += "\nBaseline F-measure:\n" + baselineFMeasureStats;
                        result += "\nBaseline PredAcc:\n" + baselinePredAccStats;
                        result += "\nBaseline MathCorr:\n" + baselineMathCorrStats;

                        result += "#Returned solutions:\n" + nrOfReturnedSolutionsStats;

                        result += "\nOverall Precision:\n" + bestReturnedSolutionPrecisionStats;
                        result += "\nOverall Recall:\n" + bestReturnedSolutionRecallStats;
                        result += "\nOverall F-measure:\n" + bestReturnedSolutionFMeasureStats;
                        result += "\nOverall PredAcc:\n" + bestReturnedSolutionPredAccStats;
                        result += "\nOverall MathCorr:\n" + bestReturnedSolutionMathCorrStats;

                        result += "\nTime until best returned solution found:\n"
                                + bestReturnedSolutionRuntimeStats;

                        result += "\nPositions of best solution:\n"
                                + Arrays.toString(bestSolutionPositionStats.getValues());
                        result += "\nPosition of best solution stats:\n" + bestSolutionPositionStats;
                        result += "\nOverall Precision of best solution:\n" + bestSolutionPrecisionStats;
                        result += "\nOverall Recall of best solution:\n" + bestSolutionRecallStats;
                        result += "\nOverall F-measure of best solution:\n" + bestSolutionFMeasureStats;

                        result += "\nCBD generation time(total):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getTotal()
                                + "\n";
                        result += "CBD generation time(avg):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getAvg()
                                + "\n";
                        result += "Tree generation time(total):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getTotal()
                                + "\n";
                        result += "Tree generation time(avg):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getAvg()
                                + "\n";
                        result += "Tree size(avg):\t" + treeSizeStats.getMean() + "\n";

                        logger.info(result);

                        try {
                            Files.write(result, statsFile, Charsets.UTF_8);
                        } catch (IOException e) {
                            e.printStackTrace();
                        }

                        data[i][j] = bestReturnedSolutionFMeasureStats.getMean();

                        if (write2DB) {
                            write2DB(heuristicName, measureName, nrOfExamples, noise,
                                    bestReturnedSolutionFMeasureStats.getMean(),
                                    bestReturnedSolutionPrecisionStats.getMean(),
                                    bestReturnedSolutionRecallStats.getMean(),
                                    bestReturnedSolutionPredAccStats.getMean(),
                                    bestReturnedSolutionMathCorrStats.getMean(),
                                    bestSolutionPositionStats.getMean(), bestSolutionFMeasureStats.getMean(),
                                    bestSolutionPrecisionStats.getMean(), bestSolutionRecallStats.getMean(),
                                    bestSolutionPredAccStats.getMean(), bestSolutionMathCorrStats.getMean(),
                                    baselineFMeasureStats.getMean(), baselinePrecisionStats.getMean(),
                                    baselineRecallStats.getMean(), baselinePredAccStats.getMean(),
                                    baselineMathCorrStats.getMean(),
                                    bestReturnedSolutionRuntimeStats.getMean());
                        }
                    }
                }
            }

            String content = "###";
            String separator = "\t";
            for (double noiseInterval1 : noiseIntervals) {
                content += separator + noiseInterval1;
            }
            content += "\n";
            for (int i = 0; i < nrOfExamplesIntervals.length; i++) {
                content += nrOfExamplesIntervals[i];
                for (int j = 0; j < noiseIntervals.length; j++) {
                    content += separator + data[i][j];
                }
                content += "\n";
            }

            File examplesVsNoise = new File(benchmarkDirectory,
                    "examplesVsNoise-" + heuristicName + "-" + measureName + ".tsv");
            try {
                Files.write(content, examplesVsNoise, Charsets.UTF_8);
            } catch (IOException e) {
                logger.error("failed to write stats to file", e);
            }
        }
    }

    if (write2DB) {
        conn.close();
    }

    if (useEmailNotification) {
        sendFinishedMail();
    }
    long t2 = System.currentTimeMillis();
    long duration = t2 - t1;
    logger.info("QTL evaluation finished in " + DurationFormatUtils.formatDurationHMS(duration) + "ms.");
}

From source file:org.dllearner.algorithms.qtl.experiments.QTLEvaluation.java

public void run(int maxNrOfProcessedQueries, int maxTreeDepth, int[] exampleInterval, double[] noiseInterval,
        HeuristicType[] measures) throws Exception {
    this.maxTreeDepth = maxTreeDepth;
    queryTreeFactory.setMaxDepth(maxTreeDepth);

    if (exampleInterval != null) {
        nrOfExamplesIntervals = exampleInterval;
    }//from  w ww. j  ava2 s  . c  o  m
    if (noiseInterval != null) {
        this.noiseIntervals = noiseInterval;
    }
    if (measures != null) {
        this.measures = measures;
    }

    logger.info("Started QTL evaluation...");
    long t1 = System.currentTimeMillis();

    List<String> queries = dataset.getSparqlQueries().values().stream().map(q -> q.toString())
            .collect(Collectors.toList());
    logger.info("#loaded queries: " + queries.size());

    // filter for debugging purposes
    queries = queries.stream().filter(q -> tokens.stream().noneMatch(t -> !q.contains(t)))
            .collect(Collectors.toList());

    if (maxNrOfProcessedQueries == -1) {
        maxNrOfProcessedQueries = queries.size();
    }

    //      queries = filter(queries, (int) Math.ceil((double) maxNrOfProcessedQueries / maxTreeDepth));
    //      queries = queries.subList(0, Math.min(queries.size(), maxNrOfProcessedQueries));
    logger.info("#queries to process: " + queries.size());

    // generate examples for each query
    logger.info("precomputing pos. and neg. examples...");
    final Map<String, ExampleCandidates> query2Examples = new HashMap<>();
    for (String query : queries) {//if(!(query.contains("Borough_(New_York_City)")))continue;
        query2Examples.put(query, generateExamples(query));
    }
    logger.info("precomputing pos. and neg. examples finished.");

    // check for queries that do not return any result (should not happen, but we never know)
    Set<String> emptyQueries = query2Examples.entrySet().stream()
            .filter(e -> e.getValue().correctPosExampleCandidates.isEmpty()).map(e -> e.getKey())
            .collect(Collectors.toSet());
    logger.info("got {} empty queries.", emptyQueries.size());
    queries.removeAll(emptyQueries);

    // min. pos examples
    Set<String> lowNrOfExamplesQueries = query2Examples.entrySet().stream()
            .filter(e -> e.getValue().correctPosExampleCandidates.size() < 2).map(e -> e.getKey())
            .collect(Collectors.toSet());
    logger.info("got {} queries with < 2 pos. examples.", emptyQueries.size());
    queries.removeAll(lowNrOfExamplesQueries);

    final int totalNrOfQTLRuns = heuristics.length * this.measures.length * nrOfExamplesIntervals.length
            * noiseIntervals.length * queries.size();
    logger.info("#QTL runs: " + totalNrOfQTLRuns);

    final AtomicInteger currentNrOfFinishedRuns = new AtomicInteger(0);

    // loop over heuristics
    for (final QueryTreeHeuristic heuristic : heuristics) {
        final String heuristicName = heuristic.getClass().getAnnotation(ComponentAnn.class).shortName();

        // loop over heuristics measures
        for (HeuristicType measure : this.measures) {
            final String measureName = measure.toString();
            heuristic.setHeuristicType(measure);

            double[][] data = new double[nrOfExamplesIntervals.length][noiseIntervals.length];

            // loop over number of positive examples
            for (int i = 0; i < nrOfExamplesIntervals.length; i++) {
                final int nrOfExamples = nrOfExamplesIntervals[i];

                // loop over noise value
                for (int j = 0; j < noiseIntervals.length; j++) {
                    final double noise = noiseIntervals[j];

                    // check if not already processed
                    File logFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-"
                            + heuristicName + "-" + measureName + ".log");
                    File statsFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-"
                            + heuristicName + "-" + measureName + ".stats");

                    if (!override && logFile.exists() && statsFile.exists()) {
                        logger.info(
                                "Eval config already processed. For re-running please remove corresponding output files.");
                        continue;
                    }

                    FileAppender appender = null;
                    try {
                        appender = new FileAppender(new SimpleLayout(), logFile.getPath(), false);
                        Logger.getRootLogger().addAppender(appender);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }

                    logger.info("#examples: " + nrOfExamples + " noise: " + noise);

                    final DescriptiveStatistics nrOfReturnedSolutionsStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics baselinePrecisionStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselineRecallStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselineFMeasureStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselinePredAccStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselineMathCorrStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestReturnedSolutionPrecisionStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionRecallStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionFMeasureStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionPredAccStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionMathCorrStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestReturnedSolutionRuntimeStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestSolutionPrecisionStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionRecallStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionFMeasureStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionPredAccStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionMathCorrStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestSolutionPositionStats = new SynchronizedDescriptiveStatistics();

                    MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).reset();
                    MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).reset();

                    ExecutorService tp = Executors.newFixedThreadPool(nrOfThreads);

                    // indicates if the execution for some of the queries failed
                    final AtomicBoolean failed = new AtomicBoolean(false);

                    // loop over SPARQL queries
                    for (final String sparqlQuery : queries) {

                        tp.submit(() -> {

                            logger.info("##############################################################");
                            logger.info("Processing query\n" + sparqlQuery);

                            try {
                                ExamplesWrapper examples = query2Examples.get(sparqlQuery).get(nrOfExamples,
                                        nrOfExamples, noise);
                                logger.info(
                                        "pos. examples:\n" + Joiner.on("\n").join(examples.correctPosExamples));
                                logger.info(
                                        "neg. examples:\n" + Joiner.on("\n").join(examples.correctNegExamples));

                                // write examples to disk
                                File dir = new File(benchmarkDirectory, "data/" + hash(sparqlQuery));
                                dir.mkdirs();
                                Files.write(Joiner.on("\n").join(examples.correctPosExamples),
                                        new File(dir, "examples_" + nrOfExamples + "_" + noise + ".tp"),
                                        Charsets.UTF_8);
                                Files.write(Joiner.on("\n").join(examples.correctNegExamples),
                                        new File(dir, "examples_" + nrOfExamples + "_" + noise + ".tn"),
                                        Charsets.UTF_8);
                                Files.write(Joiner.on("\n").join(examples.falsePosExamples),
                                        new File(dir, "examples_" + nrOfExamples + "_" + noise + ".fp"),
                                        Charsets.UTF_8);

                                // compute baseline
                                logger.info("Computing baseline...");
                                RDFResourceTree baselineSolution = applyBaseLine(examples,
                                        Baseline.MOST_INFORMATIVE_EDGE_IN_EXAMPLES);
                                logger.info("Baseline solution:\n" + owlRenderer
                                        .render(QueryTreeUtils.toOWLClassExpression(baselineSolution)));
                                logger.info("Evaluating baseline...");
                                Score baselineScore = computeScore(sparqlQuery, baselineSolution, noise);
                                logger.info("Baseline score:\n" + baselineScore);
                                String baseLineQuery = QueryTreeUtils.toSPARQLQueryString(baselineSolution,
                                        dataset.getBaseIRI(), dataset.getPrefixMapping());
                                baselinePrecisionStats.addValue(baselineScore.precision);
                                baselineRecallStats.addValue(baselineScore.recall);
                                baselineFMeasureStats.addValue(baselineScore.fmeasure);
                                baselinePredAccStats.addValue(baselineScore.predAcc);
                                baselineMathCorrStats.addValue(baselineScore.mathCorr);

                                // run QTL
                                PosNegLPStandard lp = new PosNegLPStandard();
                                lp.setPositiveExamples(examples.posExamplesMapping.keySet());
                                lp.setNegativeExamples(examples.negExamplesMapping.keySet());
                                QTL2Disjunctive la = new QTL2Disjunctive(lp, qef);
                                la.setRenderer(new org.dllearner.utilities.owl.DLSyntaxObjectRenderer());
                                la.setReasoner(dataset.getReasoner());
                                la.setEntailment(Entailment.SIMPLE);
                                la.setTreeFactory(queryTreeFactory);
                                la.setPositiveExampleTrees(examples.posExamplesMapping);
                                la.setNegativeExampleTrees(examples.negExamplesMapping);
                                la.setNoise(noise);
                                la.setHeuristic(heuristic);
                                la.setMaxExecutionTimeInSeconds(maxExecutionTimeInSeconds);
                                la.setMaxTreeComputationTimeInSeconds(maxExecutionTimeInSeconds);
                                la.init();
                                la.start();
                                List<EvaluatedRDFResourceTree> solutions = new ArrayList<>(la.getSolutions());

                                //                              List<EvaluatedRDFResourceTree> solutions = generateSolutions(examples, noise, heuristic);
                                nrOfReturnedSolutionsStats.addValue(solutions.size());

                                // the best returned solution by QTL
                                EvaluatedRDFResourceTree bestSolution = solutions.get(0);
                                logger.info("Got " + solutions.size() + " query trees.");
                                logger.info("Best computed solution:\n"
                                        + render(bestSolution.asEvaluatedDescription()));
                                logger.info("QTL Score:\n" + bestSolution.getTreeScore());
                                long runtimeBestSolution = la.getTimeBestSolutionFound();
                                bestReturnedSolutionRuntimeStats.addValue(runtimeBestSolution);

                                // convert to SPARQL query
                                RDFResourceTree tree = bestSolution.getTree();
                                //                  filter.filter(tree);
                                String learnedSPARQLQuery = QueryTreeUtils.toSPARQLQueryString(tree,
                                        dataset.getBaseIRI(), dataset.getPrefixMapping());

                                // compute score
                                Score score = computeScore(sparqlQuery, tree, noise);
                                bestReturnedSolutionPrecisionStats.addValue(score.precision);
                                bestReturnedSolutionRecallStats.addValue(score.recall);
                                bestReturnedSolutionFMeasureStats.addValue(score.fmeasure);
                                bestReturnedSolutionPredAccStats.addValue(score.predAcc);
                                bestReturnedSolutionMathCorrStats.addValue(score.mathCorr);
                                logger.info(score.toString());

                                // find the extensionally best matching tree in the list
                                Pair<EvaluatedRDFResourceTree, Score> bestMatchingTreeWithScore = findBestMatchingTreeFast(
                                        solutions, sparqlQuery, noise, examples);
                                EvaluatedRDFResourceTree bestMatchingTree = bestMatchingTreeWithScore
                                        .getFirst();
                                Score bestMatchingScore = bestMatchingTreeWithScore.getSecond();

                                // position of best tree in list of solutions
                                int positionBestScore = solutions.indexOf(bestMatchingTree);
                                bestSolutionPositionStats.addValue(positionBestScore);

                                Score bestScore = score;
                                if (positionBestScore > 0) {
                                    logger.info("Position of best covering tree in list: " + positionBestScore);
                                    logger.info("Best covering solution:\n"
                                            + render(bestMatchingTree.asEvaluatedDescription()));
                                    logger.info("Tree score: " + bestMatchingTree.getTreeScore());
                                    bestScore = bestMatchingScore;
                                    logger.info(bestMatchingScore.toString());
                                } else {
                                    logger.info("Best returned solution was also the best covering solution.");
                                }
                                bestSolutionRecallStats.addValue(bestScore.recall);
                                bestSolutionPrecisionStats.addValue(bestScore.precision);
                                bestSolutionFMeasureStats.addValue(bestScore.fmeasure);
                                bestSolutionPredAccStats.addValue(bestScore.predAcc);
                                bestSolutionMathCorrStats.addValue(bestScore.mathCorr);

                                for (RDFResourceTree negTree : examples.negExamplesMapping.values()) {
                                    if (QueryTreeUtils.isSubsumedBy(negTree, bestMatchingTree.getTree())) {
                                        Files.append(sparqlQuery + "\n", new File("/tmp/negCovered.txt"),
                                                Charsets.UTF_8);
                                        break;
                                    }
                                }

                                String bestQuery = QueryFactory.create(QueryTreeUtils.toSPARQLQueryString(
                                        filter.apply(bestMatchingTree.getTree()), dataset.getBaseIRI(),
                                        dataset.getPrefixMapping())).toString();

                                if (write2DB) {
                                    write2DB(sparqlQuery, nrOfExamples, examples, noise, baseLineQuery,
                                            baselineScore, heuristicName, measureName,
                                            QueryFactory.create(learnedSPARQLQuery).toString(), score,
                                            runtimeBestSolution, bestQuery, positionBestScore, bestScore);
                                }

                            } catch (Exception e) {
                                failed.set(true);
                                logger.error("Error occured for query\n" + sparqlQuery, e);
                                try {
                                    StringWriter sw = new StringWriter();
                                    PrintWriter pw = new PrintWriter(sw);
                                    e.printStackTrace(pw);
                                    Files.append(sparqlQuery + "\n" + sw.toString(),
                                            new File(benchmarkDirectory, "failed-" + nrOfExamples + "-" + noise
                                                    + "-" + heuristicName + "-" + measureName + ".txt"),
                                            Charsets.UTF_8);
                                } catch (IOException e1) {
                                    e1.printStackTrace();
                                }
                            } finally {
                                int cnt = currentNrOfFinishedRuns.incrementAndGet();
                                logger.info("***********Evaluation Progress:"
                                        + NumberFormat.getPercentInstance()
                                                .format((double) cnt / totalNrOfQTLRuns)
                                        + "(" + cnt + "/" + totalNrOfQTLRuns + ")" + "***********");
                            }
                        });

                    }

                    tp.shutdown();
                    tp.awaitTermination(12, TimeUnit.HOURS);

                    Logger.getRootLogger().removeAppender(appender);

                    if (!failed.get()) {
                        String result = "";
                        result += "\nBaseline Precision:\n" + baselinePrecisionStats;
                        result += "\nBaseline Recall:\n" + baselineRecallStats;
                        result += "\nBaseline F-measure:\n" + baselineFMeasureStats;
                        result += "\nBaseline PredAcc:\n" + baselinePredAccStats;
                        result += "\nBaseline MathCorr:\n" + baselineMathCorrStats;

                        result += "#Returned solutions:\n" + nrOfReturnedSolutionsStats;

                        result += "\nOverall Precision:\n" + bestReturnedSolutionPrecisionStats;
                        result += "\nOverall Recall:\n" + bestReturnedSolutionRecallStats;
                        result += "\nOverall F-measure:\n" + bestReturnedSolutionFMeasureStats;
                        result += "\nOverall PredAcc:\n" + bestReturnedSolutionPredAccStats;
                        result += "\nOverall MathCorr:\n" + bestReturnedSolutionMathCorrStats;

                        result += "\nTime until best returned solution found:\n"
                                + bestReturnedSolutionRuntimeStats;

                        result += "\nPositions of best solution:\n"
                                + Arrays.toString(bestSolutionPositionStats.getValues());
                        result += "\nPosition of best solution stats:\n" + bestSolutionPositionStats;
                        result += "\nOverall Precision of best solution:\n" + bestSolutionPrecisionStats;
                        result += "\nOverall Recall of best solution:\n" + bestSolutionRecallStats;
                        result += "\nOverall F-measure of best solution:\n" + bestSolutionFMeasureStats;

                        result += "\nCBD generation time(total):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getTotal()
                                + "\n";
                        result += "CBD generation time(avg):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getAvg()
                                + "\n";
                        result += "Tree generation time(total):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getTotal()
                                + "\n";
                        result += "Tree generation time(avg):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getAvg()
                                + "\n";
                        result += "Tree size(avg):\t" + treeSizeStats.getMean() + "\n";

                        logger.info(result);

                        try {
                            Files.write(result, statsFile, Charsets.UTF_8);
                        } catch (IOException e) {
                            e.printStackTrace();
                        }

                        data[i][j] = bestReturnedSolutionFMeasureStats.getMean();

                        if (write2DB) {
                            write2DB(heuristicName, measureName, nrOfExamples, noise,
                                    bestReturnedSolutionFMeasureStats.getMean(),
                                    bestReturnedSolutionPrecisionStats.getMean(),
                                    bestReturnedSolutionRecallStats.getMean(),
                                    bestReturnedSolutionPredAccStats.getMean(),
                                    bestReturnedSolutionMathCorrStats.getMean(),
                                    bestSolutionPositionStats.getMean(), bestSolutionFMeasureStats.getMean(),
                                    bestSolutionPrecisionStats.getMean(), bestSolutionRecallStats.getMean(),
                                    bestSolutionPredAccStats.getMean(), bestSolutionMathCorrStats.getMean(),
                                    baselineFMeasureStats.getMean(), baselinePrecisionStats.getMean(),
                                    baselineRecallStats.getMean(), baselinePredAccStats.getMean(),
                                    baselineMathCorrStats.getMean(),
                                    bestReturnedSolutionRuntimeStats.getMean());
                        }
                    }
                }
            }

            String content = "###";
            String separator = "\t";
            for (double noiseInterval1 : noiseIntervals) {
                content += separator + noiseInterval1;
            }
            content += "\n";
            for (int i = 0; i < nrOfExamplesIntervals.length; i++) {
                content += nrOfExamplesIntervals[i];
                for (int j = 0; j < noiseIntervals.length; j++) {
                    content += separator + data[i][j];
                }
                content += "\n";
            }

            File examplesVsNoise = new File(benchmarkDirectory,
                    "examplesVsNoise-" + heuristicName + "-" + measureName + ".tsv");
            try {
                Files.write(content, examplesVsNoise, Charsets.UTF_8);
            } catch (IOException e) {
                logger.error("failed to write stats to file", e);
            }
        }
    }

    if (write2DB) {
        conn.close();
    }

    if (useEmailNotification) {
        sendFinishedMail();
    }
    long t2 = System.currentTimeMillis();
    long duration = t2 - t1;
    logger.info("QTL evaluation finished in " + DurationFormatUtils.formatDurationHMS(duration) + "ms.");
}

From source file:org.hawkular.client.test.metrics.openshift.CollectionRateDetailTest.java

private void getData(String metricID, String testID, long start, long end, Duration timeBucket) {
    Reporter.log("Fetching large data set... may take a couple minutes", true);
    List<DataPoint<Double>> rawData = client().metrics().gauge()
            .findGaugeDataWithId(metricID, String.valueOf(start), String.valueOf(end), null, null, null)
            .getEntity();// w  w  w. j  a v  a  2 s.  c o  m

    Assert.assertNotNull(rawData, testID);
    Reporter.log("raw datapoints: " + rawData.size(), true);

    List<Long> zeroList = findZeroValues(rawData);

    Assert.assertTrue(zeroList == null || zeroList.size() == 0, testID);

    Map<Long, Integer> hist = OpenshiftBaseTest.makeHistogram(rawData, timeBucket);

    Double[] result = hist.entrySet().stream().map(x -> new Double(x.getValue()))
            .toArray(size -> new Double[size]);

    double[] d = ArrayUtils.toPrimitive(result);

    // drop the first and last as they are usually outliers
    double[] samples = Arrays.copyOfRange(d, 1, d.length - 1);
    DescriptiveStatistics stats = new DescriptiveStatistics(samples);

    Reporter.log(hist.toString(), true);
    Reporter.log("size: " + stats.getN(), true);
    Reporter.log("min/max: " + stats.getMin() + "/" + stats.getMax(), true);
    Reporter.log("mean: " + stats.getMean(), true);
    Reporter.log("variance: " + stats.getVariance(), true);
    Reporter.log("stddev: " + stats.getStandardDeviation(), true);
}