Example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getMean

List of usage examples for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getMean

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.descriptive DescriptiveStatistics getMean.

Prototype

public double getMean() 

Source Link

Document

Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm"> arithmetic mean </a> of the available values

Usage

From source file:org.cirdles.ludwig.isoplot3.Means.java

/**
 * Ludwig's WeightedAverage and assumes ConstExtErr = true since all
 * possible values are returned and caller can decide
 *
 * @param inValues as double[] with length nPts
 * @param inErrors as double[] with length nPts
 * @param canReject// w  w w .  j a  v a2  s .co  m
 * @param canTukeys
 * @return double[7][]{mean, sigmaMean, err68, err95, MSWD, probability, externalFlag}, {values
 * with rejected as 0.0}.  externalFlag = 1.0 for external uncertainty, 0.0 for internal
 */
public static double[][] weightedAverage(double[] inValues, double[] inErrors, boolean canReject,
        boolean canTukeys) {

    double[] values = inValues.clone();
    double[] errors = inErrors.clone();

    double[][] retVal = new double[][] { { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }, {} };

    // check precondition of same size values and errors and at least 3 points
    int nPts = values.length;
    int nN = nPts;
    int count = 0;

    // where does this come from??
    boolean hardRej = false;

    if ((nPts == errors.length) && nPts > 2) {
        // proceed
        double[] inverseVar = new double[nPts];
        double[] wtdResid = new double[nPts];
        double[] yy;
        double[] iVarY;
        double[] tbX = new double[nPts];

        double[][] wRejected = new double[nPts][2];

        for (int i = 0; i < nPts; i++) {
            inverseVar[i] = 1.0 / Math.pow(errors[i], 2);
        }

        double intMean = 0.0;
        double MSWD = 0.0;
        double intSigmaMean = 0.0;
        double probability = 0.0;
        double intMeanErr95 = 0.0;
        double intErr68 = 0.0;

        double extMean = 0.0;
        double extMeanErr95 = 0.0;
        double extMeanErr68 = 0.0;
        double extSigma = 0.0;

        double biWtMean = 0.0;
        double biWtSigma = 0.0;

        boolean reCalc;

        // entry point for RECALC goto - consider another private method?
        do {
            reCalc = false;

            extSigma = 0.0;
            double weight = 0.0;
            double sumWtdRatios = 0.0;
            double q = 0.0;
            count++;

            for (int i = 0; i < nPts; i++) {
                if (values[i] * errors[i] != 0.0) {
                    weight += inverseVar[i];
                    sumWtdRatios += inverseVar[i] * values[i];
                    q += inverseVar[i] * Math.pow(values[i], 2);
                }
            }

            int nU = nN - 1;// ' Deg. freedom
            TDistribution studentsT = new TDistribution(nU);
            // see https://stackoverflow.com/questions/21730285/calculating-t-inverse
            // for explanation of cutting the tail mass in two to get agreement with Excel two-tail
            double t68 = Math.abs(studentsT.inverseCumulativeProbability((1.0 - 0.6826) / 2.0));
            double t95 = Math.abs(studentsT.inverseCumulativeProbability((1.0 - 0.95) / 2));

            intMean = sumWtdRatios / weight;//  ' "Internal" error of wtd average

            double sums = 0.0;
            for (int i = 0; i < nPts; i++) {
                if (values[i] * errors[i] != 0.0) {
                    double resid = values[i] - intMean;//  ' Simple residual
                    wtdResid[i] = resid / errors[i];// ' Wtd residual
                    double wtdR2 = Math.pow(wtdResid[i], 2);//' Square of wtd residual
                    sums += wtdR2;
                }
            }
            sums = Math.max(sums, 0.0);

            MSWD = sums / nU;//  ' Mean square of weighted deviates
            intSigmaMean = Math.sqrt(1.0 / weight);

            // http://commons.apache.org/proper/commons-math/apidocs/org/apache/commons/math3/distribution/FDistribution.html
            FDistribution fdist = new FDistribution(nU, 1E9);
            probability = 1.0 - fdist.cumulativeProbability(MSWD);//     ChiSquare(.MSWD, (nU))
            intMeanErr95 = intSigmaMean * (double) (probability >= 0.3 ? 1.96 : t95 * Math.sqrt(MSWD));
            intErr68 = intSigmaMean * (double) (probability >= 0.3 ? 0.9998 : t68 * Math.sqrt(MSWD));

            extMean = 0.0;
            extMeanErr95 = 0.0;
            extMeanErr68 = 0.0;

            // need to find external uncertainty
            List<Double> yyList = new ArrayList<>();
            List<Double> iVarYList = new ArrayList<>();
            if ((probability < SQUID_MINIMUM_PROBABILITY) && (MSWD > 1.0)) {
                // Find the MLE constant external variance
                nN = 0;
                for (int i = 0; i < nPts; i++) {
                    if (values[i] != 0.0) {
                        yyList.add(values[i]);
                        iVarYList.add(errors[i] * errors[i]);
                        nN++;
                    }
                }
                // resize arrays
                yy = yyList.stream().mapToDouble(Double::doubleValue).toArray();
                iVarY = iVarYList.stream().mapToDouble(Double::doubleValue).toArray();

                // call secant method
                double[] wtdExtRtsec = wtdExtRTSEC(0, 10.0 * intSigmaMean * intSigmaMean, yy, iVarY);

                // check for failure
                if (wtdExtRtsec[3] == 0.0) {
                    extMean = wtdExtRtsec[1];
                    extSigma = Math.sqrt(wtdExtRtsec[0]);

                    studentsT = new TDistribution(2 * nN - 2);
                    extMeanErr95 = Math.abs(studentsT.inverseCumulativeProbability((1.0 - 0.95) / 2.0))
                            * wtdExtRtsec[2];

                } else if (MSWD > 4.0) { //Failure of RTSEC algorithm because of extremely high MSWD
                    DescriptiveStatistics stats = new DescriptiveStatistics(yy);
                    extSigma = stats.getStandardDeviation();
                    extMean = stats.getMean();
                    extMeanErr95 = t95 * extSigma / Math.sqrt(nN);
                } else {
                    extSigma = 0.0;
                    extMean = 0.0;
                    extMeanErr95 = 0.0;
                }

                extMeanErr68 = t68 / t95 * extMeanErr95;
            }

            if (canReject && (probability < SQUID_MINIMUM_PROBABILITY)) {
                // GOSUB REJECT
                double wtdAvg = 0.0;
                if (extSigma != 0.0) {
                    wtdAvg = extMean;
                } else {
                    wtdAvg = intMean;
                }

                //  reject outliers
                int n0 = nN;

                for (int i = 0; i < nPts; i++) {
                    if ((values[i] != 0.0) && (nN > 0.85 * nPts)) { //  Reject no more than 30% of ratios
                        // Start rej. tolerance at 2-sigma, increase slightly each pass.
                        double pointError = 2.0 * Math.sqrt(errors[i] * errors[i] + extSigma * extSigma);
                        // 2-sigma error of point being tested
                        double totalError = Math
                                .sqrt(pointError * pointError + (4.0 * extMeanErr68 * extMeanErr68));
                        // 1st-pass tolerance is 2-sigma; 2nd is 2.25-sigma; 3rd is 2.5-sigma.
                        double tolerance = (1.0 + (double) (count - 1.0) / 4.0) * totalError;
                        if (hardRej) {
                            tolerance = tolerance * 1.25;
                        }
                        // 1st-pass tolerance is 2-sigma; 2nd is 2.5-sigma; 3rd is 3-sigma...
                        q = values[i] - wtdAvg;

                        if ((Math.abs(q) > tolerance) && nN > 2) {
                            nN--;
                            wRejected[i][0] = values[i];
                            values[i] = 0.0;
                            wRejected[i][1] = errors[i];
                            errors[i] = 0.0;
                        } // check tolerance

                    } //  Reject no more than 30% of ratios
                } // nPts loop               

                reCalc = (nN < n0);
            } // canReject test
        } while (reCalc);

        if (canTukeys) { // March 2018 not finished as not sure where used
            System.arraycopy(values, 0, tbX, 0, nPts);

            double[] tukey = SquidMathUtils.tukeysBiweight(tbX, 6);
            biWtMean = tukey[0];
            biWtSigma = tukey[1];
            DescriptiveStatistics stats = new DescriptiveStatistics(tbX);
            double biWtErr95Median = stats.getPercentile(50);

            double median = median(tbX);
            double medianConf = medianConfLevel(nPts);
            double medianPlusErr = medianUpperLim(tbX) - median;
            double medianMinusErr = median - medianLowerLim(tbX);
        }

        // determine whether to return internal or external
        if (extMean != 0.0) {
            retVal = new double[][] { { extMean, extSigma, extMeanErr68, extMeanErr95, MSWD, probability, 1.0 },
                    // contains zeroes for each reject
                    values };
        } else {
            retVal = new double[][] { { intMean, intSigmaMean, intErr68, intMeanErr95, MSWD, probability, 0.0 },
                    // contains zeroes for each reject
                    values };
        }

    }

    return retVal;
}

From source file:org.commoncrawl.mapred.ec2.postprocess.deduper.DeduperUtils.java

/** 
 * /* w  w w  .j  a v  a2 s. c  om*/
 * @param args
 */
public static void main(String[] args) throws IOException {

    URLFPBloomFilter filter = new URLFPBloomFilter(JSONSetBuilder.NUM_ELEMENTS,
            JSONSetBuilder.NUM_HASH_FUNCTIONS, JSONSetBuilder.NUM_BITS);
    DescriptiveStatistics filterClearStats = new DescriptiveStatistics();
    for (int i = 0; i < 1000; ++i) {
        long timeStart = System.nanoTime();
        filter.clear();
        long timeEnd = System.nanoTime();
        filterClearStats.addValue(timeEnd - timeStart);
    }
    System.out.println("Mean Clear Time:" + filterClearStats.getMean());

    System.out.println("size:" + BINOMIAL_COFF);
    for (int j = 0; j < BINOMIAL_COFF; ++j) {
        int value = patternArray[j];
        System.out.print("value:" + value + " ");
        for (int i = 5; i >= 0; --i) {
            System.out.print(((value & (1 << i)) != 0) ? '1' : '0');
        }
        System.out.print("  Key MSBLen:" + Integer.toString(patternKeyMSBits[j]) + "\n");
    }
    validateGenerator();

    long key1 = new BitBuilder().on(10).off(1).on(53).bits();
    long key2 = new BitBuilder().on(10).off(4).on(50).bits();
    long key3 = new BitBuilder().on(10).off(4).on(47).off(3).bits();
    long key4 = new BitBuilder().off(10).on(4).off(47).on(3).bits();
    long key5 = new BitBuilder().off(10).on(4).off(47).on(1).off(2).bits();

    Assert.assertTrue(SimHash.hammingDistance(key1, key2) == 3);
    Assert.assertTrue(SimHash.hammingDistance(key1, key3) != 3);
    Assert.assertTrue(SimHash.hammingDistance(key2, key3) == 3);
    Assert.assertTrue(SimHash.hammingDistance(key1, key4) > 3);
    Assert.assertTrue(SimHash.hammingDistance(key2, key4) > 3);
    Assert.assertTrue(SimHash.hammingDistance(key3, key4) > 3);
    Assert.assertTrue(SimHash.hammingDistance(key4, key5) <= 3);

    ImmutableList<DeduperValue> values = new ImmutableList.Builder<DeduperValue>()

            .add(new DeduperValue(key1, 1000, 2000, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.1"), 1000,
                    new TextBytes("http://adomain.com/")))
            .add(new DeduperValue(key2, 1001, 2001, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.2"), 1000,
                    new TextBytes("http://bdomain.com/")))
            .add(new DeduperValue(key3, 1002, 2002, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.3"), 1000,
                    new TextBytes("http://cdomain.com/")))
            .add(new DeduperValue(key4, 1003, 2003, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.4"), 1000,
                    new TextBytes("http://ddomain.com/")))
            .add(new DeduperValue(key5, 1004, 2004, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.5"), 1000,
                    new TextBytes("http://edomain.com/")))
            .build();

    SimhashMatcher unionFinder = new SimhashMatcher();

    final Multimap<String, Long> rootDomainToDupes = TreeMultimap.create();
    // collect all json set representations ... 
    final ArrayList<TextBytes> jsonSets = new ArrayList<TextBytes>();

    unionFinder.emitMatches(3, values.iterator(), new OutputCollector<TextBytes, TextBytes>() {

        @Override
        public void collect(TextBytes key, TextBytes value) throws IOException {
            System.out.println("Root:" + key + " JSON: " + value.toString());

            populateTestJSONSetData(rootDomainToDupes, key, value);
            // collect all json sets for later disjoint-set join 
            jsonSets.add(value);
        }
    }, null);

    ImmutableList<Long> hashSuperSet1 = ImmutableList.of(2000L, 2001L, 2002L);
    ImmutableList<Long> hashSuperSet2 = ImmutableList.of(2003L, 2004L);

    Assert.assertTrue(rootDomainToDupes.get("adomain.com").containsAll(hashSuperSet1));
    Assert.assertTrue(rootDomainToDupes.get("bdomain.com").containsAll(hashSuperSet1));
    Assert.assertTrue(rootDomainToDupes.get("cdomain.com").containsAll(hashSuperSet1));

    Assert.assertTrue(rootDomainToDupes.get("ddomain.com").containsAll(hashSuperSet2));
    Assert.assertTrue(rootDomainToDupes.get("edomain.com").containsAll(hashSuperSet2));

    ImmutableList<DeduperValue> secondSetValues = new ImmutableList.Builder<DeduperValue>()

            .add(new DeduperValue(key1, 1000, 2000, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.2"), 1000,
                    new TextBytes("http://adomain.com/")))
            .add(new DeduperValue(key1, 1007, 2007, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.2"), 1000,
                    new TextBytes("http://z1domain.com/")))
            .add(new DeduperValue(key2, 1008, 2008, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.2"), 1000,
                    new TextBytes("http://z2domain.com/")))
            .add(new DeduperValue(key3, 1009, 2009, IPAddressUtils.IPV4AddressStrToInteger("10.0.0.2"), 1000,
                    new TextBytes("http://z3domain.com/")))
            .build();

    unionFinder.emitMatches(3, secondSetValues.iterator(), new OutputCollector<TextBytes, TextBytes>() {

        @Override
        public void collect(TextBytes key, TextBytes value) throws IOException {
            System.out.println("Root:" + key + " JSON: " + value.toString());
            // collect all json sets for later disjoint-set join 
            jsonSets.add(value);
        }
    }, null);

    SetUnionFinder unionFinder2 = new SetUnionFinder();

    // union all json sets ... 
    unionFinder2.union(jsonSets.iterator());

    // ok emit union of sets ... 
    unionFinder2.emit(new TextBytes("test"), new OutputCollector<TextBytes, TextBytes>() {

        @Override
        public void collect(TextBytes key, TextBytes value) throws IOException {
            System.out.println("Root:" + key + " JSON: " + value.toString());
        }
    }, null);

}

From source file:org.commoncrawl.mapred.pipelineV3.domainmeta.fuzzydedupe.CrossDomainDupesReducer.java

@Override
public void reduce(TextBytes key, Iterator<TextBytes> values, OutputCollector<TextBytes, TextBytes> output,
        Reporter reporter) throws IOException {

    filter.clear();/*from ww w.  j a va 2s  . c o m*/
    double crossDomainDupesCount = 0;
    double totalHitsCount = 0;
    double uniqueRootDomainsCount = 0;
    double uniqueIPs = 0;
    double validDupePatternMatches = 0;

    URLFPV2 rootFP = URLUtils.getURLFPV2FromHost(key.toString());
    URLFPV2 fp = new URLFPV2();
    int sampleCount = 0;
    ArrayList<Integer> ipAddresses = new ArrayList<Integer>();
    JsonArray thisHostsDupes = new JsonArray();
    DescriptiveStatistics lengthStats = new DescriptiveStatistics();

    while (values.hasNext()) {
        JsonArray jsonArray = parser.parse(values.next().toString()).getAsJsonArray();
        for (JsonElement elem : jsonArray) {
            totalHitsCount++;
            fp.setRootDomainHash(elem.getAsJsonObject().get("dh").getAsLong());
            if (fp.getRootDomainHash() != rootFP.getRootDomainHash()) {
                crossDomainDupesCount++;
                fp.setDomainHash(fp.getRootDomainHash());
                fp.setUrlHash(fp.getRootDomainHash());
                // track length average ....
                lengthStats.addValue(elem.getAsJsonObject().get("length").getAsInt());

                if (!filter.isPresent(fp)) {
                    uniqueRootDomainsCount++;
                    filter.add(fp);
                    if (sampleCount < samples.length) {
                        String url = elem.getAsJsonObject().get("url").getAsString();
                        GoogleURL urlObject = new GoogleURL(url);
                        if (knownValidDupesPatterns.matcher(urlObject.getCanonicalURL()).find()) {
                            validDupePatternMatches++;
                        }
                        samples[sampleCount++] = url;
                    }
                }
            } else {
                thisHostsDupes.add(elem);
            }

            int ipAddress = elem.getAsJsonObject().get("ip").getAsInt();

            fp.setRootDomainHash(ipAddress);
            fp.setDomainHash(ipAddress);
            fp.setUrlHash(ipAddress);

            if (!filter.isPresent(fp)) {
                uniqueIPs++;
                filter.add(fp);
                ipAddresses.add(ipAddress);
            }
        }
    }

    if (totalHitsCount > 15 && crossDomainDupesCount >= 2) {

        double otherDomainToLocalScore = otherDomainToLocalDomainScore(totalHitsCount, crossDomainDupesCount);
        double spamIPScore = spamHostScore(totalHitsCount, crossDomainDupesCount, uniqueIPs);

        if (otherDomainToLocalScore >= .50 || spamIPScore > .50) {
            JsonObject objectOut = new JsonObject();

            objectOut.addProperty("ratio", (crossDomainDupesCount / totalHitsCount));
            objectOut.addProperty("totalHits", totalHitsCount);
            objectOut.addProperty("crossDomainDupes", crossDomainDupesCount);
            objectOut.addProperty("uniqueRootDomains", uniqueRootDomainsCount);
            objectOut.addProperty("otherDomainToLocalScore", otherDomainToLocalScore);
            objectOut.addProperty("spamIPScore", spamIPScore);
            objectOut.addProperty("validDupeMatches", validDupePatternMatches);
            objectOut.addProperty("content-len-mean", lengthStats.getMean());
            objectOut.addProperty("content-len-geo-mean", lengthStats.getGeometricMean());

            for (int i = 0; i < sampleCount; ++i) {
                objectOut.addProperty("sample-" + i, samples[i]);
            }
            // compute path edit distance ...
            if (sampleCount > 1) {
                int sampleEditDistanceSize = Math.min(sampleCount, 5);
                DescriptiveStatistics stats = new DescriptiveStatistics();
                for (int j = 0; j < sampleEditDistanceSize; ++j) {
                    for (int k = 0; k < sampleEditDistanceSize; ++k) {
                        if (k != j) {
                            GoogleURL urlObjectA = new GoogleURL(samples[j]);
                            GoogleURL urlObjectB = new GoogleURL(samples[k]);

                            if (urlObjectA.getPath().length() < 100 && urlObjectB.getPath().length() < 100) {
                                stats.addValue(StringUtils.getLevenshteinDistance(urlObjectA.getPath(),
                                        urlObjectB.getPath()));
                            }
                        }
                    }
                }
                if (stats.getMean() != 0.0) {
                    objectOut.addProperty("lev-distance-mean", stats.getMean());
                    objectOut.addProperty("lev-distance-geomean", stats.getGeometricMean());
                }
            }

            JsonArray ipAddressArray = new JsonArray();
            for (int j = 0; j < Math.min(1000, ipAddresses.size()); ++j) {
                ipAddressArray.add(new JsonPrimitive(ipAddresses.get(j)));
            }
            if (ipAddresses.size() != 0) {
                objectOut.add("ipList", ipAddressArray);
            }
            objectOut.add("thisHostDupes", thisHostsDupes);

            output.collect(key, new TextBytes(objectOut.toString()));
        }
    }

}

From source file:org.cse.visiri.app.algoevaluation.DistributionEval.java

public void EvaluateDistribution(QueryDistribution dist, String algoname) {
    Map<String, Integer> allInfo = new TreeMap<String, Integer>();
    Map<String, Integer> nodeInfo = new TreeMap<String, Integer>();
    Map<String, Double> nodeCosts = new TreeMap<String, Double>();
    for (Query q : dist.getQueryAllocation().keySet()) {
        String node = dist.getQueryAllocation().get(q);
        if (!allInfo.containsKey(node)) {
            allInfo.put(node, 0);/*ww w  . j  av a 2 s.c  om*/
        }
        int val = allInfo.get(node);
        allInfo.put(node, val + 1);
        if (node.startsWith(NODE_PREFIX)) {
            if (!nodeInfo.containsKey(node)) {
                nodeInfo.put(node, 0);
                nodeCosts.put(node, 0.0);
            }
            val = nodeInfo.get(node);
            nodeInfo.put(node, val + 1);
            nodeCosts.put(node, nodeCosts.get(node) + q.getCost());
        }
    }

    DescriptiveStatistics stat = new DescriptiveStatistics();
    DescriptiveStatistics costStat = new DescriptiveStatistics();
    System.out.println("Query counts : ");
    for (String node : nodeInfo.keySet()) {
        System.out.println(node + " : " + allInfo.get(node));
        stat.addValue(nodeInfo.get(node));
        costStat.addValue(nodeCosts.get(node));
    }

    System.out.println();
    double mean = stat.getMean();
    double stdDev = Math.sqrt(stat.getPopulationVariance());
    double varCoef = stdDev / mean;
    System.out.println("mean : " + mean);
    System.out.println("stdDev : " + stdDev);
    System.out.println("Coefficient of var : " + varCoef);

    System.out.println("\nCosts :");
    mean = costStat.getMean();
    stdDev = Math.sqrt(costStat.getPopulationVariance());
    varCoef = stdDev / mean;
    System.out.println("mean : " + mean);
    System.out.println("stdDev : " + stdDev);
    System.out.println("Coefficient of var : " + varCoef);

    //calculate event duplication
    Map<String, Set<String>> eventMap = new TreeMap<String, Set<String>>();
    for (Query q : dist.getQueryAllocation().keySet()) {
        String targetNode = dist.getQueryAllocation().get(q);

        for (StreamDefinition def : q.getInputStreamDefinitionsList()) {
            if (!eventMap.containsKey(def.getStreamId())) {
                eventMap.put(def.getStreamId(), new HashSet<String>());
            }
            eventMap.get(def.getStreamId()).add(targetNode);
        }
    }

    stat = new DescriptiveStatistics();
    for (Set<String> nodes : eventMap.values()) {
        stat.addValue(nodes.size());
    }

    double avg = stat.getMean();

    System.out.println();
    System.out.println("Avg. event duplication " + avg);

    try {
        PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("VISIRI_algoeval.txt", true)));
        out.println(stdDev);
        out.close();
    } catch (IOException e) {
        e.printStackTrace();
    }

    try {
        PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter("VISIRI_eventDup.txt", true)));
        out.println(avg);
        out.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:org.deidentifier.arx.aggregates.StatisticsBuilder.java

/**
 * Returns summary statistics for all attributes.
 * /*from www .  j a  v  a  2 s  .c  om*/
 * @param listwiseDeletion A flag enabling list-wise deletion
 * @return
 */
@SuppressWarnings({ "unchecked", "rawtypes" })
public <T> Map<String, StatisticsSummary<?>> getSummaryStatistics(boolean listwiseDeletion) {

    // Reset stop flag
    interrupt.value = false;

    Map<String, DescriptiveStatistics> statistics = new HashMap<String, DescriptiveStatistics>();
    Map<String, StatisticsSummaryOrdinal> ordinal = new HashMap<String, StatisticsSummaryOrdinal>();
    Map<String, DataScale> scales = new HashMap<String, DataScale>();
    Map<String, GeometricMean> geomean = new HashMap<String, GeometricMean>();

    // Detect scales
    for (int col = 0; col < handle.getNumColumns(); col++) {

        // Meta
        String attribute = handle.getAttributeName(col);
        DataType<?> type = handle.getDataType(attribute);

        // Scale
        DataScale scale = type.getDescription().getScale();

        // Try to replace nominal scale with ordinal scale based on base data type
        if (scale == DataScale.NOMINAL && handle.getGeneralization(attribute) != 0) {
            if (!(handle.getBaseDataType(attribute) instanceof ARXString) && getHierarchy(col, true) != null) {
                scale = DataScale.ORDINAL;
            }
        }

        // Store
        scales.put(attribute, scale);
        statistics.put(attribute, new DescriptiveStatistics());
        geomean.put(attribute, new GeometricMean());
        ordinal.put(attribute, getSummaryStatisticsOrdinal(handle.getGeneralization(attribute),
                handle.getDataType(attribute), handle.getBaseDataType(attribute), getHierarchy(col, true)));
    }

    // Compute summary statistics
    for (int row = 0; row < handle.getNumRows(); row++) {

        // Check, if we should include this row
        boolean include = true;
        if (listwiseDeletion) {
            for (int col = 0; col < handle.getNumColumns(); col++) {
                if (handle.isOutlier(row) || DataType.isNull(handle.getValue(row, col))) {
                    include = false;
                    break;
                }
            }
        }

        // Check
        checkInterrupt();

        // If yes, add
        if (include) {

            // For each column
            for (int col = 0; col < handle.getNumColumns(); col++) {

                // Meta
                String value = handle.getValue(row, col);
                String attribute = handle.getAttributeName(col);
                DataType<?> type = handle.getDataType(attribute);

                // Analyze
                if (!DataType.isAny(value) && !DataType.isNull(value)) {
                    ordinal.get(attribute).addValue(value);
                    if (type instanceof DataTypeWithRatioScale) {
                        double doubleValue = ((DataTypeWithRatioScale) type).toDouble(type.parse(value));
                        statistics.get(attribute).addValue(doubleValue);
                        geomean.get(attribute).increment(doubleValue + 1d);
                    }
                }
            }
        }
    }

    // Convert
    Map<String, StatisticsSummary<?>> result = new HashMap<String, StatisticsSummary<?>>();
    for (int col = 0; col < handle.getNumColumns(); col++) {

        // Check
        checkInterrupt();

        // Depending on scale
        String attribute = handle.getAttributeName(col);
        DataScale scale = scales.get(attribute);
        DataType<T> type = (DataType<T>) handle.getDataType(attribute);
        ordinal.get(attribute).analyze();
        if (scale == DataScale.NOMINAL) {
            StatisticsSummaryOrdinal stats = ordinal.get(attribute);
            result.put(attribute, new StatisticsSummary<T>(DataScale.NOMINAL, stats.getNumberOfMeasures(),
                    stats.getMode(), type.parse(stats.getMode())));
        } else if (scale == DataScale.ORDINAL) {
            StatisticsSummaryOrdinal stats = ordinal.get(attribute);
            result.put(attribute,
                    new StatisticsSummary<T>(DataScale.ORDINAL, stats.getNumberOfMeasures(), stats.getMode(),
                            type.parse(stats.getMode()), stats.getMedian(), type.parse(stats.getMedian()),
                            stats.getMin(), type.parse(stats.getMin()), stats.getMax(),
                            type.parse(stats.getMax())));
        } else if (scale == DataScale.INTERVAL) {
            StatisticsSummaryOrdinal stats = ordinal.get(attribute);
            DescriptiveStatistics stats2 = statistics.get(attribute);
            boolean isPeriod = type.getDescription().getWrappedClass() == Date.class;

            // TODO: Something is wrong with commons math's kurtosis
            double kurtosis = stats2.getKurtosis();
            kurtosis = kurtosis < 0d ? Double.NaN : kurtosis;
            double range = stats2.getMax() - stats2.getMin();
            double stddev = Math.sqrt(stats2.getVariance());

            result.put(attribute, new StatisticsSummary<T>(DataScale.INTERVAL, stats.getNumberOfMeasures(),
                    stats.getMode(), type.parse(stats.getMode()), stats.getMedian(),
                    type.parse(stats.getMedian()), stats.getMin(), type.parse(stats.getMin()), stats.getMax(),
                    type.parse(stats.getMax()), toString(type, stats2.getMean(), false, false),
                    toValue(type, stats2.getMean()), stats2.getMean(),
                    toString(type, stats2.getVariance(), isPeriod, true), toValue(type, stats2.getVariance()),
                    stats2.getVariance(), toString(type, stats2.getPopulationVariance(), isPeriod, true),
                    toValue(type, stats2.getPopulationVariance()), stats2.getPopulationVariance(),
                    toString(type, stddev, isPeriod, false), toValue(type, stddev), stddev,
                    toString(type, range, isPeriod, false), toValue(type, range),
                    stats2.getMax() - stats2.getMin(), toString(type, kurtosis, isPeriod, false),
                    toValue(type, kurtosis), kurtosis));
        } else if (scale == DataScale.RATIO) {
            StatisticsSummaryOrdinal stats = ordinal.get(attribute);
            DescriptiveStatistics stats2 = statistics.get(attribute);
            GeometricMean geo = geomean.get(attribute);

            // TODO: Something is wrong with commons math's kurtosis
            double kurtosis = stats2.getKurtosis();
            kurtosis = kurtosis < 0d ? Double.NaN : kurtosis;
            double range = stats2.getMax() - stats2.getMin();
            double stddev = Math.sqrt(stats2.getVariance());

            result.put(attribute, new StatisticsSummary<T>(DataScale.RATIO, stats.getNumberOfMeasures(),
                    stats.getMode(), type.parse(stats.getMode()), stats.getMedian(),
                    type.parse(stats.getMedian()), stats.getMin(), type.parse(stats.getMin()), stats.getMax(),
                    type.parse(stats.getMax()), toString(type, stats2.getMean(), false, false),
                    toValue(type, stats2.getMean()), stats2.getMean(),
                    toString(type, stats2.getVariance(), false, false), toValue(type, stats2.getVariance()),
                    stats2.getVariance(), toString(type, stats2.getPopulationVariance(), false, false),
                    toValue(type, stats2.getPopulationVariance()), stats2.getPopulationVariance(),
                    toString(type, stddev, false, false), toValue(type, stddev), stddev,
                    toString(type, range, false, false), toValue(type, range), range,
                    toString(type, kurtosis, false, false), toValue(type, kurtosis), kurtosis,
                    toString(type, geo.getResult() - 1d, false, false), toValue(type, geo.getResult() - 1d),
                    stats2.getGeometricMean()));
        }
    }

    return result;
}

From source file:org.dllearner.algorithms.qtl.experiments.BenchmarkDescriptionGeneratorHTML.java

@Override
protected void addRow(QueryData queryData) {
    sb.append("<tr>\n");

    // column: ID
    sb.append("<td>" + queryData.id + "</td>\n");

    // column: SPARQL query
    sb.append("<td><pre>" + queryData.query.toString().replace("<", "&lt;").replace(">", "&gt;")
            + "</pre></td>\n");

    // column: SPARQL query type
    sb.append("<td>" + queryData.queryType + "</td>\n");

    // query graph
    //      QueryToGraphExporter.exportYedGraph(queryData.query, new File(""));
    //      sb.append("<td><img src=\"" + graphFile.getPath() + "\" alt=\"query graph\"></td>\n");

    // column: depth
    sb.append("<td class='number'>" + queryData.maxTreeDepth + "</td>\n");

    // column: #instances
    sb.append("<td class='number'>" + queryData.nrOfInstances + "</td>\n");

    // columns: optimal CBD sizes (min, max, avg)
    DescriptiveStatistics optimalCBDSizeStats = queryData.optimalCBDSizeStats;
    sb.append("<td class='number'>" + (int) optimalCBDSizeStats.getMin() + "</td>\n");
    sb.append("<td class='number'>" + (int) optimalCBDSizeStats.getMax() + "</td>\n");
    sb.append("<td class='number'>" + (int) optimalCBDSizeStats.getMean() + "</td>\n");

    // columns: generic CBD sizes (min, max, avg)
    DescriptiveStatistics genericCBDSizeStats = queryData.defaultCBDSizesStats;
    sb.append("<td class='number'>" + (int) genericCBDSizeStats.getMin() + "</td>\n");
    sb.append("<td class='number'>" + (int) genericCBDSizeStats.getMax() + "</td>\n");
    sb.append("<td class='number'>" + (int) genericCBDSizeStats.getMean() + "</td>\n");

    sb.append("</tr>\n");
}

From source file:org.dllearner.algorithms.qtl.experiments.EvaluationDataset.java

public void analyze() {
    ConciseBoundedDescriptionGenerator cbdGen = new SymmetricConciseBoundedDescriptionGeneratorImpl(
            ks.getQueryExecutionFactory());

    String separator = "\t";
    String tsv = sparqlQueries.entrySet().stream().map(entry -> {
        StringBuilder sb = new StringBuilder();

        // ID//from w w  w.  j a  v a  2 s .c  o m
        String id = entry.getKey();
        sb.append(id).append(separator);

        // query
        Query q = entry.getValue();
        sb.append(q.toString().replace("\n", " "));
        try {
            // get query result
            List<String> result = SPARQLUtils.getResult(ks.getQueryExecutionFactory(), q);
            sb.append(separator).append(result.size());

            // query type
            SPARQLUtils.QueryType queryType = SPARQLUtils.getQueryType(q);
            sb.append(separator).append(queryType.name());

            // check CBD sizes and time
            Monitor mon = MonitorFactory.getTimeMonitor("CBD");
            mon.reset();
            DescriptiveStatistics sizeStats = new DescriptiveStatistics();
            result.stream().map(r -> {
                System.out.println(r);
                mon.start();
                Model cbd = cbdGen.getConciseBoundedDescription(r, 2);
                mon.stop();
                return cbd;
            }).map(Model::size).forEach(sizeStats::addValue);

            // show min., max. and avg. size
            sb.append(separator).append(sizeStats.getMin());
            sb.append(separator).append(sizeStats.getMax());
            sb.append(separator).append(sizeStats.getMean());

            // show min., max. and avg. CBD time
            sb.append(separator).append(mon.getTotal());
            sb.append(separator).append(mon.getMin());
            sb.append(separator).append(mon.getMax());
            sb.append(separator).append(mon.getAvg());
        } catch (Exception e) {
            e.printStackTrace();
        }
        return sb;
    }).collect(Collectors.joining("\n"));

    System.out.println(tsv);
}

From source file:org.dllearner.algorithms.qtl.experiments.PRConvergenceExperiment.java

public void run(int maxNrOfProcessedQueries, int maxTreeDepth, int[] exampleInterval, double[] noiseInterval,
        HeuristicType[] measures) throws Exception {
    this.maxTreeDepth = maxTreeDepth;
    queryTreeFactory.setMaxDepth(maxTreeDepth);

    if (exampleInterval != null) {
        nrOfExamplesIntervals = exampleInterval;
    }/* w ww. ja  v  a2 s.  c om*/
    if (noiseInterval != null) {
        this.noiseIntervals = noiseInterval;
    }
    if (measures != null) {
        this.measures = measures;
    }

    boolean noiseEnabled = noiseIntervals.length > 1 || noiseInterval[0] > 0;
    boolean posOnly = noiseEnabled ? false : true;

    logger.info("Started QTL evaluation...");
    long t1 = System.currentTimeMillis();

    List<String> queries = dataset.getSparqlQueries().values().stream().map(q -> q.toString())
            .collect(Collectors.toList());
    logger.info("#loaded queries: " + queries.size());

    // filter for debugging purposes
    queries = queries.stream().filter(q -> queriesToProcessTokens.stream().noneMatch(t -> !q.contains(t)))
            .collect(Collectors.toList());
    queries = queries.stream().filter(q -> queriesToOmitTokens.stream().noneMatch(t -> q.contains(t)))
            .collect(Collectors.toList());

    if (maxNrOfProcessedQueries == -1) {
        maxNrOfProcessedQueries = queries.size();
    }

    //      queries = filter(queries, (int) Math.ceil((double) maxNrOfProcessedQueries / maxTreeDepth));
    //      queries = queries.subList(0, Math.min(queries.size(), maxNrOfProcessedQueries));
    logger.info("#queries to process: " + queries.size());

    // generate examples for each query
    logger.info("precomputing pos. and neg. examples...");
    for (String query : queries) {//if(!(query.contains("Borough_(New_York_City)")))continue;
        query2Examples.put(query, generateExamples(query, posOnly, noiseEnabled));
    }
    logger.info("precomputing pos. and neg. examples finished.");

    // check for queries that do not return any result (should not happen, but we never know)
    Set<String> emptyQueries = query2Examples.entrySet().stream()
            .filter(e -> e.getValue().correctPosExampleCandidates.isEmpty()).map(e -> e.getKey())
            .collect(Collectors.toSet());
    logger.info("got {} empty queries.", emptyQueries.size());
    queries.removeAll(emptyQueries);

    // min. pos examples
    int min = 3;
    Set<String> lowNrOfExamplesQueries = query2Examples.entrySet().stream()
            .filter(e -> e.getValue().correctPosExampleCandidates.size() < min).map(e -> e.getKey())
            .collect(Collectors.toSet());
    logger.info("got {} queries with < {} pos. examples.", emptyQueries.size(), min);
    queries.removeAll(lowNrOfExamplesQueries);
    queries = queries.subList(0, Math.min(80, queries.size()));

    final int totalNrOfQTLRuns = heuristics.length * this.measures.length * nrOfExamplesIntervals.length
            * noiseIntervals.length * queries.size();
    logger.info("#QTL runs: " + totalNrOfQTLRuns);

    final AtomicInteger currentNrOfFinishedRuns = new AtomicInteger(0);

    // loop over heuristics
    for (final QueryTreeHeuristic heuristic : heuristics) {
        final String heuristicName = heuristic.getClass().getAnnotation(ComponentAnn.class).shortName();

        // loop over heuristics measures
        for (HeuristicType measure : this.measures) {
            final String measureName = measure.toString();
            heuristic.setHeuristicType(measure);

            double[][] data = new double[nrOfExamplesIntervals.length][noiseIntervals.length];

            // loop over number of positive examples
            for (int i = 0; i < nrOfExamplesIntervals.length; i++) {
                final int nrOfExamples = nrOfExamplesIntervals[i];

                // loop over noise value
                for (int j = 0; j < noiseIntervals.length; j++) {
                    final double noise = noiseIntervals[j];

                    // check if not already processed
                    File logFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-"
                            + heuristicName + "-" + measureName + ".log");
                    File statsFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-"
                            + heuristicName + "-" + measureName + ".stats");

                    if (!override && logFile.exists() && statsFile.exists()) {
                        logger.info(
                                "Eval config already processed. For re-running please remove corresponding output files.");
                        continue;
                    }

                    FileAppender appender = null;
                    try {
                        appender = new FileAppender(new SimpleLayout(), logFile.getPath(), false);
                        Logger.getRootLogger().addAppender(appender);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }

                    logger.info("#examples: " + nrOfExamples + " noise: " + noise);

                    final DescriptiveStatistics nrOfReturnedSolutionsStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics baselinePrecisionStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselineRecallStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselineFMeasureStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselinePredAccStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselineMathCorrStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestReturnedSolutionPrecisionStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionRecallStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionFMeasureStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionPredAccStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionMathCorrStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestReturnedSolutionRuntimeStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestSolutionPrecisionStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionRecallStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionFMeasureStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionPredAccStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionMathCorrStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestSolutionPositionStats = new SynchronizedDescriptiveStatistics();

                    MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).reset();
                    MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).reset();

                    ExecutorService tp = Executors.newFixedThreadPool(nrOfThreads);

                    // indicates if the execution for some of the queries failed
                    final AtomicBoolean failed = new AtomicBoolean(false);

                    Set<String> queriesToProcess = new TreeSet<>(queries);
                    queriesToProcess.retainAll(query2Examples.entrySet().stream()
                            .filter(e -> e.getValue().correctPosExampleCandidates.size() >= nrOfExamples)
                            .map(e -> e.getKey()).collect(Collectors.toSet()));

                    // loop over SPARQL queries
                    for (final String sparqlQuery : queriesToProcess) {
                        CBDStructureTree cbdStructure = cbdStructureTree != null ? cbdStructureTree
                                : QueryUtils.getOptimalCBDStructure(QueryFactory.create(sparqlQuery));

                        tp.submit(() -> {
                            logger.info("CBD tree:" + cbdStructure.toStringVerbose());

                            // update max tree depth
                            this.maxTreeDepth = QueryTreeUtils.getDepth(cbdStructure);
                            logger.info("##############################################################");
                            logger.info("Processing query\n" + sparqlQuery);

                            // we repeat it n times with different permutations of examples
                            int nrOfPermutations = 1;

                            if (nrOfExamples >= query2Examples.get(sparqlQuery).correctPosExampleCandidates
                                    .size()) {
                                nrOfPermutations = 1;
                            }
                            for (int perm = 1; perm <= nrOfPermutations; perm++) {
                                logger.info("Run {}/{}", perm, nrOfPermutations);
                                try {
                                    ExamplesWrapper examples = getExamples(sparqlQuery, nrOfExamples,
                                            nrOfExamples, noise, cbdStructure);
                                    logger.info("pos. examples:\n"
                                            + Joiner.on("\n").join(examples.correctPosExamples));
                                    logger.info("neg. examples:\n"
                                            + Joiner.on("\n").join(examples.correctNegExamples));

                                    // write examples to disk
                                    File dir = new File(benchmarkDirectory, "data/" + hash(sparqlQuery));
                                    dir.mkdirs();
                                    Files.write(Joiner.on("\n").join(examples.correctPosExamples), new File(dir,
                                            "examples" + perm + "_" + nrOfExamples + "_" + noise + ".tp"),
                                            Charsets.UTF_8);
                                    Files.write(Joiner.on("\n").join(examples.correctNegExamples), new File(dir,
                                            "examples" + perm + "_" + nrOfExamples + "_" + noise + ".tn"),
                                            Charsets.UTF_8);
                                    Files.write(Joiner.on("\n").join(examples.falsePosExamples), new File(dir,
                                            "examples" + perm + "_" + nrOfExamples + "_" + noise + ".fp"),
                                            Charsets.UTF_8);

                                    // compute baseline
                                    RDFResourceTree baselineSolution = applyBaseLine(examples,
                                            Baseline.MOST_INFORMATIVE_EDGE_IN_EXAMPLES);
                                    logger.info("Evaluating baseline...");
                                    Score baselineScore = computeScore(sparqlQuery, baselineSolution, noise);
                                    logger.info("Baseline score:\n" + baselineScore);
                                    String baseLineQuery = QueryTreeUtils.toSPARQLQueryString(baselineSolution,
                                            dataset.getBaseIRI(), dataset.getPrefixMapping());
                                    baselinePrecisionStats.addValue(baselineScore.precision);
                                    baselineRecallStats.addValue(baselineScore.recall);
                                    baselineFMeasureStats.addValue(baselineScore.fmeasure);
                                    baselinePredAccStats.addValue(baselineScore.predAcc);
                                    baselineMathCorrStats.addValue(baselineScore.mathCorr);

                                    // run QTL
                                    PosNegLPStandard lp = new PosNegLPStandard();
                                    lp.setPositiveExamples(examples.posExamplesMapping.keySet());
                                    lp.setNegativeExamples(examples.negExamplesMapping.keySet());
                                    //                                 QTL2Disjunctive la = new QTL2Disjunctive(lp, qef);
                                    QTL2DisjunctiveMultiThreaded la = new QTL2DisjunctiveMultiThreaded(lp, qef);
                                    la.setRenderer(new org.dllearner.utilities.owl.DLSyntaxObjectRenderer());
                                    la.setReasoner(dataset.getReasoner());
                                    la.setEntailment(Entailment.SIMPLE);
                                    la.setTreeFactory(queryTreeFactory);
                                    la.setPositiveExampleTrees(examples.posExamplesMapping);
                                    la.setNegativeExampleTrees(examples.negExamplesMapping);
                                    la.setNoise(noise);
                                    la.setHeuristic(heuristic);
                                    la.setMaxExecutionTimeInSeconds(maxExecutionTimeInSeconds);
                                    la.setMaxTreeComputationTimeInSeconds(maxExecutionTimeInSeconds);
                                    la.init();
                                    la.start();
                                    List<EvaluatedRDFResourceTree> solutions = new ArrayList<>(
                                            la.getSolutions());

                                    //                              List<EvaluatedRDFResourceTree> solutions = generateSolutions(examples, noise, heuristic);
                                    nrOfReturnedSolutionsStats.addValue(solutions.size());

                                    // the best returned solution by QTL
                                    EvaluatedRDFResourceTree bestSolution = solutions.get(0);
                                    logger.info("Got " + solutions.size() + " query trees.");
                                    //                                 logger.info("Best computed solution:\n" + render(bestSolution.asEvaluatedDescription()));
                                    logger.info("QTL Score:\n" + bestSolution.getTreeScore());
                                    long runtimeBestSolution = la.getTimeBestSolutionFound();
                                    bestReturnedSolutionRuntimeStats.addValue(runtimeBestSolution);

                                    // convert to SPARQL query
                                    RDFResourceTree tree = bestSolution.getTree();
                                    tree = filter.apply(tree);
                                    String learnedSPARQLQuery = QueryTreeUtils.toSPARQLQueryString(tree,
                                            dataset.getBaseIRI(), dataset.getPrefixMapping());

                                    // compute score
                                    Score score = computeScore(sparqlQuery, tree, noise);
                                    bestReturnedSolutionPrecisionStats.addValue(score.precision);
                                    bestReturnedSolutionRecallStats.addValue(score.recall);
                                    bestReturnedSolutionFMeasureStats.addValue(score.fmeasure);
                                    bestReturnedSolutionPredAccStats.addValue(score.predAcc);
                                    bestReturnedSolutionMathCorrStats.addValue(score.mathCorr);
                                    logger.info(score.toString());

                                    // find the extensionally best matching tree in the list
                                    Pair<EvaluatedRDFResourceTree, Score> bestMatchingTreeWithScore = findBestMatchingTreeFast(
                                            solutions, sparqlQuery, noise, examples);
                                    EvaluatedRDFResourceTree bestMatchingTree = bestMatchingTreeWithScore
                                            .getFirst();
                                    Score bestMatchingScore = bestMatchingTreeWithScore.getSecond();

                                    // position of best tree in list of solutions
                                    int positionBestScore = solutions.indexOf(bestMatchingTree);
                                    bestSolutionPositionStats.addValue(positionBestScore);

                                    Score bestScore = score;
                                    if (positionBestScore > 0) {
                                        logger.info(
                                                "Position of best covering tree in list: " + positionBestScore);
                                        logger.info("Best covering solution:\n"
                                                + render(bestMatchingTree.asEvaluatedDescription()));
                                        logger.info("Tree score: " + bestMatchingTree.getTreeScore());
                                        bestScore = bestMatchingScore;
                                        logger.info(bestMatchingScore.toString());
                                    } else {
                                        logger.info(
                                                "Best returned solution was also the best covering solution.");
                                    }
                                    bestSolutionRecallStats.addValue(bestScore.recall);
                                    bestSolutionPrecisionStats.addValue(bestScore.precision);
                                    bestSolutionFMeasureStats.addValue(bestScore.fmeasure);
                                    bestSolutionPredAccStats.addValue(bestScore.predAcc);
                                    bestSolutionMathCorrStats.addValue(bestScore.mathCorr);

                                    for (RDFResourceTree negTree : examples.negExamplesMapping.values()) {
                                        if (QueryTreeUtils.isSubsumedBy(negTree, bestMatchingTree.getTree())) {
                                            Files.append(sparqlQuery + "\n", new File("/tmp/negCovered.txt"),
                                                    Charsets.UTF_8);
                                            break;
                                        }
                                    }

                                    String bestQuery = QueryFactory
                                            .create(QueryTreeUtils.toSPARQLQueryString(
                                                    filter.apply(bestMatchingTree.getTree()),
                                                    dataset.getBaseIRI(), dataset.getPrefixMapping()))
                                            .toString();

                                    if (write2DB) {
                                        write2DB(sparqlQuery, nrOfExamples, examples, noise, baseLineQuery,
                                                baselineScore, heuristicName, measureName,
                                                QueryFactory.create(learnedSPARQLQuery).toString(), score,
                                                runtimeBestSolution, bestQuery, positionBestScore, bestScore);
                                    }

                                } catch (Exception e) {
                                    failed.set(true);
                                    logger.error("Error occured for query\n" + sparqlQuery, e);
                                    try {
                                        StringWriter sw = new StringWriter();
                                        PrintWriter pw = new PrintWriter(sw);
                                        e.printStackTrace(pw);
                                        Files.append(sparqlQuery + "\n" + sw.toString(),
                                                new File(benchmarkDirectory,
                                                        "failed-" + nrOfExamples + "-" + noise + "-"
                                                                + heuristicName + "-" + measureName + ".txt"),
                                                Charsets.UTF_8);
                                    } catch (IOException e1) {
                                        e1.printStackTrace();
                                    }
                                } finally {
                                    int cnt = currentNrOfFinishedRuns.incrementAndGet();
                                    logger.info("***********Evaluation Progress:"
                                            + NumberFormat.getPercentInstance()
                                                    .format((double) cnt / totalNrOfQTLRuns)
                                            + "(" + cnt + "/" + totalNrOfQTLRuns + ")" + "***********");
                                }
                            }
                        });
                    }

                    tp.shutdown();
                    tp.awaitTermination(12, TimeUnit.HOURS);

                    Logger.getRootLogger().removeAppender(appender);

                    if (!failed.get()) {
                        String result = "";
                        result += "\nBaseline Precision:\n" + baselinePrecisionStats;
                        result += "\nBaseline Recall:\n" + baselineRecallStats;
                        result += "\nBaseline F-measure:\n" + baselineFMeasureStats;
                        result += "\nBaseline PredAcc:\n" + baselinePredAccStats;
                        result += "\nBaseline MathCorr:\n" + baselineMathCorrStats;

                        result += "#Returned solutions:\n" + nrOfReturnedSolutionsStats;

                        result += "\nOverall Precision:\n" + bestReturnedSolutionPrecisionStats;
                        result += "\nOverall Recall:\n" + bestReturnedSolutionRecallStats;
                        result += "\nOverall F-measure:\n" + bestReturnedSolutionFMeasureStats;
                        result += "\nOverall PredAcc:\n" + bestReturnedSolutionPredAccStats;
                        result += "\nOverall MathCorr:\n" + bestReturnedSolutionMathCorrStats;

                        result += "\nTime until best returned solution found:\n"
                                + bestReturnedSolutionRuntimeStats;

                        result += "\nPositions of best solution:\n"
                                + Arrays.toString(bestSolutionPositionStats.getValues());
                        result += "\nPosition of best solution stats:\n" + bestSolutionPositionStats;
                        result += "\nOverall Precision of best solution:\n" + bestSolutionPrecisionStats;
                        result += "\nOverall Recall of best solution:\n" + bestSolutionRecallStats;
                        result += "\nOverall F-measure of best solution:\n" + bestSolutionFMeasureStats;

                        result += "\nCBD generation time(total):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getTotal()
                                + "\n";
                        result += "CBD generation time(avg):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getAvg()
                                + "\n";
                        result += "Tree generation time(total):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getTotal()
                                + "\n";
                        result += "Tree generation time(avg):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getAvg()
                                + "\n";
                        result += "Tree size(avg):\t" + treeSizeStats.getMean() + "\n";

                        logger.info(result);

                        try {
                            Files.write(result, statsFile, Charsets.UTF_8);
                        } catch (IOException e) {
                            e.printStackTrace();
                        }

                        data[i][j] = bestReturnedSolutionFMeasureStats.getMean();

                        if (write2DB) {
                            write2DB(heuristicName, measureName, nrOfExamples, noise,
                                    bestReturnedSolutionFMeasureStats.getMean(),
                                    bestReturnedSolutionPrecisionStats.getMean(),
                                    bestReturnedSolutionRecallStats.getMean(),
                                    bestReturnedSolutionPredAccStats.getMean(),
                                    bestReturnedSolutionMathCorrStats.getMean(),
                                    bestSolutionPositionStats.getMean(), bestSolutionFMeasureStats.getMean(),
                                    bestSolutionPrecisionStats.getMean(), bestSolutionRecallStats.getMean(),
                                    bestSolutionPredAccStats.getMean(), bestSolutionMathCorrStats.getMean(),
                                    baselineFMeasureStats.getMean(), baselinePrecisionStats.getMean(),
                                    baselineRecallStats.getMean(), baselinePredAccStats.getMean(),
                                    baselineMathCorrStats.getMean(),
                                    bestReturnedSolutionRuntimeStats.getMean());
                        }
                    }
                }
            }

            String content = "###";
            String separator = "\t";
            for (double noiseInterval1 : noiseIntervals) {
                content += separator + noiseInterval1;
            }
            content += "\n";
            for (int i = 0; i < nrOfExamplesIntervals.length; i++) {
                content += nrOfExamplesIntervals[i];
                for (int j = 0; j < noiseIntervals.length; j++) {
                    content += separator + data[i][j];
                }
                content += "\n";
            }

            File examplesVsNoise = new File(benchmarkDirectory,
                    "examplesVsNoise-" + heuristicName + "-" + measureName + ".tsv");
            try {
                Files.write(content, examplesVsNoise, Charsets.UTF_8);
            } catch (IOException e) {
                logger.error("failed to write stats to file", e);
            }
        }
    }

    if (write2DB) {
        conn.close();
    }

    if (useEmailNotification) {
        sendFinishedMail();
    }
    long t2 = System.currentTimeMillis();
    long duration = t2 - t1;
    logger.info("QTL evaluation finished in " + DurationFormatUtils.formatDurationHMS(duration) + "ms.");
}

From source file:org.dllearner.algorithms.qtl.experiments.QTLEvaluation.java

public void run(int maxNrOfProcessedQueries, int maxTreeDepth, int[] exampleInterval, double[] noiseInterval,
        HeuristicType[] measures) throws Exception {
    this.maxTreeDepth = maxTreeDepth;
    queryTreeFactory.setMaxDepth(maxTreeDepth);

    if (exampleInterval != null) {
        nrOfExamplesIntervals = exampleInterval;
    }//from  w ww. j  ava2 s  . c  o  m
    if (noiseInterval != null) {
        this.noiseIntervals = noiseInterval;
    }
    if (measures != null) {
        this.measures = measures;
    }

    logger.info("Started QTL evaluation...");
    long t1 = System.currentTimeMillis();

    List<String> queries = dataset.getSparqlQueries().values().stream().map(q -> q.toString())
            .collect(Collectors.toList());
    logger.info("#loaded queries: " + queries.size());

    // filter for debugging purposes
    queries = queries.stream().filter(q -> tokens.stream().noneMatch(t -> !q.contains(t)))
            .collect(Collectors.toList());

    if (maxNrOfProcessedQueries == -1) {
        maxNrOfProcessedQueries = queries.size();
    }

    //      queries = filter(queries, (int) Math.ceil((double) maxNrOfProcessedQueries / maxTreeDepth));
    //      queries = queries.subList(0, Math.min(queries.size(), maxNrOfProcessedQueries));
    logger.info("#queries to process: " + queries.size());

    // generate examples for each query
    logger.info("precomputing pos. and neg. examples...");
    final Map<String, ExampleCandidates> query2Examples = new HashMap<>();
    for (String query : queries) {//if(!(query.contains("Borough_(New_York_City)")))continue;
        query2Examples.put(query, generateExamples(query));
    }
    logger.info("precomputing pos. and neg. examples finished.");

    // check for queries that do not return any result (should not happen, but we never know)
    Set<String> emptyQueries = query2Examples.entrySet().stream()
            .filter(e -> e.getValue().correctPosExampleCandidates.isEmpty()).map(e -> e.getKey())
            .collect(Collectors.toSet());
    logger.info("got {} empty queries.", emptyQueries.size());
    queries.removeAll(emptyQueries);

    // min. pos examples
    Set<String> lowNrOfExamplesQueries = query2Examples.entrySet().stream()
            .filter(e -> e.getValue().correctPosExampleCandidates.size() < 2).map(e -> e.getKey())
            .collect(Collectors.toSet());
    logger.info("got {} queries with < 2 pos. examples.", emptyQueries.size());
    queries.removeAll(lowNrOfExamplesQueries);

    final int totalNrOfQTLRuns = heuristics.length * this.measures.length * nrOfExamplesIntervals.length
            * noiseIntervals.length * queries.size();
    logger.info("#QTL runs: " + totalNrOfQTLRuns);

    final AtomicInteger currentNrOfFinishedRuns = new AtomicInteger(0);

    // loop over heuristics
    for (final QueryTreeHeuristic heuristic : heuristics) {
        final String heuristicName = heuristic.getClass().getAnnotation(ComponentAnn.class).shortName();

        // loop over heuristics measures
        for (HeuristicType measure : this.measures) {
            final String measureName = measure.toString();
            heuristic.setHeuristicType(measure);

            double[][] data = new double[nrOfExamplesIntervals.length][noiseIntervals.length];

            // loop over number of positive examples
            for (int i = 0; i < nrOfExamplesIntervals.length; i++) {
                final int nrOfExamples = nrOfExamplesIntervals[i];

                // loop over noise value
                for (int j = 0; j < noiseIntervals.length; j++) {
                    final double noise = noiseIntervals[j];

                    // check if not already processed
                    File logFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-"
                            + heuristicName + "-" + measureName + ".log");
                    File statsFile = new File(benchmarkDirectory, "qtl2-" + nrOfExamples + "-" + noise + "-"
                            + heuristicName + "-" + measureName + ".stats");

                    if (!override && logFile.exists() && statsFile.exists()) {
                        logger.info(
                                "Eval config already processed. For re-running please remove corresponding output files.");
                        continue;
                    }

                    FileAppender appender = null;
                    try {
                        appender = new FileAppender(new SimpleLayout(), logFile.getPath(), false);
                        Logger.getRootLogger().addAppender(appender);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }

                    logger.info("#examples: " + nrOfExamples + " noise: " + noise);

                    final DescriptiveStatistics nrOfReturnedSolutionsStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics baselinePrecisionStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselineRecallStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselineFMeasureStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselinePredAccStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics baselineMathCorrStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestReturnedSolutionPrecisionStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionRecallStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionFMeasureStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionPredAccStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestReturnedSolutionMathCorrStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestReturnedSolutionRuntimeStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestSolutionPrecisionStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionRecallStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionFMeasureStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionPredAccStats = new SynchronizedDescriptiveStatistics();
                    final DescriptiveStatistics bestSolutionMathCorrStats = new SynchronizedDescriptiveStatistics();

                    final DescriptiveStatistics bestSolutionPositionStats = new SynchronizedDescriptiveStatistics();

                    MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).reset();
                    MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).reset();

                    ExecutorService tp = Executors.newFixedThreadPool(nrOfThreads);

                    // indicates if the execution for some of the queries failed
                    final AtomicBoolean failed = new AtomicBoolean(false);

                    // loop over SPARQL queries
                    for (final String sparqlQuery : queries) {

                        tp.submit(() -> {

                            logger.info("##############################################################");
                            logger.info("Processing query\n" + sparqlQuery);

                            try {
                                ExamplesWrapper examples = query2Examples.get(sparqlQuery).get(nrOfExamples,
                                        nrOfExamples, noise);
                                logger.info(
                                        "pos. examples:\n" + Joiner.on("\n").join(examples.correctPosExamples));
                                logger.info(
                                        "neg. examples:\n" + Joiner.on("\n").join(examples.correctNegExamples));

                                // write examples to disk
                                File dir = new File(benchmarkDirectory, "data/" + hash(sparqlQuery));
                                dir.mkdirs();
                                Files.write(Joiner.on("\n").join(examples.correctPosExamples),
                                        new File(dir, "examples_" + nrOfExamples + "_" + noise + ".tp"),
                                        Charsets.UTF_8);
                                Files.write(Joiner.on("\n").join(examples.correctNegExamples),
                                        new File(dir, "examples_" + nrOfExamples + "_" + noise + ".tn"),
                                        Charsets.UTF_8);
                                Files.write(Joiner.on("\n").join(examples.falsePosExamples),
                                        new File(dir, "examples_" + nrOfExamples + "_" + noise + ".fp"),
                                        Charsets.UTF_8);

                                // compute baseline
                                logger.info("Computing baseline...");
                                RDFResourceTree baselineSolution = applyBaseLine(examples,
                                        Baseline.MOST_INFORMATIVE_EDGE_IN_EXAMPLES);
                                logger.info("Baseline solution:\n" + owlRenderer
                                        .render(QueryTreeUtils.toOWLClassExpression(baselineSolution)));
                                logger.info("Evaluating baseline...");
                                Score baselineScore = computeScore(sparqlQuery, baselineSolution, noise);
                                logger.info("Baseline score:\n" + baselineScore);
                                String baseLineQuery = QueryTreeUtils.toSPARQLQueryString(baselineSolution,
                                        dataset.getBaseIRI(), dataset.getPrefixMapping());
                                baselinePrecisionStats.addValue(baselineScore.precision);
                                baselineRecallStats.addValue(baselineScore.recall);
                                baselineFMeasureStats.addValue(baselineScore.fmeasure);
                                baselinePredAccStats.addValue(baselineScore.predAcc);
                                baselineMathCorrStats.addValue(baselineScore.mathCorr);

                                // run QTL
                                PosNegLPStandard lp = new PosNegLPStandard();
                                lp.setPositiveExamples(examples.posExamplesMapping.keySet());
                                lp.setNegativeExamples(examples.negExamplesMapping.keySet());
                                QTL2Disjunctive la = new QTL2Disjunctive(lp, qef);
                                la.setRenderer(new org.dllearner.utilities.owl.DLSyntaxObjectRenderer());
                                la.setReasoner(dataset.getReasoner());
                                la.setEntailment(Entailment.SIMPLE);
                                la.setTreeFactory(queryTreeFactory);
                                la.setPositiveExampleTrees(examples.posExamplesMapping);
                                la.setNegativeExampleTrees(examples.negExamplesMapping);
                                la.setNoise(noise);
                                la.setHeuristic(heuristic);
                                la.setMaxExecutionTimeInSeconds(maxExecutionTimeInSeconds);
                                la.setMaxTreeComputationTimeInSeconds(maxExecutionTimeInSeconds);
                                la.init();
                                la.start();
                                List<EvaluatedRDFResourceTree> solutions = new ArrayList<>(la.getSolutions());

                                //                              List<EvaluatedRDFResourceTree> solutions = generateSolutions(examples, noise, heuristic);
                                nrOfReturnedSolutionsStats.addValue(solutions.size());

                                // the best returned solution by QTL
                                EvaluatedRDFResourceTree bestSolution = solutions.get(0);
                                logger.info("Got " + solutions.size() + " query trees.");
                                logger.info("Best computed solution:\n"
                                        + render(bestSolution.asEvaluatedDescription()));
                                logger.info("QTL Score:\n" + bestSolution.getTreeScore());
                                long runtimeBestSolution = la.getTimeBestSolutionFound();
                                bestReturnedSolutionRuntimeStats.addValue(runtimeBestSolution);

                                // convert to SPARQL query
                                RDFResourceTree tree = bestSolution.getTree();
                                //                  filter.filter(tree);
                                String learnedSPARQLQuery = QueryTreeUtils.toSPARQLQueryString(tree,
                                        dataset.getBaseIRI(), dataset.getPrefixMapping());

                                // compute score
                                Score score = computeScore(sparqlQuery, tree, noise);
                                bestReturnedSolutionPrecisionStats.addValue(score.precision);
                                bestReturnedSolutionRecallStats.addValue(score.recall);
                                bestReturnedSolutionFMeasureStats.addValue(score.fmeasure);
                                bestReturnedSolutionPredAccStats.addValue(score.predAcc);
                                bestReturnedSolutionMathCorrStats.addValue(score.mathCorr);
                                logger.info(score.toString());

                                // find the extensionally best matching tree in the list
                                Pair<EvaluatedRDFResourceTree, Score> bestMatchingTreeWithScore = findBestMatchingTreeFast(
                                        solutions, sparqlQuery, noise, examples);
                                EvaluatedRDFResourceTree bestMatchingTree = bestMatchingTreeWithScore
                                        .getFirst();
                                Score bestMatchingScore = bestMatchingTreeWithScore.getSecond();

                                // position of best tree in list of solutions
                                int positionBestScore = solutions.indexOf(bestMatchingTree);
                                bestSolutionPositionStats.addValue(positionBestScore);

                                Score bestScore = score;
                                if (positionBestScore > 0) {
                                    logger.info("Position of best covering tree in list: " + positionBestScore);
                                    logger.info("Best covering solution:\n"
                                            + render(bestMatchingTree.asEvaluatedDescription()));
                                    logger.info("Tree score: " + bestMatchingTree.getTreeScore());
                                    bestScore = bestMatchingScore;
                                    logger.info(bestMatchingScore.toString());
                                } else {
                                    logger.info("Best returned solution was also the best covering solution.");
                                }
                                bestSolutionRecallStats.addValue(bestScore.recall);
                                bestSolutionPrecisionStats.addValue(bestScore.precision);
                                bestSolutionFMeasureStats.addValue(bestScore.fmeasure);
                                bestSolutionPredAccStats.addValue(bestScore.predAcc);
                                bestSolutionMathCorrStats.addValue(bestScore.mathCorr);

                                for (RDFResourceTree negTree : examples.negExamplesMapping.values()) {
                                    if (QueryTreeUtils.isSubsumedBy(negTree, bestMatchingTree.getTree())) {
                                        Files.append(sparqlQuery + "\n", new File("/tmp/negCovered.txt"),
                                                Charsets.UTF_8);
                                        break;
                                    }
                                }

                                String bestQuery = QueryFactory.create(QueryTreeUtils.toSPARQLQueryString(
                                        filter.apply(bestMatchingTree.getTree()), dataset.getBaseIRI(),
                                        dataset.getPrefixMapping())).toString();

                                if (write2DB) {
                                    write2DB(sparqlQuery, nrOfExamples, examples, noise, baseLineQuery,
                                            baselineScore, heuristicName, measureName,
                                            QueryFactory.create(learnedSPARQLQuery).toString(), score,
                                            runtimeBestSolution, bestQuery, positionBestScore, bestScore);
                                }

                            } catch (Exception e) {
                                failed.set(true);
                                logger.error("Error occured for query\n" + sparqlQuery, e);
                                try {
                                    StringWriter sw = new StringWriter();
                                    PrintWriter pw = new PrintWriter(sw);
                                    e.printStackTrace(pw);
                                    Files.append(sparqlQuery + "\n" + sw.toString(),
                                            new File(benchmarkDirectory, "failed-" + nrOfExamples + "-" + noise
                                                    + "-" + heuristicName + "-" + measureName + ".txt"),
                                            Charsets.UTF_8);
                                } catch (IOException e1) {
                                    e1.printStackTrace();
                                }
                            } finally {
                                int cnt = currentNrOfFinishedRuns.incrementAndGet();
                                logger.info("***********Evaluation Progress:"
                                        + NumberFormat.getPercentInstance()
                                                .format((double) cnt / totalNrOfQTLRuns)
                                        + "(" + cnt + "/" + totalNrOfQTLRuns + ")" + "***********");
                            }
                        });

                    }

                    tp.shutdown();
                    tp.awaitTermination(12, TimeUnit.HOURS);

                    Logger.getRootLogger().removeAppender(appender);

                    if (!failed.get()) {
                        String result = "";
                        result += "\nBaseline Precision:\n" + baselinePrecisionStats;
                        result += "\nBaseline Recall:\n" + baselineRecallStats;
                        result += "\nBaseline F-measure:\n" + baselineFMeasureStats;
                        result += "\nBaseline PredAcc:\n" + baselinePredAccStats;
                        result += "\nBaseline MathCorr:\n" + baselineMathCorrStats;

                        result += "#Returned solutions:\n" + nrOfReturnedSolutionsStats;

                        result += "\nOverall Precision:\n" + bestReturnedSolutionPrecisionStats;
                        result += "\nOverall Recall:\n" + bestReturnedSolutionRecallStats;
                        result += "\nOverall F-measure:\n" + bestReturnedSolutionFMeasureStats;
                        result += "\nOverall PredAcc:\n" + bestReturnedSolutionPredAccStats;
                        result += "\nOverall MathCorr:\n" + bestReturnedSolutionMathCorrStats;

                        result += "\nTime until best returned solution found:\n"
                                + bestReturnedSolutionRuntimeStats;

                        result += "\nPositions of best solution:\n"
                                + Arrays.toString(bestSolutionPositionStats.getValues());
                        result += "\nPosition of best solution stats:\n" + bestSolutionPositionStats;
                        result += "\nOverall Precision of best solution:\n" + bestSolutionPrecisionStats;
                        result += "\nOverall Recall of best solution:\n" + bestSolutionRecallStats;
                        result += "\nOverall F-measure of best solution:\n" + bestSolutionFMeasureStats;

                        result += "\nCBD generation time(total):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getTotal()
                                + "\n";
                        result += "CBD generation time(avg):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.CBD_RETRIEVAL.name()).getAvg()
                                + "\n";
                        result += "Tree generation time(total):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getTotal()
                                + "\n";
                        result += "Tree generation time(avg):\t"
                                + MonitorFactory.getTimeMonitor(TimeMonitors.TREE_GENERATION.name()).getAvg()
                                + "\n";
                        result += "Tree size(avg):\t" + treeSizeStats.getMean() + "\n";

                        logger.info(result);

                        try {
                            Files.write(result, statsFile, Charsets.UTF_8);
                        } catch (IOException e) {
                            e.printStackTrace();
                        }

                        data[i][j] = bestReturnedSolutionFMeasureStats.getMean();

                        if (write2DB) {
                            write2DB(heuristicName, measureName, nrOfExamples, noise,
                                    bestReturnedSolutionFMeasureStats.getMean(),
                                    bestReturnedSolutionPrecisionStats.getMean(),
                                    bestReturnedSolutionRecallStats.getMean(),
                                    bestReturnedSolutionPredAccStats.getMean(),
                                    bestReturnedSolutionMathCorrStats.getMean(),
                                    bestSolutionPositionStats.getMean(), bestSolutionFMeasureStats.getMean(),
                                    bestSolutionPrecisionStats.getMean(), bestSolutionRecallStats.getMean(),
                                    bestSolutionPredAccStats.getMean(), bestSolutionMathCorrStats.getMean(),
                                    baselineFMeasureStats.getMean(), baselinePrecisionStats.getMean(),
                                    baselineRecallStats.getMean(), baselinePredAccStats.getMean(),
                                    baselineMathCorrStats.getMean(),
                                    bestReturnedSolutionRuntimeStats.getMean());
                        }
                    }
                }
            }

            String content = "###";
            String separator = "\t";
            for (double noiseInterval1 : noiseIntervals) {
                content += separator + noiseInterval1;
            }
            content += "\n";
            for (int i = 0; i < nrOfExamplesIntervals.length; i++) {
                content += nrOfExamplesIntervals[i];
                for (int j = 0; j < noiseIntervals.length; j++) {
                    content += separator + data[i][j];
                }
                content += "\n";
            }

            File examplesVsNoise = new File(benchmarkDirectory,
                    "examplesVsNoise-" + heuristicName + "-" + measureName + ".tsv");
            try {
                Files.write(content, examplesVsNoise, Charsets.UTF_8);
            } catch (IOException e) {
                logger.error("failed to write stats to file", e);
            }
        }
    }

    if (write2DB) {
        conn.close();
    }

    if (useEmailNotification) {
        sendFinishedMail();
    }
    long t2 = System.currentTimeMillis();
    long duration = t2 - t1;
    logger.info("QTL evaluation finished in " + DurationFormatUtils.formatDurationHMS(duration) + "ms.");
}

From source file:org.hawkular.client.test.metrics.openshift.CollectionRateDetailTest.java

private void getData(String metricID, String testID, long start, long end, Duration timeBucket) {
    Reporter.log("Fetching large data set... may take a couple minutes", true);
    List<DataPoint<Double>> rawData = client().metrics().gauge()
            .findGaugeDataWithId(metricID, String.valueOf(start), String.valueOf(end), null, null, null)
            .getEntity();// w  w  w. j  a v  a  2 s.  c o  m

    Assert.assertNotNull(rawData, testID);
    Reporter.log("raw datapoints: " + rawData.size(), true);

    List<Long> zeroList = findZeroValues(rawData);

    Assert.assertTrue(zeroList == null || zeroList.size() == 0, testID);

    Map<Long, Integer> hist = OpenshiftBaseTest.makeHistogram(rawData, timeBucket);

    Double[] result = hist.entrySet().stream().map(x -> new Double(x.getValue()))
            .toArray(size -> new Double[size]);

    double[] d = ArrayUtils.toPrimitive(result);

    // drop the first and last as they are usually outliers
    double[] samples = Arrays.copyOfRange(d, 1, d.length - 1);
    DescriptiveStatistics stats = new DescriptiveStatistics(samples);

    Reporter.log(hist.toString(), true);
    Reporter.log("size: " + stats.getN(), true);
    Reporter.log("min/max: " + stats.getMin() + "/" + stats.getMax(), true);
    Reporter.log("mean: " + stats.getMean(), true);
    Reporter.log("variance: " + stats.getVariance(), true);
    Reporter.log("stddev: " + stats.getStandardDeviation(), true);
}