Example usage for weka.core DistanceFunction distance

List of usage examples for weka.core DistanceFunction distance

Introduction

In this page you can find the example usage for weka.core DistanceFunction distance.

Prototype

public double distance(Instance first, Instance second);

Source Link

Document

Calculates the distance between two instances.

Usage

From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.ClusteringMetrics.java

License:Apache License

/**
 * Calculates the distance between clusters.
 * /*w ww  .j  ava 2  s.  c o m*/
 * @param centroids
 * @return
 */
protected void calculateInterClusteringSimilarity(Instances centroids) {
    DistanceFunction euclideanDistance = new EuclideanDistance();
    euclideanDistance.setInstances(centroids);

    double k = (double) centroids.numInstances();
    double sumDistance = 0;
    double sumVariance = 0;

    for (int i = 0; i < k; i++) {
        for (int j = i + 1; j < k; j++) {
            sumDistance += euclideanDistance.distance(centroids.instance(i), centroids.instance(j));
        }
    }

    this.sampleMeanInterCluster = (1 / (k * (k - 1) / 2)) * sumDistance;

    for (int i = 0; i < k; i++) {
        for (int j = i + 1; j < k; j++) {
            sumVariance += Math.pow((euclideanDistance.distance(centroids.instance(i), centroids.instance(j))
                    - this.sampleMeanInterCluster), 2);
        }
    }

    this.sampleVarianceInterCluster = (1 / ((k * (k - 1) / 2) - 1)) * sumVariance;

    this.sampleCoefficientOfVarianceInterCluster = Math.sqrt(this.sampleVarianceInterCluster)
            / this.sampleMeanInterCluster;

}

From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.ClusteringMetrics.java

License:Apache License

/**
 * Calculates the distance within a cluster.
 * /*  w w  w.  ja v  a  2  s .  com*/
 * @param centroids
 * @return
 */
protected void calculateIntraClusteringSimilarity(Instances centroids, Instances instances, int[] assignments) {
    DistanceFunction euclideanDistance = new EuclideanDistance();
    euclideanDistance.setInstances(instances);

    double[] avgIntraClusterSimilarity = new double[centroids.numInstances()];
    double k = (double) centroids.numInstances();
    double sumDistance = 0;
    double counter = 0;
    double sumDistanceAllClusters = 0;
    double sumVariance = 0;

    for (int i = 0; i < k; i++) {
        for (int j = 0; j < instances.numInstances(); j++) {
            if (assignments[j] == i) {
                sumDistance += euclideanDistance.distance(instances.instance(j), centroids.instance(i));
                counter += 1;
            }
        }
        avgIntraClusterSimilarity[i] = (1 / counter) * sumDistance;
        sumDistance = 0;
        counter = 0;
    }

    for (double clusterDistance : avgIntraClusterSimilarity) {
        sumDistanceAllClusters += clusterDistance;
    }

    this.sampleMeanIntraCluster = (1 / k) * sumDistanceAllClusters;

    for (int i = 0; i < k; i++) {
        sumVariance += Math.pow((avgIntraClusterSimilarity[i] - this.sampleMeanIntraCluster), 2);
    }

    this.sampleVarianceIntraCluster = (1 / (k - 1)) * sumVariance;

    this.sampleCoefficientOfVarianceIntraCluster = Math.sqrt(this.sampleVarianceIntraCluster)
            / this.sampleMeanIntraCluster;

}

From source file:org.iobserve.analysis.behavior.clustering.hierarchical.ElbowMethod.java

License:Apache License

/**
 * Calculate within-cluster sum-of-square (WSS) for a given cluster.
 *
 * @param cluster//from  ww w .  ja v a2 s  .c  o m
 *            Calculate the WSS for this cluster.
 * @return WSS
 **/
public double calcWSS(final List<Integer> cluster) {

    final DistanceFunction distanceFunction = this.hierarchicalClusterer.getDistanceFunction();
    final double[] sumAttValues = new double[this.instances.numAttributes()];
    for (int i = 0; i < cluster.size(); i++) {
        final Instance instance = this.instances.instance(cluster.get(i));
        // Sum up all values of all instances.
        for (int j = 0; j < this.instances.numAttributes(); j++) {
            sumAttValues[j] += instance.value(j);
        }
    }
    // Get average value of each attribute value.
    for (int j = 0; j < sumAttValues.length; j++) {
        sumAttValues[j] /= cluster.size();
    }

    /*
     * Create a centroid of this cluster by setting the average attributes of this cluster as
     * its own.
     */
    final Instance centroid = (Instance) this.instances.instance(cluster.get(0)).copy();
    for (int j = 0; j < this.instances.numAttributes(); j++) {
        centroid.setValue(j, sumAttValues[j]);
    }
    // Sum up distances of each data point in cluster to centroid to get WSS.
    double clusterWSS = 0.0;
    for (int i = 0; i < cluster.size(); i++) {
        final Instance instance = this.instances.instance(cluster.get(i));
        clusterWSS += Math.pow(distanceFunction.distance(centroid, instance), 2);
    }
    return clusterWSS;
}

From source file:org.iobserve.analysis.behavior.clustering.hierarchical.GapStatisticMethod.java

License:Apache License

/**
 * Calculated error sum-of-squares (ESS) for a given cluster.
 *
 * @param cluster//from   w w  w .j av a  2s.c  om
 *            Calculate the ESS for this cluster
 * @return ESS
 **/
public double calcESS(final List<Integer> cluster) {

    if ((cluster.size() == 0) || (cluster.size() == 1)) {
        return 0.0;
    }
    final DistanceFunction distanceFunction = this.hierarchicalClusterer.getDistanceFunction();
    final double[] sumAttValues = new double[this.instances.numAttributes()];
    for (int i = 0; i < cluster.size(); i++) {
        final Instance instance = this.instances.instance(cluster.get(i));
        // Sum up all values of all instances.
        for (int j = 0; j < this.instances.numAttributes(); j++) {
            sumAttValues[j] += instance.value(j);
        }
    }
    // Get average value of each attribute value.
    for (int j = 0; j < this.instances.numAttributes(); j++) {
        sumAttValues[j] /= cluster.size();
    }

    /*
     * Create a centroid of this cluster by setting the average attributes of this cluster as
     * its own.
     */
    final Instance centroid = (Instance) this.instances.instance(cluster.get(0)).copy();
    for (int j = 0; j < this.instances.numAttributes(); j++) {
        centroid.setValue(j, sumAttValues[j]);
    }
    // Sum up distances of each data point in cluster to centroid to get ESS.
    double clusterESS = 0.0;
    for (int i = 0; i < cluster.size(); i++) {
        final Instance instance = this.instances.instance(cluster.get(i));
        clusterESS += distanceFunction.distance(centroid, instance);
    }
    return clusterESS / cluster.size();
}

From source file:org.iobserve.analysis.behavior.karlsruhe.data.ClusteringMetrics.java

License:Apache License

private double calculateSumOfSquaredErrors() {

    final DistanceFunction euclideanDistance = new EuclideanDistance();
    euclideanDistance.setInstances(this.instances);

    final double numberOfCentroids = this.centroids.numInstances();

    this.sumOfSquaredErrors = 0;

    for (int i = 0; i < numberOfCentroids; i++) {
        for (int j = 0; j < this.instances.numInstances(); j++) {
            if (this.assignments[j] == i) {
                this.sumOfSquaredErrors += Math.pow(
                        euclideanDistance.distance(this.instances.instance(j), this.centroids.instance(i)), 2);
            }// ww  w  .j ava  2  s.  co  m
        }
    }

    return this.sumOfSquaredErrors;
}

From source file:org.knime.knip.suise.node.boundarymodel.contourdata.IRI.java

License:Open Source License

private Pair<IntervalRule, Double> createRule(Instances flatData, Instances miData, int iterations)
        throws Exception {
    // store for the distances between the reference distance and all others
    double[] distances = new double[flatData.numInstances()];
    // the distance function
    DistanceFunction distFunc = new EuclideanDistance(flatData);
    // permutation which sorts the distances
    Integer[] perm = new Integer[flatData.numInstances()];

    IntervalRule bestRule = null;//  ww  w  .  j  ava  2 s.  co  m
    double bestRuleScore = -Double.MAX_VALUE;

    // retrieve the best rule heuristically for a number of iterations
    for (int ruleIterations = 0; ruleIterations < iterations; ruleIterations++) {

        // System.out.println("------- Iteration " + ruleIterations
        // + "----------");

        // randomly select an initial instance, i.e. selecting a positive
        // bag
        // randomly and taking the instance with the largest weight
        Random r = new Random();
        int bagIdx;
        while (miData.get(bagIdx = r.nextInt(miData.numInstances())).value(2) == 0)
            ;

        // the reference instance for the next rule
        Instance refInstance = miData.get(bagIdx).relationalValue(1).firstInstance();
        for (Instance i : miData.get(bagIdx).relationalValue(1)) {
            if (i.weight() > refInstance.weight()) {
                refInstance = i;
            }
        }

        // System.out.println("\tRef Instance: " + refInstance);

        IntervalRule rule = new IntervalRule();
        rule.updateClassifier(refInstance);

        // calculate the distance from that particular reference instance to
        // all other
        // positive instances (negatives are set to NaN) and sort them
        Arrays.fill(distances, Double.NaN);
        for (int i = 0; i < distances.length; i++) {
            if (flatData.get(i).classValue() == 1) {
                distances[i] = distFunc.distance(refInstance, flatData.get(i));
            }
        }
        PermutationSort.sortPermInPlace(distances, perm);

        double ruleScore = 0;
        double tmpRuleScore = 0;

        // extend the rule successively by the nearest instances till the
        // score doesn't increase anymore

        int instanceIdx = 0;
        while (true) {
            if (!Double.isNaN(distances[perm[instanceIdx]])) {
                IntervalRule tmpRule = new IntervalRule(rule);
                tmpRule.updateClassifier(flatData.get(perm[instanceIdx]));

                // System.out.println("\tNext Instance: "
                // + flatData.get(perm[instanceIdx]));
                // System.out.println("\tCurrent Rule: " + tmpRule);

                // evaluate rule
                tmpRuleScore = ruleScore(tmpRule, flatData);

                if (tmpRuleScore >= ruleScore) {
                    ruleScore = tmpRuleScore;
                    rule = tmpRule;
                } else {
                    break;
                }
            }
            instanceIdx++;
        }

        if (ruleScore > bestRuleScore) {
            bestRuleScore = ruleScore;
            bestRule = rule;
        }

    } // iterations per rule

    return new ValuePair<IntervalRule, Double>(bestRule, bestRuleScore);
}