List of usage examples for weka.core DistanceFunction distance
public double distance(Instance first, Instance second);
From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.ClusteringMetrics.java
License:Apache License
/** * Calculates the distance between clusters. * /*w ww .j ava 2 s. c o m*/ * @param centroids * @return */ protected void calculateInterClusteringSimilarity(Instances centroids) { DistanceFunction euclideanDistance = new EuclideanDistance(); euclideanDistance.setInstances(centroids); double k = (double) centroids.numInstances(); double sumDistance = 0; double sumVariance = 0; for (int i = 0; i < k; i++) { for (int j = i + 1; j < k; j++) { sumDistance += euclideanDistance.distance(centroids.instance(i), centroids.instance(j)); } } this.sampleMeanInterCluster = (1 / (k * (k - 1) / 2)) * sumDistance; for (int i = 0; i < k; i++) { for (int j = i + 1; j < k; j++) { sumVariance += Math.pow((euclideanDistance.distance(centroids.instance(i), centroids.instance(j)) - this.sampleMeanInterCluster), 2); } } this.sampleVarianceInterCluster = (1 / ((k * (k - 1) / 2) - 1)) * sumVariance; this.sampleCoefficientOfVarianceInterCluster = Math.sqrt(this.sampleVarianceInterCluster) / this.sampleMeanInterCluster; }
From source file:net.sf.markov4jmeter.behaviormodelextractor.extraction.transformation.clustering.ClusteringMetrics.java
License:Apache License
/** * Calculates the distance within a cluster. * /* w w w. ja v a 2 s . com*/ * @param centroids * @return */ protected void calculateIntraClusteringSimilarity(Instances centroids, Instances instances, int[] assignments) { DistanceFunction euclideanDistance = new EuclideanDistance(); euclideanDistance.setInstances(instances); double[] avgIntraClusterSimilarity = new double[centroids.numInstances()]; double k = (double) centroids.numInstances(); double sumDistance = 0; double counter = 0; double sumDistanceAllClusters = 0; double sumVariance = 0; for (int i = 0; i < k; i++) { for (int j = 0; j < instances.numInstances(); j++) { if (assignments[j] == i) { sumDistance += euclideanDistance.distance(instances.instance(j), centroids.instance(i)); counter += 1; } } avgIntraClusterSimilarity[i] = (1 / counter) * sumDistance; sumDistance = 0; counter = 0; } for (double clusterDistance : avgIntraClusterSimilarity) { sumDistanceAllClusters += clusterDistance; } this.sampleMeanIntraCluster = (1 / k) * sumDistanceAllClusters; for (int i = 0; i < k; i++) { sumVariance += Math.pow((avgIntraClusterSimilarity[i] - this.sampleMeanIntraCluster), 2); } this.sampleVarianceIntraCluster = (1 / (k - 1)) * sumVariance; this.sampleCoefficientOfVarianceIntraCluster = Math.sqrt(this.sampleVarianceIntraCluster) / this.sampleMeanIntraCluster; }
From source file:org.iobserve.analysis.behavior.clustering.hierarchical.ElbowMethod.java
License:Apache License
/** * Calculate within-cluster sum-of-square (WSS) for a given cluster. * * @param cluster//from ww w . ja v a2 s .c o m * Calculate the WSS for this cluster. * @return WSS **/ public double calcWSS(final List<Integer> cluster) { final DistanceFunction distanceFunction = this.hierarchicalClusterer.getDistanceFunction(); final double[] sumAttValues = new double[this.instances.numAttributes()]; for (int i = 0; i < cluster.size(); i++) { final Instance instance = this.instances.instance(cluster.get(i)); // Sum up all values of all instances. for (int j = 0; j < this.instances.numAttributes(); j++) { sumAttValues[j] += instance.value(j); } } // Get average value of each attribute value. for (int j = 0; j < sumAttValues.length; j++) { sumAttValues[j] /= cluster.size(); } /* * Create a centroid of this cluster by setting the average attributes of this cluster as * its own. */ final Instance centroid = (Instance) this.instances.instance(cluster.get(0)).copy(); for (int j = 0; j < this.instances.numAttributes(); j++) { centroid.setValue(j, sumAttValues[j]); } // Sum up distances of each data point in cluster to centroid to get WSS. double clusterWSS = 0.0; for (int i = 0; i < cluster.size(); i++) { final Instance instance = this.instances.instance(cluster.get(i)); clusterWSS += Math.pow(distanceFunction.distance(centroid, instance), 2); } return clusterWSS; }
From source file:org.iobserve.analysis.behavior.clustering.hierarchical.GapStatisticMethod.java
License:Apache License
/** * Calculated error sum-of-squares (ESS) for a given cluster. * * @param cluster//from w w w .j av a 2s.c om * Calculate the ESS for this cluster * @return ESS **/ public double calcESS(final List<Integer> cluster) { if ((cluster.size() == 0) || (cluster.size() == 1)) { return 0.0; } final DistanceFunction distanceFunction = this.hierarchicalClusterer.getDistanceFunction(); final double[] sumAttValues = new double[this.instances.numAttributes()]; for (int i = 0; i < cluster.size(); i++) { final Instance instance = this.instances.instance(cluster.get(i)); // Sum up all values of all instances. for (int j = 0; j < this.instances.numAttributes(); j++) { sumAttValues[j] += instance.value(j); } } // Get average value of each attribute value. for (int j = 0; j < this.instances.numAttributes(); j++) { sumAttValues[j] /= cluster.size(); } /* * Create a centroid of this cluster by setting the average attributes of this cluster as * its own. */ final Instance centroid = (Instance) this.instances.instance(cluster.get(0)).copy(); for (int j = 0; j < this.instances.numAttributes(); j++) { centroid.setValue(j, sumAttValues[j]); } // Sum up distances of each data point in cluster to centroid to get ESS. double clusterESS = 0.0; for (int i = 0; i < cluster.size(); i++) { final Instance instance = this.instances.instance(cluster.get(i)); clusterESS += distanceFunction.distance(centroid, instance); } return clusterESS / cluster.size(); }
From source file:org.iobserve.analysis.behavior.karlsruhe.data.ClusteringMetrics.java
License:Apache License
private double calculateSumOfSquaredErrors() { final DistanceFunction euclideanDistance = new EuclideanDistance(); euclideanDistance.setInstances(this.instances); final double numberOfCentroids = this.centroids.numInstances(); this.sumOfSquaredErrors = 0; for (int i = 0; i < numberOfCentroids; i++) { for (int j = 0; j < this.instances.numInstances(); j++) { if (this.assignments[j] == i) { this.sumOfSquaredErrors += Math.pow( euclideanDistance.distance(this.instances.instance(j), this.centroids.instance(i)), 2); }// ww w .j ava 2 s. co m } } return this.sumOfSquaredErrors; }
From source file:org.knime.knip.suise.node.boundarymodel.contourdata.IRI.java
License:Open Source License
private Pair<IntervalRule, Double> createRule(Instances flatData, Instances miData, int iterations) throws Exception { // store for the distances between the reference distance and all others double[] distances = new double[flatData.numInstances()]; // the distance function DistanceFunction distFunc = new EuclideanDistance(flatData); // permutation which sorts the distances Integer[] perm = new Integer[flatData.numInstances()]; IntervalRule bestRule = null;// ww w . j ava 2 s. co m double bestRuleScore = -Double.MAX_VALUE; // retrieve the best rule heuristically for a number of iterations for (int ruleIterations = 0; ruleIterations < iterations; ruleIterations++) { // System.out.println("------- Iteration " + ruleIterations // + "----------"); // randomly select an initial instance, i.e. selecting a positive // bag // randomly and taking the instance with the largest weight Random r = new Random(); int bagIdx; while (miData.get(bagIdx = r.nextInt(miData.numInstances())).value(2) == 0) ; // the reference instance for the next rule Instance refInstance = miData.get(bagIdx).relationalValue(1).firstInstance(); for (Instance i : miData.get(bagIdx).relationalValue(1)) { if (i.weight() > refInstance.weight()) { refInstance = i; } } // System.out.println("\tRef Instance: " + refInstance); IntervalRule rule = new IntervalRule(); rule.updateClassifier(refInstance); // calculate the distance from that particular reference instance to // all other // positive instances (negatives are set to NaN) and sort them Arrays.fill(distances, Double.NaN); for (int i = 0; i < distances.length; i++) { if (flatData.get(i).classValue() == 1) { distances[i] = distFunc.distance(refInstance, flatData.get(i)); } } PermutationSort.sortPermInPlace(distances, perm); double ruleScore = 0; double tmpRuleScore = 0; // extend the rule successively by the nearest instances till the // score doesn't increase anymore int instanceIdx = 0; while (true) { if (!Double.isNaN(distances[perm[instanceIdx]])) { IntervalRule tmpRule = new IntervalRule(rule); tmpRule.updateClassifier(flatData.get(perm[instanceIdx])); // System.out.println("\tNext Instance: " // + flatData.get(perm[instanceIdx])); // System.out.println("\tCurrent Rule: " + tmpRule); // evaluate rule tmpRuleScore = ruleScore(tmpRule, flatData); if (tmpRuleScore >= ruleScore) { ruleScore = tmpRuleScore; rule = tmpRule; } else { break; } } instanceIdx++; } if (ruleScore > bestRuleScore) { bestRuleScore = ruleScore; bestRule = rule; } } // iterations per rule return new ValuePair<IntervalRule, Double>(bestRule, bestRuleScore); }