List of usage examples for org.apache.commons.math3.ml.distance EuclideanDistance EuclideanDistance
EuclideanDistance
From source file:analysis.SilhouetteIndex.java
public double calculateIndex(SimpleKMeans sk, Instances inst, int c) throws Exception { //Map<Integer, Instances> clustermap = sk.clusterInstance; sk.setNumClusters(c);/* w ww.ja v a2 s. c o m*/ sk.buildClusterer(inst); EuclideanDistance ed = new EuclideanDistance(); double avgSilhouetteOverAllPoints = 0.d; if (sk.getNumClusters() == 1) { //Index is not defined for k=1. needs at least 2 clusters return Double.NaN; } for (int i = 0; i < inst.numInstances(); i++) { //for the current element get its cluster int currentcluster = sk.clusterInstance(inst.instance(i)); //System.out.println(inst.instance(i).value(2)); double[] current_attr = new double[inst.numAttributes()]; double[] other_attr = new double[inst.numAttributes()]; //get attributes of the current instance for (int attr = 0; attr < inst.numAttributes(); attr++) { current_attr[attr] = inst.instance(i).value(attr); } // int counter double[] distances = new double[sk.getNumClusters()]; int[] counters = new int[sk.getNumClusters()]; //System.out.println("distances: "+distances.length); double avgInClusterDist = 0, dist = 0; int countsamecluster = 0; distances[currentcluster] = Double.MAX_VALUE; for (int j = 0; j < inst.numInstances(); j++) { for (int attr = 0; attr < inst.numAttributes(); attr++) { other_attr[attr] = inst.instance(j).value(attr); } //get cluster number of j th element int clusternumber = sk.clusterInstance(inst.instance(j)); //check if j and i in the same cluster if (clusternumber == currentcluster) { if (inst.instance(i) != inst.instance(j)) { //calculate average dist to other elements in the cluster //inst. dist = ed.compute(current_attr, other_attr); avgInClusterDist = avgInClusterDist + dist; countsamecluster++; } } else { dist = ed.compute(current_attr, other_attr); distances[clusternumber] = distances[clusternumber] + dist; counters[clusternumber]++; } } //calculate value ai if (countsamecluster > 0) { avgInClusterDist = avgInClusterDist / countsamecluster; //this is value ai } //find average distances to other clusters for (int k = 0; k < distances.length; k++) { if (k != currentcluster) { distances[k] = distances[k] / counters[k]; } } //Find the min value of average distance to other clusters double min = distances[0]; for (int k = 1; k < distances.length; k++) { if (min > distances[k]) { min = distances[k]; } } //si for current element: double si; // if we only have one element in our cluster it makes sense to set // si = 0 if (countsamecluster == 1) { si = 0.0d; } else { si = (min - avgInClusterDist) / Math.max(min, avgInClusterDist); } avgSilhouetteOverAllPoints = avgSilhouetteOverAllPoints + si; } //System.out.println(inst.numInstances()); return avgSilhouetteOverAllPoints / inst.numInstances(); }
From source file:br.unicamp.ic.recod.gpsi.measures.gpsiClusterDistortionScore.java
@Override public double score(double[][][] samples) { double score = 0.0; double[] centroid; double[][] vectors; int nClasses = samples.length, m = 0, m_i = 0, i; VectorialMean meanOperator;/*from www .j av a 2 s . com*/ EuclideanDistance distance = new EuclideanDistance(); for (int label = 0; label < samples.length; label++) { vectors = samples[label]; meanOperator = new VectorialMean(vectors[0].length); for (i = 0; i < vectors.length; i++) meanOperator.increment(vectors[i]); centroid = meanOperator.getResult(); m_i = vectors.length; for (i = 0; i < m_i; i++) { score += distance.compute(centroid, vectors[i]); } m += m_i; } if (score <= 1E-30) return Double.MAX_VALUE; return score / m; }
From source file:br.unicamp.ic.recod.gpsi.measures.gpsiMeanAndStandardDeviationDistanceScore.java
@Override public double score(double[][][] input) { double[][] means = new double[2][]; double[] sDistances; VectorialMean mean;/*from w w w.ja v a 2 s . c o m*/ Mean mean_ = new Mean(); EuclideanDistance distance = new EuclideanDistance(); int i, j; double d = 0; for (i = 0; i < 2; i++) { mean = new VectorialMean(input[i][0].length); for (j = 0; j < input[i].length; j++) mean.increment(input[i][j]); means[i] = mean.getResult(); } d = distance.compute(means[0], means[1]); double deviations = 0.0; for (i = 0; i < 2; i++) { sDistances = new double[input[i].length]; for (j = 0; j < input[i].length; j++) sDistances[j] = distance.compute(means[i], input[i][j]); deviations += mean_.evaluate(sDistances); } return d / deviations; }
From source file:com.hurence.logisland.math.CentroidDistanceMeasure.java
public static void computeNearestCentroid(Trace trace) { double[] point = trace.getPoint(); EuclideanDistance distance = new EuclideanDistance(); trace.setDistanceToNearestCentroid(distance.compute(point, centroids[0])); trace.setCentroidName(centroidNames[0]); for (int i = 1; i < centroids.length; i++) { double d1 = distance.compute(point, centroids[i]); if (d1 < trace.getDistanceToNearestCentroid()) { trace.setDistanceToNearestCentroid(d1); trace.setCentroidName(centroidNames[i]); }//w w w . j a va2 s . com } }
From source file:bigdataproject.KMeansKFinder.java
public int find(double epsilon) { double oldAvDist = 0.0; for (int k = 2; k < numSamples; k++) { KMeansPlusPlusClusterer kmeans = new KMeansPlusPlusClusterer(k, 1000, new EuclideanDistance()); List<Cluster<DoublePoint>> clusterList = kmeans.cluster(list); double[] avDistances = new double[k]; int index = 0; for (Cluster<DoublePoint> c : clusterList) { List cluster = c.getPoints(); int size = cluster.size(); double[] centroid = getCentroid(cluster); double distanceSum = 0.0; for (Object p : cluster) { DoublePoint point = (DoublePoint) p; double[] pointDouble = point.getPoint(); EuclideanDistance dist = new EuclideanDistance(); distanceSum += dist.compute(centroid, pointDouble); }/* w w w .ja va 2s.c o m*/ avDistances[index] = distanceSum / size; index++; } double avDistSum = 0.0; for (int i = 0; i < avDistances.length; i++) { avDistSum += avDistances[i]; } double newAvDist = avDistSum / avDistances.length; double difference = Math.abs(newAvDist - oldAvDist); if (difference >= epsilon) { oldAvDist = newAvDist; } else return k - 1; } return 0; }
From source file:bigdataproject.KDistances.java
public void calculateDistances() { for (int i = 0; i < samples.length; i++) { double[] point = samples[i]; for (int j = i + 1; j < samples.length; j++) { double[] point2 = samples[j]; EuclideanDistance dist = new EuclideanDistance(); distanceMatrix[i][j] = dist.compute(point, point2); }/*from w ww .ja va 2 s .c o m*/ } BlockRealMatrix dist = new BlockRealMatrix(distanceMatrix); BlockRealMatrix distTranspose = dist.transpose(); dist = dist.add(distTranspose); distanceMatrix = dist.getData(); }
From source file:GeMSE.GS.Analysis.Clustering.DistanceMatrix.java
public double[][] GetDistanceMatrix(double[][] space, Metrics metric, ClusteringDomains clusteringDomain) { euclideanDistance = new EuclideanDistance(); manhattanDistance = new ManhattanDistance(); earthMoversDistance = new EarthMoversDistance(); chebyshevDistance = new ChebyshevDistance(); canberraDistance = new CanberraDistance(); double[][] rtv = null; int rowCount = space.length; int colCount = space[0].length; int rtvIndex = 0; switch (clusteringDomain) { case Rows://from w w w. ja va 2 s .co m rtv = new double[1][(int) Math.floor((Math.pow(rowCount, 2) - rowCount) / 2f)]; for (int i = 0; i < rowCount - 1; i++) for (int j = i + 1; j < rowCount; j++) // TODO: replace the following mess with the factory method. switch (metric) { case EuclideanDistance: rtv[0][rtvIndex++] = euclideanDistance.compute(space[i], space[j]); break; case ManhattanDistance: rtv[0][rtvIndex++] = manhattanDistance.compute(space[i], space[j]); break; case EarthMoversDistance: rtv[0][rtvIndex++] = earthMoversDistance.compute(space[i], space[j]); break; case ChebyshevDistance: rtv[0][rtvIndex++] = chebyshevDistance.compute(space[i], space[j]); break; case CanberraDistance: rtv[0][rtvIndex++] = canberraDistance.compute(space[i], space[j]); break; case PearsonCorrelationCoefficient: rtv[0][rtvIndex++] = ComputePCC(space[i], space[j]); break; } break; case Columns: rtv = new double[1][(int) Math.floor((Math.pow(colCount, 2) - colCount) / 2f)]; double[] col_i = new double[rowCount]; double[] col_j = new double[rowCount]; for (int i = 0; i < colCount - 1; i++) { for (int j = i + 1; j < colCount; j++) { for (int row = 0; row < rowCount; row++) { col_i[row] = space[row][i]; col_j[row] = space[row][j]; } // TODO: Replace the following mess with the factory pattern. switch (metric) { case EuclideanDistance: rtv[0][rtvIndex++] = euclideanDistance.compute(col_i, col_j); break; case ManhattanDistance: rtv[0][rtvIndex++] = manhattanDistance.compute(col_i, col_j); break; case EarthMoversDistance: rtv[0][rtvIndex++] = earthMoversDistance.compute(col_i, col_j); break; case ChebyshevDistance: rtv[0][rtvIndex++] = chebyshevDistance.compute(col_i, col_j); break; case CanberraDistance: rtv[0][rtvIndex++] = canberraDistance.compute(col_i, col_j); break; case PearsonCorrelationCoefficient: rtv[0][rtvIndex++] = ComputePCC(col_i, col_j); break; } } } break; } return rtv; }
From source file:br.prof.salesfilho.oci.service.EuclidianClassifier.java
private void classify() { double yesNudeDistance = 0; double noNudeDistance = 0; EuclideanDistance euclideanDistance = new EuclideanDistance(); double[] redChanellDescriptor; double[] greenChanellDescriptor; double[] blueChanellDescriptor; double[] avgChanellDescriptor; double[] grayScaleChanelDescriptor; switch (this.classificationLevel) { case 1:// w ww . j a va 2s . com //RGB AVG avgChanellDescriptor = imageProcessor.getMagnitudeAvarageFromRgbChannels(kernelSize); yesNudeDistance += euclideanDistance.compute(avgChanellDescriptor, nudeBodyPartDescriptor.getAvgChannel()); noNudeDistance += euclideanDistance.compute(avgChanellDescriptor, notNudeBodyPartDescriptor.getAvgChannel()); break; case 2: redChanellDescriptor = imageProcessor.getMagnitude(ImageProcessor.CHANNEL_RED, kernelSize); greenChanellDescriptor = imageProcessor.getMagnitude(ImageProcessor.CHANNEL_GREEN, kernelSize); blueChanellDescriptor = imageProcessor.getMagnitude(ImageProcessor.CHANNEL_BLUE, kernelSize); //Red yesNudeDistance += euclideanDistance.compute(redChanellDescriptor, nudeBodyPartDescriptor.getRedChannel()); noNudeDistance += euclideanDistance.compute(redChanellDescriptor, notNudeBodyPartDescriptor.getRedChannel()); //Green yesNudeDistance += euclideanDistance.compute(greenChanellDescriptor, nudeBodyPartDescriptor.getGreenChannel()); noNudeDistance += euclideanDistance.compute(greenChanellDescriptor, notNudeBodyPartDescriptor.getGreenChannel()); //Blue yesNudeDistance += euclideanDistance.compute(blueChanellDescriptor, nudeBodyPartDescriptor.getBlueChannel()); noNudeDistance += euclideanDistance.compute(blueChanellDescriptor, notNudeBodyPartDescriptor.getBlueChannel()); break; case 3: redChanellDescriptor = imageProcessor.getMagnitude(ImageProcessor.CHANNEL_RED, kernelSize); greenChanellDescriptor = imageProcessor.getMagnitude(ImageProcessor.CHANNEL_GREEN, kernelSize); blueChanellDescriptor = imageProcessor.getMagnitude(ImageProcessor.CHANNEL_BLUE, kernelSize); avgChanellDescriptor = imageProcessor.getMagnitudeAvarageFromRgbChannels(kernelSize); //Red yesNudeDistance += euclideanDistance.compute(redChanellDescriptor, nudeBodyPartDescriptor.getRedChannel()); noNudeDistance += euclideanDistance.compute(redChanellDescriptor, notNudeBodyPartDescriptor.getRedChannel()); //Green yesNudeDistance += euclideanDistance.compute(greenChanellDescriptor, nudeBodyPartDescriptor.getGreenChannel()); noNudeDistance += euclideanDistance.compute(greenChanellDescriptor, notNudeBodyPartDescriptor.getGreenChannel()); //Blue yesNudeDistance += euclideanDistance.compute(blueChanellDescriptor, nudeBodyPartDescriptor.getBlueChannel()); noNudeDistance += euclideanDistance.compute(blueChanellDescriptor, notNudeBodyPartDescriptor.getBlueChannel()); //RGB AVG yesNudeDistance += euclideanDistance.compute(avgChanellDescriptor, nudeBodyPartDescriptor.getAvgChannel()); noNudeDistance += euclideanDistance.compute(avgChanellDescriptor, notNudeBodyPartDescriptor.getAvgChannel()); break; default: //GRAY_SCALE grayScaleChanelDescriptor = imageProcessor.getMagnitude(ImageProcessor.CHANNEL_GRAYSCALE, kernelSize); yesNudeDistance += euclideanDistance.compute(grayScaleChanelDescriptor, nudeBodyPartDescriptor.getGrayScaleChannel()); noNudeDistance += euclideanDistance.compute(grayScaleChanelDescriptor, notNudeBodyPartDescriptor.getGrayScaleChannel()); } System.out.println("-------------------------------------------------------------------------------------"); System.out.println("Distance YES.: " + yesNudeDistance); System.out.println("Distance NO..: " + noNudeDistance); System.out.println("-------------------------------------------------------------------------------------"); if (yesNudeDistance < noNudeDistance) { classification = "YES"; } else if (yesNudeDistance > noNudeDistance) { classification = "NO"; } else { classification = "MAYBE"; } }
From source file:com.yahoo.egads.models.adm.DBScanModel.java
@Override public void tune(DataSequence observedSeries, DataSequence expectedSeries, IntervalSequence anomalySequence) throws Exception { // Compute the time-series of errors. HashMap<String, ArrayList<Float>> allErrors = aes.initAnomalyErrors(observedSeries, expectedSeries); List<IdentifiedDoublePoint> points = new ArrayList<IdentifiedDoublePoint>(); EuclideanDistance ed = new EuclideanDistance(); int n = observedSeries.size(); for (int i = 0; i < n; i++) { double[] d = new double[(aes.getIndexToError().keySet()).size()]; for (int e = 0; e < (aes.getIndexToError().keySet()).size(); e++) { d[e] = allErrors.get(aes.getIndexToError().get(e)).get(i); }/*from ww w . jav a 2 s .co m*/ points.add(new IdentifiedDoublePoint(d, i)); } double sum = 0.0; double count = 0.0; for (int i = 0; i < n; i++) { for (int j = 0; j < n; j++) { sum += ed.compute(points.get(i).getPoint(), points.get(j).getPoint()); count++; } } eps = ((double) this.sDAutoSensitivity) * (sum / count); minPoints = ((int) Math.ceil(((double) this.amntAutoSensitivity) * ((double) n))); dbscan = new DBSCANClusterer<IdentifiedDoublePoint>(eps, minPoints); }
From source file:com.yahoo.egads.utilities.DBSCANClusterer.java
/** * Creates a new instance of a DBSCANClusterer. * <p>/*from w ww . j a v a 2 s . c o m*/ * The euclidean distance will be used as default distance measure. * * @param eps maximum radius of the neighborhood to be considered * @param minPts minimum number of points needed for a cluster * @throws NotPositiveException if {@code eps < 0.0} or {@code minPts < 0} */ public DBSCANClusterer(final double eps, final int minPts) throws NotPositiveException { this(eps, minPts, new EuclideanDistance()); }