Example usage for org.apache.mahout.math Vector getDistanceSquared

List of usage examples for org.apache.mahout.math Vector getDistanceSquared

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector getDistanceSquared.

Prototype

double getDistanceSquared(Vector v);

Source Link

Document

Get the square of the distance between this vector and the other vector.

Usage

From source file:com.cloudera.science.ml.core.vectors.Centers.java

License:Open Source License

/**
 * Returns the minimum squared Euclidean distance between the given
 * {@code Vector} and a point contained in this instance.
 * /*from  w ww .j  av  a2s.  c o  m*/
 * @param point The point
 * @return The minimum squared Euclidean distance from the point 
 */
public double getDistanceSquared(Vector point) {
    double min = Double.POSITIVE_INFINITY;
    for (Vector c : centers) {
        min = Math.min(min, c.getDistanceSquared(point));
    }
    return min;
}

From source file:com.cloudera.science.ml.kmeans.core.KMeansEvaluation.java

License:Open Source License

private void init() {
    predictionStrengths = Lists.newArrayListWithExpectedSize(testCenters.size());
    trainCosts = Lists.newArrayListWithExpectedSize(testCenters.size());
    testCosts = Lists.newArrayListWithExpectedSize(testCenters.size());
    stableClusters = Lists.newArrayListWithExpectedSize(testCenters.size());
    stablePoints = Lists.newArrayListWithExpectedSize(testCenters.size());

    for (int i = 0; i < testCenters.size(); i++) {
        Centers test = testCenters.get(i);
        Centers train = trainCenters.get(i);
        double trainCost = 0.0, testCost = 0.0;
        double[][] assignments = new double[test.size()][train.size()];
        int totalPoints = 0;
        for (Weighted<Vector> wv : testPoints) {
            double wt = wv.weight();
            totalPoints += wt;// www . j  a  v  a 2  s . c  o  m
            Vector v = wv.thing();
            int testId = test.indexOfClosest(v);
            testCost += wt * v.getDistanceSquared(test.get(testId));
            int trainId = train.indexOfClosest(wv.thing());
            trainCost += wt * v.getDistanceSquared(train.get(trainId));
            assignments[testId][trainId] += wt;
        }
        trainCosts.add(trainCost);
        testCosts.add(testCost);

        double minScore = Double.POSITIVE_INFINITY;
        double points = 0;
        double clusters = 0;
        for (double[] assignment : assignments) {
            double total = 0.0;
            double same = 0.0;
            for (double a : assignment) {
                total += a;
                same += a * (a - 1);
            }
            double score = same / (total * (total - 1));
            // Only consider clusters that contain a non-trivial number of obs
            if (total > assignment.length && score < minScore) {
                minScore = score;
            }
            if (score > 0.8) { // stability threshold
                clusters++;
                points += total;
            }
        }
        predictionStrengths.add(minScore);
        stableClusters.add(clusters / assignments.length);
        stablePoints.add(points / totalPoints);
    }
}