Example usage for org.apache.mahout.math.random WeightedThing getWeight

List of usage examples for org.apache.mahout.math.random WeightedThing getWeight

Introduction

In this page you can find the example usage for org.apache.mahout.math.random WeightedThing getWeight.

Prototype

public double getWeight() 

Source Link

Usage

From source file:eu.stratosphere.library.clustering.DistributedOnePassKMeans.BallKMeans.java

License:Apache License

/**
 * Examines the datapoints and updates cluster centers to be the centroid of the nearest datapoints points.  To
 * compute a new center for cluster c_i, we average all points that are closer than d_i * trimFraction
 * where d_i is/*from   w ww  .j  a  va 2 s.c  om*/
 *
 * d_i = min_j \sqrt ||c_j - c_i||^2
 *
 * By ignoring distant points, the centroids converge more quickly to a good approximation of the
 * optimal k-means solution (given good starting points).
 *
 * @param datapoints the points to cluster.
 */
private void iterativeAssignment(List<? extends WeightedVector> datapoints) {
    DistanceMeasure distanceMeasure = centroids.getDistanceMeasure();
    // closestClusterDistances.get(i) is the distance from the i'th cluster to its closest
    // neighboring cluster.
    List<Double> closestClusterDistances = Lists.newArrayListWithExpectedSize(numClusters);
    // clusterAssignments[i] == j means that the i'th point is assigned to the j'th cluster. When
    // these don't change, we are done.
    // Each point is assigned to the invalid "-1" cluster initially.
    List<Integer> clusterAssignments = Lists.newArrayList(Collections.nCopies(datapoints.size(), -1));

    boolean changed = true;
    for (int i = 0; changed && i < maxNumIterations; i++) {
        changed = false;
        // We compute what the distance between each cluster and its closest neighbor is to set a
        // proportional distance threshold for points that should be involved in calculating the
        // centroid.
        closestClusterDistances.clear();

        for (Vector center : centroids) {
            // If a centroid has no points assigned to it, the clustering failed.

            Vector closestOtherCluster = centroids.searchFirst(center, true).getValue();
            closestClusterDistances.add(distanceMeasure.distance(center, closestOtherCluster));
        }

        // Copies the current cluster centroids to newClusters and sets their weights to 0. This is
        // so we calculate the new centroids as we go through the datapoints.
        List<Centroid> newCentroids = Lists.newArrayList();
        for (Vector centroid : centroids) {
            // need a deep copy because we will mutate these values
            Centroid newCentroid = (Centroid) centroid.clone();
            newCentroid.setWeight(0);
            newCentroids.add(newCentroid);
        }

        // Pass over the datapoints computing new centroids.
        for (int j = 0; j < datapoints.size(); ++j) {
            WeightedVector datapoint = datapoints.get(j);
            // Get the closest cluster this point belongs to.
            WeightedThing<Vector> closestPair = centroids.searchFirst(datapoint, false);
            int closestIndex = ((WeightedVector) closestPair.getValue()).getIndex();
            double closestDistance = closestPair.getWeight();
            // Update its cluster assignment if necessary.
            if (closestIndex != clusterAssignments.get(j)) {
                changed = true;
                clusterAssignments.set(j, closestIndex);
            }
            // Only update if the datapoints point is near enough. What this means is that the weight
            // of outliers is NOT taken into account and the final weights of the centroids will
            // reflect this (it will be less or equal to the initial sum of the weights).
            if (closestDistance < trimFraction * closestClusterDistances.get(closestIndex)) {
                newCentroids.get(closestIndex).update(datapoint);
            }
        }
        // Add the new centers back into searcher.
        centroids.clear();
        centroids.addAll(newCentroids);
    }

    if (correctWeights) {
        for (Vector v : centroids) {
            ((Centroid) v).setWeight(0);
        }
        for (WeightedVector datapoint : datapoints) {
            Centroid closestCentroid = (Centroid) centroids.searchFirst(datapoint, false).getValue();
            closestCentroid.setWeight(closestCentroid.getWeight() + datapoint.getWeight());
        }
    }
}

From source file:zx.soft.mahout.knn.search.AbstractSearchTest.java

License:Apache License

@Test
public void testOrdering() {
    Matrix queries = new DenseMatrix(100, 20);
    MultiNormal gen = new MultiNormal(20);
    for (int i = 0; i < 100; i++) {
        queries.viewRow(i).assign(gen.sample());
    }/*from   w  w w . ja  v  a2s  .  co  m*/

    Searcher s = getSearch(20);
    // s.setSearchSize(200);
    s.addAllMatrixSlices(testData());

    for (MatrixSlice query : queries) {
        List<WeightedThing<Vector>> r = s.search(query.vector(), 200);
        double x = 0;
        for (WeightedThing<Vector> thing : r) {
            assertTrue("Scores must be monotonic increasing", thing.getWeight() > x);
            x = thing.getWeight();
        }
    }
}