Example usage for org.apache.commons.math.stat.clustering Cluster Cluster

List of usage examples for org.apache.commons.math.stat.clustering Cluster Cluster

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.clustering Cluster Cluster.

Prototype

public Cluster(final T center) 

Source Link

Document

Build a cluster centered at a specified point.

Usage

From source file:org.basketball.MyKMeansPlusPlusClusterer.java

/**
 * Runs the K-means++ clustering algorithm.
 *
 * @param points the points to cluster//from  w w  w. java  2 s  .c om
 * @param k the number of clusters to split the data into
 * @param maxIterations the maximum number of iterations to run the algorithm
 *     for.  If negative, no maximum will be used
 * @return a list of clusters containing the points
 */
public List<Cluster<T>> cluster(final Collection<T> points, final int k, final int maxIterations) {
    // create the initial clusters
    List<Cluster<T>> clusters = chooseInitialCenters(points, k, random);
    assignPointsToClusters(clusters, points);

    // iterate through updating the centers until we're done
    final int max = (maxIterations < 0) ? Integer.MAX_VALUE : maxIterations;
    for (int count = 0; count < max; count++) {
        boolean clusteringChanged = false;
        List<Cluster<T>> newClusters = new ArrayList<Cluster<T>>();
        for (final Cluster<T> cluster : clusters) {
            final T newCenter;
            if (cluster.getPoints().isEmpty()) {
                switch (emptyStrategy) {
                case LARGEST_VARIANCE:
                    newCenter = getPointFromLargestVarianceCluster(clusters);
                    break;
                case LARGEST_POINTS_NUMBER:
                    newCenter = getPointFromLargestNumberCluster(clusters);
                    break;
                case FARTHEST_POINT:
                    newCenter = getFarthestPoint(clusters);
                    break;
                case IGNORE:
                    continue;
                default:
                    throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
                }
                clusteringChanged = true;
            } else {
                newCenter = cluster.getCenter().centroidOf(cluster.getPoints());
                if (!newCenter.equals(cluster.getCenter())) {
                    clusteringChanged = true;
                }
            }
            newClusters.add(new Cluster<T>(newCenter));
        }
        if (!clusteringChanged) {
            return clusters;
        }
        assignPointsToClusters(newClusters, points);
        clusters = newClusters;
    }
    return clusters;
}

From source file:org.basketball.MyKMeansPlusPlusClusterer.java

/**
 * Use K-means++ to choose the initial centers.
 *
 * @param <T> type of the points to cluster
 * @param points the points to choose the initial centers from
 * @param k the number of centers to choose
 * @param random random generator to use
 * @return the initial centers//from  w  ww  . jav  a 2 s  . c  o  m
 */
private static <T extends Clusterable<T>> List<Cluster<T>> chooseInitialCenters(final Collection<T> points,
        final int k, final Random random) {

    final List<T> pointSet = new ArrayList<T>(points);
    final List<Cluster<T>> resultSet = new ArrayList<Cluster<T>>();

    // Choose one center uniformly at random from among the data points.
    final T firstPoint = pointSet.remove(random.nextInt(pointSet.size()));
    resultSet.add(new Cluster<T>(firstPoint));

    final double[] dx2 = new double[pointSet.size()];
    while (resultSet.size() < k) {
        // For each data point x, compute D(x), the distance between x and
        // the nearest center that has already been chosen.
        int sum = 0;
        for (int i = 0; i < pointSet.size(); i++) {
            final T p = pointSet.get(i);
            final Cluster<T> nearest = getNearestCluster(resultSet, p);
            final double d = p.distanceFrom(nearest.getCenter());
            sum += d * d;
            dx2[i] = sum;
        }

        // Add one new data point as a center. Each point x is chosen with
        // probability proportional to D(x)2
        final double r = random.nextDouble() * sum;
        for (int i = 0; i < dx2.length; i++) {
            if (dx2[i] >= r) {
                final T p = pointSet.remove(i);
                resultSet.add(new Cluster<T>(p));
                break;
            }
        }
    }

    return resultSet;

}