Example usage for org.apache.commons.math.stat.clustering Cluster getCenter

List of usage examples for org.apache.commons.math.stat.clustering Cluster getCenter

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.clustering Cluster getCenter.

Prototype

public T getCenter() 

Source Link

Document

Get the point chosen to be the center of this cluster.

Usage

From source file:org.basketball.MyKMeansPlusPlusClusterer.java

/**
 * Runs the K-means++ clustering algorithm.
 *
 * @param points the points to cluster//from  w w  w  .ja  va  2 s .c o m
 * @param k the number of clusters to split the data into
 * @param maxIterations the maximum number of iterations to run the algorithm
 *     for.  If negative, no maximum will be used
 * @return a list of clusters containing the points
 */
public List<Cluster<T>> cluster(final Collection<T> points, final int k, final int maxIterations) {
    // create the initial clusters
    List<Cluster<T>> clusters = chooseInitialCenters(points, k, random);
    assignPointsToClusters(clusters, points);

    // iterate through updating the centers until we're done
    final int max = (maxIterations < 0) ? Integer.MAX_VALUE : maxIterations;
    for (int count = 0; count < max; count++) {
        boolean clusteringChanged = false;
        List<Cluster<T>> newClusters = new ArrayList<Cluster<T>>();
        for (final Cluster<T> cluster : clusters) {
            final T newCenter;
            if (cluster.getPoints().isEmpty()) {
                switch (emptyStrategy) {
                case LARGEST_VARIANCE:
                    newCenter = getPointFromLargestVarianceCluster(clusters);
                    break;
                case LARGEST_POINTS_NUMBER:
                    newCenter = getPointFromLargestNumberCluster(clusters);
                    break;
                case FARTHEST_POINT:
                    newCenter = getFarthestPoint(clusters);
                    break;
                case IGNORE:
                    continue;
                default:
                    throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
                }
                clusteringChanged = true;
            } else {
                newCenter = cluster.getCenter().centroidOf(cluster.getPoints());
                if (!newCenter.equals(cluster.getCenter())) {
                    clusteringChanged = true;
                }
            }
            newClusters.add(new Cluster<T>(newCenter));
        }
        if (!clusteringChanged) {
            return clusters;
        }
        assignPointsToClusters(newClusters, points);
        clusters = newClusters;
    }
    return clusters;
}

From source file:org.basketball.MyKMeansPlusPlusClusterer.java

/**
 * Use K-means++ to choose the initial centers.
 *
 * @param <T> type of the points to cluster
 * @param points the points to choose the initial centers from
 * @param k the number of centers to choose
 * @param random random generator to use
 * @return the initial centers//from  ww w  . ja  v a2  s  .  com
 */
private static <T extends Clusterable<T>> List<Cluster<T>> chooseInitialCenters(final Collection<T> points,
        final int k, final Random random) {

    final List<T> pointSet = new ArrayList<T>(points);
    final List<Cluster<T>> resultSet = new ArrayList<Cluster<T>>();

    // Choose one center uniformly at random from among the data points.
    final T firstPoint = pointSet.remove(random.nextInt(pointSet.size()));
    resultSet.add(new Cluster<T>(firstPoint));

    final double[] dx2 = new double[pointSet.size()];
    while (resultSet.size() < k) {
        // For each data point x, compute D(x), the distance between x and
        // the nearest center that has already been chosen.
        int sum = 0;
        for (int i = 0; i < pointSet.size(); i++) {
            final T p = pointSet.get(i);
            final Cluster<T> nearest = getNearestCluster(resultSet, p);
            final double d = p.distanceFrom(nearest.getCenter());
            sum += d * d;
            dx2[i] = sum;
        }

        // Add one new data point as a center. Each point x is chosen with
        // probability proportional to D(x)2
        final double r = random.nextDouble() * sum;
        for (int i = 0; i < dx2.length; i++) {
            if (dx2[i] >= r) {
                final T p = pointSet.remove(i);
                resultSet.add(new Cluster<T>(p));
                break;
            }
        }
    }

    return resultSet;

}

From source file:org.basketball.MyKMeansPlusPlusClusterer.java

/**
 * Get a random point from the {@link Cluster} with the largest distance variance.
 *
 * @param clusters the {@link Cluster}s to search
 * @return a random point from the selected cluster
 *///w  ww  . j  a va2  s.c om
private T getPointFromLargestVarianceCluster(final Collection<Cluster<T>> clusters) {

    double maxVariance = Double.NEGATIVE_INFINITY;
    Cluster<T> selected = null;
    for (final Cluster<T> cluster : clusters) {
        if (!cluster.getPoints().isEmpty()) {

            // compute the distance variance of the current cluster
            final T center = cluster.getCenter();
            final Variance stat = new Variance();
            for (final T point : cluster.getPoints()) {
                stat.increment(point.distanceFrom(center));
            }
            final double variance = stat.getResult();

            // select the cluster with the largest variance
            if (variance > maxVariance) {
                maxVariance = variance;
                selected = cluster;
            }

        }
    }

    // did we find at least one non-empty cluster ?
    if (selected == null) {
        throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
    }

    // extract a random point from the cluster
    final List<T> selectedPoints = selected.getPoints();
    return selectedPoints.remove(random.nextInt(selectedPoints.size()));

}

From source file:org.basketball.MyKMeansPlusPlusClusterer.java

/**
 * Get the point farthest to its cluster center
 *
 * @param clusters the {@link Cluster}s to search
 * @return point farthest to its cluster center
 *///from ww w  .j a  v a  2  s .  c  o m
private T getFarthestPoint(final Collection<Cluster<T>> clusters) {

    double maxDistance = Double.NEGATIVE_INFINITY;
    Cluster<T> selectedCluster = null;
    int selectedPoint = -1;
    for (final Cluster<T> cluster : clusters) {

        // get the farthest point
        final T center = cluster.getCenter();
        final List<T> points = cluster.getPoints();
        for (int i = 0; i < points.size(); ++i) {
            final double distance = points.get(i).distanceFrom(center);
            if (distance > maxDistance) {
                maxDistance = distance;
                selectedCluster = cluster;
                selectedPoint = i;
            }
        }

    }

    // did we find at least one non-empty cluster ?
    if (selectedCluster == null) {
        throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
    }

    return selectedCluster.getPoints().remove(selectedPoint);

}

From source file:org.basketball.MyKMeansPlusPlusClusterer.java

/**
 * Returns the nearest {@link Cluster} to the given point
 *
 * @param <T> type of the points to cluster
 * @param clusters the {@link Cluster}s to search
 * @param point the point to find the nearest {@link Cluster} for
 * @return the nearest {@link Cluster} to the given point
 *//*from  w  ww .ja  v a  2s  .c  o  m*/
private static <T extends Clusterable<T>> Cluster<T> getNearestCluster(final Collection<Cluster<T>> clusters,
        final T point) {
    double minDistance = Double.MAX_VALUE;
    Cluster<T> minCluster = null;
    for (final Cluster<T> c : clusters) {
        final double distance = point.distanceFrom(c.getCenter());
        if (distance < minDistance) {
            minDistance = distance;
            minCluster = c;
        }
    }
    return minCluster;
}

From source file:playground.christoph.evacuation.analysis.EvacuationTimeClusterer.java

Map<BasicLocation, List<Double>> buildCluster(int numClusters, int iterations) {

    createCostMap();// www.java  2  s. c  o  m

    KMeansPlusPlusClusterer<ClusterableLocation> clusterer = new KMeansPlusPlusClusterer<ClusterableLocation>(
            MatsimRandom.getLocalInstance());

    List<ClusterableLocation> points = getClusterableLocations();

    buildQuadTree(points);

    log.info("do clustering...");
    List<Cluster<ClusterableLocation>> list = clusterer.cluster(points, numClusters, iterations);

    Map<BasicLocation, List<Double>> map = new HashMap<BasicLocation, List<Double>>();

    for (Cluster<ClusterableLocation> cluster : list) {
        BasicLocation center = cluster.getCenter().getBasicLocation();

        List<Double> evacuationTimes = new ArrayList<Double>();
        for (ClusterableLocation location : cluster.getPoints()) {
            List<Double> pointTravelTimes = locationMap.get(location.getBasicLocation());
            evacuationTimes.addAll(pointTravelTimes);
        }

        map.put(center, evacuationTimes);
    }

    log.info("done.");

    return map;
}