Example usage for org.apache.commons.math3.ml.clustering CentroidCluster CentroidCluster

List of usage examples for org.apache.commons.math3.ml.clustering CentroidCluster CentroidCluster

Introduction

In this page you can find the example usage for org.apache.commons.math3.ml.clustering CentroidCluster CentroidCluster.

Prototype

public CentroidCluster(final Clusterable center) 

Source Link

Document

Build a cluster centered at a specified point.

Usage

From source file:Clustering.technique.KMeansPlusPlusClusterer.java

/**
 * Runs the K-means++ clustering algorithm.
 *
 * @param points the points to cluster//www  .  j  a v a2s .c  om
 * @return a list of clusters containing the points
 * @throws MathIllegalArgumentException if the data points are null or the number
 *     of clusters is larger than the number of data points
 * @throws ConvergenceException if an empty cluster is encountered and the
 * {@link #emptyStrategy} is set to {@code ERROR}
 */
public List<CentroidCluster<T>> cluster(final Collection<T> points)
        throws MathIllegalArgumentException, ConvergenceException {

    // sanity checks
    MathUtils.checkNotNull(points);

    // number of clusters has to be smaller or equal the number of data points
    if (points.size() < k) {
        throw new NumberIsTooSmallException(points.size(), k, false);
    }

    // create the initial clusters
    List<CentroidCluster<T>> clusters = chooseInitialCenters(points);

    // create an array containing the latest assignment of a point to a cluster
    // no need to initialize the array, as it will be filled with the first assignment
    int[] assignments = new int[points.size()];
    assignPointsToClusters(clusters, points, assignments);

    // iterate through updating the centers until we're done
    final int max = (maxIterations < 0) ? Integer.MAX_VALUE : maxIterations;
    for (int count = 0; count < max; count++) {
        boolean emptyCluster = false;
        List<CentroidCluster<T>> newClusters = new ArrayList<CentroidCluster<T>>();
        for (final CentroidCluster<T> cluster : clusters) {
            final Clusterable newCenter;
            if (cluster.getPoints().isEmpty()) {
                switch (emptyStrategy) {
                case LARGEST_VARIANCE:
                    newCenter = getPointFromLargestVarianceCluster(clusters);
                    break;
                case LARGEST_POINTS_NUMBER:
                    newCenter = getPointFromLargestNumberCluster(clusters);
                    break;
                case FARTHEST_POINT:
                    newCenter = getFarthestPoint(clusters);
                    break;
                default:
                    throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
                }
                emptyCluster = true;
            } else {
                newCenter = centroidOf(cluster.getCenter(), cluster.getPoints(),
                        cluster.getCenter().getPoint().length);
            }
            newClusters.add(new CentroidCluster<T>(newCenter));
        }
        int changes = assignPointsToClusters(newClusters, points, assignments);
        clusters = newClusters;

        // if there were no more changes in the point-to-cluster assignment
        // and there are no empty clusters left, return the current clusters
        if (changes == 0 && !emptyCluster) {
            return clusters;
        }
    }
    return clusters;
}

From source file:KMeansRecommender.MyKMeansPlusPlusClusterer.java

/**
 * Runs the K-means++ clustering algorithm.
 *
 * @param points the points to cluster/*w w  w  .ja v a  2  s .co  m*/
 * @return a list of clusters containing the points
 * @throws MathIllegalArgumentException if the data points are null or the number
 *     of clusters is larger than the number of data points
 * @throws ConvergenceException if an empty cluster is encountered and the
 * {@link #emptyStrategy} is set to {@code ERROR}
 */
public List<CentroidCluster<T>> cluster(final Collection<T> points)
        throws MathIllegalArgumentException, ConvergenceException {

    // sanity checks
    MathUtils.checkNotNull(points);

    // number of clusters has to be smaller or equal the number of data points
    if (points.size() < k) {
        throw new NumberIsTooSmallException(points.size(), k, false);
    }

    // create the initial clusters
    List<CentroidCluster<T>> clusters = chooseInitialCenters(points);

    // create an array containing the latest assignment of a point to a cluster
    // no need to initialize the array, as it will be filled with the first assignment
    int[] assignments = new int[points.size()];
    assignPointsToClusters(clusters, points, assignments);

    // iterate through updating the centers until we're done
    int finalchange = 0;
    final int max = (maxIterations < 0) ? Integer.MAX_VALUE : maxIterations;
    for (int count = 0; count < max; count++) {
        boolean emptyCluster = false;
        List<CentroidCluster<T>> newClusters = new ArrayList<CentroidCluster<T>>();
        for (final CentroidCluster<T> cluster : clusters) {
            final Clusterable newCenter;
            if (cluster.getPoints().isEmpty()) {
                switch (emptyStrategy) {
                case LARGEST_VARIANCE:
                    newCenter = getPointFromLargestVarianceCluster(clusters);
                    break;
                case LARGEST_POINTS_NUMBER:
                    newCenter = getPointFromLargestNumberCluster(clusters);
                    break;
                case FARTHEST_POINT:
                    newCenter = getFarthestPoint(clusters);
                    break;
                default:
                    throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
                }
                emptyCluster = true;
            } else {
                newCenter = centroidOf(cluster.getPoints(), cluster.getCenter().getPoint().length);
            }
            newClusters.add(new CentroidCluster<T>(newCenter));
        }
        int changes = assignPointsToClusters(newClusters, points, assignments);
        clusters = newClusters;
        finalchange = changes; //for test
        // if there were no more changes in the point-to-cluster assignment
        // and there are no empty clusters left, return the current clusters
        if (changes == 0 && !emptyCluster) {
            //System.out.println("iteration time: " + count + ", changes : 0");  //for test
            return clusters;
        }

    }
    //System.out.println("iteration time: " + max + ", changes : " + finalchange);  //for test  
    return clusters;
}

From source file:Clustering.technique.KMeansPlusPlusClusterer.java

/**
 * Use K-means++ to choose the initial centers.
 *
 * @param points the points to choose the initial centers from
 * @return the initial centers//  ww w.  j  av a2  s. c  om
 */
private List<CentroidCluster<T>> chooseInitialCenters(final Collection<T> points) {

    // Convert to list for indexed access. Make it unmodifiable, since removal of items
    // would screw up the logic of this method.
    final List<T> pointList = Collections.unmodifiableList(new ArrayList<T>(points));

    // The number of points in the list.
    final int numPoints = pointList.size();

    // Set the corresponding element in this array to indicate when
    // elements of pointList are no longer available.
    final boolean[] taken = new boolean[numPoints];

    // The resulting list of initial centers.
    final List<CentroidCluster<T>> resultSet = new ArrayList<CentroidCluster<T>>();

    // Choose one center uniformly at random from among the data points.
    final int firstPointIndex = random.nextInt(numPoints);

    final T firstPoint = pointList.get(firstPointIndex);

    resultSet.add(new CentroidCluster<T>(firstPoint));

    // Must mark it as taken
    taken[firstPointIndex] = true;

    // To keep track of the minimum distance squared of elements of
    // pointList to elements of resultSet.
    final double[] minDistSquared = new double[numPoints];

    // Initialize the elements.  Since the only point in resultSet is firstPoint,
    // this is very easy.
    for (int i = 0; i < numPoints; i++) {
        if (i != firstPointIndex) { // That point isn't considered
            double d = distance(firstPoint, pointList.get(i));
            minDistSquared[i] = d * d;
        }
    }

    while (resultSet.size() < k) {

        // Sum up the squared distances for the points in pointList not
        // already taken.
        double distSqSum = 0.0;

        for (int i = 0; i < numPoints; i++) {
            if (!taken[i]) {
                distSqSum += minDistSquared[i];
            }
        }

        // Add one new data point as a center. Each point x is chosen with
        // probability proportional to D(x)2
        final double r = random.nextDouble() * distSqSum;

        // The index of the next point to be added to the resultSet.
        int nextPointIndex = -1;

        // Sum through the squared min distances again, stopping when
        // sum >= r.
        double sum = 0.0;
        for (int i = 0; i < numPoints; i++) {
            if (!taken[i]) {
                sum += minDistSquared[i];
                if (sum >= r) {
                    nextPointIndex = i;
                    break;
                }
            }
        }

        // If it's not set to >= 0, the point wasn't found in the previous
        // for loop, probably because distances are extremely small.  Just pick
        // the last available point.
        if (nextPointIndex == -1) {
            for (int i = numPoints - 1; i >= 0; i--) {
                if (!taken[i]) {
                    nextPointIndex = i;
                    break;
                }
            }
        }

        // We found one.
        if (nextPointIndex >= 0) {

            final T p = pointList.get(nextPointIndex);

            resultSet.add(new CentroidCluster<T>(p));

            // Mark it as taken.
            taken[nextPointIndex] = true;

            if (resultSet.size() < k) {
                // Now update elements of minDistSquared.  We only have to compute
                // the distance to the new center to do this.
                for (int j = 0; j < numPoints; j++) {
                    // Only have to worry about the points still not taken.
                    if (!taken[j]) {
                        double d = distance(p, pointList.get(j));
                        double d2 = d * d;
                        if (d2 < minDistSquared[j]) {
                            minDistSquared[j] = d2;
                        }
                    }
                }
            }

        } else {
            // None found --
            // Break from the while loop to prevent
            // an infinite loop.
            break;
        }
    }

    return resultSet;
}

From source file:org.esa.s2tbx.s2msi.idepix.operators.cloudshadow.MyClustering.java

/**
 * Runs the K-means++ clustering algorithm.
 *
 * @param points the points to cluster//  www  .  j  a va 2  s  .com
 * @return a list of clusters containing the points
 * @throws MathIllegalArgumentException if the data points are null or the number
 *                                      of clusters is larger than the number of data points
 * @throws ConvergenceException         if an empty cluster is encountered and the
 *                                      {@link #emptyStrategy} is set to {@code ERROR}
 */
@Override
public List<CentroidCluster<T>> cluster(final Collection<T> points)
        throws MathIllegalArgumentException, ConvergenceException {

    // sanity checks
    MathUtils.checkNotNull(points);

    // number of clusters has to be smaller or equal the number of data points
    if (points.size() < k) {
        throw new NumberIsTooSmallException(points.size(), k, false);
    }
    // todo adaption for more bands is required
    double darkestPoint = Double.MAX_VALUE;
    int darkestPointIndex = -1;
    for (int index = 0; index < points.size(); index++) {
        DoublePoint p = ((ArrayList<DoublePoint>) points).get(index);
        double value = p.getPoint()[0];
        if (value < darkestPoint) {
            darkestPoint = value;
            darkestPointIndex = index;
        }
    }
    /*
    int array [][] = {{11, 0},{34, 1},{8, 2}};
            
     java.util.Arrays.sort(array, new java.util.Comparator<double[]>() {
     public int compare(double[] a, double[] b) {
         return b[0] - a[0];
     }
    });
            
    array [][] = {{8, 2}, {11, 0}, {34, 1}};
    */

    // create the initial clusters
    List<CentroidCluster<T>> clusters = chooseInitialCenters(points, darkestPointIndex);

    // create an array containing the latest assignment of a point to a cluster
    // no need to initialize the array, as it will be filled with the first assignment
    int[] assignments = new int[points.size()];
    assignPointsToClusters(clusters, points, assignments);

    // iterate through updating the centers until we're done
    final int max = (maxIterations < 0) ? Integer.MAX_VALUE : maxIterations;
    for (int count = 0; count < max; count++) {
        boolean emptyCluster = false;
        List<CentroidCluster<T>> newClusters = new ArrayList<>();
        for (final CentroidCluster<T> cluster : clusters) {
            final Clusterable newCenter;
            if (cluster.getPoints().isEmpty()) {
                switch (emptyStrategy) {
                case LARGEST_VARIANCE:
                    newCenter = getPointFromLargestVarianceCluster(clusters);
                    break;
                case LARGEST_POINTS_NUMBER:
                    newCenter = getPointFromLargestNumberCluster(clusters);
                    break;
                case FARTHEST_POINT:
                    newCenter = getFarthestPoint(clusters);
                    break;
                default:
                    throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
                }
                emptyCluster = true;
            } else {
                newCenter = centroidOf(cluster.getPoints(), cluster.getCenter().getPoint().length);
            }
            newClusters.add(new CentroidCluster<>(newCenter));
        }
        int changes = assignPointsToClusters(newClusters, points, assignments);
        clusters = newClusters;

        // if there were no more changes in the point-to-cluster assignment
        // and there are no empty clusters left, return the current clusters
        if (changes == 0 && !emptyCluster) {
            return clusters;
        }
    }
    return clusters;
}

From source file:org.esa.s2tbx.s2msi.idepix.operators.cloudshadow.MyClustering.java

private List<CentroidCluster<T>> chooseInitialCenters(final Collection<T> points, int darkestPointIndex) {

    // Choose the darkest point from the data points as one center .

    // Convert to list for indexed access. Make it unmodifiable, since removal of items
    // would screw up the logic of this method.
    final List<T> pointList = Collections.unmodifiableList(new ArrayList<>(points));

    // The number of points in the list.
    final int numPoints = pointList.size();

    // Set the corresponding element in this array to indicate when
    // elements of pointList are no longer available.
    final boolean[] taken = new boolean[numPoints];

    // The resulting list of initial centers.
    final List<CentroidCluster<T>> resultSet = new ArrayList<>();

    final T firstPoint = pointList.get(darkestPointIndex);
    resultSet.add(new CentroidCluster<>(firstPoint));

    // Must mark it as taken
    taken[darkestPointIndex] = true;/*from  w w  w .jav  a 2 s  .  c o m*/

    // To keep track of the minimum distance squared of elements of
    // pointList to elements of resultSet.
    final double[] minDistSquared = new double[numPoints];

    // Initialize the elements.  Since the only point in resultSet is firstPoint,
    // this is very easy.
    for (int i = 0; i < numPoints; i++) {
        if (i != darkestPointIndex) { // That point isn't considered
            double d = distance(firstPoint, pointList.get(i));
            minDistSquared[i] = d * d;
        }
    }

    while (resultSet.size() < k) {

        // Sum up the squared distances for the points in pointList not
        // already taken.
        double distSqSum = 0.0;

        for (int i = 0; i < numPoints; i++) {
            if (!taken[i]) {
                distSqSum += minDistSquared[i];
            }
        }

        // Add one new data point as a center. Each point x is chosen with
        // probability proportional to D(x)2
        final double r = random.nextDouble() * distSqSum;

        // The index of the next point to be added to the resultSet.
        int nextPointIndex = -1;

        // Sum through the squared min distances again, stopping when
        // sum >= r.
        double sum = 0.0;
        for (int i = 0; i < numPoints; i++) {
            if (!taken[i]) {
                sum += minDistSquared[i];
                if (sum >= r) {
                    nextPointIndex = i;
                    break;
                }
            }
        }

        //System.out.printf(":  %f  \n", firstPoint.getPoint()[0]);

        // If it's not set to >= 0, the point wasn't found in the previous
        // for loop, probably because distances are extremely small.  Just pick
        // the last available point.
        if (nextPointIndex == -1) {
            for (int i = numPoints - 1; i >= 0; i--) {
                if (!taken[i]) {
                    nextPointIndex = i;
                    break;
                }
            }
        }
        //System.out.printf("\n InitialCentroids:  %f   \n", firstPoint.getPoint()[0]);
        // We found one.
        if (nextPointIndex >= 0) {

            final T p = pointList.get(nextPointIndex);
            //System.out.printf(":  %f   \n", p.getPoint()[0]);

            resultSet.add(new CentroidCluster<>(p));

            // Mark it as taken.
            taken[nextPointIndex] = true;

            if (resultSet.size() < k) {
                // Now update elements of minDistSquared.  We only have to compute
                // the distance to the new center to do this.
                for (int j = 0; j < numPoints; j++) {
                    // Only have to worry about the points still not taken.
                    if (!taken[j]) {
                        double d = distance(p, pointList.get(j));
                        double d2 = d * d;
                        if (d2 < minDistSquared[j]) {
                            minDistSquared[j] = d2;
                        }
                    }
                }
            }

        } else {
            // None found --
            // Break from the while loop to prevent
            // an infinite loop.
            break;
        }
    }

    return resultSet;
}