Example usage for org.apache.commons.math3.ml.clustering KMeansPlusPlusClusterer KMeansPlusPlusClusterer

Introduction

In this page you can find the example usage for org.apache.commons.math3.ml.clustering KMeansPlusPlusClusterer KMeansPlusPlusClusterer.

Prototype

public KMeansPlusPlusClusterer(final int k, final int maxIterations, final DistanceMeasure measure,
        final RandomGenerator random)

Source Link

Document

Build a clusterer.

Usage

From source file:edu.byu.nlp.data.app.AnnotationStream2Annotators.java

/**
 * This returns a set of clustered annotator parameters. Averaging them yields the centroid of the cluster.
 * Note that Annotator.clusterAssignment properties are change IN PLACE.  
 *//*  w w  w  .  j  a v a 2 s  . c  om*/
private static void assignKMeansClusters(List<Annotator> annotators, int k, int maxIterations,
        RandomGenerator rnd) {
    Preconditions.checkNotNull(annotators);
    Preconditions.checkArgument(annotators.size() > 0);
    KMeansPlusPlusClusterer<Annotator> clusterer = new KMeansPlusPlusClusterer<>(k, maxIterations,
            new EuclideanDistance(), rnd);
    List<CentroidCluster<Annotator>> clusterCentroids = clusterer.cluster(annotators);

    for (int c = 0; c < clusterCentroids.size(); c++) {
        for (Annotator annotator : clusterCentroids.get(c).getPoints()) {
            // note: we don't return the centroid point here because averaging the points in the cluster 
            // yields precisely the centroid point.
            // stick this annotator in this location in the confusions
            annotator.clusterAssignment = c;
        }
    }

}

From source file:msi.gaml.operators.Stats.java

@operator(value = "kmeans", can_be_const = false, type = IType.LIST, category = {
        IOperatorCategory.STATISTICAL }, concept = { IConcept.STATISTIC, IConcept.CLUSTERING })
@doc(value = "returns the list of clusters (list of instance indices) computed with the kmeans++ algorithm from the first operand data according to the number of clusters to split the data into (k) and the maximum number of iterations to run the algorithm for (If negative, no maximum will be used) (maxIt). Usage: kmeans(data,k,maxit)", special_cases = "if the lengths of two vectors in the right-hand aren't equal, returns 0", examples = {
        @example(value = "kmeans ([[2,4,5], [3,8,2], [1,1,3], [4,3,4]],2,10)", isExecutable = false) })
public static GamaList<GamaList> KMeansPlusplusApache(final IScope scope, final GamaList data, final Integer k,
        final Integer maxIt) throws GamaRuntimeException {
    final MersenneTwister rand = new MersenneTwister(scope.getRandom().getSeed().longValue());

    final List<DoublePoint> instances = new ArrayList<>();
    for (int i = 0; i < data.size(); i++) {
        final GamaList d = (GamaList) data.get(i);
        final double point[] = new double[d.size()];
        for (int j = 0; j < d.size(); j++) {
            point[j] = Cast.asFloat(scope, d.get(j));
        }//from  www  .  j a  v  a  2  s .co  m
        instances.add(new Instance(i, point));
    }
    final KMeansPlusPlusClusterer<DoublePoint> kmeans = new KMeansPlusPlusClusterer<>(k, maxIt,
            new EuclideanDistance(), rand);
    final List<CentroidCluster<DoublePoint>> clusters = kmeans.cluster(instances);
    final GamaList results = (GamaList) GamaListFactory.create();
    for (final Cluster<DoublePoint> cl : clusters) {
        final GamaList clG = (GamaList) GamaListFactory.create();
        for (final DoublePoint pt : cl.getPoints()) {
            clG.addValue(scope, ((Instance) pt).getId());
        }
        results.addValue(scope, clG);
    }
    return results;
}