Example usage for org.apache.commons.math3.ml.clustering CentroidCluster getCenter

Introduction

In this page you can find the example usage for org.apache.commons.math3.ml.clustering CentroidCluster getCenter.

Prototype

public Clusterable getCenter()

Source Link

Document

Get the point chosen to be the center of this cluster.

Usage

From source file:clustering.KMeans.java

public static void main(String[] args) throws UnknownHostException {
    if (args.length != 1) {
        System.out.println("Usage : KMeans <nrClusters>");
        System.exit(-1);//  w  w  w .j  a  v  a 2 s.co m
    }

    int kClusters = Integer.parseInt(args[0]);

    ArrayList<Artist> artists = new ArrayList<Artist>();
    DBHelper dbHelper = DBHelper.getInstance();
    DBCursor result = dbHelper.findArtistsWithFBandTW();

    while (result.hasNext()) {
        DBObject currentArtist = result.next();
        artists.add(Artist.fromDBObject(currentArtist));
    }

    //System.out.println(artists.size());
    KMeansPlusPlusClusterer<Artist> clusterer = new KMeansPlusPlusClusterer<Artist>(kClusters);
    List<CentroidCluster<Artist>> clusters = clusterer.cluster(artists);
    //System.out.println(clusters.size());
    dbHelper.emptyClusterCenters();

    for (CentroidCluster<Artist> cluster : clusters) {
        double[] center = cluster.getCenter().getPoint();
        ObjectId centerId = dbHelper.insertClusterCenter(center[0], center[1], center[2]);

        List<Artist> artC = cluster.getPoints();
        for (Artist artist : artC) {
            dbHelper.updateMatrixRowCluster(artist.getDBObject(), centerId);
            //System.out.print("("+artist.fb_likes+","+artist.twitter_followers+","+artist.album_count+") ");
        }
    }
}

From source file:indexer.DocClusterer.java

public String getClusterVecs() throws Exception {
    StringBuffer buff = new StringBuffer();
    List<CentroidCluster<WordVec>> clusters = clusterWords(dvec.getWordMap(), numClusters);
    if (clusters == null)
        return "";
    int i = 0;//ww w.j a v  a2 s  .co  m

    for (CentroidCluster<WordVec> c : clusters) {
        //List<WordVec> thisClusterPoints = c.getPoints();
        //WordVec clusterCenter = WordVecs.getCentroid(thisClusterPoints);
        Clusterable clusterCenter = c.getCenter();
        WordVec clusterWordVec = new WordVec("Cluster_" + i, clusterCenter.getPoint());
        //clusterCenter.setWord("Cluster_" + numClusters);
        buff.append(clusterWordVec.toString()).append(":");
        i++;
    }

    return buff.toString();
}

From source file:net.semanticmetadata.lire.imageanalysis.bovw.LocalFeatureHistogramBuilderKmeansPlusPlus.java

/**
 * Uses an existing index, where each and every document should have a set of local features. A number of
 * random images (numDocsForVocabulary) is selected and clustered to get a vocabulary of visual words
 * (the cluster means). For all images a histogram on the visual words is created and added to the documents.
 * Pre-existing histograms are deleted, so this method can be used for re-indexing.
 *
 * @throws java.io.IOException/* www . j a va2 s  . co m*/
 */
public void index() throws IOException {
    df.setMaximumFractionDigits(3);
    // find the documents for building the vocabulary:
    HashSet<Integer> docIDs = selectVocabularyDocs();
    System.out.println("Using " + docIDs.size() + " documents to build the vocabulary.");
    KMeansPlusPlusClusterer kpp = new KMeansPlusPlusClusterer(numClusters, 15);
    // fill the KMeans object:
    LinkedList<DoublePoint> features = new LinkedList<DoublePoint>();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    for (Iterator<Integer> iterator = docIDs.iterator(); iterator.hasNext();) {
        int nextDoc = iterator.next();
        if (reader.hasDeletions() && !liveDocs.get(nextDoc))
            continue; // if it is deleted, just ignore it.
        Document d = reader.document(nextDoc);
        //            features.clear();
        IndexableField[] fields = d.getFields(localFeatureFieldName);
        String file = d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        for (int j = 0; j < fields.length; j++) {
            LireFeature f = getFeatureInstance();
            f.setByteArrayRepresentation(fields[j].binaryValue().bytes, fields[j].binaryValue().offset,
                    fields[j].binaryValue().length);
            // copy the data over to new array ...
            double[] feat = new double[f.getDoubleHistogram().length];
            System.arraycopy(f.getDoubleHistogram(), 0, feat, 0, feat.length);
            features.add(new DoublePoint(f.getDoubleHistogram()));
        }
    }
    if (features.size() < numClusters) {
        // this cannot work. You need more data points than clusters.
        throw new UnsupportedOperationException("Only " + features.size() + " features found to cluster in "
                + numClusters + ". Try to use less clusters or more images.");
    }
    // do the clustering:
    System.out.println("Number of local features: " + df.format(features.size()));
    System.out.println("Starting clustering ...");
    List<CentroidCluster<DoublePoint>> clusterList = kpp.cluster(features);
    // TODO: Serializing clusters to a file on the disk ...
    System.out.println("Clustering finished, " + clusterList.size() + " clusters found");
    clusters = new LinkedList<double[]>();
    for (Iterator<CentroidCluster<DoublePoint>> iterator = clusterList.iterator(); iterator.hasNext();) {
        CentroidCluster<DoublePoint> centroidCluster = iterator.next();
        clusters.add(centroidCluster.getCenter().getPoint());
    }
    System.out.println("Creating histograms ...");
    int[] tmpHist = new int[numClusters];
    IndexWriter iw = LuceneUtils.createIndexWriter(((DirectoryReader) reader).directory(), true,
            LuceneUtils.AnalyzerType.WhitespaceAnalyzer, 256d);

    // careful: copy reader to RAM for faster access when reading ...
    //        reader = IndexReader.open(new RAMDirectory(reader.directory()), true);
    LireFeature f = getFeatureInstance();
    for (int i = 0; i < reader.maxDoc(); i++) {
        try {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue;
            for (int j = 0; j < tmpHist.length; j++) {
                tmpHist[j] = 0;
            }
            Document d = reader.document(i);
            IndexableField[] fields = d.getFields(localFeatureFieldName);
            // remove the fields if they are already there ...
            d.removeField(visualWordsFieldName);
            d.removeField(localFeatureHistFieldName);

            // find the appropriate cluster for each feature:
            for (int j = 0; j < fields.length; j++) {
                f.setByteArrayRepresentation(fields[j].binaryValue().bytes, fields[j].binaryValue().offset,
                        fields[j].binaryValue().length);
                tmpHist[clusterForFeature(f, clusters)]++;
            }
            //                System.out.println(Arrays.toString(tmpHist));
            d.add(new StoredField(localFeatureHistFieldName,
                    SerializationUtils.toByteArray(normalize(tmpHist))));
            quantize(tmpHist);
            d.add(new TextField(visualWordsFieldName, arrayToVisualWordString(tmpHist), Field.Store.YES));

            // remove local features to save some space if requested:
            if (DELETE_LOCAL_FEATURES) {
                d.removeFields(localFeatureFieldName);
            }
            // now write the new one. we use the identifier to update ;)
            iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER,
                    d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), d);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    iw.commit();
    // this one does the "old" commit(), it removes the deleted local features.
    iw.forceMerge(1);
    iw.close();
    System.out.println("Finished.");
}

From source file:Clustering.technique.KMeansPlusPlusClusterer.java

/**
 * Returns the nearest {@link Cluster} to the given point
 *
 * @param clusters the {@link Cluster}s to search
 * @param point the point to find the nearest {@link Cluster} for
 * @return the index of the nearest {@link Cluster} to the given point
 *//*ww w .  j  a va2  s  .c  o  m*/
private int getNearestCluster(final Collection<CentroidCluster<T>> clusters, final T point) {
    double minDistance = Double.MAX_VALUE;
    int clusterIndex = 0;
    int minCluster = 0;
    for (final CentroidCluster<T> c : clusters) {
        final double distance = distance(point, c.getCenter());
        if (distance < minDistance) {
            minDistance = distance;
            minCluster = clusterIndex;
        }
        clusterIndex++;
    }
    return minCluster;
}

From source file:Clustering.technique.KMeansPlusPlusClusterer.java

/**
 * Get the point farthest to its cluster center
 *
 * @param clusters the {@link Cluster}s to search
 * @return point farthest to its cluster center
 * @throws ConvergenceException if clusters are all empty
 *//*from   w w w.ja  v a2 s . com*/
private T getFarthestPoint(final Collection<CentroidCluster<T>> clusters) throws ConvergenceException {

    double maxDistance = Double.NEGATIVE_INFINITY;
    Cluster<T> selectedCluster = null;
    int selectedPoint = -1;
    for (final CentroidCluster<T> cluster : clusters) {

        // get the farthest point
        final Clusterable center = cluster.getCenter();
        final List<T> points = cluster.getPoints();
        for (int i = 0; i < points.size(); ++i) {
            final double distance = distance(points.get(i), center);
            if (distance > maxDistance) {
                maxDistance = distance;
                selectedCluster = cluster;
                selectedPoint = i;
            }
        }

    }

    // did we find at least one non-empty cluster ?
    if (selectedCluster == null) {
        throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
    }

    return selectedCluster.getPoints().remove(selectedPoint);

}

From source file:Clustering.technique.KMeansPlusPlusClusterer.java

/**
 * Get a random point from the {@link Cluster} with the largest distance variance.
 *
 * @param clusters the {@link Cluster}s to search
 * @return a random point from the selected cluster
 * @throws ConvergenceException if clusters are all empty
 *///from  w  w w.j  a  va 2  s  .co  m
private T getPointFromLargestVarianceCluster(final Collection<CentroidCluster<T>> clusters)
        throws ConvergenceException {

    double maxVariance = Double.NEGATIVE_INFINITY;
    Cluster<T> selected = null;
    for (final CentroidCluster<T> cluster : clusters) {
        if (!cluster.getPoints().isEmpty()) {

            // compute the distance variance of the current cluster
            final Clusterable center = cluster.getCenter();
            final Variance stat = new Variance();
            for (final T point : cluster.getPoints()) {
                stat.increment(distance(point, center));
            }
            final double variance = stat.getResult();

            // select the cluster with the largest variance
            if (variance > maxVariance) {
                maxVariance = variance;
                selected = cluster;
            }

        }
    }

    // did we find at least one non-empty cluster ?
    if (selected == null) {
        throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
    }

    // extract a random point from the cluster
    final List<T> selectedPoints = selected.getPoints();
    return selectedPoints.remove(random.nextInt(selectedPoints.size()));

}

From source file:Clustering.technique.KMeansPlusPlusClusterer.java

/**
 * Runs the K-means++ clustering algorithm.
 *
 * @param points the points to cluster//  ww  w .j  a va2 s  . co  m
 * @return a list of clusters containing the points
 * @throws MathIllegalArgumentException if the data points are null or the number
 *     of clusters is larger than the number of data points
 * @throws ConvergenceException if an empty cluster is encountered and the
 * {@link #emptyStrategy} is set to {@code ERROR}
 */
public List<CentroidCluster<T>> cluster(final Collection<T> points)
        throws MathIllegalArgumentException, ConvergenceException {

    // sanity checks
    MathUtils.checkNotNull(points);

    // number of clusters has to be smaller or equal the number of data points
    if (points.size() < k) {
        throw new NumberIsTooSmallException(points.size(), k, false);
    }

    // create the initial clusters
    List<CentroidCluster<T>> clusters = chooseInitialCenters(points);

    // create an array containing the latest assignment of a point to a cluster
    // no need to initialize the array, as it will be filled with the first assignment
    int[] assignments = new int[points.size()];
    assignPointsToClusters(clusters, points, assignments);

    // iterate through updating the centers until we're done
    final int max = (maxIterations < 0) ? Integer.MAX_VALUE : maxIterations;
    for (int count = 0; count < max; count++) {
        boolean emptyCluster = false;
        List<CentroidCluster<T>> newClusters = new ArrayList<CentroidCluster<T>>();
        for (final CentroidCluster<T> cluster : clusters) {
            final Clusterable newCenter;
            if (cluster.getPoints().isEmpty()) {
                switch (emptyStrategy) {
                case LARGEST_VARIANCE:
                    newCenter = getPointFromLargestVarianceCluster(clusters);
                    break;
                case LARGEST_POINTS_NUMBER:
                    newCenter = getPointFromLargestNumberCluster(clusters);
                    break;
                case FARTHEST_POINT:
                    newCenter = getFarthestPoint(clusters);
                    break;
                default:
                    throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
                }
                emptyCluster = true;
            } else {
                newCenter = centroidOf(cluster.getCenter(), cluster.getPoints(),
                        cluster.getCenter().getPoint().length);
            }
            newClusters.add(new CentroidCluster<T>(newCenter));
        }
        int changes = assignPointsToClusters(newClusters, points, assignments);
        clusters = newClusters;

        // if there were no more changes in the point-to-cluster assignment
        // and there are no empty clusters left, return the current clusters
        if (changes == 0 && !emptyCluster) {
            return clusters;
        }
    }
    return clusters;
}

From source file:KMeansRecommender.MyKMeansPlusPlusClusterer.java

/**
 * Runs the K-means++ clustering algorithm.
 *
 * @param points the points to cluster/* w ww.  ja va2  s .  c o m*/
 * @return a list of clusters containing the points
 * @throws MathIllegalArgumentException if the data points are null or the number
 *     of clusters is larger than the number of data points
 * @throws ConvergenceException if an empty cluster is encountered and the
 * {@link #emptyStrategy} is set to {@code ERROR}
 */
public List<CentroidCluster<T>> cluster(final Collection<T> points)
        throws MathIllegalArgumentException, ConvergenceException {

    // sanity checks
    MathUtils.checkNotNull(points);

    // number of clusters has to be smaller or equal the number of data points
    if (points.size() < k) {
        throw new NumberIsTooSmallException(points.size(), k, false);
    }

    // create the initial clusters
    List<CentroidCluster<T>> clusters = chooseInitialCenters(points);

    // create an array containing the latest assignment of a point to a cluster
    // no need to initialize the array, as it will be filled with the first assignment
    int[] assignments = new int[points.size()];
    assignPointsToClusters(clusters, points, assignments);

    // iterate through updating the centers until we're done
    int finalchange = 0;
    final int max = (maxIterations < 0) ? Integer.MAX_VALUE : maxIterations;
    for (int count = 0; count < max; count++) {
        boolean emptyCluster = false;
        List<CentroidCluster<T>> newClusters = new ArrayList<CentroidCluster<T>>();
        for (final CentroidCluster<T> cluster : clusters) {
            final Clusterable newCenter;
            if (cluster.getPoints().isEmpty()) {
                switch (emptyStrategy) {
                case LARGEST_VARIANCE:
                    newCenter = getPointFromLargestVarianceCluster(clusters);
                    break;
                case LARGEST_POINTS_NUMBER:
                    newCenter = getPointFromLargestNumberCluster(clusters);
                    break;
                case FARTHEST_POINT:
                    newCenter = getFarthestPoint(clusters);
                    break;
                default:
                    throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
                }
                emptyCluster = true;
            } else {
                newCenter = centroidOf(cluster.getPoints(), cluster.getCenter().getPoint().length);
            }
            newClusters.add(new CentroidCluster<T>(newCenter));
        }
        int changes = assignPointsToClusters(newClusters, points, assignments);
        clusters = newClusters;
        finalchange = changes; //for test
        // if there were no more changes in the point-to-cluster assignment
        // and there are no empty clusters left, return the current clusters
        if (changes == 0 && !emptyCluster) {
            //System.out.println("iteration time: " + count + ", changes : 0");  //for test
            return clusters;
        }

    }
    //System.out.println("iteration time: " + max + ", changes : " + finalchange);  //for test  
    return clusters;
}

From source file:ec.coevolve.MultiPopCoevolutionaryEvaluatorExtra.java

protected Individual[] behaviourElite(EvolutionState state, int subpop) {
    // Generate the dataset
    ArrayList<IndividualClusterable> points = new ArrayList<IndividualClusterable>();
    if (novelChampionsOrigin == NovelChampionsOrigin.halloffame) {
        for (int i = 0; i < hallOfFame[subpop].size(); i++) {
            points.add(new IndividualClusterable(hallOfFame[subpop].get(i), i));
        }//  w w  w. j av a2  s  .  co  m
    } else if (novelChampionsOrigin == NovelChampionsOrigin.archive) {
        for (ArchiveEntry ae : archives[subpop]) {
            points.add(new IndividualClusterable(ae.getIndividual(), ae.getGeneration()));
        }
    }

    // Cap -- only use the individuals with the highest fitness scores
    if (novelChampionsCap > 0) {
        // calculate the percentile
        DescriptiveStatistics ds = new DescriptiveStatistics();
        for (IndividualClusterable ic : points) {
            ds.addValue(ic.getFitness());
        }
        double percentile = ds.getPercentile(novelChampionsCap);

        // remove those below the percentile
        Iterator<IndividualClusterable> iter = points.iterator();
        while (iter.hasNext()) {
            IndividualClusterable next = iter.next();
            if (next.getFitness() < percentile) {
                iter.remove();
            }
        }
    }

    // Check if there are enough points for clustering
    if (points.size() <= novelChampions) {
        Individual[] elite = new Individual[points.size()];
        for (int i = 0; i < elite.length; i++) {
            elite[i] = points.get(i).getIndividual();
        }
        return elite;
    }

    // Do the k-means clustering
    KMeansPlusPlusClusterer<IndividualClusterable> clusterer = new KMeansPlusPlusClusterer<IndividualClusterable>(
            novelChampions, 100);
    List<CentroidCluster<IndividualClusterable>> clusters = clusterer.cluster(points);

    // Return one from each cluster
    Individual[] elite = new Individual[novelChampions];
    for (int i = 0; i < clusters.size(); i++) {
        CentroidCluster<IndividualClusterable> cluster = clusters.get(i);
        List<IndividualClusterable> clusterPoints = cluster.getPoints();
        if (novelChampionsMode == NovelChampionsMode.random) {
            int randIndex = state.random[0].nextInt(clusterPoints.size());
            elite[i] = clusterPoints.get(randIndex).getIndividual();
        } else if (novelChampionsMode == NovelChampionsMode.last) {
            IndividualClusterable oldest = null;
            for (IndividualClusterable ic : clusterPoints) {
                if (oldest == null || ic.age > oldest.age) {
                    oldest = ic;
                }
            }
            elite[i] = oldest.getIndividual();
        } else if (novelChampionsMode == NovelChampionsMode.centroid) {
            DistanceMeasure dm = clusterer.getDistanceMeasure();
            double[] centroid = cluster.getCenter().getPoint();
            IndividualClusterable closest = null;
            double closestDist = Double.MAX_VALUE;
            for (IndividualClusterable ic : clusterPoints) {
                double dist = dm.compute(centroid, ic.getPoint());
                if (dist < closestDist) {
                    closestDist = dist;
                    closest = ic;
                }
            }
            elite[i] = closest.getIndividual();
        } else if (novelChampionsMode == NovelChampionsMode.best) {
            IndividualClusterable best = null;
            float highestFit = Float.NEGATIVE_INFINITY;
            for (IndividualClusterable ic : clusterPoints) {
                if (ic.getFitness() > highestFit) {
                    best = ic;
                    highestFit = ic.getFitness();
                }
            }
            elite[i] = best.getIndividual();
        }
    }
    return elite;
}

From source file:org.apache.solr.client.solrj.io.eval.GetCentroidsEvaluator.java

@Override
public Object doWork(Object value) throws IOException {
    if (!(value instanceof KmeansEvaluator.ClusterTuple)) {
        throw new IOException(String.format(Locale.ROOT,
                "Invalid expression %s - found type %s for value, expecting a clustering result",
                toExpression(constructingFactory), value.getClass().getSimpleName()));
    } else {/*from  www .  j ava2 s.c om*/
        KmeansEvaluator.ClusterTuple clusterTuple = (KmeansEvaluator.ClusterTuple) value;
        List<CentroidCluster<KmeansEvaluator.ClusterPoint>> clusters = clusterTuple.getClusters();
        double[][] data = new double[clusters.size()][];
        for (int i = 0; i < clusters.size(); i++) {
            CentroidCluster<KmeansEvaluator.ClusterPoint> centroidCluster = clusters.get(i);
            Clusterable clusterable = centroidCluster.getCenter();
            data[i] = clusterable.getPoint();
        }
        Matrix centroids = new Matrix(data);
        centroids.setColumnLabels(clusterTuple.getColumnLabels());
        return centroids;
    }
}