Example usage for org.apache.commons.math3.ml.clustering KMeansPlusPlusClusterer KMeansPlusPlusClusterer

Introduction

In this page you can find the example usage for org.apache.commons.math3.ml.clustering KMeansPlusPlusClusterer KMeansPlusPlusClusterer.

Prototype

public KMeansPlusPlusClusterer(final int k, final int maxIterations)

Source Link

Document

Build a clusterer.

Usage

From source file:edu.cmu.sv.modelinference.eventtool.classification.Clusterer1D.java

private List<? extends Cluster<DataWrapper>> computeClusters(Collection<DataWrapper> dataCol, int k) {
    List<? extends Cluster<DataWrapper>> clusterResults = null;
    try {// w ww. ja va  2  s  .  c o  m
        Clusterer<DataWrapper> clusterer = new MultiKMeansPlusPlusClusterer<>(
                new KMeansPlusPlusClusterer<DataWrapper>(k, maxIterations), trials);
        clusterResults = clusterer.cluster(dataCol);
    } catch (NumberIsTooSmallException e) {
        logger.warn("Too few datapoints for clusters: " + e.getMessage());
    }
    return clusterResults;
}

From source file:net.semanticmetadata.lire.imageanalysis.bovw.LocalFeatureHistogramBuilderKmeansPlusPlus.java

/**
 * Uses an existing index, where each and every document should have a set of local features. A number of
 * random images (numDocsForVocabulary) is selected and clustered to get a vocabulary of visual words
 * (the cluster means). For all images a histogram on the visual words is created and added to the documents.
 * Pre-existing histograms are deleted, so this method can be used for re-indexing.
 *
 * @throws java.io.IOException/*ww w .  ja v  a 2  s .c  o m*/
 */
public void index() throws IOException {
    df.setMaximumFractionDigits(3);
    // find the documents for building the vocabulary:
    HashSet<Integer> docIDs = selectVocabularyDocs();
    System.out.println("Using " + docIDs.size() + " documents to build the vocabulary.");
    KMeansPlusPlusClusterer kpp = new KMeansPlusPlusClusterer(numClusters, 15);
    // fill the KMeans object:
    LinkedList<DoublePoint> features = new LinkedList<DoublePoint>();
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);
    for (Iterator<Integer> iterator = docIDs.iterator(); iterator.hasNext();) {
        int nextDoc = iterator.next();
        if (reader.hasDeletions() && !liveDocs.get(nextDoc))
            continue; // if it is deleted, just ignore it.
        Document d = reader.document(nextDoc);
        //            features.clear();
        IndexableField[] fields = d.getFields(localFeatureFieldName);
        String file = d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0];
        for (int j = 0; j < fields.length; j++) {
            LireFeature f = getFeatureInstance();
            f.setByteArrayRepresentation(fields[j].binaryValue().bytes, fields[j].binaryValue().offset,
                    fields[j].binaryValue().length);
            // copy the data over to new array ...
            double[] feat = new double[f.getDoubleHistogram().length];
            System.arraycopy(f.getDoubleHistogram(), 0, feat, 0, feat.length);
            features.add(new DoublePoint(f.getDoubleHistogram()));
        }
    }
    if (features.size() < numClusters) {
        // this cannot work. You need more data points than clusters.
        throw new UnsupportedOperationException("Only " + features.size() + " features found to cluster in "
                + numClusters + ". Try to use less clusters or more images.");
    }
    // do the clustering:
    System.out.println("Number of local features: " + df.format(features.size()));
    System.out.println("Starting clustering ...");
    List<CentroidCluster<DoublePoint>> clusterList = kpp.cluster(features);
    // TODO: Serializing clusters to a file on the disk ...
    System.out.println("Clustering finished, " + clusterList.size() + " clusters found");
    clusters = new LinkedList<double[]>();
    for (Iterator<CentroidCluster<DoublePoint>> iterator = clusterList.iterator(); iterator.hasNext();) {
        CentroidCluster<DoublePoint> centroidCluster = iterator.next();
        clusters.add(centroidCluster.getCenter().getPoint());
    }
    System.out.println("Creating histograms ...");
    int[] tmpHist = new int[numClusters];
    IndexWriter iw = LuceneUtils.createIndexWriter(((DirectoryReader) reader).directory(), true,
            LuceneUtils.AnalyzerType.WhitespaceAnalyzer, 256d);

    // careful: copy reader to RAM for faster access when reading ...
    //        reader = IndexReader.open(new RAMDirectory(reader.directory()), true);
    LireFeature f = getFeatureInstance();
    for (int i = 0; i < reader.maxDoc(); i++) {
        try {
            if (reader.hasDeletions() && !liveDocs.get(i))
                continue;
            for (int j = 0; j < tmpHist.length; j++) {
                tmpHist[j] = 0;
            }
            Document d = reader.document(i);
            IndexableField[] fields = d.getFields(localFeatureFieldName);
            // remove the fields if they are already there ...
            d.removeField(visualWordsFieldName);
            d.removeField(localFeatureHistFieldName);

            // find the appropriate cluster for each feature:
            for (int j = 0; j < fields.length; j++) {
                f.setByteArrayRepresentation(fields[j].binaryValue().bytes, fields[j].binaryValue().offset,
                        fields[j].binaryValue().length);
                tmpHist[clusterForFeature(f, clusters)]++;
            }
            //                System.out.println(Arrays.toString(tmpHist));
            d.add(new StoredField(localFeatureHistFieldName,
                    SerializationUtils.toByteArray(normalize(tmpHist))));
            quantize(tmpHist);
            d.add(new TextField(visualWordsFieldName, arrayToVisualWordString(tmpHist), Field.Store.YES));

            // remove local features to save some space if requested:
            if (DELETE_LOCAL_FEATURES) {
                d.removeFields(localFeatureFieldName);
            }
            // now write the new one. we use the identifier to update ;)
            iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER,
                    d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), d);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    iw.commit();
    // this one does the "old" commit(), it removes the deleted local features.
    iw.forceMerge(1);
    iw.close();
    System.out.println("Finished.");
}

From source file:edu.nyu.vida.data_polygamy.ctdata.TopologicalIndex.java

public double getThreshold(Feature[] f) {

    KMeansPlusPlusClusterer<DoublePoint> kmeans = new KMeansPlusPlusClusterer<DoublePoint>(2, 1000);
    ArrayList<DoublePoint> pts = new ArrayList<DoublePoint>();

    if (f.length < 2) {
        return f[0].wt * 0.4;
    }//from   www  .j  a v a2 s .co  m
    for (int i = 0; i < f.length; i++) {
        DoublePoint dpt = new DoublePoint(new double[] { f[i].wt });
        pts.add(dpt);
    }
    List<CentroidCluster<DoublePoint>> clusters = kmeans.cluster(pts);

    double maxp = 0;
    double minp = 0;
    int ct = 0;
    for (CentroidCluster<DoublePoint> c : clusters) {
        double mp = 0;
        double mnp = Double.MAX_VALUE;
        for (DoublePoint dpt : c.getPoints()) {
            double[] pt = dpt.getPoint();
            mp = Math.max(mp, pt[0]);
            mnp = Math.min(mnp, pt[0]);
        }
        if (mp > maxp) {
            maxp = mp;
            minp = mnp;
        }
        ct++;
    }
    if (ct > 2) {
        Utilities.er("Can there be > 2 clusters?");
    }
    return minp;
}

From source file:ec.coevolve.MultiPopCoevolutionaryEvaluatorExtra.java

protected Individual[] behaviourElite(EvolutionState state, int subpop) {
    // Generate the dataset
    ArrayList<IndividualClusterable> points = new ArrayList<IndividualClusterable>();
    if (novelChampionsOrigin == NovelChampionsOrigin.halloffame) {
        for (int i = 0; i < hallOfFame[subpop].size(); i++) {
            points.add(new IndividualClusterable(hallOfFame[subpop].get(i), i));
        }// ww  w . j  a v a2  s . c o m
    } else if (novelChampionsOrigin == NovelChampionsOrigin.archive) {
        for (ArchiveEntry ae : archives[subpop]) {
            points.add(new IndividualClusterable(ae.getIndividual(), ae.getGeneration()));
        }
    }

    // Cap -- only use the individuals with the highest fitness scores
    if (novelChampionsCap > 0) {
        // calculate the percentile
        DescriptiveStatistics ds = new DescriptiveStatistics();
        for (IndividualClusterable ic : points) {
            ds.addValue(ic.getFitness());
        }
        double percentile = ds.getPercentile(novelChampionsCap);

        // remove those below the percentile
        Iterator<IndividualClusterable> iter = points.iterator();
        while (iter.hasNext()) {
            IndividualClusterable next = iter.next();
            if (next.getFitness() < percentile) {
                iter.remove();
            }
        }
    }

    // Check if there are enough points for clustering
    if (points.size() <= novelChampions) {
        Individual[] elite = new Individual[points.size()];
        for (int i = 0; i < elite.length; i++) {
            elite[i] = points.get(i).getIndividual();
        }
        return elite;
    }

    // Do the k-means clustering
    KMeansPlusPlusClusterer<IndividualClusterable> clusterer = new KMeansPlusPlusClusterer<IndividualClusterable>(
            novelChampions, 100);
    List<CentroidCluster<IndividualClusterable>> clusters = clusterer.cluster(points);

    // Return one from each cluster
    Individual[] elite = new Individual[novelChampions];
    for (int i = 0; i < clusters.size(); i++) {
        CentroidCluster<IndividualClusterable> cluster = clusters.get(i);
        List<IndividualClusterable> clusterPoints = cluster.getPoints();
        if (novelChampionsMode == NovelChampionsMode.random) {
            int randIndex = state.random[0].nextInt(clusterPoints.size());
            elite[i] = clusterPoints.get(randIndex).getIndividual();
        } else if (novelChampionsMode == NovelChampionsMode.last) {
            IndividualClusterable oldest = null;
            for (IndividualClusterable ic : clusterPoints) {
                if (oldest == null || ic.age > oldest.age) {
                    oldest = ic;
                }
            }
            elite[i] = oldest.getIndividual();
        } else if (novelChampionsMode == NovelChampionsMode.centroid) {
            DistanceMeasure dm = clusterer.getDistanceMeasure();
            double[] centroid = cluster.getCenter().getPoint();
            IndividualClusterable closest = null;
            double closestDist = Double.MAX_VALUE;
            for (IndividualClusterable ic : clusterPoints) {
                double dist = dm.compute(centroid, ic.getPoint());
                if (dist < closestDist) {
                    closestDist = dist;
                    closest = ic;
                }
            }
            elite[i] = closest.getIndividual();
        } else if (novelChampionsMode == NovelChampionsMode.best) {
            IndividualClusterable best = null;
            float highestFit = Float.NEGATIVE_INFINITY;
            for (IndividualClusterable ic : clusterPoints) {
                if (ic.getFitness() > highestFit) {
                    best = ic;
                    highestFit = ic.getFitness();
                }
            }
            elite[i] = best.getIndividual();
        }
    }
    return elite;
}

From source file:org.apache.solr.client.solrj.io.eval.KmeansEvaluator.java

@Override
public Object doWork(Object value1, Object value2) throws IOException {

    Matrix matrix = null;//from w ww  . java 2 s . c  o  m
    int k = 0;

    if (value1 instanceof Matrix) {
        matrix = (Matrix) value1;
    } else {
        throw new IOException("The first parameter for kmeans should be the observation matrix.");
    }

    if (value2 instanceof Number) {
        k = ((Number) value2).intValue();
    } else {
        throw new IOException("The second parameter for kmeans should be k.");
    }

    KMeansPlusPlusClusterer<ClusterPoint> kmeans = new KMeansPlusPlusClusterer(k, maxIterations);
    List<ClusterPoint> points = new ArrayList();
    double[][] data = matrix.getData();

    List<String> ids = matrix.getRowLabels();

    for (int i = 0; i < data.length; i++) {
        double[] vec = data[i];
        points.add(new ClusterPoint(ids.get(i), vec));
    }

    Map fields = new HashMap();

    fields.put("k", k);
    fields.put("distance", "euclidean");
    fields.put("maxIterations", maxIterations);

    return new ClusterTuple(fields, kmeans.cluster(points), matrix.getColumnLabels());
}

From source file:org.apache.solr.client.solrj.io.eval.MultiKmeansEvaluator.java

@Override
public Object doWork(Object... values) throws IOException {

    if (values.length != 3) {
        throw new IOException(
                "The multiKmeans function expects three parameters; a matrix to cluster, k and number of trials.");
    }//from  w w  w.ja  v  a 2 s  .  c om

    Object value1 = values[0];
    Object value2 = values[1];
    Object value3 = values[2];

    Matrix matrix = null;
    int k = 0;
    int trials = 0;

    if (value1 instanceof Matrix) {
        matrix = (Matrix) value1;
    } else {
        throw new IOException("The first parameter for multiKmeans should be the observation matrix.");
    }

    if (value2 instanceof Number) {
        k = ((Number) value2).intValue();
    } else {
        throw new IOException("The second parameter for multiKmeans should be k.");
    }

    if (value3 instanceof Number) {
        trials = ((Number) value3).intValue();
    } else {
        throw new IOException("The third parameter for multiKmeans should be trials.");
    }

    KMeansPlusPlusClusterer<KmeansEvaluator.ClusterPoint> kmeans = new KMeansPlusPlusClusterer(k,
            maxIterations);
    MultiKMeansPlusPlusClusterer multiKmeans = new MultiKMeansPlusPlusClusterer(kmeans, trials);

    List<KmeansEvaluator.ClusterPoint> points = new ArrayList();
    double[][] data = matrix.getData();

    List<String> ids = matrix.getRowLabels();

    for (int i = 0; i < data.length; i++) {
        double[] vec = data[i];
        points.add(new KmeansEvaluator.ClusterPoint(ids.get(i), vec));
    }

    Map fields = new HashMap();

    fields.put("k", k);
    fields.put("trials", trials);
    fields.put("distance", "euclidean");
    fields.put("maxIterations", maxIterations);

    return new KmeansEvaluator.ClusterTuple(fields, multiKmeans.cluster(points), matrix.getColumnLabels());
}

From source file:playground.sergioo.facilitiesGenerator2012.WorkFacilitiesGeneration.java

private static List<CentroidCluster<PointPerson>> clusterWorkActivities(Map<String, PointPerson> points) {
    Set<PointPerson> pointsC = getPCATransformation(points.values());
    Random r = new Random();
    List<CentroidCluster<PointPerson>> clusters = new KMeansPlusPlusClusterer<PointPerson>(SIZE, 100)
            .cluster(pointsC);/*from ww w.  j av a2  s. c om*/
    //new ClustersWindow("Work times cluster PCA: "+getClustersDeviations(clusters)+" "+getWeightedClustersDeviations(clusters), clusters, pointsC.size()).setVisible(true);
    for (Cluster<PointPerson> cluster : clusters)
        for (PointPerson pointPersonT : cluster.getPoints()) {
            PointPerson pointPerson = points.get(pointPersonT.getId());
            for (int d = 0; d < pointPersonT.getDimension(); d++)
                pointPersonT.setElement(d, pointPerson.getElement(d));
        }
    //new ClustersWindow("Work times cluster PCA back: "+getClustersDeviations(clusters)+" "+getWeightedClustersDeviations(clusters), clusters, pointsC.size()).setVisible(true);
    /*List<Cluster<PointPerson>> clusters2 = new KMeansPlusPlusClusterer<PointPerson>(new Random()).cluster(points.values(), SIZE, 100);
    new ClustersWindow("Work times cluster: "+getClustersDeviations(clusters2)+" "+getWeightedClustersDeviations(clusters2), clusters2, points.size()).setVisible(true);
    for(Cluster<PointPerson> clusterE:clusters) {
    double startTime = clusterE.getCenter().getElement(0);
    double endTime = clusterE.getCenter().getElement(1);
    System.out.println();
    System.out.println("    ("+startTime+","+endTime+")");
    System.out.println("    ("+((int)startTime/(15*60))*(15*60)+","+((int)endTime/(15*60))*(15*60)+")");
    System.out.println("    ("+(int)startTime/3600+":"+((int)startTime%3600)/60+","+(int)endTime/3600+":"+((int)endTime%3600)/60+")");
    System.out.println("    ("+((int)startTime/(15*60))*(15*60)/3600+":"+(((int)startTime/(15*60))*(15*60)%3600)/60+","+((int)endTime/(15*60))*(15*60)/3600+":"+(((int)endTime/(15*60))*(15*60)%3600)/60+")");
    System.out.println("    "+clusterE.getPoints().size());
    }*/
    return clusters;
}

From source file:playground.sergioo.workplaceCapacities2012.MainWorkplaceCapacities.java

private static List<CentroidCluster<PointPerson>> clusterWorkActivities(Map<String, PointPerson> points)
        throws FileNotFoundException, IOException, ClassNotFoundException {
    List<CentroidCluster<PointPerson>> clusters = null;
    Set<PointPerson> pointsC = getPCATransformation(points.values());
    clusters = new KMeansPlusPlusClusterer<PointPerson>(SIZE, 1000).cluster(pointsC);
    new ClustersWindow("Work times cluster PCA: " + getClustersDeviations(clusters) + " "
            + getWeightedClustersDeviations(clusters), clusters).setVisible(true);
    for (Cluster<PointPerson> cluster : clusters)
        for (PointPerson pointPersonT : cluster.getPoints()) {
            PointPerson pointPerson = points.get(pointPersonT.getId());
            for (int d = 0; d < pointPersonT.getDimension(); d++)
                pointPersonT.setElement(d, pointPerson.getElement(d));
        }/* w w  w  . ja  v a 2 s.  c  om*/
    ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(CLUSTERS_FILE));
    oos.writeObject(clusters);
    oos.close();
    return clusters;
}

From source file:playground.sergioo.workplaceCapacities2012.MainWorkplaceCapacities.java

private static void writeOptimizationParameters2(int numRegions) throws FileNotFoundException, IOException {
    List<double[][]> travelTimes = new ArrayList<double[][]>();
    List<double[]> maximumAreaCapacities = new ArrayList<double[]>();
    List<double[][]> stopScheduleCapacities = new ArrayList<double[][]>();
    Set<StopCoord> pointsC = new HashSet<StopCoord>();
    for (Entry<String, Coord> stop : stopsBase.entrySet())
        pointsC.add(new StopCoord(stop.getValue().getX(), stop.getValue().getY(),
                Id.create(stop.getKey(), TransitStopFacility.class)));
    List<CentroidCluster<StopCoord>> clusters = new KMeansPlusPlusClusterer<StopCoord>(numRegions, 1000)
            .cluster(pointsC);/*w  w  w .  j av a  2  s.  c  o  m*/
    for (int n = 0; n < numRegions; n++) {
        double[][] tts = new double[clusters.get(n).getPoints().size()][1];
        for (StopCoord stop : clusters.get(n).getPoints()) {
            for (MPAreaData mPArea : dataMPAreas.values()) {
                Double tt = mPArea.getTravelTime(stop.getId());
                int s = 0;
                int w = 0;
                if (tt != null)
                    tts[s][w] = tt;
            }
        }
        travelTimes.add(tts);
        maximumAreaCapacities.add(new double[1]);
        stopScheduleCapacities.add(new double[clusters.get(n).getPoints().size()][SIZE]);
    }
    ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(INPUT_FILE));
    oos.writeObject(travelTimes);
    oos.writeObject(maximumAreaCapacities);
    oos.writeObject(stopScheduleCapacities);
    oos.close();
}

From source file:VQVAD.VQVADTrainer.java

/**
 * Create a trainer with default values. Should work fine for most cases.
 *///from w  w  w . j  a  v a 2  s.c  o m
public VQVADTrainer() {
    trainingFrameBuffer = new CircularFifoBuffer(DEFAULT_FRAME_BUFFER_SIZE);
    clusterer = new KMeansPlusPlusClusterer<DoublePoint>(vqSize, DEFAULT_KMEANS_MAX_ITER);
}