List of usage examples for org.apache.commons.math3.ml.clustering KMeansPlusPlusClusterer KMeansPlusPlusClusterer
public KMeansPlusPlusClusterer(final int k, final int maxIterations)
From source file:edu.cmu.sv.modelinference.eventtool.classification.Clusterer1D.java
private List<? extends Cluster<DataWrapper>> computeClusters(Collection<DataWrapper> dataCol, int k) { List<? extends Cluster<DataWrapper>> clusterResults = null; try {// w ww. ja va 2 s . c o m Clusterer<DataWrapper> clusterer = new MultiKMeansPlusPlusClusterer<>( new KMeansPlusPlusClusterer<DataWrapper>(k, maxIterations), trials); clusterResults = clusterer.cluster(dataCol); } catch (NumberIsTooSmallException e) { logger.warn("Too few datapoints for clusters: " + e.getMessage()); } return clusterResults; }
From source file:net.semanticmetadata.lire.imageanalysis.bovw.LocalFeatureHistogramBuilderKmeansPlusPlus.java
/** * Uses an existing index, where each and every document should have a set of local features. A number of * random images (numDocsForVocabulary) is selected and clustered to get a vocabulary of visual words * (the cluster means). For all images a histogram on the visual words is created and added to the documents. * Pre-existing histograms are deleted, so this method can be used for re-indexing. * * @throws java.io.IOException/*ww w . ja v a 2 s .c o m*/ */ public void index() throws IOException { df.setMaximumFractionDigits(3); // find the documents for building the vocabulary: HashSet<Integer> docIDs = selectVocabularyDocs(); System.out.println("Using " + docIDs.size() + " documents to build the vocabulary."); KMeansPlusPlusClusterer kpp = new KMeansPlusPlusClusterer(numClusters, 15); // fill the KMeans object: LinkedList<DoublePoint> features = new LinkedList<DoublePoint>(); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); for (Iterator<Integer> iterator = docIDs.iterator(); iterator.hasNext();) { int nextDoc = iterator.next(); if (reader.hasDeletions() && !liveDocs.get(nextDoc)) continue; // if it is deleted, just ignore it. Document d = reader.document(nextDoc); // features.clear(); IndexableField[] fields = d.getFields(localFeatureFieldName); String file = d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; for (int j = 0; j < fields.length; j++) { LireFeature f = getFeatureInstance(); f.setByteArrayRepresentation(fields[j].binaryValue().bytes, fields[j].binaryValue().offset, fields[j].binaryValue().length); // copy the data over to new array ... double[] feat = new double[f.getDoubleHistogram().length]; System.arraycopy(f.getDoubleHistogram(), 0, feat, 0, feat.length); features.add(new DoublePoint(f.getDoubleHistogram())); } } if (features.size() < numClusters) { // this cannot work. You need more data points than clusters. throw new UnsupportedOperationException("Only " + features.size() + " features found to cluster in " + numClusters + ". Try to use less clusters or more images."); } // do the clustering: System.out.println("Number of local features: " + df.format(features.size())); System.out.println("Starting clustering ..."); List<CentroidCluster<DoublePoint>> clusterList = kpp.cluster(features); // TODO: Serializing clusters to a file on the disk ... System.out.println("Clustering finished, " + clusterList.size() + " clusters found"); clusters = new LinkedList<double[]>(); for (Iterator<CentroidCluster<DoublePoint>> iterator = clusterList.iterator(); iterator.hasNext();) { CentroidCluster<DoublePoint> centroidCluster = iterator.next(); clusters.add(centroidCluster.getCenter().getPoint()); } System.out.println("Creating histograms ..."); int[] tmpHist = new int[numClusters]; IndexWriter iw = LuceneUtils.createIndexWriter(((DirectoryReader) reader).directory(), true, LuceneUtils.AnalyzerType.WhitespaceAnalyzer, 256d); // careful: copy reader to RAM for faster access when reading ... // reader = IndexReader.open(new RAMDirectory(reader.directory()), true); LireFeature f = getFeatureInstance(); for (int i = 0; i < reader.maxDoc(); i++) { try { if (reader.hasDeletions() && !liveDocs.get(i)) continue; for (int j = 0; j < tmpHist.length; j++) { tmpHist[j] = 0; } Document d = reader.document(i); IndexableField[] fields = d.getFields(localFeatureFieldName); // remove the fields if they are already there ... d.removeField(visualWordsFieldName); d.removeField(localFeatureHistFieldName); // find the appropriate cluster for each feature: for (int j = 0; j < fields.length; j++) { f.setByteArrayRepresentation(fields[j].binaryValue().bytes, fields[j].binaryValue().offset, fields[j].binaryValue().length); tmpHist[clusterForFeature(f, clusters)]++; } // System.out.println(Arrays.toString(tmpHist)); d.add(new StoredField(localFeatureHistFieldName, SerializationUtils.toByteArray(normalize(tmpHist)))); quantize(tmpHist); d.add(new TextField(visualWordsFieldName, arrayToVisualWordString(tmpHist), Field.Store.YES)); // remove local features to save some space if requested: if (DELETE_LOCAL_FEATURES) { d.removeFields(localFeatureFieldName); } // now write the new one. we use the identifier to update ;) iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), d); } catch (IOException e) { e.printStackTrace(); } } iw.commit(); // this one does the "old" commit(), it removes the deleted local features. iw.forceMerge(1); iw.close(); System.out.println("Finished."); }
From source file:edu.nyu.vida.data_polygamy.ctdata.TopologicalIndex.java
public double getThreshold(Feature[] f) { KMeansPlusPlusClusterer<DoublePoint> kmeans = new KMeansPlusPlusClusterer<DoublePoint>(2, 1000); ArrayList<DoublePoint> pts = new ArrayList<DoublePoint>(); if (f.length < 2) { return f[0].wt * 0.4; }//from www .j a v a2 s .co m for (int i = 0; i < f.length; i++) { DoublePoint dpt = new DoublePoint(new double[] { f[i].wt }); pts.add(dpt); } List<CentroidCluster<DoublePoint>> clusters = kmeans.cluster(pts); double maxp = 0; double minp = 0; int ct = 0; for (CentroidCluster<DoublePoint> c : clusters) { double mp = 0; double mnp = Double.MAX_VALUE; for (DoublePoint dpt : c.getPoints()) { double[] pt = dpt.getPoint(); mp = Math.max(mp, pt[0]); mnp = Math.min(mnp, pt[0]); } if (mp > maxp) { maxp = mp; minp = mnp; } ct++; } if (ct > 2) { Utilities.er("Can there be > 2 clusters?"); } return minp; }
From source file:ec.coevolve.MultiPopCoevolutionaryEvaluatorExtra.java
protected Individual[] behaviourElite(EvolutionState state, int subpop) { // Generate the dataset ArrayList<IndividualClusterable> points = new ArrayList<IndividualClusterable>(); if (novelChampionsOrigin == NovelChampionsOrigin.halloffame) { for (int i = 0; i < hallOfFame[subpop].size(); i++) { points.add(new IndividualClusterable(hallOfFame[subpop].get(i), i)); }// ww w . j a v a2 s . c o m } else if (novelChampionsOrigin == NovelChampionsOrigin.archive) { for (ArchiveEntry ae : archives[subpop]) { points.add(new IndividualClusterable(ae.getIndividual(), ae.getGeneration())); } } // Cap -- only use the individuals with the highest fitness scores if (novelChampionsCap > 0) { // calculate the percentile DescriptiveStatistics ds = new DescriptiveStatistics(); for (IndividualClusterable ic : points) { ds.addValue(ic.getFitness()); } double percentile = ds.getPercentile(novelChampionsCap); // remove those below the percentile Iterator<IndividualClusterable> iter = points.iterator(); while (iter.hasNext()) { IndividualClusterable next = iter.next(); if (next.getFitness() < percentile) { iter.remove(); } } } // Check if there are enough points for clustering if (points.size() <= novelChampions) { Individual[] elite = new Individual[points.size()]; for (int i = 0; i < elite.length; i++) { elite[i] = points.get(i).getIndividual(); } return elite; } // Do the k-means clustering KMeansPlusPlusClusterer<IndividualClusterable> clusterer = new KMeansPlusPlusClusterer<IndividualClusterable>( novelChampions, 100); List<CentroidCluster<IndividualClusterable>> clusters = clusterer.cluster(points); // Return one from each cluster Individual[] elite = new Individual[novelChampions]; for (int i = 0; i < clusters.size(); i++) { CentroidCluster<IndividualClusterable> cluster = clusters.get(i); List<IndividualClusterable> clusterPoints = cluster.getPoints(); if (novelChampionsMode == NovelChampionsMode.random) { int randIndex = state.random[0].nextInt(clusterPoints.size()); elite[i] = clusterPoints.get(randIndex).getIndividual(); } else if (novelChampionsMode == NovelChampionsMode.last) { IndividualClusterable oldest = null; for (IndividualClusterable ic : clusterPoints) { if (oldest == null || ic.age > oldest.age) { oldest = ic; } } elite[i] = oldest.getIndividual(); } else if (novelChampionsMode == NovelChampionsMode.centroid) { DistanceMeasure dm = clusterer.getDistanceMeasure(); double[] centroid = cluster.getCenter().getPoint(); IndividualClusterable closest = null; double closestDist = Double.MAX_VALUE; for (IndividualClusterable ic : clusterPoints) { double dist = dm.compute(centroid, ic.getPoint()); if (dist < closestDist) { closestDist = dist; closest = ic; } } elite[i] = closest.getIndividual(); } else if (novelChampionsMode == NovelChampionsMode.best) { IndividualClusterable best = null; float highestFit = Float.NEGATIVE_INFINITY; for (IndividualClusterable ic : clusterPoints) { if (ic.getFitness() > highestFit) { best = ic; highestFit = ic.getFitness(); } } elite[i] = best.getIndividual(); } } return elite; }
From source file:org.apache.solr.client.solrj.io.eval.KmeansEvaluator.java
@Override public Object doWork(Object value1, Object value2) throws IOException { Matrix matrix = null;//from w ww . java 2 s . c o m int k = 0; if (value1 instanceof Matrix) { matrix = (Matrix) value1; } else { throw new IOException("The first parameter for kmeans should be the observation matrix."); } if (value2 instanceof Number) { k = ((Number) value2).intValue(); } else { throw new IOException("The second parameter for kmeans should be k."); } KMeansPlusPlusClusterer<ClusterPoint> kmeans = new KMeansPlusPlusClusterer(k, maxIterations); List<ClusterPoint> points = new ArrayList(); double[][] data = matrix.getData(); List<String> ids = matrix.getRowLabels(); for (int i = 0; i < data.length; i++) { double[] vec = data[i]; points.add(new ClusterPoint(ids.get(i), vec)); } Map fields = new HashMap(); fields.put("k", k); fields.put("distance", "euclidean"); fields.put("maxIterations", maxIterations); return new ClusterTuple(fields, kmeans.cluster(points), matrix.getColumnLabels()); }
From source file:org.apache.solr.client.solrj.io.eval.MultiKmeansEvaluator.java
@Override public Object doWork(Object... values) throws IOException { if (values.length != 3) { throw new IOException( "The multiKmeans function expects three parameters; a matrix to cluster, k and number of trials."); }//from w w w.ja v a 2 s . c om Object value1 = values[0]; Object value2 = values[1]; Object value3 = values[2]; Matrix matrix = null; int k = 0; int trials = 0; if (value1 instanceof Matrix) { matrix = (Matrix) value1; } else { throw new IOException("The first parameter for multiKmeans should be the observation matrix."); } if (value2 instanceof Number) { k = ((Number) value2).intValue(); } else { throw new IOException("The second parameter for multiKmeans should be k."); } if (value3 instanceof Number) { trials = ((Number) value3).intValue(); } else { throw new IOException("The third parameter for multiKmeans should be trials."); } KMeansPlusPlusClusterer<KmeansEvaluator.ClusterPoint> kmeans = new KMeansPlusPlusClusterer(k, maxIterations); MultiKMeansPlusPlusClusterer multiKmeans = new MultiKMeansPlusPlusClusterer(kmeans, trials); List<KmeansEvaluator.ClusterPoint> points = new ArrayList(); double[][] data = matrix.getData(); List<String> ids = matrix.getRowLabels(); for (int i = 0; i < data.length; i++) { double[] vec = data[i]; points.add(new KmeansEvaluator.ClusterPoint(ids.get(i), vec)); } Map fields = new HashMap(); fields.put("k", k); fields.put("trials", trials); fields.put("distance", "euclidean"); fields.put("maxIterations", maxIterations); return new KmeansEvaluator.ClusterTuple(fields, multiKmeans.cluster(points), matrix.getColumnLabels()); }
From source file:playground.sergioo.facilitiesGenerator2012.WorkFacilitiesGeneration.java
private static List<CentroidCluster<PointPerson>> clusterWorkActivities(Map<String, PointPerson> points) { Set<PointPerson> pointsC = getPCATransformation(points.values()); Random r = new Random(); List<CentroidCluster<PointPerson>> clusters = new KMeansPlusPlusClusterer<PointPerson>(SIZE, 100) .cluster(pointsC);/*from ww w. j av a2 s. c om*/ //new ClustersWindow("Work times cluster PCA: "+getClustersDeviations(clusters)+" "+getWeightedClustersDeviations(clusters), clusters, pointsC.size()).setVisible(true); for (Cluster<PointPerson> cluster : clusters) for (PointPerson pointPersonT : cluster.getPoints()) { PointPerson pointPerson = points.get(pointPersonT.getId()); for (int d = 0; d < pointPersonT.getDimension(); d++) pointPersonT.setElement(d, pointPerson.getElement(d)); } //new ClustersWindow("Work times cluster PCA back: "+getClustersDeviations(clusters)+" "+getWeightedClustersDeviations(clusters), clusters, pointsC.size()).setVisible(true); /*List<Cluster<PointPerson>> clusters2 = new KMeansPlusPlusClusterer<PointPerson>(new Random()).cluster(points.values(), SIZE, 100); new ClustersWindow("Work times cluster: "+getClustersDeviations(clusters2)+" "+getWeightedClustersDeviations(clusters2), clusters2, points.size()).setVisible(true); for(Cluster<PointPerson> clusterE:clusters) { double startTime = clusterE.getCenter().getElement(0); double endTime = clusterE.getCenter().getElement(1); System.out.println(); System.out.println(" ("+startTime+","+endTime+")"); System.out.println(" ("+((int)startTime/(15*60))*(15*60)+","+((int)endTime/(15*60))*(15*60)+")"); System.out.println(" ("+(int)startTime/3600+":"+((int)startTime%3600)/60+","+(int)endTime/3600+":"+((int)endTime%3600)/60+")"); System.out.println(" ("+((int)startTime/(15*60))*(15*60)/3600+":"+(((int)startTime/(15*60))*(15*60)%3600)/60+","+((int)endTime/(15*60))*(15*60)/3600+":"+(((int)endTime/(15*60))*(15*60)%3600)/60+")"); System.out.println(" "+clusterE.getPoints().size()); }*/ return clusters; }
From source file:playground.sergioo.workplaceCapacities2012.MainWorkplaceCapacities.java
private static List<CentroidCluster<PointPerson>> clusterWorkActivities(Map<String, PointPerson> points) throws FileNotFoundException, IOException, ClassNotFoundException { List<CentroidCluster<PointPerson>> clusters = null; Set<PointPerson> pointsC = getPCATransformation(points.values()); clusters = new KMeansPlusPlusClusterer<PointPerson>(SIZE, 1000).cluster(pointsC); new ClustersWindow("Work times cluster PCA: " + getClustersDeviations(clusters) + " " + getWeightedClustersDeviations(clusters), clusters).setVisible(true); for (Cluster<PointPerson> cluster : clusters) for (PointPerson pointPersonT : cluster.getPoints()) { PointPerson pointPerson = points.get(pointPersonT.getId()); for (int d = 0; d < pointPersonT.getDimension(); d++) pointPersonT.setElement(d, pointPerson.getElement(d)); }/* w w w . ja v a 2 s. c om*/ ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(CLUSTERS_FILE)); oos.writeObject(clusters); oos.close(); return clusters; }
From source file:playground.sergioo.workplaceCapacities2012.MainWorkplaceCapacities.java
private static void writeOptimizationParameters2(int numRegions) throws FileNotFoundException, IOException { List<double[][]> travelTimes = new ArrayList<double[][]>(); List<double[]> maximumAreaCapacities = new ArrayList<double[]>(); List<double[][]> stopScheduleCapacities = new ArrayList<double[][]>(); Set<StopCoord> pointsC = new HashSet<StopCoord>(); for (Entry<String, Coord> stop : stopsBase.entrySet()) pointsC.add(new StopCoord(stop.getValue().getX(), stop.getValue().getY(), Id.create(stop.getKey(), TransitStopFacility.class))); List<CentroidCluster<StopCoord>> clusters = new KMeansPlusPlusClusterer<StopCoord>(numRegions, 1000) .cluster(pointsC);/*w w w . j av a 2 s. c o m*/ for (int n = 0; n < numRegions; n++) { double[][] tts = new double[clusters.get(n).getPoints().size()][1]; for (StopCoord stop : clusters.get(n).getPoints()) { for (MPAreaData mPArea : dataMPAreas.values()) { Double tt = mPArea.getTravelTime(stop.getId()); int s = 0; int w = 0; if (tt != null) tts[s][w] = tt; } } travelTimes.add(tts); maximumAreaCapacities.add(new double[1]); stopScheduleCapacities.add(new double[clusters.get(n).getPoints().size()][SIZE]); } ObjectOutputStream oos = new ObjectOutputStream(new FileOutputStream(INPUT_FILE)); oos.writeObject(travelTimes); oos.writeObject(maximumAreaCapacities); oos.writeObject(stopScheduleCapacities); oos.close(); }
From source file:VQVAD.VQVADTrainer.java
/** * Create a trainer with default values. Should work fine for most cases. *///from w w w . j a v a 2 s.c o m public VQVADTrainer() { trainingFrameBuffer = new CircularFifoBuffer(DEFAULT_FRAME_BUFFER_SIZE); clusterer = new KMeansPlusPlusClusterer<DoublePoint>(vqSize, DEFAULT_KMEANS_MAX_ITER); }