List of usage examples for org.apache.commons.math3.ml.clustering Clusterable getPoint
double[] getPoint();
From source file:indexer.DocClusterer.java
public String getClusterVecs() throws Exception { StringBuffer buff = new StringBuffer(); List<CentroidCluster<WordVec>> clusters = clusterWords(dvec.getWordMap(), numClusters); if (clusters == null) return ""; int i = 0;/* ww w . j a v a2 s. co m*/ for (CentroidCluster<WordVec> c : clusters) { //List<WordVec> thisClusterPoints = c.getPoints(); //WordVec clusterCenter = WordVecs.getCentroid(thisClusterPoints); Clusterable clusterCenter = c.getCenter(); WordVec clusterWordVec = new WordVec("Cluster_" + i, clusterCenter.getPoint()); //clusterCenter.setWord("Cluster_" + numClusters); buff.append(clusterWordVec.toString()).append(":"); i++; } return buff.toString(); }
From source file:org.apache.solr.client.solrj.io.eval.GetCentroidsEvaluator.java
@Override public Object doWork(Object value) throws IOException { if (!(value instanceof KmeansEvaluator.ClusterTuple)) { throw new IOException(String.format(Locale.ROOT, "Invalid expression %s - found type %s for value, expecting a clustering result", toExpression(constructingFactory), value.getClass().getSimpleName())); } else {//from ww w .jav a 2s . com KmeansEvaluator.ClusterTuple clusterTuple = (KmeansEvaluator.ClusterTuple) value; List<CentroidCluster<KmeansEvaluator.ClusterPoint>> clusters = clusterTuple.getClusters(); double[][] data = new double[clusters.size()][]; for (int i = 0; i < clusters.size(); i++) { CentroidCluster<KmeansEvaluator.ClusterPoint> centroidCluster = clusters.get(i); Clusterable clusterable = centroidCluster.getCenter(); data[i] = clusterable.getPoint(); } Matrix centroids = new Matrix(data); centroids.setColumnLabels(clusterTuple.getColumnLabels()); return centroids; } }
From source file:org.rhwlab.BHC.BalancedKMeansClusterer.java
@Override public List<Cluster<Clusterable>> cluster(Collection<Clusterable> points) throws MathIllegalArgumentException, ConvergenceException { s = new int[k]; for (int i = 0; i < s.length; ++i) { s[i] = 0;// w ww. j a va2s.co m } data = points.toArray(new Clusterable[0]); int n = data.length; dist = new double[n][]; for (int i = 0; i < dist.length; ++i) { dist[i] = new double[n]; } HungarianAlgorithm hungar = new HungarianAlgorithm(dist); slots = new int[n]; // assign the slots to centroids int c = 0; for (int i = 0; i < slots.length; ++i) { slots[i] = c; ++s[c]; ++c; if (c == k) { c = 0; } } // randomly assign the initial centroids from the data Random rnd = new Random(); centroids = new double[k][]; HashSet<Clusterable> set = new HashSet<>(); // set used to insure data point is used only once for (int i = 0; i < k; ++i) { Clusterable cl = data[rnd.nextInt(n)]; while (set.contains(cl)) { cl = data[rnd.nextInt(n)]; } set.add(cl); centroids[i] = cl.getPoint(); } assigns = hungar.execute(); // iterate to a solution boolean finished = false; while (!finished) { // calculate the distances for (int i = 0; i < n; ++i) { for (int j = 0; j < n; ++j) { dist[i][j] = this.getDistanceMeasure().compute(data[i].getPoint(), centroids[slots[j]]); } } // assign data points to clusters int[] nextAssigns = hungar.execute(); // count the changes int changes = 0; for (int i = 0; i > assigns.length; ++i) { if (assigns[i] != nextAssigns[i]) { ++changes; } } // calculate new centroids for (int i = 0; i < k; ++i) { double[] centroid = centroids[i]; for (int j = 0; j < centroid.length; ++j) { centroid[j] = 0.0; } } for (int i = 0; i < n; ++i) { double[] p = data[i].getPoint(); for (int j = 0; j < p.length; ++j) { centroids[slots[i]][j] = centroids[slots[i]][j] + p[j]; } } for (int i = 0; i < k; ++i) { double[] centroid = centroids[i]; for (int j = 0; j < centroid.length; ++j) { centroid[j] = centroid[j] / s[j]; } } finished = changes == 0; } return null; }
From source file:org.rhwlab.BHC.FreqSensKMeansClusterer.java
@Override public List<? extends Cluster<Clusterable>> cluster(Collection<Clusterable> points) throws MathIllegalArgumentException, ConvergenceException { ArrayList<Cluster<Clusterable>> ret = new ArrayList<>(); data = points.toArray(new Clusterable[0]); int N = data.length; D = data[0].getPoint().length;/*from ww w . j a v a 2 s . c o m*/ inCluster = new int[N]; for (int i = 0; i < N; ++i) { inCluster[i] = -1; } n = new double[K]; logN = new double[K]; for (int i = 0; i < K; ++i) { n[i] = (double) N / (double) K; logN[i] = Math.log(n[i]); } double f = (double) D * (double) N / (double) K; // Randomly assign initial means Random rnd = new Random(); centroids = new double[K][]; HashSet<Clusterable> set = new HashSet<>(); // set used to insure data point is used only once for (int i = 0; i < K; ++i) { Clusterable cl = data[rnd.nextInt(N)]; while (set.contains(cl)) { cl = data[rnd.nextInt(N)]; } set.add(cl); centroids[i] = cl.getPoint(); } int changes; do { changes = 0; // make new assignments of each data point for (int i = 0; i < N; ++i) { double[] point = data[i].getPoint(); double hMax = 0.0; int index = -1; for (int j = 0; j < K; ++j) { double h = 0.0; for (int d = 0; d < D; ++d) { h = h + centroids[j][d] * point[d]; } h = (h + 1.0 + n[j] * logN[j] / f) / n[j]; if (h > hMax) { hMax = h; index = j; } } if (inCluster[i] != index) { ++changes; } } // update the number of points in each cluster int[] sums = new int[K]; for (int i = 0; i < N; ++i) { ++sums[inCluster[i]]; } for (int j = 0; j < K; ++j) { n[j] = sums[j]; logN[j] = Math.log(n[j]); } // update the centroids centroids = new double[K][]; for (int k = 0; k < K; ++k) { centroids[k] = new double[D]; } for (int i = 0; i < N; ++i) { double[] p = data[i].getPoint(); int c = inCluster[i]; for (int d = 0; d < D; ++d) { centroids[c][d] = centroids[c][d] + p[d]; } } for (int k = 0; k < K; ++k) { for (int d = 0; d < D; ++d) { centroids[k][d] = centroids[k][d] / n[k]; } } } while (changes > 0); return ret; }
From source file:wvec.DocVec.java
public DocVec(WeightedTerm[] queryTerms, int numClusters) throws Exception { wvecMap = new HashMap<>(); for (WeightedTerm term : queryTerms) { WordVec qwv = WordVecs.getVec(term.getTerm()); if (qwv != null) { qwv.normalize();/* w w w . ja v a2 s . c o m*/ wvecMap.put(qwv.getWord(), qwv); } } List<WordVec> wordList = new ArrayList<>(wvecMap.size()); for (Map.Entry<String, WordVec> e : wvecMap.entrySet()) { wordList.add(e.getValue()); } if (wordList.size() == 0) return; // Cluster the query word vecs clusterer = new KMeansPlusPlusClusterer<>(Math.min(numClusters, wordList.size())); List<CentroidCluster<WordVec>> clusters = clusterer.cluster(wordList); wvecMap.clear(); int i = 0; for (CentroidCluster<WordVec> c : clusters) { Clusterable clusterCenter = c.getCenter(); WordVec clusterWordVec = new WordVec("Cluster_" + i, clusterCenter.getPoint()); wvecMap.put(clusterWordVec.getWord(), clusterWordVec); i++; } }