List of usage examples for org.apache.commons.math3.ml.clustering KMeansPlusPlusClusterer KMeansPlusPlusClusterer
public KMeansPlusPlusClusterer(final int k)
From source file:clustering.KMeans.java
public static void main(String[] args) throws UnknownHostException { if (args.length != 1) { System.out.println("Usage : KMeans <nrClusters>"); System.exit(-1);//from w w w .jav a2s . c om } int kClusters = Integer.parseInt(args[0]); ArrayList<Artist> artists = new ArrayList<Artist>(); DBHelper dbHelper = DBHelper.getInstance(); DBCursor result = dbHelper.findArtistsWithFBandTW(); while (result.hasNext()) { DBObject currentArtist = result.next(); artists.add(Artist.fromDBObject(currentArtist)); } //System.out.println(artists.size()); KMeansPlusPlusClusterer<Artist> clusterer = new KMeansPlusPlusClusterer<Artist>(kClusters); List<CentroidCluster<Artist>> clusters = clusterer.cluster(artists); //System.out.println(clusters.size()); dbHelper.emptyClusterCenters(); for (CentroidCluster<Artist> cluster : clusters) { double[] center = cluster.getCenter().getPoint(); ObjectId centerId = dbHelper.insertClusterCenter(center[0], center[1], center[2]); List<Artist> artC = cluster.getPoints(); for (Artist artist : artC) { dbHelper.updateMatrixRowCluster(artist.getDBObject(), centerId); //System.out.print("("+artist.fb_likes+","+artist.twitter_followers+","+artist.album_count+") "); } } }
From source file:indexer.DocClusterer.java
public List<CentroidCluster<WordVec>> clusterWords(HashMap<String, WordVec> wvecMap, int numClusters) throws Exception { System.out.println("Clustering document: " + dvec.getDocId()); List<WordVec> wordList = new ArrayList<>(wvecMap.size()); for (Map.Entry<String, WordVec> e : wvecMap.entrySet()) { wordList.add(e.getValue());/*w w w . j a v a2 s .com*/ } KMeansPlusPlusClusterer<WordVec> clusterer = new KMeansPlusPlusClusterer<>( Math.min(numClusters, wordList.size())); if (wordList.size() == 0) return null; List<CentroidCluster<WordVec>> clusters = clusterer.cluster(wordList); return clusters; }
From source file:gedi.atac.Atac.java
public static void testInPeaks(GenomicRegionStorage<? extends AlignedReadsData> storage, String contrasts, String peakFile, String rmq, String compOut, String bicOut, String out, boolean randomizeContrasts) throws IOException { DiskGenomicNumericBuilder clusterRmq = new DiskGenomicNumericBuilder(rmq); LineIterator it = new LineOrientedFile(peakFile).lineIterator(); LineOrientedFile o = new LineOrientedFile(out); o.startWriting();/*from w w w .j av a 2s. c o m*/ o.writef("%s\tComponents\tp.value\n", it.next()); int offset = 4; ContrastMapping contr = new ContrastMapping(); ExtendedIterator<String> coit = new LineOrientedFile(contrasts).lineIterator(); if (randomizeContrasts) { String[] ca = coit.toArray(new String[0]); ArrayUtils.shuffleSlice(ca, 0, ca.length); coit = FunctorUtils.arrayIterator(ca); } coit.forEachRemaining( l -> contr.addMapping(contr.getNumOriginalConditions(), contr.getMappedIndexOrNext(l), l)); LineOrientedFile co = new LineOrientedFile(compOut); co.startWriting(); co.writef("Peak\tComponent"); for (int i = 0; i < contr.getNumMergedConditions(); i++) co.writef("\t%s", contr.getMappedName(i)); co.writeLine(); LineOrientedFile bico = new LineOrientedFile(bicOut); bico.startWriting(); bico.writef("Peak\tk\tBIC\n"); Progress pr = new ConsoleProgress(); pr.init(); int peakCount = (int) new LineOrientedFile(peakFile).lineIterator().count() - 1; pr.setCount(peakCount); while (it.hasNext()) { String line = it.next(); ImmutableReferenceGenomicRegion<Object> peak = ImmutableReferenceGenomicRegion .parse(StringUtils.splitField(line, '\t', 0)); pr.setDescription(peak.toString()); pr.incrementProgress(); HashMap<FixedDoublePoint, Integer> pToPos = new HashMap<FixedDoublePoint, Integer>(); FixedDoublePoint[] m = new FixedDoublePoint[peak.getRegion().getTotalLength()]; for (int i = 0; i < m.length; i++) { m[i] = new FixedDoublePoint(new double[contr.getNumMergedConditions()]); pToPos.put(m[i], peak.getRegion().map(i)); } Consumer<MutableReferenceGenomicRegion<? extends AlignedReadsData>> adder = new Consumer<MutableReferenceGenomicRegion<? extends AlignedReadsData>>() { @Override public void accept(MutableReferenceGenomicRegion<? extends AlignedReadsData> mrgr) { try { int start = GenomicRegionPosition.Start.position(mrgr.getReference(), mrgr.getRegion(), offset); if (peak.getRegion().contains(start)) addDownsampled(contr, m[peak.getRegion().induce(start)].getPoint(), mrgr.getData().getTotalCountsForConditions(ReadCountMode.All)); int stop = GenomicRegionPosition.Stop.position(mrgr.getReference(), mrgr.getRegion(), -offset); if (peak.getRegion().contains(stop)) addDownsampled(contr, m[peak.getRegion().induce(stop)].getPoint(), mrgr.getData().getTotalCountsForConditions(ReadCountMode.All)); } catch (Exception e) { throw new RuntimeException(e); } } private void addDownsampled(ContrastMapping contr, double[] re, double[] c) { double max = ArrayUtils.max(c); if (max > 0) ArrayUtils.mult(c, 1 / max); for (int i = 0; i < c.length; i++) if (contr.getMappedIndex(i) > -1) re[contr.getMappedIndex(i)] += c[i]; } }; storage.iterateIntersectingMutableReferenceGenomicRegions(peak.getReference().toPlusStrand(), peak.getRegion()).forEachRemaining(adder); storage.iterateIntersectingMutableReferenceGenomicRegions(peak.getReference().toMinusStrand(), peak.getRegion()).forEachRemaining(adder); // double[] total = new double[cond]; // for (int i=0; i<m.length; i++) // for (int j=0; j<cond; j++) // total[j]+=m[i].getPoint()[j]; // ArrayUtils.normalize(total); // // double ll = 0; // for (int i=0; i<m.length; i++) // ll+=ddirichlet1(m[i].getPoint(), total); // DoubleArrayList ll = new DoubleArrayList(); ll.add(0); DoubleArrayList bic = new DoubleArrayList(); bic.add(0); ArrayList<FixedDoublePoint> list = new ArrayList<FixedDoublePoint>(); for (FixedDoublePoint p : m) if (ArrayUtils.sum(p.getPoint()) > 0) list.add(p); List<CentroidCluster<FixedDoublePoint>> ocl = null; double op = 0; for (int k = 1; k < Math.min(list.size(), 50); k++) { KMeansPlusPlusClusterer<FixedDoublePoint> kmeans = new KMeansPlusPlusClusterer<FixedDoublePoint>(k); List<CentroidCluster<FixedDoublePoint>> cl = kmeans.cluster(list); double cll = 0; for (CentroidCluster<FixedDoublePoint> c : cl) { double[] total = new double[contr.getNumMergedConditions()]; Arrays.fill(total, 1); for (FixedDoublePoint p : c.getPoints()) for (int j = 0; j < contr.getNumMergedConditions(); j++) total[j] += p.getPoint()[j]; ArrayUtils.normalize(total); for (FixedDoublePoint p : c.getPoints()) cll += ddirichlet1(p.getPoint(), total); } // LLR test double LLR = 2 * cll - 2 * ll.getLastDouble(); double p = 1 - new ChiSquaredDistribution(contr.getNumMergedConditions() - 1).cumulativeProbability(LLR); bic.add(-2 * cll + 2 * (contr.getNumMergedConditions() - 1) * k); bico.writef("%s\t%d\t%.1f\n", peak.toLocationString(), k, bic.getLastDouble()); // bonferroni correction p = p * peakCount; if (p > 0.01) { if (ocl.size() > 1) { for (int i = 0; i < ocl.size(); i++) { co.writef("%s\t%d", peak.toLocationString(), i); double[] total = new double[contr.getNumMergedConditions()]; Arrays.fill(total, 1); for (FixedDoublePoint pp : ocl.get(i).getPoints()) { clusterRmq.addValue(peak.getReference(), pToPos.get(pp).intValue(), (byte) i); for (int j = 0; j < contr.getNumMergedConditions(); j++) total[j] += pp.getPoint()[j]; } ArrayUtils.normalize(total); for (int c = 0; c < contr.getNumMergedConditions(); c++) co.writef("\t%.4f", total[c]); co.writeLine(); } } break; } ll.add(cll); ocl = cl; op = p; } o.writef("%s\t%d\t%.4g\n", line, ll.size() - 1, ll.size() == 2 ? Double.NaN : op); } pr.finish(); o.finishWriting(); co.finishWriting(); clusterRmq.build(); }
From source file:org.rhwlab.dispim.datasource.SegmentedTiffDataSource.java
@Override public ClusteredDataSource kMeansCluster(int nClusters, int nPartitions) throws Exception { double[] maxs = segmentation.getMaxs(); double[] mins = segmentation.getMins(); double[] dels = new double[dims.length]; for (int d = 0; d < dims.length; ++d) { dels[d] = (maxs[d] - mins[d]) / nPartitions; }/* w ww .j a v a 2 s . c o m*/ // partition the voxels into separate lists int mx = (int) Math.pow(nPartitions, getD()); ArrayList<Voxel>[] lists = new ArrayList[mx]; for (int i = 0; i < lists.length; ++i) { lists[i] = new ArrayList(); } for (int i = 0; i < getSegmentN(); ++i) { Voxel vox = getSegmentVoxel(i); int index = region(vox.getPoint(), nPartitions, dels); lists[index].add(vox); } // build the clustering threads double f = (double) getSegmentN() / (double) nClusters; ArrayList<VoxelClusterer> clusterers = new ArrayList<>(); for (int i = 0; i < lists.length; ++i) { ArrayList<Voxel> list = lists[i]; if (!list.isEmpty()) { int nc = (int) ((double) list.size() / f); VoxelClusterer clusterer = new VoxelClusterer(list, new KMeansPlusPlusClusterer(nc)); // VoxelClusterer clusterer = new VoxelClusterer(list,new BalancedKMeansClusterer(nc)); clusterers.add(clusterer); } } // do the clustering for (VoxelClusterer clusterer : clusterers) { clusterer.start(); } // wait for them all to finish for (VoxelClusterer clusterer : clusterers) { clusterer.join(); } ClusteredDataSource ret = new ClusteredDataSource(clusterers.toArray(new VoxelClusterer[0]), segmentation.getThreshold(), this.getD()); ret.setPartition(mx); return ret; }
From source file:org.rhwlab.variationalbayesian.OldFaithfulDataSource.java
public List<CentroidCluster<Clusterable>> cluster(int K) { KMeansPlusPlusClusterer clusterer = new KMeansPlusPlusClusterer(K); ArrayList<DoublePoint> points = new ArrayList<>(); for (RealVector v : data) { DoublePoint point = new DoublePoint(v.toArray()); points.add(point);/*ww w . j a va 2 s. c o m*/ } return clusterer.cluster(points); }
From source file:org.rhwlab.variationalbayesian.TifMaskSuperVoxelDataSource.java
public TifMaskSuperVoxelDataSource(String file) { ArrayList<DoublePoint> allVoxels = new ArrayList<>(); final ImagePlus imp = new Opener().openImage(file); final Img image = ImagePlusAdapter.wrap(imp); Cursor cursor = image.localizingCursor(); int[] pos = new int[3]; while (cursor.hasNext()) { UnsignedByteType obj = (UnsignedByteType) cursor.next(); int i = obj.getInteger(); if (obj.getInteger() != 1) { cursor.localize(pos);/*from ww w . j a v a 2 s. co m*/ DoublePoint point = new DoublePoint(pos); allVoxels.add(point); } } this.T = allVoxels.size(); int K = allVoxels.size() / 1000; superVoxels = new SuperVoxel[K]; KMeansPlusPlusClusterer clusterer = new KMeansPlusPlusClusterer(K); List<CentroidCluster<Clusterable>> clusters = clusterer.cluster(allVoxels); int k = 0; for (CentroidCluster<Clusterable> cluster : clusters) { List<Clusterable> points = cluster.getPoints(); RealVector[] voxels = new RealVector[points.size()]; for (int i = 0; i < points.size(); ++i) { voxels[i] = new ArrayRealVector(points.get(i).getPoint()); } RealVector center = new ArrayRealVector(cluster.getCenter().getPoint()); superVoxels[k] = new SuperVoxel(voxels, center); ++k; } int ausgdf = 0; }
From source file:qupath.lib.gui.panels.classify.RandomTrainingRegionSelector.java
public static Map<Integer, List<PathObject>> objectClusterer(final Collection<PathObject> pathObjects, final BufferedImage imgThumbnail, final double thumbScaleX, final double thumbScaleY, final int nClusters) { Map<Integer, List<PathObject>> map = new HashMap<>(); if (pathObjects.isEmpty()) return map; if (nClusters <= 1 || pathObjects.size() == 1) { map.put(Integer.valueOf(0), new ArrayList<>(pathObjects)); return map; }//w w w . ja v a2s . c o m // int maxIterations = 100; KMeansPlusPlusClusterer<ClusterableObject> km = new KMeansPlusPlusClusterer<>(nClusters); List<ClusterableObject> clusterableObjects = new ArrayList<>(); WritableRaster raster = imgThumbnail.getRaster(); int nChannels = raster.getNumBands(); double[] valueBuffer = new double[nChannels]; int w = imgThumbnail.getWidth(); int h = imgThumbnail.getHeight(); boolean isRGB = imgThumbnail.getSampleModel().getNumBands() == 3 && imgThumbnail.getSampleModel().getSampleSize(0) == 8; for (PathObject pathObject : pathObjects) { // Get pixel values for the ROI centroid // CIE LAB is used rather than RGB where possible, due to better suitability for Euclidean distances ROI roi = pathObject.getROI(); if (roi == null) continue; int x = (int) (roi.getCentroidX() * thumbScaleX + 0.5); int y = (int) (roi.getCentroidY() * thumbScaleY + 0.5); if (x < 0 || x >= w || y < 0 || y >= h) continue; if (isRGB) valueBuffer = makeCIELAB(imgThumbnail.getRGB(x, y), valueBuffer); else { for (int c = 0; c < nChannels; c++) valueBuffer[c] = raster.getSampleDouble(x, y, c); } clusterableObjects.add(new ClusterableObject(pathObject, valueBuffer)); } List<CentroidCluster<ClusterableObject>> results = km.cluster(clusterableObjects); int i = 0; for (CentroidCluster<ClusterableObject> centroidCluster : results) { Integer label = Integer.valueOf(i); List<PathObject> objects = new ArrayList<>(); for (ClusterableObject co : centroidCluster.getPoints()) objects.add(co.getPathObject()); map.put(label, objects); i++; } return map; }
From source file:wvec.DocVec.java
public DocVec(WeightedTerm[] queryTerms, int numClusters) throws Exception { wvecMap = new HashMap<>(); for (WeightedTerm term : queryTerms) { WordVec qwv = WordVecs.getVec(term.getTerm()); if (qwv != null) { qwv.normalize();/* w w w. jav a 2 s .c o m*/ wvecMap.put(qwv.getWord(), qwv); } } List<WordVec> wordList = new ArrayList<>(wvecMap.size()); for (Map.Entry<String, WordVec> e : wvecMap.entrySet()) { wordList.add(e.getValue()); } if (wordList.size() == 0) return; // Cluster the query word vecs clusterer = new KMeansPlusPlusClusterer<>(Math.min(numClusters, wordList.size())); List<CentroidCluster<WordVec>> clusters = clusterer.cluster(wordList); wvecMap.clear(); int i = 0; for (CentroidCluster<WordVec> c : clusters) { Clusterable clusterCenter = c.getCenter(); WordVec clusterWordVec = new WordVec("Cluster_" + i, clusterCenter.getPoint()); wvecMap.put(clusterWordVec.getWord(), clusterWordVec); i++; } }
From source file:wvec.WordVecsIndexer.java
void clusterWordVecs(IndexWriter clusterIndexWriter, int numClusters) throws Exception { // Index where word vectors are stored IndexReader reader = DirectoryReader.open(FSDirectory.open((new File(indexPath)).toPath())); int numDocs = reader.numDocs(); KMeansPlusPlusClusterer<WordVec> clusterer = new KMeansPlusPlusClusterer<>(numClusters); List<WordVec> wordList = new ArrayList<>(numDocs); // Read every wvec and load in memory for (int i = 0; i < numDocs; i++) { Document doc = reader.document(i); WordVec wvec = new WordVec(doc.get(FIELD_WORD_VEC)); wordList.add(wvec);//www .j a va 2 s. co m } // Call K-means clustering System.out.println("Clustering the entire vocabulary..."); List<CentroidCluster<WordVec>> clusters = clusterer.cluster(wordList); // Save the cluster info System.out.println("Writing out cluster ids in Lucene index..."); int clusterId = 0; for (CentroidCluster<WordVec> c : clusters) { List<WordVec> pointsInThisClusuter = c.getPoints(); for (WordVec thisPoint : pointsInThisClusuter) { Document clusterInfo = constructDoc(thisPoint.word, String.valueOf(clusterId)); clusterIndexWriter.addDocument(clusterInfo); } clusterId++; } reader.close(); }