Example usage for org.apache.commons.math3.ml.clustering KMeansPlusPlusClusterer KMeansPlusPlusClusterer

Introduction

In this page you can find the example usage for org.apache.commons.math3.ml.clustering KMeansPlusPlusClusterer KMeansPlusPlusClusterer.

Prototype

public KMeansPlusPlusClusterer(final int k)

Source Link

Document

Build a clusterer.

Usage

From source file:clustering.KMeans.java

public static void main(String[] args) throws UnknownHostException {
    if (args.length != 1) {
        System.out.println("Usage : KMeans <nrClusters>");
        System.exit(-1);//from  w w  w  .jav a2s  . c  om
    }

    int kClusters = Integer.parseInt(args[0]);

    ArrayList<Artist> artists = new ArrayList<Artist>();
    DBHelper dbHelper = DBHelper.getInstance();
    DBCursor result = dbHelper.findArtistsWithFBandTW();

    while (result.hasNext()) {
        DBObject currentArtist = result.next();
        artists.add(Artist.fromDBObject(currentArtist));
    }

    //System.out.println(artists.size());
    KMeansPlusPlusClusterer<Artist> clusterer = new KMeansPlusPlusClusterer<Artist>(kClusters);
    List<CentroidCluster<Artist>> clusters = clusterer.cluster(artists);
    //System.out.println(clusters.size());
    dbHelper.emptyClusterCenters();

    for (CentroidCluster<Artist> cluster : clusters) {
        double[] center = cluster.getCenter().getPoint();
        ObjectId centerId = dbHelper.insertClusterCenter(center[0], center[1], center[2]);

        List<Artist> artC = cluster.getPoints();
        for (Artist artist : artC) {
            dbHelper.updateMatrixRowCluster(artist.getDBObject(), centerId);
            //System.out.print("("+artist.fb_likes+","+artist.twitter_followers+","+artist.album_count+") ");
        }
    }
}

From source file:indexer.DocClusterer.java

public List<CentroidCluster<WordVec>> clusterWords(HashMap<String, WordVec> wvecMap, int numClusters)
        throws Exception {
    System.out.println("Clustering document: " + dvec.getDocId());
    List<WordVec> wordList = new ArrayList<>(wvecMap.size());
    for (Map.Entry<String, WordVec> e : wvecMap.entrySet()) {
        wordList.add(e.getValue());/*w  w  w .  j  a  v  a2 s .com*/
    }

    KMeansPlusPlusClusterer<WordVec> clusterer = new KMeansPlusPlusClusterer<>(
            Math.min(numClusters, wordList.size()));
    if (wordList.size() == 0)
        return null;
    List<CentroidCluster<WordVec>> clusters = clusterer.cluster(wordList);
    return clusters;
}

From source file:gedi.atac.Atac.java

public static void testInPeaks(GenomicRegionStorage<? extends AlignedReadsData> storage, String contrasts,
        String peakFile, String rmq, String compOut, String bicOut, String out, boolean randomizeContrasts)
        throws IOException {

    DiskGenomicNumericBuilder clusterRmq = new DiskGenomicNumericBuilder(rmq);
    LineIterator it = new LineOrientedFile(peakFile).lineIterator();
    LineOrientedFile o = new LineOrientedFile(out);
    o.startWriting();/*from   w  w  w .j  av  a 2s. c  o  m*/
    o.writef("%s\tComponents\tp.value\n", it.next());
    int offset = 4;

    ContrastMapping contr = new ContrastMapping();
    ExtendedIterator<String> coit = new LineOrientedFile(contrasts).lineIterator();
    if (randomizeContrasts) {
        String[] ca = coit.toArray(new String[0]);
        ArrayUtils.shuffleSlice(ca, 0, ca.length);
        coit = FunctorUtils.arrayIterator(ca);
    }
    coit.forEachRemaining(
            l -> contr.addMapping(contr.getNumOriginalConditions(), contr.getMappedIndexOrNext(l), l));

    LineOrientedFile co = new LineOrientedFile(compOut);
    co.startWriting();
    co.writef("Peak\tComponent");
    for (int i = 0; i < contr.getNumMergedConditions(); i++)
        co.writef("\t%s", contr.getMappedName(i));
    co.writeLine();

    LineOrientedFile bico = new LineOrientedFile(bicOut);
    bico.startWriting();
    bico.writef("Peak\tk\tBIC\n");

    Progress pr = new ConsoleProgress();
    pr.init();
    int peakCount = (int) new LineOrientedFile(peakFile).lineIterator().count() - 1;
    pr.setCount(peakCount);

    while (it.hasNext()) {
        String line = it.next();
        ImmutableReferenceGenomicRegion<Object> peak = ImmutableReferenceGenomicRegion
                .parse(StringUtils.splitField(line, '\t', 0));

        pr.setDescription(peak.toString());
        pr.incrementProgress();

        HashMap<FixedDoublePoint, Integer> pToPos = new HashMap<FixedDoublePoint, Integer>();
        FixedDoublePoint[] m = new FixedDoublePoint[peak.getRegion().getTotalLength()];
        for (int i = 0; i < m.length; i++) {
            m[i] = new FixedDoublePoint(new double[contr.getNumMergedConditions()]);
            pToPos.put(m[i], peak.getRegion().map(i));
        }

        Consumer<MutableReferenceGenomicRegion<? extends AlignedReadsData>> adder = new Consumer<MutableReferenceGenomicRegion<? extends AlignedReadsData>>() {

            @Override
            public void accept(MutableReferenceGenomicRegion<? extends AlignedReadsData> mrgr) {
                try {

                    int start = GenomicRegionPosition.Start.position(mrgr.getReference(), mrgr.getRegion(),
                            offset);
                    if (peak.getRegion().contains(start))
                        addDownsampled(contr, m[peak.getRegion().induce(start)].getPoint(),
                                mrgr.getData().getTotalCountsForConditions(ReadCountMode.All));

                    int stop = GenomicRegionPosition.Stop.position(mrgr.getReference(), mrgr.getRegion(),
                            -offset);
                    if (peak.getRegion().contains(stop))
                        addDownsampled(contr, m[peak.getRegion().induce(stop)].getPoint(),
                                mrgr.getData().getTotalCountsForConditions(ReadCountMode.All));
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }

            private void addDownsampled(ContrastMapping contr, double[] re, double[] c) {
                double max = ArrayUtils.max(c);
                if (max > 0)
                    ArrayUtils.mult(c, 1 / max);
                for (int i = 0; i < c.length; i++)
                    if (contr.getMappedIndex(i) > -1)
                        re[contr.getMappedIndex(i)] += c[i];
            }
        };
        storage.iterateIntersectingMutableReferenceGenomicRegions(peak.getReference().toPlusStrand(),
                peak.getRegion()).forEachRemaining(adder);
        storage.iterateIntersectingMutableReferenceGenomicRegions(peak.getReference().toMinusStrand(),
                peak.getRegion()).forEachRemaining(adder);

        //         double[] total = new double[cond];
        //         for (int i=0; i<m.length; i++) 
        //            for (int j=0; j<cond; j++)
        //               total[j]+=m[i].getPoint()[j];
        //         ArrayUtils.normalize(total);
        //         
        //         double ll = 0;
        //         for (int i=0; i<m.length; i++)
        //            ll+=ddirichlet1(m[i].getPoint(), total);
        //         

        DoubleArrayList ll = new DoubleArrayList();
        ll.add(0);
        DoubleArrayList bic = new DoubleArrayList();
        bic.add(0);

        ArrayList<FixedDoublePoint> list = new ArrayList<FixedDoublePoint>();
        for (FixedDoublePoint p : m)
            if (ArrayUtils.sum(p.getPoint()) > 0)
                list.add(p);

        List<CentroidCluster<FixedDoublePoint>> ocl = null;
        double op = 0;

        for (int k = 1; k < Math.min(list.size(), 50); k++) {

            KMeansPlusPlusClusterer<FixedDoublePoint> kmeans = new KMeansPlusPlusClusterer<FixedDoublePoint>(k);
            List<CentroidCluster<FixedDoublePoint>> cl = kmeans.cluster(list);

            double cll = 0;
            for (CentroidCluster<FixedDoublePoint> c : cl) {
                double[] total = new double[contr.getNumMergedConditions()];
                Arrays.fill(total, 1);
                for (FixedDoublePoint p : c.getPoints())
                    for (int j = 0; j < contr.getNumMergedConditions(); j++)
                        total[j] += p.getPoint()[j];
                ArrayUtils.normalize(total);

                for (FixedDoublePoint p : c.getPoints())
                    cll += ddirichlet1(p.getPoint(), total);
            }

            // LLR test
            double LLR = 2 * cll - 2 * ll.getLastDouble();
            double p = 1
                    - new ChiSquaredDistribution(contr.getNumMergedConditions() - 1).cumulativeProbability(LLR);

            bic.add(-2 * cll + 2 * (contr.getNumMergedConditions() - 1) * k);
            bico.writef("%s\t%d\t%.1f\n", peak.toLocationString(), k, bic.getLastDouble());

            // bonferroni correction
            p = p * peakCount;

            if (p > 0.01) {
                if (ocl.size() > 1) {
                    for (int i = 0; i < ocl.size(); i++) {
                        co.writef("%s\t%d", peak.toLocationString(), i);
                        double[] total = new double[contr.getNumMergedConditions()];
                        Arrays.fill(total, 1);
                        for (FixedDoublePoint pp : ocl.get(i).getPoints()) {
                            clusterRmq.addValue(peak.getReference(), pToPos.get(pp).intValue(), (byte) i);
                            for (int j = 0; j < contr.getNumMergedConditions(); j++)
                                total[j] += pp.getPoint()[j];
                        }
                        ArrayUtils.normalize(total);
                        for (int c = 0; c < contr.getNumMergedConditions(); c++)
                            co.writef("\t%.4f", total[c]);
                        co.writeLine();

                    }
                }
                break;
            }

            ll.add(cll);
            ocl = cl;
            op = p;
        }

        o.writef("%s\t%d\t%.4g\n", line, ll.size() - 1, ll.size() == 2 ? Double.NaN : op);
    }

    pr.finish();
    o.finishWriting();
    co.finishWriting();

    clusterRmq.build();
}

From source file:org.rhwlab.dispim.datasource.SegmentedTiffDataSource.java

@Override
public ClusteredDataSource kMeansCluster(int nClusters, int nPartitions) throws Exception {
    double[] maxs = segmentation.getMaxs();
    double[] mins = segmentation.getMins();

    double[] dels = new double[dims.length];
    for (int d = 0; d < dims.length; ++d) {
        dels[d] = (maxs[d] - mins[d]) / nPartitions;
    }/* w  ww .j a  v a 2  s . c  o  m*/
    // partition the voxels into separate lists
    int mx = (int) Math.pow(nPartitions, getD());
    ArrayList<Voxel>[] lists = new ArrayList[mx];
    for (int i = 0; i < lists.length; ++i) {
        lists[i] = new ArrayList();
    }
    for (int i = 0; i < getSegmentN(); ++i) {
        Voxel vox = getSegmentVoxel(i);
        int index = region(vox.getPoint(), nPartitions, dels);
        lists[index].add(vox);
    }
    // build the clustering threads
    double f = (double) getSegmentN() / (double) nClusters;
    ArrayList<VoxelClusterer> clusterers = new ArrayList<>();
    for (int i = 0; i < lists.length; ++i) {
        ArrayList<Voxel> list = lists[i];
        if (!list.isEmpty()) {
            int nc = (int) ((double) list.size() / f);
            VoxelClusterer clusterer = new VoxelClusterer(list, new KMeansPlusPlusClusterer(nc));
            //                VoxelClusterer clusterer = new VoxelClusterer(list,new BalancedKMeansClusterer(nc));
            clusterers.add(clusterer);
        }
    }

    // do the clustering
    for (VoxelClusterer clusterer : clusterers) {
        clusterer.start();
    }

    // wait for them all to finish
    for (VoxelClusterer clusterer : clusterers) {
        clusterer.join();
    }

    ClusteredDataSource ret = new ClusteredDataSource(clusterers.toArray(new VoxelClusterer[0]),
            segmentation.getThreshold(), this.getD());
    ret.setPartition(mx);
    return ret;
}

From source file:org.rhwlab.variationalbayesian.OldFaithfulDataSource.java

public List<CentroidCluster<Clusterable>> cluster(int K) {
    KMeansPlusPlusClusterer clusterer = new KMeansPlusPlusClusterer(K);
    ArrayList<DoublePoint> points = new ArrayList<>();
    for (RealVector v : data) {
        DoublePoint point = new DoublePoint(v.toArray());
        points.add(point);/*ww w .  j a va 2 s.  c o m*/
    }
    return clusterer.cluster(points);
}

From source file:org.rhwlab.variationalbayesian.TifMaskSuperVoxelDataSource.java

public TifMaskSuperVoxelDataSource(String file) {
    ArrayList<DoublePoint> allVoxels = new ArrayList<>();
    final ImagePlus imp = new Opener().openImage(file);
    final Img image = ImagePlusAdapter.wrap(imp);
    Cursor cursor = image.localizingCursor();
    int[] pos = new int[3];
    while (cursor.hasNext()) {
        UnsignedByteType obj = (UnsignedByteType) cursor.next();
        int i = obj.getInteger();
        if (obj.getInteger() != 1) {
            cursor.localize(pos);/*from  ww w  .  j  a v a 2  s. co m*/
            DoublePoint point = new DoublePoint(pos);
            allVoxels.add(point);
        }
    }
    this.T = allVoxels.size();
    int K = allVoxels.size() / 1000;
    superVoxels = new SuperVoxel[K];
    KMeansPlusPlusClusterer clusterer = new KMeansPlusPlusClusterer(K);
    List<CentroidCluster<Clusterable>> clusters = clusterer.cluster(allVoxels);
    int k = 0;
    for (CentroidCluster<Clusterable> cluster : clusters) {
        List<Clusterable> points = cluster.getPoints();
        RealVector[] voxels = new RealVector[points.size()];
        for (int i = 0; i < points.size(); ++i) {
            voxels[i] = new ArrayRealVector(points.get(i).getPoint());
        }
        RealVector center = new ArrayRealVector(cluster.getCenter().getPoint());
        superVoxels[k] = new SuperVoxel(voxels, center);
        ++k;
    }
    int ausgdf = 0;
}

From source file:qupath.lib.gui.panels.classify.RandomTrainingRegionSelector.java

public static Map<Integer, List<PathObject>> objectClusterer(final Collection<PathObject> pathObjects,
        final BufferedImage imgThumbnail, final double thumbScaleX, final double thumbScaleY,
        final int nClusters) {

    Map<Integer, List<PathObject>> map = new HashMap<>();
    if (pathObjects.isEmpty())
        return map;

    if (nClusters <= 1 || pathObjects.size() == 1) {
        map.put(Integer.valueOf(0), new ArrayList<>(pathObjects));
        return map;
    }//w w  w .  ja v  a2s . c o  m

    //      int maxIterations = 100;

    KMeansPlusPlusClusterer<ClusterableObject> km = new KMeansPlusPlusClusterer<>(nClusters);
    List<ClusterableObject> clusterableObjects = new ArrayList<>();
    WritableRaster raster = imgThumbnail.getRaster();
    int nChannels = raster.getNumBands();
    double[] valueBuffer = new double[nChannels];
    int w = imgThumbnail.getWidth();
    int h = imgThumbnail.getHeight();
    boolean isRGB = imgThumbnail.getSampleModel().getNumBands() == 3
            && imgThumbnail.getSampleModel().getSampleSize(0) == 8;

    for (PathObject pathObject : pathObjects) {
        // Get pixel values for the ROI centroid
        // CIE LAB is used rather than RGB where possible, due to better suitability for Euclidean distances
        ROI roi = pathObject.getROI();
        if (roi == null)
            continue;
        int x = (int) (roi.getCentroidX() * thumbScaleX + 0.5);
        int y = (int) (roi.getCentroidY() * thumbScaleY + 0.5);
        if (x < 0 || x >= w || y < 0 || y >= h)
            continue;

        if (isRGB)
            valueBuffer = makeCIELAB(imgThumbnail.getRGB(x, y), valueBuffer);
        else {
            for (int c = 0; c < nChannels; c++)
                valueBuffer[c] = raster.getSampleDouble(x, y, c);
        }

        clusterableObjects.add(new ClusterableObject(pathObject, valueBuffer));
    }
    List<CentroidCluster<ClusterableObject>> results = km.cluster(clusterableObjects);

    int i = 0;
    for (CentroidCluster<ClusterableObject> centroidCluster : results) {
        Integer label = Integer.valueOf(i);
        List<PathObject> objects = new ArrayList<>();
        for (ClusterableObject co : centroidCluster.getPoints())
            objects.add(co.getPathObject());
        map.put(label, objects);
        i++;
    }

    return map;
}

From source file:wvec.DocVec.java

public DocVec(WeightedTerm[] queryTerms, int numClusters) throws Exception {
    wvecMap = new HashMap<>();

    for (WeightedTerm term : queryTerms) {
        WordVec qwv = WordVecs.getVec(term.getTerm());
        if (qwv != null) {
            qwv.normalize();/* w  w w. jav  a 2  s  .c o  m*/
            wvecMap.put(qwv.getWord(), qwv);
        }
    }

    List<WordVec> wordList = new ArrayList<>(wvecMap.size());
    for (Map.Entry<String, WordVec> e : wvecMap.entrySet()) {
        wordList.add(e.getValue());
    }

    if (wordList.size() == 0)
        return;

    // Cluster the query word vecs
    clusterer = new KMeansPlusPlusClusterer<>(Math.min(numClusters, wordList.size()));
    List<CentroidCluster<WordVec>> clusters = clusterer.cluster(wordList);

    wvecMap.clear();
    int i = 0;
    for (CentroidCluster<WordVec> c : clusters) {
        Clusterable clusterCenter = c.getCenter();
        WordVec clusterWordVec = new WordVec("Cluster_" + i, clusterCenter.getPoint());
        wvecMap.put(clusterWordVec.getWord(), clusterWordVec);
        i++;
    }
}

From source file:wvec.WordVecsIndexer.java

void clusterWordVecs(IndexWriter clusterIndexWriter, int numClusters) throws Exception {
    // Index where word vectors are stored
    IndexReader reader = DirectoryReader.open(FSDirectory.open((new File(indexPath)).toPath()));
    int numDocs = reader.numDocs();
    KMeansPlusPlusClusterer<WordVec> clusterer = new KMeansPlusPlusClusterer<>(numClusters);
    List<WordVec> wordList = new ArrayList<>(numDocs);

    // Read every wvec and load in memory
    for (int i = 0; i < numDocs; i++) {
        Document doc = reader.document(i);
        WordVec wvec = new WordVec(doc.get(FIELD_WORD_VEC));
        wordList.add(wvec);//www  .j a va  2  s. co m
    }

    // Call K-means clustering
    System.out.println("Clustering the entire vocabulary...");
    List<CentroidCluster<WordVec>> clusters = clusterer.cluster(wordList);

    // Save the cluster info
    System.out.println("Writing out cluster ids in Lucene index...");
    int clusterId = 0;
    for (CentroidCluster<WordVec> c : clusters) {
        List<WordVec> pointsInThisClusuter = c.getPoints();
        for (WordVec thisPoint : pointsInThisClusuter) {
            Document clusterInfo = constructDoc(thisPoint.word, String.valueOf(clusterId));
            clusterIndexWriter.addDocument(clusterInfo);
        }
        clusterId++;
    }

    reader.close();
}