Example usage for org.apache.commons.math3.ml.clustering KMeansPlusPlusClusterer KMeansPlusPlusClusterer

List of usage examples for org.apache.commons.math3.ml.clustering KMeansPlusPlusClusterer KMeansPlusPlusClusterer

Introduction

In this page you can find the example usage for org.apache.commons.math3.ml.clustering KMeansPlusPlusClusterer KMeansPlusPlusClusterer.

Prototype

public KMeansPlusPlusClusterer(final int k) 

Source Link

Document

Build a clusterer.

Usage

From source file:clustering.KMeans.java

public static void main(String[] args) throws UnknownHostException {
    if (args.length != 1) {
        System.out.println("Usage : KMeans <nrClusters>");
        System.exit(-1);//from  w w  w  .jav a2s  . c  om
    }

    int kClusters = Integer.parseInt(args[0]);

    ArrayList<Artist> artists = new ArrayList<Artist>();
    DBHelper dbHelper = DBHelper.getInstance();
    DBCursor result = dbHelper.findArtistsWithFBandTW();

    while (result.hasNext()) {
        DBObject currentArtist = result.next();
        artists.add(Artist.fromDBObject(currentArtist));
    }

    //System.out.println(artists.size());
    KMeansPlusPlusClusterer<Artist> clusterer = new KMeansPlusPlusClusterer<Artist>(kClusters);
    List<CentroidCluster<Artist>> clusters = clusterer.cluster(artists);
    //System.out.println(clusters.size());
    dbHelper.emptyClusterCenters();

    for (CentroidCluster<Artist> cluster : clusters) {
        double[] center = cluster.getCenter().getPoint();
        ObjectId centerId = dbHelper.insertClusterCenter(center[0], center[1], center[2]);

        List<Artist> artC = cluster.getPoints();
        for (Artist artist : artC) {
            dbHelper.updateMatrixRowCluster(artist.getDBObject(), centerId);
            //System.out.print("("+artist.fb_likes+","+artist.twitter_followers+","+artist.album_count+") ");
        }
    }
}

From source file:indexer.DocClusterer.java

public List<CentroidCluster<WordVec>> clusterWords(HashMap<String, WordVec> wvecMap, int numClusters)
        throws Exception {
    System.out.println("Clustering document: " + dvec.getDocId());
    List<WordVec> wordList = new ArrayList<>(wvecMap.size());
    for (Map.Entry<String, WordVec> e : wvecMap.entrySet()) {
        wordList.add(e.getValue());/*w  w  w .  j  a  v  a2 s .com*/
    }

    KMeansPlusPlusClusterer<WordVec> clusterer = new KMeansPlusPlusClusterer<>(
            Math.min(numClusters, wordList.size()));
    if (wordList.size() == 0)
        return null;
    List<CentroidCluster<WordVec>> clusters = clusterer.cluster(wordList);
    return clusters;
}

From source file:gedi.atac.Atac.java

public static void testInPeaks(GenomicRegionStorage<? extends AlignedReadsData> storage, String contrasts,
        String peakFile, String rmq, String compOut, String bicOut, String out, boolean randomizeContrasts)
        throws IOException {

    DiskGenomicNumericBuilder clusterRmq = new DiskGenomicNumericBuilder(rmq);
    LineIterator it = new LineOrientedFile(peakFile).lineIterator();
    LineOrientedFile o = new LineOrientedFile(out);
    o.startWriting();/*from   w  w  w .j  av  a 2s. c  o  m*/
    o.writef("%s\tComponents\tp.value\n", it.next());
    int offset = 4;

    ContrastMapping contr = new ContrastMapping();
    ExtendedIterator<String> coit = new LineOrientedFile(contrasts).lineIterator();
    if (randomizeContrasts) {
        String[] ca = coit.toArray(new String[0]);
        ArrayUtils.shuffleSlice(ca, 0, ca.length);
        coit = FunctorUtils.arrayIterator(ca);
    }
    coit.forEachRemaining(
            l -> contr.addMapping(contr.getNumOriginalConditions(), contr.getMappedIndexOrNext(l), l));

    LineOrientedFile co = new LineOrientedFile(compOut);
    co.startWriting();
    co.writef("Peak\tComponent");
    for (int i = 0; i < contr.getNumMergedConditions(); i++)
        co.writef("\t%s", contr.getMappedName(i));
    co.writeLine();

    LineOrientedFile bico = new LineOrientedFile(bicOut);
    bico.startWriting();
    bico.writef("Peak\tk\tBIC\n");

    Progress pr = new ConsoleProgress();
    pr.init();
    int peakCount = (int) new LineOrientedFile(peakFile).lineIterator().count() - 1;
    pr.setCount(peakCount);

    while (it.hasNext()) {
        String line = it.next();
        ImmutableReferenceGenomicRegion<Object> peak = ImmutableReferenceGenomicRegion
                .parse(StringUtils.splitField(line, '\t', 0));

        pr.setDescription(peak.toString());
        pr.incrementProgress();

        HashMap<FixedDoublePoint, Integer> pToPos = new HashMap<FixedDoublePoint, Integer>();
        FixedDoublePoint[] m = new FixedDoublePoint[peak.getRegion().getTotalLength()];
        for (int i = 0; i < m.length; i++) {
            m[i] = new FixedDoublePoint(new double[contr.getNumMergedConditions()]);
            pToPos.put(m[i], peak.getRegion().map(i));
        }

        Consumer<MutableReferenceGenomicRegion<? extends AlignedReadsData>> adder = new Consumer<MutableReferenceGenomicRegion<? extends AlignedReadsData>>() {

            @Override
            public void accept(MutableReferenceGenomicRegion<? extends AlignedReadsData> mrgr) {
                try {

                    int start = GenomicRegionPosition.Start.position(mrgr.getReference(), mrgr.getRegion(),
                            offset);
                    if (peak.getRegion().contains(start))
                        addDownsampled(contr, m[peak.getRegion().induce(start)].getPoint(),
                                mrgr.getData().getTotalCountsForConditions(ReadCountMode.All));

                    int stop = GenomicRegionPosition.Stop.position(mrgr.getReference(), mrgr.getRegion(),
                            -offset);
                    if (peak.getRegion().contains(stop))
                        addDownsampled(contr, m[peak.getRegion().induce(stop)].getPoint(),
                                mrgr.getData().getTotalCountsForConditions(ReadCountMode.All));
                } catch (Exception e) {
                    throw new RuntimeException(e);
                }
            }

            private void addDownsampled(ContrastMapping contr, double[] re, double[] c) {
                double max = ArrayUtils.max(c);
                if (max > 0)
                    ArrayUtils.mult(c, 1 / max);
                for (int i = 0; i < c.length; i++)
                    if (contr.getMappedIndex(i) > -1)
                        re[contr.getMappedIndex(i)] += c[i];
            }
        };
        storage.iterateIntersectingMutableReferenceGenomicRegions(peak.getReference().toPlusStrand(),
                peak.getRegion()).forEachRemaining(adder);
        storage.iterateIntersectingMutableReferenceGenomicRegions(peak.getReference().toMinusStrand(),
                peak.getRegion()).forEachRemaining(adder);

        //         double[] total = new double[cond];
        //         for (int i=0; i<m.length; i++) 
        //            for (int j=0; j<cond; j++)
        //               total[j]+=m[i].getPoint()[j];
        //         ArrayUtils.normalize(total);
        //         
        //         double ll = 0;
        //         for (int i=0; i<m.length; i++)
        //            ll+=ddirichlet1(m[i].getPoint(), total);
        //         

        DoubleArrayList ll = new DoubleArrayList();
        ll.add(0);
        DoubleArrayList bic = new DoubleArrayList();
        bic.add(0);

        ArrayList<FixedDoublePoint> list = new ArrayList<FixedDoublePoint>();
        for (FixedDoublePoint p : m)
            if (ArrayUtils.sum(p.getPoint()) > 0)
                list.add(p);

        List<CentroidCluster<FixedDoublePoint>> ocl = null;
        double op = 0;

        for (int k = 1; k < Math.min(list.size(), 50); k++) {

            KMeansPlusPlusClusterer<FixedDoublePoint> kmeans = new KMeansPlusPlusClusterer<FixedDoublePoint>(k);
            List<CentroidCluster<FixedDoublePoint>> cl = kmeans.cluster(list);

            double cll = 0;
            for (CentroidCluster<FixedDoublePoint> c : cl) {
                double[] total = new double[contr.getNumMergedConditions()];
                Arrays.fill(total, 1);
                for (FixedDoublePoint p : c.getPoints())
                    for (int j = 0; j < contr.getNumMergedConditions(); j++)
                        total[j] += p.getPoint()[j];
                ArrayUtils.normalize(total);

                for (FixedDoublePoint p : c.getPoints())
                    cll += ddirichlet1(p.getPoint(), total);
            }

            // LLR test
            double LLR = 2 * cll - 2 * ll.getLastDouble();
            double p = 1
                    - new ChiSquaredDistribution(contr.getNumMergedConditions() - 1).cumulativeProbability(LLR);

            bic.add(-2 * cll + 2 * (contr.getNumMergedConditions() - 1) * k);
            bico.writef("%s\t%d\t%.1f\n", peak.toLocationString(), k, bic.getLastDouble());

            // bonferroni correction
            p = p * peakCount;

            if (p > 0.01) {
                if (ocl.size() > 1) {
                    for (int i = 0; i < ocl.size(); i++) {
                        co.writef("%s\t%d", peak.toLocationString(), i);
                        double[] total = new double[contr.getNumMergedConditions()];
                        Arrays.fill(total, 1);
                        for (FixedDoublePoint pp : ocl.get(i).getPoints()) {
                            clusterRmq.addValue(peak.getReference(), pToPos.get(pp).intValue(), (byte) i);
                            for (int j = 0; j < contr.getNumMergedConditions(); j++)
                                total[j] += pp.getPoint()[j];
                        }
                        ArrayUtils.normalize(total);
                        for (int c = 0; c < contr.getNumMergedConditions(); c++)
                            co.writef("\t%.4f", total[c]);
                        co.writeLine();

                    }
                }
                break;
            }

            ll.add(cll);
            ocl = cl;
            op = p;
        }

        o.writef("%s\t%d\t%.4g\n", line, ll.size() - 1, ll.size() == 2 ? Double.NaN : op);
    }

    pr.finish();
    o.finishWriting();
    co.finishWriting();

    clusterRmq.build();
}

From source file:org.rhwlab.dispim.datasource.SegmentedTiffDataSource.java

@Override
public ClusteredDataSource kMeansCluster(int nClusters, int nPartitions) throws Exception {
    double[] maxs = segmentation.getMaxs();
    double[] mins = segmentation.getMins();

    double[] dels = new double[dims.length];
    for (int d = 0; d < dims.length; ++d) {
        dels[d] = (maxs[d] - mins[d]) / nPartitions;
    }/* w  ww .j a  v a 2  s . c  o  m*/
    // partition the voxels into separate lists
    int mx = (int) Math.pow(nPartitions, getD());
    ArrayList<Voxel>[] lists = new ArrayList[mx];
    for (int i = 0; i < lists.length; ++i) {
        lists[i] = new ArrayList();
    }
    for (int i = 0; i < getSegmentN(); ++i) {
        Voxel vox = getSegmentVoxel(i);
        int index = region(vox.getPoint(), nPartitions, dels);
        lists[index].add(vox);
    }
    // build the clustering threads
    double f = (double) getSegmentN() / (double) nClusters;
    ArrayList<VoxelClusterer> clusterers = new ArrayList<>();
    for (int i = 0; i < lists.length; ++i) {
        ArrayList<Voxel> list = lists[i];
        if (!list.isEmpty()) {
            int nc = (int) ((double) list.size() / f);
            VoxelClusterer clusterer = new VoxelClusterer(list, new KMeansPlusPlusClusterer(nc));
            //                VoxelClusterer clusterer = new VoxelClusterer(list,new BalancedKMeansClusterer(nc));
            clusterers.add(clusterer);
        }
    }

    // do the clustering
    for (VoxelClusterer clusterer : clusterers) {
        clusterer.start();
    }

    // wait for them all to finish
    for (VoxelClusterer clusterer : clusterers) {
        clusterer.join();
    }

    ClusteredDataSource ret = new ClusteredDataSource(clusterers.toArray(new VoxelClusterer[0]),
            segmentation.getThreshold(), this.getD());
    ret.setPartition(mx);
    return ret;
}

From source file:org.rhwlab.variationalbayesian.OldFaithfulDataSource.java

public List<CentroidCluster<Clusterable>> cluster(int K) {
    KMeansPlusPlusClusterer clusterer = new KMeansPlusPlusClusterer(K);
    ArrayList<DoublePoint> points = new ArrayList<>();
    for (RealVector v : data) {
        DoublePoint point = new DoublePoint(v.toArray());
        points.add(point);/*ww w .  j a va 2 s.  c o m*/
    }
    return clusterer.cluster(points);
}

From source file:org.rhwlab.variationalbayesian.TifMaskSuperVoxelDataSource.java

public TifMaskSuperVoxelDataSource(String file) {
    ArrayList<DoublePoint> allVoxels = new ArrayList<>();
    final ImagePlus imp = new Opener().openImage(file);
    final Img image = ImagePlusAdapter.wrap(imp);
    Cursor cursor = image.localizingCursor();
    int[] pos = new int[3];
    while (cursor.hasNext()) {
        UnsignedByteType obj = (UnsignedByteType) cursor.next();
        int i = obj.getInteger();
        if (obj.getInteger() != 1) {
            cursor.localize(pos);/*from  ww w  .  j  a v a 2  s. co m*/
            DoublePoint point = new DoublePoint(pos);
            allVoxels.add(point);
        }
    }
    this.T = allVoxels.size();
    int K = allVoxels.size() / 1000;
    superVoxels = new SuperVoxel[K];
    KMeansPlusPlusClusterer clusterer = new KMeansPlusPlusClusterer(K);
    List<CentroidCluster<Clusterable>> clusters = clusterer.cluster(allVoxels);
    int k = 0;
    for (CentroidCluster<Clusterable> cluster : clusters) {
        List<Clusterable> points = cluster.getPoints();
        RealVector[] voxels = new RealVector[points.size()];
        for (int i = 0; i < points.size(); ++i) {
            voxels[i] = new ArrayRealVector(points.get(i).getPoint());
        }
        RealVector center = new ArrayRealVector(cluster.getCenter().getPoint());
        superVoxels[k] = new SuperVoxel(voxels, center);
        ++k;
    }
    int ausgdf = 0;
}

From source file:qupath.lib.gui.panels.classify.RandomTrainingRegionSelector.java

public static Map<Integer, List<PathObject>> objectClusterer(final Collection<PathObject> pathObjects,
        final BufferedImage imgThumbnail, final double thumbScaleX, final double thumbScaleY,
        final int nClusters) {

    Map<Integer, List<PathObject>> map = new HashMap<>();
    if (pathObjects.isEmpty())
        return map;

    if (nClusters <= 1 || pathObjects.size() == 1) {
        map.put(Integer.valueOf(0), new ArrayList<>(pathObjects));
        return map;
    }//w w  w .  ja v  a2s . c o  m

    //      int maxIterations = 100;

    KMeansPlusPlusClusterer<ClusterableObject> km = new KMeansPlusPlusClusterer<>(nClusters);
    List<ClusterableObject> clusterableObjects = new ArrayList<>();
    WritableRaster raster = imgThumbnail.getRaster();
    int nChannels = raster.getNumBands();
    double[] valueBuffer = new double[nChannels];
    int w = imgThumbnail.getWidth();
    int h = imgThumbnail.getHeight();
    boolean isRGB = imgThumbnail.getSampleModel().getNumBands() == 3
            && imgThumbnail.getSampleModel().getSampleSize(0) == 8;

    for (PathObject pathObject : pathObjects) {
        // Get pixel values for the ROI centroid
        // CIE LAB is used rather than RGB where possible, due to better suitability for Euclidean distances
        ROI roi = pathObject.getROI();
        if (roi == null)
            continue;
        int x = (int) (roi.getCentroidX() * thumbScaleX + 0.5);
        int y = (int) (roi.getCentroidY() * thumbScaleY + 0.5);
        if (x < 0 || x >= w || y < 0 || y >= h)
            continue;

        if (isRGB)
            valueBuffer = makeCIELAB(imgThumbnail.getRGB(x, y), valueBuffer);
        else {
            for (int c = 0; c < nChannels; c++)
                valueBuffer[c] = raster.getSampleDouble(x, y, c);
        }

        clusterableObjects.add(new ClusterableObject(pathObject, valueBuffer));
    }
    List<CentroidCluster<ClusterableObject>> results = km.cluster(clusterableObjects);

    int i = 0;
    for (CentroidCluster<ClusterableObject> centroidCluster : results) {
        Integer label = Integer.valueOf(i);
        List<PathObject> objects = new ArrayList<>();
        for (ClusterableObject co : centroidCluster.getPoints())
            objects.add(co.getPathObject());
        map.put(label, objects);
        i++;
    }

    return map;
}

From source file:wvec.DocVec.java

public DocVec(WeightedTerm[] queryTerms, int numClusters) throws Exception {
    wvecMap = new HashMap<>();

    for (WeightedTerm term : queryTerms) {
        WordVec qwv = WordVecs.getVec(term.getTerm());
        if (qwv != null) {
            qwv.normalize();/* w  w w. jav  a 2  s  .c o  m*/
            wvecMap.put(qwv.getWord(), qwv);
        }
    }

    List<WordVec> wordList = new ArrayList<>(wvecMap.size());
    for (Map.Entry<String, WordVec> e : wvecMap.entrySet()) {
        wordList.add(e.getValue());
    }

    if (wordList.size() == 0)
        return;

    // Cluster the query word vecs
    clusterer = new KMeansPlusPlusClusterer<>(Math.min(numClusters, wordList.size()));
    List<CentroidCluster<WordVec>> clusters = clusterer.cluster(wordList);

    wvecMap.clear();
    int i = 0;
    for (CentroidCluster<WordVec> c : clusters) {
        Clusterable clusterCenter = c.getCenter();
        WordVec clusterWordVec = new WordVec("Cluster_" + i, clusterCenter.getPoint());
        wvecMap.put(clusterWordVec.getWord(), clusterWordVec);
        i++;
    }
}

From source file:wvec.WordVecsIndexer.java

void clusterWordVecs(IndexWriter clusterIndexWriter, int numClusters) throws Exception {
    // Index where word vectors are stored
    IndexReader reader = DirectoryReader.open(FSDirectory.open((new File(indexPath)).toPath()));
    int numDocs = reader.numDocs();
    KMeansPlusPlusClusterer<WordVec> clusterer = new KMeansPlusPlusClusterer<>(numClusters);
    List<WordVec> wordList = new ArrayList<>(numDocs);

    // Read every wvec and load in memory
    for (int i = 0; i < numDocs; i++) {
        Document doc = reader.document(i);
        WordVec wvec = new WordVec(doc.get(FIELD_WORD_VEC));
        wordList.add(wvec);//www  .j a va  2  s. co m
    }

    // Call K-means clustering
    System.out.println("Clustering the entire vocabulary...");
    List<CentroidCluster<WordVec>> clusters = clusterer.cluster(wordList);

    // Save the cluster info
    System.out.println("Writing out cluster ids in Lucene index...");
    int clusterId = 0;
    for (CentroidCluster<WordVec> c : clusters) {
        List<WordVec> pointsInThisClusuter = c.getPoints();
        for (WordVec thisPoint : pointsInThisClusuter) {
            Document clusterInfo = constructDoc(thisPoint.word, String.valueOf(clusterId));
            clusterIndexWriter.addDocument(clusterInfo);
        }
        clusterId++;
    }

    reader.close();
}