Example usage for org.apache.commons.math3.ml.clustering Clusterable getPoint

List of usage examples for org.apache.commons.math3.ml.clustering Clusterable getPoint

Introduction

In this page you can find the example usage for org.apache.commons.math3.ml.clustering Clusterable getPoint.

Prototype

double[] getPoint();

Source Link

Document

Gets the n-dimensional point.

Usage

From source file:indexer.DocClusterer.java

public String getClusterVecs() throws Exception {
    StringBuffer buff = new StringBuffer();
    List<CentroidCluster<WordVec>> clusters = clusterWords(dvec.getWordMap(), numClusters);
    if (clusters == null)
        return "";
    int i = 0;/* ww w .  j  a  v  a2 s. co m*/

    for (CentroidCluster<WordVec> c : clusters) {
        //List<WordVec> thisClusterPoints = c.getPoints();
        //WordVec clusterCenter = WordVecs.getCentroid(thisClusterPoints);
        Clusterable clusterCenter = c.getCenter();
        WordVec clusterWordVec = new WordVec("Cluster_" + i, clusterCenter.getPoint());
        //clusterCenter.setWord("Cluster_" + numClusters);
        buff.append(clusterWordVec.toString()).append(":");
        i++;
    }

    return buff.toString();
}

From source file:org.apache.solr.client.solrj.io.eval.GetCentroidsEvaluator.java

@Override
public Object doWork(Object value) throws IOException {
    if (!(value instanceof KmeansEvaluator.ClusterTuple)) {
        throw new IOException(String.format(Locale.ROOT,
                "Invalid expression %s - found type %s for value, expecting a clustering result",
                toExpression(constructingFactory), value.getClass().getSimpleName()));
    } else {//from ww  w .jav  a 2s .  com
        KmeansEvaluator.ClusterTuple clusterTuple = (KmeansEvaluator.ClusterTuple) value;
        List<CentroidCluster<KmeansEvaluator.ClusterPoint>> clusters = clusterTuple.getClusters();
        double[][] data = new double[clusters.size()][];
        for (int i = 0; i < clusters.size(); i++) {
            CentroidCluster<KmeansEvaluator.ClusterPoint> centroidCluster = clusters.get(i);
            Clusterable clusterable = centroidCluster.getCenter();
            data[i] = clusterable.getPoint();
        }
        Matrix centroids = new Matrix(data);
        centroids.setColumnLabels(clusterTuple.getColumnLabels());
        return centroids;
    }
}

From source file:org.rhwlab.BHC.BalancedKMeansClusterer.java

@Override
public List<Cluster<Clusterable>> cluster(Collection<Clusterable> points)
        throws MathIllegalArgumentException, ConvergenceException {
    s = new int[k];
    for (int i = 0; i < s.length; ++i) {
        s[i] = 0;// w  ww.  j a va2s.co  m
    }
    data = points.toArray(new Clusterable[0]);
    int n = data.length;
    dist = new double[n][];
    for (int i = 0; i < dist.length; ++i) {
        dist[i] = new double[n];
    }
    HungarianAlgorithm hungar = new HungarianAlgorithm(dist);

    slots = new int[n];
    // assign the slots to centroids
    int c = 0;
    for (int i = 0; i < slots.length; ++i) {
        slots[i] = c;
        ++s[c];
        ++c;
        if (c == k) {
            c = 0;
        }
    }

    // randomly assign the initial centroids from the data
    Random rnd = new Random();
    centroids = new double[k][];
    HashSet<Clusterable> set = new HashSet<>(); // set used to insure data point is used only once
    for (int i = 0; i < k; ++i) {
        Clusterable cl = data[rnd.nextInt(n)];
        while (set.contains(cl)) {
            cl = data[rnd.nextInt(n)];
        }
        set.add(cl);
        centroids[i] = cl.getPoint();
    }
    assigns = hungar.execute();

    // iterate to a solution
    boolean finished = false;
    while (!finished) {
        // calculate the distances
        for (int i = 0; i < n; ++i) {
            for (int j = 0; j < n; ++j) {
                dist[i][j] = this.getDistanceMeasure().compute(data[i].getPoint(), centroids[slots[j]]);
            }
        }

        // assign data points to clusters
        int[] nextAssigns = hungar.execute();
        // count the changes 
        int changes = 0;
        for (int i = 0; i > assigns.length; ++i) {
            if (assigns[i] != nextAssigns[i]) {
                ++changes;
            }
        }
        // calculate new centroids
        for (int i = 0; i < k; ++i) {
            double[] centroid = centroids[i];
            for (int j = 0; j < centroid.length; ++j) {
                centroid[j] = 0.0;
            }
        }
        for (int i = 0; i < n; ++i) {
            double[] p = data[i].getPoint();

            for (int j = 0; j < p.length; ++j) {
                centroids[slots[i]][j] = centroids[slots[i]][j] + p[j];
            }
        }
        for (int i = 0; i < k; ++i) {
            double[] centroid = centroids[i];
            for (int j = 0; j < centroid.length; ++j) {
                centroid[j] = centroid[j] / s[j];
            }
        }
        finished = changes == 0;

    }
    return null;
}

From source file:org.rhwlab.BHC.FreqSensKMeansClusterer.java

@Override
public List<? extends Cluster<Clusterable>> cluster(Collection<Clusterable> points)
        throws MathIllegalArgumentException, ConvergenceException {
    ArrayList<Cluster<Clusterable>> ret = new ArrayList<>();

    data = points.toArray(new Clusterable[0]);
    int N = data.length;
    D = data[0].getPoint().length;/*from   ww w  . j a v a 2 s  .  c o  m*/
    inCluster = new int[N];
    for (int i = 0; i < N; ++i) {
        inCluster[i] = -1;
    }
    n = new double[K];
    logN = new double[K];
    for (int i = 0; i < K; ++i) {
        n[i] = (double) N / (double) K;
        logN[i] = Math.log(n[i]);
    }

    double f = (double) D * (double) N / (double) K;

    // Randomly assign initial means
    Random rnd = new Random();
    centroids = new double[K][];
    HashSet<Clusterable> set = new HashSet<>(); // set used to insure data point is used only once
    for (int i = 0; i < K; ++i) {

        Clusterable cl = data[rnd.nextInt(N)];
        while (set.contains(cl)) {
            cl = data[rnd.nextInt(N)];
        }
        set.add(cl);
        centroids[i] = cl.getPoint();
    }
    int changes;
    do {
        changes = 0;
        // make new assignments of each data point
        for (int i = 0; i < N; ++i) {
            double[] point = data[i].getPoint();
            double hMax = 0.0;
            int index = -1;
            for (int j = 0; j < K; ++j) {
                double h = 0.0;
                for (int d = 0; d < D; ++d) {
                    h = h + centroids[j][d] * point[d];
                }
                h = (h + 1.0 + n[j] * logN[j] / f) / n[j];
                if (h > hMax) {
                    hMax = h;
                    index = j;
                }
            }
            if (inCluster[i] != index) {
                ++changes;
            }
        }

        // update the number of points in each cluster
        int[] sums = new int[K];
        for (int i = 0; i < N; ++i) {
            ++sums[inCluster[i]];
        }
        for (int j = 0; j < K; ++j) {
            n[j] = sums[j];
            logN[j] = Math.log(n[j]);
        }

        // update the centroids  
        centroids = new double[K][];
        for (int k = 0; k < K; ++k) {
            centroids[k] = new double[D];
        }
        for (int i = 0; i < N; ++i) {
            double[] p = data[i].getPoint();
            int c = inCluster[i];
            for (int d = 0; d < D; ++d) {
                centroids[c][d] = centroids[c][d] + p[d];
            }
        }
        for (int k = 0; k < K; ++k) {
            for (int d = 0; d < D; ++d) {
                centroids[k][d] = centroids[k][d] / n[k];
            }
        }

    } while (changes > 0);

    return ret;
}

From source file:wvec.DocVec.java

public DocVec(WeightedTerm[] queryTerms, int numClusters) throws Exception {
    wvecMap = new HashMap<>();

    for (WeightedTerm term : queryTerms) {
        WordVec qwv = WordVecs.getVec(term.getTerm());
        if (qwv != null) {
            qwv.normalize();/*  w w w  .  ja  v a2  s  . c  o  m*/
            wvecMap.put(qwv.getWord(), qwv);
        }
    }

    List<WordVec> wordList = new ArrayList<>(wvecMap.size());
    for (Map.Entry<String, WordVec> e : wvecMap.entrySet()) {
        wordList.add(e.getValue());
    }

    if (wordList.size() == 0)
        return;

    // Cluster the query word vecs
    clusterer = new KMeansPlusPlusClusterer<>(Math.min(numClusters, wordList.size()));
    List<CentroidCluster<WordVec>> clusters = clusterer.cluster(wordList);

    wvecMap.clear();
    int i = 0;
    for (CentroidCluster<WordVec> c : clusters) {
        Clusterable clusterCenter = c.getCenter();
        WordVec clusterWordVec = new WordVec("Cluster_" + i, clusterCenter.getPoint());
        wvecMap.put(clusterWordVec.getWord(), clusterWordVec);
        i++;
    }
}