Example usage for org.apache.commons.math3.ml.clustering KMeansPlusPlusClusterer KMeansPlusPlusClusterer

Introduction

In this page you can find the example usage for org.apache.commons.math3.ml.clustering KMeansPlusPlusClusterer KMeansPlusPlusClusterer.

Prototype

public KMeansPlusPlusClusterer(final int k, final int maxIterations, final DistanceMeasure measure)

Source Link

Document

Build a clusterer.

Usage

From source file:bigdataproject.KMeansKFinder.java

public int find(double epsilon) {
    double oldAvDist = 0.0;
    for (int k = 2; k < numSamples; k++) {
        KMeansPlusPlusClusterer kmeans = new KMeansPlusPlusClusterer(k, 1000, new EuclideanDistance());
        List<Cluster<DoublePoint>> clusterList = kmeans.cluster(list);
        double[] avDistances = new double[k];
        int index = 0;
        for (Cluster<DoublePoint> c : clusterList) {
            List cluster = c.getPoints();
            int size = cluster.size();
            double[] centroid = getCentroid(cluster);
            double distanceSum = 0.0;
            for (Object p : cluster) {
                DoublePoint point = (DoublePoint) p;
                double[] pointDouble = point.getPoint();
                EuclideanDistance dist = new EuclideanDistance();
                distanceSum += dist.compute(centroid, pointDouble);
            }// w  w  w.j  av a 2  s  . c o m
            avDistances[index] = distanceSum / size;
            index++;
        }
        double avDistSum = 0.0;
        for (int i = 0; i < avDistances.length; i++) {
            avDistSum += avDistances[i];
        }
        double newAvDist = avDistSum / avDistances.length;
        double difference = Math.abs(newAvDist - oldAvDist);
        if (difference >= epsilon) {
            oldAvDist = newAvDist;
        } else
            return k - 1;
    }
    return 0;
}

From source file:de.bund.bfr.knime.openkrise.util.cluster.DBSCANNodeModel.java

/**
 * {@inheritDoc}//from   www  . j  a v a 2  s .  c  om
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec)
        throws Exception {
    BufferedDataTable table = inData[0];
    DataTableSpec spec = table.getSpec();

    TracingUtils.assertColumnNotMissing(spec, TracingColumns.ID, null);
    TracingUtils.assertColumnNotMissing(spec, GeocodingNodeModel.LATITUDE_COLUMN, null);
    TracingUtils.assertColumnNotMissing(spec, GeocodingNodeModel.LONGITUDE_COLUMN, null);

    NodePropertySchema nodeSchema = new NodePropertySchema(TracingUtils.getTableColumns(table.getSpec()),
            TracingColumns.ID);
    Collection<GraphNode> nodes = TracingUtils.readGraphNodes(table, nodeSchema).values();
    Set<String> filteredOut = new LinkedHashSet<>();

    if (set.getFilter() != null) {
        set.getFilter().getValues(nodes).forEach((node, value) -> {
            if (value == 0.0) {
                filteredOut.add(node.getId());
            }
        });
    }

    List<ClusterableRow> clusterableRows = new ArrayList<>();

    for (DataRow row : table) {
        String id = IO.getToCleanString(row.getCell(spec.findColumnIndex(TracingColumns.ID)));
        Double lat = IO.getDouble(row.getCell(spec.findColumnIndex(GeocodingNodeModel.LATITUDE_COLUMN)));
        Double lon = IO.getDouble(row.getCell(spec.findColumnIndex(GeocodingNodeModel.LONGITUDE_COLUMN)));

        if (id == null || lat == null || lon == null || filteredOut.contains(id)) {
            continue;
        }

        clusterableRows.add(new ClusterableRow(row.getKey(), Math.toRadians(lat), Math.toRadians(lon)));
    }

    List<? extends Cluster<ClusterableRow>> clusters;

    if (set.getModel().equals(DBSCANNSettings.MODEL_DBSCAN)) {
        clusters = new DBSCANClusterer<ClusterableRow>(set.getMaxDistance(), set.getMinPoints(),
                new HaversineDistance()).cluster(clusterableRows);
    } else if (set.getModel().equals(DBSCANNSettings.MODEL_K_MEANS)) {
        clusters = new MultiKMeansPlusPlusClusterer<ClusterableRow>(
                new KMeansPlusPlusClusterer<>(set.getNumClusters(), -1, new HaversineDistance()), 5)
                        .cluster(clusterableRows);
    } else {
        throw new InvalidSettingsException(set.getModel());
    }

    Map<RowKey, Integer> clusterIds = new LinkedHashMap<>();

    for (int i = 0; i < clusters.size(); i++) {
        for (ClusterableRow r : clusters.get(i).getPoints()) {
            clusterIds.put(r.getKey(), i);
        }
    }

    DataTableSpec outSpec = createSpec(spec);
    BufferedDataContainer container = exec.createDataContainer(outSpec);

    for (DataRow row : table) {
        DataCell[] cells = new DataCell[outSpec.getNumColumns()];

        for (String column : spec.getColumnNames()) {
            cells[outSpec.findColumnIndex(column)] = row.getCell(spec.findColumnIndex(column));
        }

        cells[outSpec.findColumnIndex(TracingColumns.CLUSTER_ID)] = IO.createCell(clusterIds.get(row.getKey()));
        container.addRowToTable(new DefaultRow(row.getKey(), cells));
        exec.checkCanceled();
    }

    container.close();

    return new BufferedDataTable[] { container.getTable() };
}

From source file:bigdataproject.MainJFrame.java

private void jButton1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButton1ActionPerformed
    this.jLabel8.setText("");
    ReadDataSet read = new ReadDataSet();
    read.readFromFile();/*from   w ww  .  j av a  2 s  . c  om*/
    read.filter();
    matrix = read.getMatrix();
    PCA pca = new PCA(matrix);
    double[][] matrix2DPCA = pca.reduceDimensions();
    BlockRealMatrix pcaMatrix = new BlockRealMatrix(matrix2DPCA);
    BlockRealMatrix pcaMatrixTranspose = pcaMatrix.transpose();
    List<DoublePoint> list = read.getCollection(read.getHashMap(pcaMatrixTranspose.getData()));
    List<Cluster<DoublePoint>> clusterList;
    if (kMeans) {
        int k;
        if (this.jCheckBox1.isSelected()) {
            KMeansKFinder kFinder = new KMeansKFinder(list);
            k = kFinder.find(0.15);
        } else
            k = (int) this.jSpinner1.getValue();
        KMeansPlusPlusClusterer kmeans = new KMeansPlusPlusClusterer(k, 1000, new EuclideanDistance());
        clusterList = kmeans.cluster(list);
    } else {
        int minPts;
        double eps;
        if (this.jCheckBox2.isSelected()) {
            minPts = 6;
            //KDistances dist = new KDistances(pcaMatrixTranspose.getData());
            //dist.calculateDistances();
            //dist.getKSortedNearestNeighbors(minPts);
            //dist.printKdistances();
            eps = 1.0;
        } else {
            minPts = (int) this.jSpinner2.getValue();
            try {
                eps = Double.parseDouble(this.jTextField1.getText());
            } catch (NumberFormatException e) {
                this.jLabel8.setText("Wrong eps Value");
                return;
            }
        }
        DBSCANClusterer dbscan = new DBSCANClusterer(eps, minPts);
        clusterList = dbscan.cluster(list);
    }
    final ScatterPlot demo = new ScatterPlot("Big Data Clustering Project", matrix2DPCA, clusterList);
    demo.pack();
    RefineryUtilities.centerFrameOnScreen(demo);
    demo.setVisible(true);
}