Example usage for org.apache.commons.math3.ml.clustering MultiKMeansPlusPlusClusterer MultiKMeansPlusPlusClusterer

Introduction

In this page you can find the example usage for org.apache.commons.math3.ml.clustering MultiKMeansPlusPlusClusterer MultiKMeansPlusPlusClusterer.

Prototype

public MultiKMeansPlusPlusClusterer(final KMeansPlusPlusClusterer<T> clusterer, final int numTrials)

Source Link

Document

Build a clusterer.

Usage

From source file:edu.cmu.sv.modelinference.eventtool.classification.Clusterer1D.java

private List<? extends Cluster<DataWrapper>> computeClusters(Collection<DataWrapper> dataCol, int k) {
    List<? extends Cluster<DataWrapper>> clusterResults = null;
    try {//from ww w.j a va  2 s. c om
        Clusterer<DataWrapper> clusterer = new MultiKMeansPlusPlusClusterer<>(
                new KMeansPlusPlusClusterer<DataWrapper>(k, maxIterations), trials);
        clusterResults = clusterer.cluster(dataCol);
    } catch (NumberIsTooSmallException e) {
        logger.warn("Too few datapoints for clusters: " + e.getMessage());
    }
    return clusterResults;
}

From source file:de.bund.bfr.knime.openkrise.util.cluster.DBSCANNodeModel.java

/**
 * {@inheritDoc}// w  w w. ja  v a 2  s .c  o  m
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec)
        throws Exception {
    BufferedDataTable table = inData[0];
    DataTableSpec spec = table.getSpec();

    TracingUtils.assertColumnNotMissing(spec, TracingColumns.ID, null);
    TracingUtils.assertColumnNotMissing(spec, GeocodingNodeModel.LATITUDE_COLUMN, null);
    TracingUtils.assertColumnNotMissing(spec, GeocodingNodeModel.LONGITUDE_COLUMN, null);

    NodePropertySchema nodeSchema = new NodePropertySchema(TracingUtils.getTableColumns(table.getSpec()),
            TracingColumns.ID);
    Collection<GraphNode> nodes = TracingUtils.readGraphNodes(table, nodeSchema).values();
    Set<String> filteredOut = new LinkedHashSet<>();

    if (set.getFilter() != null) {
        set.getFilter().getValues(nodes).forEach((node, value) -> {
            if (value == 0.0) {
                filteredOut.add(node.getId());
            }
        });
    }

    List<ClusterableRow> clusterableRows = new ArrayList<>();

    for (DataRow row : table) {
        String id = IO.getToCleanString(row.getCell(spec.findColumnIndex(TracingColumns.ID)));
        Double lat = IO.getDouble(row.getCell(spec.findColumnIndex(GeocodingNodeModel.LATITUDE_COLUMN)));
        Double lon = IO.getDouble(row.getCell(spec.findColumnIndex(GeocodingNodeModel.LONGITUDE_COLUMN)));

        if (id == null || lat == null || lon == null || filteredOut.contains(id)) {
            continue;
        }

        clusterableRows.add(new ClusterableRow(row.getKey(), Math.toRadians(lat), Math.toRadians(lon)));
    }

    List<? extends Cluster<ClusterableRow>> clusters;

    if (set.getModel().equals(DBSCANNSettings.MODEL_DBSCAN)) {
        clusters = new DBSCANClusterer<ClusterableRow>(set.getMaxDistance(), set.getMinPoints(),
                new HaversineDistance()).cluster(clusterableRows);
    } else if (set.getModel().equals(DBSCANNSettings.MODEL_K_MEANS)) {
        clusters = new MultiKMeansPlusPlusClusterer<ClusterableRow>(
                new KMeansPlusPlusClusterer<>(set.getNumClusters(), -1, new HaversineDistance()), 5)
                        .cluster(clusterableRows);
    } else {
        throw new InvalidSettingsException(set.getModel());
    }

    Map<RowKey, Integer> clusterIds = new LinkedHashMap<>();

    for (int i = 0; i < clusters.size(); i++) {
        for (ClusterableRow r : clusters.get(i).getPoints()) {
            clusterIds.put(r.getKey(), i);
        }
    }

    DataTableSpec outSpec = createSpec(spec);
    BufferedDataContainer container = exec.createDataContainer(outSpec);

    for (DataRow row : table) {
        DataCell[] cells = new DataCell[outSpec.getNumColumns()];

        for (String column : spec.getColumnNames()) {
            cells[outSpec.findColumnIndex(column)] = row.getCell(spec.findColumnIndex(column));
        }

        cells[outSpec.findColumnIndex(TracingColumns.CLUSTER_ID)] = IO.createCell(clusterIds.get(row.getKey()));
        container.addRowToTable(new DefaultRow(row.getKey(), cells));
        exec.checkCanceled();
    }

    container.close();

    return new BufferedDataTable[] { container.getTable() };
}

From source file:org.apache.solr.client.solrj.io.eval.MultiKmeansEvaluator.java

@Override
public Object doWork(Object... values) throws IOException {

    if (values.length != 3) {
        throw new IOException(
                "The multiKmeans function expects three parameters; a matrix to cluster, k and number of trials.");
    }/*from  ww  w  .  j  a v  a 2s .co  m*/

    Object value1 = values[0];
    Object value2 = values[1];
    Object value3 = values[2];

    Matrix matrix = null;
    int k = 0;
    int trials = 0;

    if (value1 instanceof Matrix) {
        matrix = (Matrix) value1;
    } else {
        throw new IOException("The first parameter for multiKmeans should be the observation matrix.");
    }

    if (value2 instanceof Number) {
        k = ((Number) value2).intValue();
    } else {
        throw new IOException("The second parameter for multiKmeans should be k.");
    }

    if (value3 instanceof Number) {
        trials = ((Number) value3).intValue();
    } else {
        throw new IOException("The third parameter for multiKmeans should be trials.");
    }

    KMeansPlusPlusClusterer<KmeansEvaluator.ClusterPoint> kmeans = new KMeansPlusPlusClusterer(k,
            maxIterations);
    MultiKMeansPlusPlusClusterer multiKmeans = new MultiKMeansPlusPlusClusterer(kmeans, trials);

    List<KmeansEvaluator.ClusterPoint> points = new ArrayList();
    double[][] data = matrix.getData();

    List<String> ids = matrix.getRowLabels();

    for (int i = 0; i < data.length; i++) {
        double[] vec = data[i];
        points.add(new KmeansEvaluator.ClusterPoint(ids.get(i), vec));
    }

    Map fields = new HashMap();

    fields.put("k", k);
    fields.put("trials", trials);
    fields.put("distance", "euclidean");
    fields.put("maxIterations", maxIterations);

    return new KmeansEvaluator.ClusterTuple(fields, multiKmeans.cluster(points), matrix.getColumnLabels());
}