Example usage for org.apache.commons.math3.ml.clustering MultiKMeansPlusPlusClusterer MultiKMeansPlusPlusClusterer

List of usage examples for org.apache.commons.math3.ml.clustering MultiKMeansPlusPlusClusterer MultiKMeansPlusPlusClusterer

Introduction

In this page you can find the example usage for org.apache.commons.math3.ml.clustering MultiKMeansPlusPlusClusterer MultiKMeansPlusPlusClusterer.

Prototype

public MultiKMeansPlusPlusClusterer(final KMeansPlusPlusClusterer<T> clusterer, final int numTrials) 

Source Link

Document

Build a clusterer.

Usage

From source file:edu.cmu.sv.modelinference.eventtool.classification.Clusterer1D.java

private List<? extends Cluster<DataWrapper>> computeClusters(Collection<DataWrapper> dataCol, int k) {
    List<? extends Cluster<DataWrapper>> clusterResults = null;
    try {//from ww w.j a va  2 s. c om
        Clusterer<DataWrapper> clusterer = new MultiKMeansPlusPlusClusterer<>(
                new KMeansPlusPlusClusterer<DataWrapper>(k, maxIterations), trials);
        clusterResults = clusterer.cluster(dataCol);
    } catch (NumberIsTooSmallException e) {
        logger.warn("Too few datapoints for clusters: " + e.getMessage());
    }
    return clusterResults;
}

From source file:de.bund.bfr.knime.openkrise.util.cluster.DBSCANNodeModel.java

/**
 * {@inheritDoc}// w  w w. ja  v a 2  s .c  o  m
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec)
        throws Exception {
    BufferedDataTable table = inData[0];
    DataTableSpec spec = table.getSpec();

    TracingUtils.assertColumnNotMissing(spec, TracingColumns.ID, null);
    TracingUtils.assertColumnNotMissing(spec, GeocodingNodeModel.LATITUDE_COLUMN, null);
    TracingUtils.assertColumnNotMissing(spec, GeocodingNodeModel.LONGITUDE_COLUMN, null);

    NodePropertySchema nodeSchema = new NodePropertySchema(TracingUtils.getTableColumns(table.getSpec()),
            TracingColumns.ID);
    Collection<GraphNode> nodes = TracingUtils.readGraphNodes(table, nodeSchema).values();
    Set<String> filteredOut = new LinkedHashSet<>();

    if (set.getFilter() != null) {
        set.getFilter().getValues(nodes).forEach((node, value) -> {
            if (value == 0.0) {
                filteredOut.add(node.getId());
            }
        });
    }

    List<ClusterableRow> clusterableRows = new ArrayList<>();

    for (DataRow row : table) {
        String id = IO.getToCleanString(row.getCell(spec.findColumnIndex(TracingColumns.ID)));
        Double lat = IO.getDouble(row.getCell(spec.findColumnIndex(GeocodingNodeModel.LATITUDE_COLUMN)));
        Double lon = IO.getDouble(row.getCell(spec.findColumnIndex(GeocodingNodeModel.LONGITUDE_COLUMN)));

        if (id == null || lat == null || lon == null || filteredOut.contains(id)) {
            continue;
        }

        clusterableRows.add(new ClusterableRow(row.getKey(), Math.toRadians(lat), Math.toRadians(lon)));
    }

    List<? extends Cluster<ClusterableRow>> clusters;

    if (set.getModel().equals(DBSCANNSettings.MODEL_DBSCAN)) {
        clusters = new DBSCANClusterer<ClusterableRow>(set.getMaxDistance(), set.getMinPoints(),
                new HaversineDistance()).cluster(clusterableRows);
    } else if (set.getModel().equals(DBSCANNSettings.MODEL_K_MEANS)) {
        clusters = new MultiKMeansPlusPlusClusterer<ClusterableRow>(
                new KMeansPlusPlusClusterer<>(set.getNumClusters(), -1, new HaversineDistance()), 5)
                        .cluster(clusterableRows);
    } else {
        throw new InvalidSettingsException(set.getModel());
    }

    Map<RowKey, Integer> clusterIds = new LinkedHashMap<>();

    for (int i = 0; i < clusters.size(); i++) {
        for (ClusterableRow r : clusters.get(i).getPoints()) {
            clusterIds.put(r.getKey(), i);
        }
    }

    DataTableSpec outSpec = createSpec(spec);
    BufferedDataContainer container = exec.createDataContainer(outSpec);

    for (DataRow row : table) {
        DataCell[] cells = new DataCell[outSpec.getNumColumns()];

        for (String column : spec.getColumnNames()) {
            cells[outSpec.findColumnIndex(column)] = row.getCell(spec.findColumnIndex(column));
        }

        cells[outSpec.findColumnIndex(TracingColumns.CLUSTER_ID)] = IO.createCell(clusterIds.get(row.getKey()));
        container.addRowToTable(new DefaultRow(row.getKey(), cells));
        exec.checkCanceled();
    }

    container.close();

    return new BufferedDataTable[] { container.getTable() };
}

From source file:org.apache.solr.client.solrj.io.eval.MultiKmeansEvaluator.java

@Override
public Object doWork(Object... values) throws IOException {

    if (values.length != 3) {
        throw new IOException(
                "The multiKmeans function expects three parameters; a matrix to cluster, k and number of trials.");
    }/*from  ww  w  .  j  a v  a 2s .co  m*/

    Object value1 = values[0];
    Object value2 = values[1];
    Object value3 = values[2];

    Matrix matrix = null;
    int k = 0;
    int trials = 0;

    if (value1 instanceof Matrix) {
        matrix = (Matrix) value1;
    } else {
        throw new IOException("The first parameter for multiKmeans should be the observation matrix.");
    }

    if (value2 instanceof Number) {
        k = ((Number) value2).intValue();
    } else {
        throw new IOException("The second parameter for multiKmeans should be k.");
    }

    if (value3 instanceof Number) {
        trials = ((Number) value3).intValue();
    } else {
        throw new IOException("The third parameter for multiKmeans should be trials.");
    }

    KMeansPlusPlusClusterer<KmeansEvaluator.ClusterPoint> kmeans = new KMeansPlusPlusClusterer(k,
            maxIterations);
    MultiKMeansPlusPlusClusterer multiKmeans = new MultiKMeansPlusPlusClusterer(kmeans, trials);

    List<KmeansEvaluator.ClusterPoint> points = new ArrayList();
    double[][] data = matrix.getData();

    List<String> ids = matrix.getRowLabels();

    for (int i = 0; i < data.length; i++) {
        double[] vec = data[i];
        points.add(new KmeansEvaluator.ClusterPoint(ids.get(i), vec));
    }

    Map fields = new HashMap();

    fields.put("k", k);
    fields.put("trials", trials);
    fields.put("distance", "euclidean");
    fields.put("maxIterations", maxIterations);

    return new KmeansEvaluator.ClusterTuple(fields, multiKmeans.cluster(points), matrix.getColumnLabels());
}