List of usage examples for org.apache.commons.math3.ml.clustering MultiKMeansPlusPlusClusterer MultiKMeansPlusPlusClusterer
public MultiKMeansPlusPlusClusterer(final KMeansPlusPlusClusterer<T> clusterer, final int numTrials)
From source file:edu.cmu.sv.modelinference.eventtool.classification.Clusterer1D.java
private List<? extends Cluster<DataWrapper>> computeClusters(Collection<DataWrapper> dataCol, int k) { List<? extends Cluster<DataWrapper>> clusterResults = null; try {//from ww w.j a va 2 s. c om Clusterer<DataWrapper> clusterer = new MultiKMeansPlusPlusClusterer<>( new KMeansPlusPlusClusterer<DataWrapper>(k, maxIterations), trials); clusterResults = clusterer.cluster(dataCol); } catch (NumberIsTooSmallException e) { logger.warn("Too few datapoints for clusters: " + e.getMessage()); } return clusterResults; }
From source file:de.bund.bfr.knime.openkrise.util.cluster.DBSCANNodeModel.java
/** * {@inheritDoc}// w w w. ja v a 2 s .c o m */ @Override protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception { BufferedDataTable table = inData[0]; DataTableSpec spec = table.getSpec(); TracingUtils.assertColumnNotMissing(spec, TracingColumns.ID, null); TracingUtils.assertColumnNotMissing(spec, GeocodingNodeModel.LATITUDE_COLUMN, null); TracingUtils.assertColumnNotMissing(spec, GeocodingNodeModel.LONGITUDE_COLUMN, null); NodePropertySchema nodeSchema = new NodePropertySchema(TracingUtils.getTableColumns(table.getSpec()), TracingColumns.ID); Collection<GraphNode> nodes = TracingUtils.readGraphNodes(table, nodeSchema).values(); Set<String> filteredOut = new LinkedHashSet<>(); if (set.getFilter() != null) { set.getFilter().getValues(nodes).forEach((node, value) -> { if (value == 0.0) { filteredOut.add(node.getId()); } }); } List<ClusterableRow> clusterableRows = new ArrayList<>(); for (DataRow row : table) { String id = IO.getToCleanString(row.getCell(spec.findColumnIndex(TracingColumns.ID))); Double lat = IO.getDouble(row.getCell(spec.findColumnIndex(GeocodingNodeModel.LATITUDE_COLUMN))); Double lon = IO.getDouble(row.getCell(spec.findColumnIndex(GeocodingNodeModel.LONGITUDE_COLUMN))); if (id == null || lat == null || lon == null || filteredOut.contains(id)) { continue; } clusterableRows.add(new ClusterableRow(row.getKey(), Math.toRadians(lat), Math.toRadians(lon))); } List<? extends Cluster<ClusterableRow>> clusters; if (set.getModel().equals(DBSCANNSettings.MODEL_DBSCAN)) { clusters = new DBSCANClusterer<ClusterableRow>(set.getMaxDistance(), set.getMinPoints(), new HaversineDistance()).cluster(clusterableRows); } else if (set.getModel().equals(DBSCANNSettings.MODEL_K_MEANS)) { clusters = new MultiKMeansPlusPlusClusterer<ClusterableRow>( new KMeansPlusPlusClusterer<>(set.getNumClusters(), -1, new HaversineDistance()), 5) .cluster(clusterableRows); } else { throw new InvalidSettingsException(set.getModel()); } Map<RowKey, Integer> clusterIds = new LinkedHashMap<>(); for (int i = 0; i < clusters.size(); i++) { for (ClusterableRow r : clusters.get(i).getPoints()) { clusterIds.put(r.getKey(), i); } } DataTableSpec outSpec = createSpec(spec); BufferedDataContainer container = exec.createDataContainer(outSpec); for (DataRow row : table) { DataCell[] cells = new DataCell[outSpec.getNumColumns()]; for (String column : spec.getColumnNames()) { cells[outSpec.findColumnIndex(column)] = row.getCell(spec.findColumnIndex(column)); } cells[outSpec.findColumnIndex(TracingColumns.CLUSTER_ID)] = IO.createCell(clusterIds.get(row.getKey())); container.addRowToTable(new DefaultRow(row.getKey(), cells)); exec.checkCanceled(); } container.close(); return new BufferedDataTable[] { container.getTable() }; }
From source file:org.apache.solr.client.solrj.io.eval.MultiKmeansEvaluator.java
@Override public Object doWork(Object... values) throws IOException { if (values.length != 3) { throw new IOException( "The multiKmeans function expects three parameters; a matrix to cluster, k and number of trials."); }/*from ww w . j a v a 2s .co m*/ Object value1 = values[0]; Object value2 = values[1]; Object value3 = values[2]; Matrix matrix = null; int k = 0; int trials = 0; if (value1 instanceof Matrix) { matrix = (Matrix) value1; } else { throw new IOException("The first parameter for multiKmeans should be the observation matrix."); } if (value2 instanceof Number) { k = ((Number) value2).intValue(); } else { throw new IOException("The second parameter for multiKmeans should be k."); } if (value3 instanceof Number) { trials = ((Number) value3).intValue(); } else { throw new IOException("The third parameter for multiKmeans should be trials."); } KMeansPlusPlusClusterer<KmeansEvaluator.ClusterPoint> kmeans = new KMeansPlusPlusClusterer(k, maxIterations); MultiKMeansPlusPlusClusterer multiKmeans = new MultiKMeansPlusPlusClusterer(kmeans, trials); List<KmeansEvaluator.ClusterPoint> points = new ArrayList(); double[][] data = matrix.getData(); List<String> ids = matrix.getRowLabels(); for (int i = 0; i < data.length; i++) { double[] vec = data[i]; points.add(new KmeansEvaluator.ClusterPoint(ids.get(i), vec)); } Map fields = new HashMap(); fields.put("k", k); fields.put("trials", trials); fields.put("distance", "euclidean"); fields.put("maxIterations", maxIterations); return new KmeansEvaluator.ClusterTuple(fields, multiKmeans.cluster(points), matrix.getColumnLabels()); }