Example usage for org.apache.commons.math3.ml.clustering DBSCANClusterer DBSCANClusterer

Introduction

In this page you can find the example usage for org.apache.commons.math3.ml.clustering DBSCANClusterer DBSCANClusterer.

Prototype

public DBSCANClusterer(final double eps, final int minPts, final DistanceMeasure measure)
        throws NotPositiveException

Source Link

Document

Creates a new instance of a DBSCANClusterer.

Usage

From source file:de.bund.bfr.knime.openkrise.util.cluster.DBSCANNodeModel.java

/**
 * {@inheritDoc}//from   w  w w  .  ja  v a  2s.  co  m
 */
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec)
        throws Exception {
    BufferedDataTable table = inData[0];
    DataTableSpec spec = table.getSpec();

    TracingUtils.assertColumnNotMissing(spec, TracingColumns.ID, null);
    TracingUtils.assertColumnNotMissing(spec, GeocodingNodeModel.LATITUDE_COLUMN, null);
    TracingUtils.assertColumnNotMissing(spec, GeocodingNodeModel.LONGITUDE_COLUMN, null);

    NodePropertySchema nodeSchema = new NodePropertySchema(TracingUtils.getTableColumns(table.getSpec()),
            TracingColumns.ID);
    Collection<GraphNode> nodes = TracingUtils.readGraphNodes(table, nodeSchema).values();
    Set<String> filteredOut = new LinkedHashSet<>();

    if (set.getFilter() != null) {
        set.getFilter().getValues(nodes).forEach((node, value) -> {
            if (value == 0.0) {
                filteredOut.add(node.getId());
            }
        });
    }

    List<ClusterableRow> clusterableRows = new ArrayList<>();

    for (DataRow row : table) {
        String id = IO.getToCleanString(row.getCell(spec.findColumnIndex(TracingColumns.ID)));
        Double lat = IO.getDouble(row.getCell(spec.findColumnIndex(GeocodingNodeModel.LATITUDE_COLUMN)));
        Double lon = IO.getDouble(row.getCell(spec.findColumnIndex(GeocodingNodeModel.LONGITUDE_COLUMN)));

        if (id == null || lat == null || lon == null || filteredOut.contains(id)) {
            continue;
        }

        clusterableRows.add(new ClusterableRow(row.getKey(), Math.toRadians(lat), Math.toRadians(lon)));
    }

    List<? extends Cluster<ClusterableRow>> clusters;

    if (set.getModel().equals(DBSCANNSettings.MODEL_DBSCAN)) {
        clusters = new DBSCANClusterer<ClusterableRow>(set.getMaxDistance(), set.getMinPoints(),
                new HaversineDistance()).cluster(clusterableRows);
    } else if (set.getModel().equals(DBSCANNSettings.MODEL_K_MEANS)) {
        clusters = new MultiKMeansPlusPlusClusterer<ClusterableRow>(
                new KMeansPlusPlusClusterer<>(set.getNumClusters(), -1, new HaversineDistance()), 5)
                        .cluster(clusterableRows);
    } else {
        throw new InvalidSettingsException(set.getModel());
    }

    Map<RowKey, Integer> clusterIds = new LinkedHashMap<>();

    for (int i = 0; i < clusters.size(); i++) {
        for (ClusterableRow r : clusters.get(i).getPoints()) {
            clusterIds.put(r.getKey(), i);
        }
    }

    DataTableSpec outSpec = createSpec(spec);
    BufferedDataContainer container = exec.createDataContainer(outSpec);

    for (DataRow row : table) {
        DataCell[] cells = new DataCell[outSpec.getNumColumns()];

        for (String column : spec.getColumnNames()) {
            cells[outSpec.findColumnIndex(column)] = row.getCell(spec.findColumnIndex(column));
        }

        cells[outSpec.findColumnIndex(TracingColumns.CLUSTER_ID)] = IO.createCell(clusterIds.get(row.getKey()));
        container.addRowToTable(new DefaultRow(row.getKey(), cells));
        exec.checkCanceled();
    }

    container.close();

    return new BufferedDataTable[] { container.getTable() };
}

From source file:org.lpe.common.util.LpeNumericUtils.java

public static <T extends Number, S extends Number> List<NumericPairList<T, S>> dbscanNormalized(
        NumericPairList<T, S> points, double epsilon, int minNumPoints, double keyRange, double valueRange) {
    NormalizedDistanceMeasure distanceMeasure = new NormalizedDistanceMeasure(1.0 / keyRange, 1.0 / valueRange);
    System.out.println("########## STARTED CLUSTERING ###################");
    DBSCANClusterer<NumericPair<T, S>> clusterer = new DBSCANClusterer<NumericPair<T, S>>(epsilon, minNumPoints,
            distanceMeasure);/*from  www .  j a  va2s.  c  o m*/

    List<Cluster<NumericPair<T, S>>> clusters = clusterer.cluster(points.getPairs());
    System.out.println("########## FINISHED CLUSTERING ###################");
    List<NumericPairList<T, S>> result = new ArrayList<>();
    for (Cluster<NumericPair<T, S>> c : clusters) {
        NumericPairList<T, S> pairList = new NumericPairList<>();
        for (NumericPair<T, S> pair : c.getPoints()) {
            pairList.add(pair);
        }
        result.add(pairList);

    }
    System.out.println("########## RETURNED CLUSTERS ###################");
    return result;
}

From source file:org.meresco.lucene.search.MerescoClusterer.java

public void finish() {
    this.clusters = new ArrayList<Cluster<MerescoVector>>();
    for (StrategyClusterer strategyClusterer : this.strategyClusterers) {
        DBSCANClusterer<MerescoVector> clusterer = new DBSCANClusterer<MerescoVector>(strategyClusterer.eps,
                strategyClusterer.minPoints, new GeneralizedJaccardDistance());
        this.clusters.addAll(clusterer.cluster(strategyClusterer.docvectors));
    }/* w  w w .jav  a 2 s.co  m*/
}

From source file:webservice.ImportantPlacesWorker.java

public ImportantPlacesWorker(ImportantPlacesThread thread, Map locations) {
    this.locationsMap = locations;
    dbscan = new DBSCANClusterer(30, NUMBER_OF_POINTS, new GPSDistance());
    points = new ArrayList<>();
}