List of usage examples for org.apache.commons.math3.ml.clustering KMeansPlusPlusClusterer KMeansPlusPlusClusterer
public KMeansPlusPlusClusterer(final int k, final int maxIterations, final DistanceMeasure measure)
From source file:bigdataproject.KMeansKFinder.java
public int find(double epsilon) { double oldAvDist = 0.0; for (int k = 2; k < numSamples; k++) { KMeansPlusPlusClusterer kmeans = new KMeansPlusPlusClusterer(k, 1000, new EuclideanDistance()); List<Cluster<DoublePoint>> clusterList = kmeans.cluster(list); double[] avDistances = new double[k]; int index = 0; for (Cluster<DoublePoint> c : clusterList) { List cluster = c.getPoints(); int size = cluster.size(); double[] centroid = getCentroid(cluster); double distanceSum = 0.0; for (Object p : cluster) { DoublePoint point = (DoublePoint) p; double[] pointDouble = point.getPoint(); EuclideanDistance dist = new EuclideanDistance(); distanceSum += dist.compute(centroid, pointDouble); }// w w w.j av a 2 s . c o m avDistances[index] = distanceSum / size; index++; } double avDistSum = 0.0; for (int i = 0; i < avDistances.length; i++) { avDistSum += avDistances[i]; } double newAvDist = avDistSum / avDistances.length; double difference = Math.abs(newAvDist - oldAvDist); if (difference >= epsilon) { oldAvDist = newAvDist; } else return k - 1; } return 0; }
From source file:de.bund.bfr.knime.openkrise.util.cluster.DBSCANNodeModel.java
/** * {@inheritDoc}//from www . j a v a 2 s . c om */ @Override protected BufferedDataTable[] execute(final BufferedDataTable[] inData, final ExecutionContext exec) throws Exception { BufferedDataTable table = inData[0]; DataTableSpec spec = table.getSpec(); TracingUtils.assertColumnNotMissing(spec, TracingColumns.ID, null); TracingUtils.assertColumnNotMissing(spec, GeocodingNodeModel.LATITUDE_COLUMN, null); TracingUtils.assertColumnNotMissing(spec, GeocodingNodeModel.LONGITUDE_COLUMN, null); NodePropertySchema nodeSchema = new NodePropertySchema(TracingUtils.getTableColumns(table.getSpec()), TracingColumns.ID); Collection<GraphNode> nodes = TracingUtils.readGraphNodes(table, nodeSchema).values(); Set<String> filteredOut = new LinkedHashSet<>(); if (set.getFilter() != null) { set.getFilter().getValues(nodes).forEach((node, value) -> { if (value == 0.0) { filteredOut.add(node.getId()); } }); } List<ClusterableRow> clusterableRows = new ArrayList<>(); for (DataRow row : table) { String id = IO.getToCleanString(row.getCell(spec.findColumnIndex(TracingColumns.ID))); Double lat = IO.getDouble(row.getCell(spec.findColumnIndex(GeocodingNodeModel.LATITUDE_COLUMN))); Double lon = IO.getDouble(row.getCell(spec.findColumnIndex(GeocodingNodeModel.LONGITUDE_COLUMN))); if (id == null || lat == null || lon == null || filteredOut.contains(id)) { continue; } clusterableRows.add(new ClusterableRow(row.getKey(), Math.toRadians(lat), Math.toRadians(lon))); } List<? extends Cluster<ClusterableRow>> clusters; if (set.getModel().equals(DBSCANNSettings.MODEL_DBSCAN)) { clusters = new DBSCANClusterer<ClusterableRow>(set.getMaxDistance(), set.getMinPoints(), new HaversineDistance()).cluster(clusterableRows); } else if (set.getModel().equals(DBSCANNSettings.MODEL_K_MEANS)) { clusters = new MultiKMeansPlusPlusClusterer<ClusterableRow>( new KMeansPlusPlusClusterer<>(set.getNumClusters(), -1, new HaversineDistance()), 5) .cluster(clusterableRows); } else { throw new InvalidSettingsException(set.getModel()); } Map<RowKey, Integer> clusterIds = new LinkedHashMap<>(); for (int i = 0; i < clusters.size(); i++) { for (ClusterableRow r : clusters.get(i).getPoints()) { clusterIds.put(r.getKey(), i); } } DataTableSpec outSpec = createSpec(spec); BufferedDataContainer container = exec.createDataContainer(outSpec); for (DataRow row : table) { DataCell[] cells = new DataCell[outSpec.getNumColumns()]; for (String column : spec.getColumnNames()) { cells[outSpec.findColumnIndex(column)] = row.getCell(spec.findColumnIndex(column)); } cells[outSpec.findColumnIndex(TracingColumns.CLUSTER_ID)] = IO.createCell(clusterIds.get(row.getKey())); container.addRowToTable(new DefaultRow(row.getKey(), cells)); exec.checkCanceled(); } container.close(); return new BufferedDataTable[] { container.getTable() }; }
From source file:bigdataproject.MainJFrame.java
private void jButton1ActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_jButton1ActionPerformed this.jLabel8.setText(""); ReadDataSet read = new ReadDataSet(); read.readFromFile();/*from w ww . j av a 2 s . c om*/ read.filter(); matrix = read.getMatrix(); PCA pca = new PCA(matrix); double[][] matrix2DPCA = pca.reduceDimensions(); BlockRealMatrix pcaMatrix = new BlockRealMatrix(matrix2DPCA); BlockRealMatrix pcaMatrixTranspose = pcaMatrix.transpose(); List<DoublePoint> list = read.getCollection(read.getHashMap(pcaMatrixTranspose.getData())); List<Cluster<DoublePoint>> clusterList; if (kMeans) { int k; if (this.jCheckBox1.isSelected()) { KMeansKFinder kFinder = new KMeansKFinder(list); k = kFinder.find(0.15); } else k = (int) this.jSpinner1.getValue(); KMeansPlusPlusClusterer kmeans = new KMeansPlusPlusClusterer(k, 1000, new EuclideanDistance()); clusterList = kmeans.cluster(list); } else { int minPts; double eps; if (this.jCheckBox2.isSelected()) { minPts = 6; //KDistances dist = new KDistances(pcaMatrixTranspose.getData()); //dist.calculateDistances(); //dist.getKSortedNearestNeighbors(minPts); //dist.printKdistances(); eps = 1.0; } else { minPts = (int) this.jSpinner2.getValue(); try { eps = Double.parseDouble(this.jTextField1.getText()); } catch (NumberFormatException e) { this.jLabel8.setText("Wrong eps Value"); return; } } DBSCANClusterer dbscan = new DBSCANClusterer(eps, minPts); clusterList = dbscan.cluster(list); } final ScatterPlot demo = new ScatterPlot("Big Data Clustering Project", matrix2DPCA, clusterList); demo.pack(); RefineryUtilities.centerFrameOnScreen(demo); demo.setVisible(true); }