List of usage examples for org.apache.mahout.clustering ClusteringUtils daviesBouldinIndex
public static double daviesBouldinIndex(List<? extends Vector> centroids, DistanceMeasure distanceMeasure, List<OnlineSummarizer> clusterDistanceSummaries)
From source file:org.conan.mymahout.clustering.streaming.tools.ClusterQualitySummarizer.java
License:Apache License
public int run(String[] args) throws IOException { if (!parseArgs(args)) { return -1; }/*from www . j a v a 2s. c o m*/ Configuration conf = new Configuration(); try { // Configuration.dumpConfiguration(conf, new OutputStreamWriter(System.out)); fileOut = new PrintWriter(new FileOutputStream(outputFile)); fileOut.printf("cluster,distance.mean,distance.sd,distance.q0,distance.q1,distance.q2,distance.q3," + "distance.q4,count,is.train\n"); // Reading in the centroids (both pairs, if they exist). List<Centroid> centroids; List<Centroid> centroidsCompare = null; if (mahoutKMeansFormat) { SequenceFileDirValueIterable<ClusterWritable> clusterIterable = new SequenceFileDirValueIterable<ClusterWritable>( new Path(centroidFile), PathType.GLOB, conf); centroids = Lists.newArrayList(IOUtils.getCentroidsFromClusterWritableIterable(clusterIterable)); } else { SequenceFileDirValueIterable<CentroidWritable> centroidIterable = new SequenceFileDirValueIterable<CentroidWritable>( new Path(centroidFile), PathType.GLOB, conf); centroids = Lists.newArrayList(IOUtils.getCentroidsFromCentroidWritableIterable(centroidIterable)); } if (centroidCompareFile != null) { if (mahoutKMeansFormatCompare) { SequenceFileDirValueIterable<ClusterWritable> clusterCompareIterable = new SequenceFileDirValueIterable<ClusterWritable>( new Path(centroidCompareFile), PathType.GLOB, conf); centroidsCompare = Lists .newArrayList(IOUtils.getCentroidsFromClusterWritableIterable(clusterCompareIterable)); } else { SequenceFileDirValueIterable<CentroidWritable> centroidCompareIterable = new SequenceFileDirValueIterable<CentroidWritable>( new Path(centroidCompareFile), PathType.GLOB, conf); centroidsCompare = Lists.newArrayList( IOUtils.getCentroidsFromCentroidWritableIterable(centroidCompareIterable)); } } // Reading in the "training" set. SequenceFileDirValueIterable<VectorWritable> trainIterable = new SequenceFileDirValueIterable<VectorWritable>( new Path(trainFile), PathType.GLOB, conf); Iterable<Vector> trainDatapoints = IOUtils.getVectorsFromVectorWritableIterable(trainIterable); Iterable<Vector> datapoints = trainDatapoints; printSummaries(ClusteringUtils.summarizeClusterDistances(trainDatapoints, centroids, new SquaredEuclideanDistanceMeasure()), "train"); // Also adding in the "test" set. if (testFile != null) { SequenceFileDirValueIterable<VectorWritable> testIterable = new SequenceFileDirValueIterable<VectorWritable>( new Path(testFile), PathType.GLOB, conf); Iterable<Vector> testDatapoints = IOUtils.getVectorsFromVectorWritableIterable(testIterable); printSummaries(ClusteringUtils.summarizeClusterDistances(testDatapoints, centroids, new SquaredEuclideanDistanceMeasure()), "test"); datapoints = Iterables.concat(trainDatapoints, testDatapoints); } // At this point, all train/test CSVs have been written. We now compute quality metrics. List<OnlineSummarizer> summaries = ClusteringUtils.summarizeClusterDistances(datapoints, centroids, distanceMeasure); List<OnlineSummarizer> compareSummaries = null; if (centroidsCompare != null) { compareSummaries = ClusteringUtils.summarizeClusterDistances(datapoints, centroidsCompare, distanceMeasure); } System.out.printf("[Dunn Index] First: %f", ClusteringUtils.dunnIndex(centroids, distanceMeasure, summaries)); if (compareSummaries != null) { System.out.printf(" Second: %f\n", ClusteringUtils.dunnIndex(centroidsCompare, distanceMeasure, compareSummaries)); } else { System.out.printf("\n"); } System.out.printf("[Davies-Bouldin Index] First: %f", ClusteringUtils.daviesBouldinIndex(centroids, distanceMeasure, summaries)); if (compareSummaries != null) { System.out.printf(" Second: %f\n", ClusteringUtils.daviesBouldinIndex(centroidsCompare, distanceMeasure, compareSummaries)); } else { System.out.printf("\n"); } } catch (IOException e) { System.out.println(e.getMessage()); } finally { Closeables.close(fileOut, false); } return 0; }