List of usage examples for org.apache.mahout.utils.clustering ClusterWriter write
void write(ClusterWritable clusterWritable) throws IOException;
From source file:tk.summerway.mahout9.tools.MyClusterDumper.java
License:Apache License
public void printClusters(String[] dictionary) throws Exception { Configuration conf = new Configuration(); if (this.termDictionary != null) { if ("text".equals(dictionaryFormat)) { dictionary = VectorHelper.loadTermDictionary(new File(this.termDictionary)); } else if ("sequencefile".equals(dictionaryFormat)) { dictionary = VectorHelper.loadTermDictionary(conf, this.termDictionary); } else {/*from w ww .j a va 2 s. c om*/ throw new IllegalArgumentException("Invalid dictionary format"); } } Writer writer; boolean shouldClose; if (this.outputFile == null) { shouldClose = false; writer = new OutputStreamWriter(System.out, Charsets.UTF_8); } else { shouldClose = true; if (outputFile.getName().startsWith("s3n://")) { Path p = outputPath; FileSystem fs = FileSystem.get(p.toUri(), conf); writer = new OutputStreamWriter(fs.create(p), Charsets.UTF_8); } else { Files.createParentDirs(outputFile); writer = Files.newWriter(this.outputFile, Charsets.UTF_8); } } ClusterWriter clusterWriter = createClusterWriter(writer, dictionary); try { long numWritten = clusterWriter.write(new SequenceFileDirValueIterable<ClusterWritable>( new Path(seqFileDir, "part-*"), PathType.GLOB, conf)); writer.flush(); if (runEvaluation) { MyClusterEvaluator ce = new MyClusterEvaluator(pointsDir.toString(), seqFileDir.toString(), "~/cluster_evaluate_result.txt", measure, 1000L); ce.evaluateClusters(conf); } // if (runEvaluation) { // HadoopUtil.delete(conf, new Path("tmp/representative")); // int numIters = 5; // RepresentativePointsDriver.main(new String[] { "--input", // seqFileDir.toString(), "--output", // "tmp/representative", "--clusteredPoints", // pointsDir.toString(), "--distanceMeasure", // measure.getClass().getName(), "--maxIter", // String.valueOf(numIters) }); // conf.set(RepresentativePointsDriver.DISTANCE_MEASURE_KEY, // measure.getClass().getName()); // conf.set(RepresentativePointsDriver.STATE_IN_KEY, // "tmp/representative/representativePoints-" + numIters); // ClusterEvaluator ce = new ClusterEvaluator(conf, seqFileDir); // writer.append("\n"); // writer.append("Inter-Cluster Density: ") // .append(String.valueOf(ce.interClusterDensity())) // .append("\n"); // writer.append("Intra-Cluster Density: ") // .append(String.valueOf(ce.intraClusterDensity())) // .append("\n"); // CDbwEvaluator cdbw = new CDbwEvaluator(conf, seqFileDir); // writer.append("CDbw Inter-Cluster Density: ") // .append(String.valueOf(cdbw.interClusterDensity())) // .append("\n"); // writer.append("CDbw Intra-Cluster Density: ") // .append(String.valueOf(cdbw.intraClusterDensity())) // .append("\n"); // writer.append("CDbw Separation: ") // .append(String.valueOf(cdbw.separation())).append("\n"); // writer.flush(); // } log.info("Wrote {} clusters", numWritten); } finally { if (shouldClose) { Closeables.close(clusterWriter, false); } else { if (clusterWriter instanceof GraphMLClusterWriter) { clusterWriter.close(); } } } }