Example usage for org.apache.mahout.utils.clustering ClusterWriter write

List of usage examples for org.apache.mahout.utils.clustering ClusterWriter write

Introduction

In this page you can find the example usage for org.apache.mahout.utils.clustering ClusterWriter write.

Prototype

void write(ClusterWritable clusterWritable) throws IOException;

Source Link

Document

Write out a Cluster

Usage

From source file:tk.summerway.mahout9.tools.MyClusterDumper.java

License:Apache License

public void printClusters(String[] dictionary) throws Exception {
    Configuration conf = new Configuration();

    if (this.termDictionary != null) {
        if ("text".equals(dictionaryFormat)) {
            dictionary = VectorHelper.loadTermDictionary(new File(this.termDictionary));
        } else if ("sequencefile".equals(dictionaryFormat)) {
            dictionary = VectorHelper.loadTermDictionary(conf, this.termDictionary);
        } else {/*from w ww  .j  a  va 2 s. c  om*/
            throw new IllegalArgumentException("Invalid dictionary format");
        }
    }

    Writer writer;
    boolean shouldClose;
    if (this.outputFile == null) {
        shouldClose = false;
        writer = new OutputStreamWriter(System.out, Charsets.UTF_8);
    } else {
        shouldClose = true;
        if (outputFile.getName().startsWith("s3n://")) {
            Path p = outputPath;
            FileSystem fs = FileSystem.get(p.toUri(), conf);
            writer = new OutputStreamWriter(fs.create(p), Charsets.UTF_8);
        } else {
            Files.createParentDirs(outputFile);
            writer = Files.newWriter(this.outputFile, Charsets.UTF_8);
        }
    }
    ClusterWriter clusterWriter = createClusterWriter(writer, dictionary);
    try {
        long numWritten = clusterWriter.write(new SequenceFileDirValueIterable<ClusterWritable>(
                new Path(seqFileDir, "part-*"), PathType.GLOB, conf));

        writer.flush();
        if (runEvaluation) {
            MyClusterEvaluator ce = new MyClusterEvaluator(pointsDir.toString(), seqFileDir.toString(),
                    "~/cluster_evaluate_result.txt", measure, 1000L);
            ce.evaluateClusters(conf);
        }
        //            if (runEvaluation) {
        //                HadoopUtil.delete(conf, new Path("tmp/representative"));
        //                int numIters = 5;
        //                RepresentativePointsDriver.main(new String[] { "--input",
        //                        seqFileDir.toString(), "--output",
        //                        "tmp/representative", "--clusteredPoints",
        //                        pointsDir.toString(), "--distanceMeasure",
        //                        measure.getClass().getName(), "--maxIter",
        //                        String.valueOf(numIters) });
        //                conf.set(RepresentativePointsDriver.DISTANCE_MEASURE_KEY,
        //                        measure.getClass().getName());
        //                conf.set(RepresentativePointsDriver.STATE_IN_KEY,
        //                        "tmp/representative/representativePoints-" + numIters);
        //                ClusterEvaluator ce = new ClusterEvaluator(conf, seqFileDir);
        //                writer.append("\n");
        //                writer.append("Inter-Cluster Density: ")
        //                        .append(String.valueOf(ce.interClusterDensity()))
        //                        .append("\n");
        //                writer.append("Intra-Cluster Density: ")
        //                        .append(String.valueOf(ce.intraClusterDensity()))
        //                        .append("\n");
        //                CDbwEvaluator cdbw = new CDbwEvaluator(conf, seqFileDir);
        //                writer.append("CDbw Inter-Cluster Density: ")
        //                        .append(String.valueOf(cdbw.interClusterDensity()))
        //                        .append("\n");
        //                writer.append("CDbw Intra-Cluster Density: ")
        //                        .append(String.valueOf(cdbw.intraClusterDensity()))
        //                        .append("\n");
        //                writer.append("CDbw Separation: ")
        //                        .append(String.valueOf(cdbw.separation())).append("\n");
        //                writer.flush();
        //            }
        log.info("Wrote {} clusters", numWritten);
    } finally {
        if (shouldClose) {
            Closeables.close(clusterWriter, false);
        } else {
            if (clusterWriter instanceof GraphMLClusterWriter) {
                clusterWriter.close();
            }
        }
    }
}