List of usage examples for org.apache.mahout.clustering.kmeans KMeansDriver run
public static void run(Path input, Path clustersIn, Path output, double convergenceDelta, int maxIterations, boolean runClustering, double clusterClassificationThreshold, boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException
From source file:DisplayKMeans.java
License:Apache License
private static void runSequentialKMeansClusterer(Configuration conf, Path samples, Path output, DistanceMeasure measure, int numClusters, int maxIterations, double convergenceDelta) throws IOException, InterruptedException, ClassNotFoundException { Path clustersIn = new Path(output, "random-seeds"); RandomSeedGenerator.buildRandom(conf, samples, clustersIn, numClusters, measure); KMeansDriver.run(samples, clustersIn, output, convergenceDelta, maxIterations, true, 0.0, true); loadClustersWritable(output);/* ww w.j a v a2 s . co m*/ }
From source file:io.github.thushear.display.DisplayKMeans.java
License:Apache License
private static void runSequentialKMeansClusterer(Configuration conf, Path samples, Path output, DistanceMeasure measure, int maxIterations) throws IOException, InterruptedException, ClassNotFoundException { Path clusters = RandomSeedGenerator.buildRandom(conf, samples, new Path(output, "clusters-0"), 3, measure); double distanceThreshold = 0.001; KMeansDriver.run(samples, clusters, output, measure, distanceThreshold, maxIterations, true, true); loadClusters(output);// www.jav a 2 s .co m }
From source file:net.aprendizajengrande.ontocluster.Clusterer.java
License:Open Source License
public static void main(String[] args) throws ClassNotFoundException, IOException, InterruptedException { if (args.length != 3) { System.err.println(//from ww w .j a va 2 s . c om "Usage: <input hdfs folder with vectors> <hdfs folder for output> <local folder for output>"); System.exit(1); } Configuration conf = new Configuration(); DistanceMeasure measure = new CosineDistanceMeasure(); long seed = 67241; int numClusters = 250; int numIterations = 500; // see // http://stackoverflow.com/questions/17265002/hadoop-no-filesystem-for-scheme-file conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); // crear vectores en HDFS System.out.println("Input: " + args[0]); Path input = new Path(args[0] + "/input"); // first centroids are an input parameter to clustering Path clusters = new Path(args[0] + "/clusters"); clusters = RandomSeedGenerator.buildRandom(conf, input, clusters, numClusters, measure, seed); Path output = new Path(args[1]); // cluster KMeansDriver.run(input, clusters, output, 0.005, numIterations, true, 0.0, false); // read the rel names, to pretty print Path inputRels = new Path(args[0] + "/rels"); FileSystem fs = inputRels.getFileSystem(conf); FSDataInputStream fsdis = fs.open(inputRels); BufferedReader br = new BufferedReader(new InputStreamReader(fsdis)); String line = br.readLine(); Map<Integer, String> relIdToName = new HashMap<>(); while (line != null) { String[] parts = line.split("\\t"); relIdToName.put(Integer.parseInt(parts[0]), parts[1]); line = br.readLine(); } // read output Path outputFinal = ClusterExtractor.findFinalClusters(args[1], conf); if (outputFinal == null) { System.err.println("Couldn't find final clusters at '" + args[1] + "-\\d+-final'"); System.exit(1); } Path successFile = new Path(outputFinal, "_SUCCESS"); if (fs.exists(successFile)) { fs.delete(successFile, false); } SequenceFileDirIterable<Text, Writable> it = new SequenceFileDirIterable<>(outputFinal, PathType.LIST, conf); PrintWriter pw = new PrintWriter(new FileWriter(new File(args[2]))); int clusterNum = 0; for (Pair<Text, Writable> p : it) { Object obj = p.getSecond(); if (!(obj instanceof ClusterWritable)) continue; pw.println(clusterNum + ") " + p.getFirst()); Cluster cluster = ((ClusterWritable) obj).getValue(); Vector center = cluster.getCenter(); for (int i = 0; i < center.size(); i++) { String name = relIdToName.get(i); if (name == null) name = "?"; if (center.get(i) >= 0.01) pw.println("\t" + name + ": " + center.get(i)); } pw.println(); clusterNum++; } pw.close(); }