List of usage examples for org.apache.mahout.common.iterator.sequencefile SequenceFileValueIterable SequenceFileValueIterable
public SequenceFileValueIterable(Path path, Configuration conf)
From source file:edu.indiana.d2i.htrc.kmeans.MemCachedKMeansDriver.java
License:Apache License
private static void clusterDataSeq(Configuration conf, Path input, Path clustersIn, Path output, DistanceMeasure measure) throws IOException { KMeansClusterer clusterer = new KMeansClusterer(measure); Collection<Cluster> clusters = Lists.newArrayList(); MemKMeansUtil.configureWithClusterInfo(conf, clustersIn, clusters); if (clusters.isEmpty()) { throw new IllegalStateException("Clusters is empty!"); }//w w w. ja va2s . c o m FileSystem fs = FileSystem.get(input.toUri(), conf); FileStatus[] status = fs.listStatus(input, PathFilters.logsCRCFilter()); int part = 0; for (FileStatus s : status) { SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(output, "part-m-" + part), IntWritable.class, WeightedVectorWritable.class); try { for (VectorWritable value : new SequenceFileValueIterable<VectorWritable>(s.getPath(), conf)) { clusterer.emitPointToNearestCluster(value.get(), clusters, writer); } } finally { Closeables.closeQuietly(writer); } } }