List of usage examples for org.apache.mahout.math MatrixSlice MatrixSlice
public MatrixSlice(Vector v, int index)
From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java
License:Apache License
@Override public Iterator<MatrixSlice> iterateAll() { try {/*from w w w . j a v a 2s. c o m*/ Path pathPattern = rowPath; if (FileSystem.get(conf).getFileStatus(rowPath).isDir()) { pathPattern = new Path(rowPath, "*"); } return Iterators.transform( new SequenceFileDirIterator<IntWritable, VectorWritable>(pathPattern, PathType.GLOB, PathFilters.logsCRCFilter(), null, true, conf), new Function<Pair<IntWritable, VectorWritable>, MatrixSlice>() { @Override public MatrixSlice apply(Pair<IntWritable, VectorWritable> from) { return new MatrixSlice(from.getSecond().get(), from.getFirst().get()); } }); } catch (IOException ioe) { throw new IllegalStateException(ioe); } }
From source file:edu.indiana.d2i.htrc.skmeans.StreamingKMeansDriver.java
License:Apache License
private void StreamingKMeansConfigHelper(Configuration conf, String input, int maxCluster) throws IOException { // get samples to calculate scale factor FileSystem fs = FileSystem.get(conf); FileStatus[] status = fs.listStatus(new Path(input), Utilities.HIDDEN_FILE_FILTER); int index = 0 + (int) (Math.random() * (status.length)); SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, status[index].getPath(), conf); int count = 0; Text key = new Text(); VectorWritable value = new VectorWritable(); List<MatrixSlice> slices = new ArrayList<MatrixSlice>(); while (seqReader.next(key, value) && count < samplesNum) { MatrixSlice slice = new MatrixSlice(value.get().clone(), count); slices.add(slice);/*from w w w . java 2s .c om*/ count++; } // set cutoff float cutoff = (float) StreamingKmeans.estimateCutoff(slices, samplesNum); conf.setFloat(StreamingKMeansConfigKeys.CUTOFF, cutoff); logger.info("Scale factor (cutoff) is: " + cutoff); // set vector dimension int dim = value.get().size(); conf.setInt(StreamingKMeansConfigKeys.VECTOR_DIMENSION, dim); logger.info("Dimemsion of a vector is: " + dim); // set maximum #cluster conf.setInt(StreamingKMeansConfigKeys.MAXCLUSTER, maxCluster); // set distance measurement conf.set(StreamingKMeansConfigKeys.DIST_MEASUREMENT, EuclideanDistanceMeasure.class.getName()); }