Example usage for org.apache.mahout.math MatrixSlice MatrixSlice

List of usage examples for org.apache.mahout.math MatrixSlice MatrixSlice

Introduction

In this page you can find the example usage for org.apache.mahout.math MatrixSlice MatrixSlice.

Prototype

public MatrixSlice(Vector v, int index) 

Source Link

Usage

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java

License:Apache License

@Override
public Iterator<MatrixSlice> iterateAll() {
    try {/*from  w  w  w  .  j  a  v  a  2s. c o m*/
        Path pathPattern = rowPath;
        if (FileSystem.get(conf).getFileStatus(rowPath).isDir()) {
            pathPattern = new Path(rowPath, "*");
        }
        return Iterators.transform(
                new SequenceFileDirIterator<IntWritable, VectorWritable>(pathPattern, PathType.GLOB,
                        PathFilters.logsCRCFilter(), null, true, conf),
                new Function<Pair<IntWritable, VectorWritable>, MatrixSlice>() {
                    @Override
                    public MatrixSlice apply(Pair<IntWritable, VectorWritable> from) {
                        return new MatrixSlice(from.getSecond().get(), from.getFirst().get());
                    }
                });
    } catch (IOException ioe) {
        throw new IllegalStateException(ioe);
    }
}

From source file:edu.indiana.d2i.htrc.skmeans.StreamingKMeansDriver.java

License:Apache License

private void StreamingKMeansConfigHelper(Configuration conf, String input, int maxCluster) throws IOException {
    // get samples to calculate scale factor
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] status = fs.listStatus(new Path(input), Utilities.HIDDEN_FILE_FILTER);
    int index = 0 + (int) (Math.random() * (status.length));
    SequenceFile.Reader seqReader = new SequenceFile.Reader(fs, status[index].getPath(), conf);

    int count = 0;
    Text key = new Text();
    VectorWritable value = new VectorWritable();
    List<MatrixSlice> slices = new ArrayList<MatrixSlice>();
    while (seqReader.next(key, value) && count < samplesNum) {
        MatrixSlice slice = new MatrixSlice(value.get().clone(), count);
        slices.add(slice);/*from w w w . java  2s .c om*/
        count++;
    }

    // set cutoff
    float cutoff = (float) StreamingKmeans.estimateCutoff(slices, samplesNum);
    conf.setFloat(StreamingKMeansConfigKeys.CUTOFF, cutoff);
    logger.info("Scale factor (cutoff) is: " + cutoff);

    // set vector dimension
    int dim = value.get().size();
    conf.setInt(StreamingKMeansConfigKeys.VECTOR_DIMENSION, dim);
    logger.info("Dimemsion of a vector is: " + dim);

    // set maximum #cluster
    conf.setInt(StreamingKMeansConfigKeys.MAXCLUSTER, maxCluster);

    // set distance measurement
    conf.set(StreamingKMeansConfigKeys.DIST_MEASUREMENT, EuclideanDistanceMeasure.class.getName());
}