Example usage for org.apache.mahout.math AbstractVector AbstractVector

List of usage examples for org.apache.mahout.math AbstractVector AbstractVector

Introduction

In this page you can find the example usage for org.apache.mahout.math AbstractVector AbstractVector.

Prototype

protected AbstractVector(int size) 

Source Link

Usage

From source file:edu.dfci.cccb.mev.kmeans.domain.hadoop.HadoopKMeansBuilder.java

License:Open Source License

@Override
public KMeans build() throws DatasetException {
    try (TemporaryFolder hadoop = new TemporaryFolder()) {
        File points = new File(hadoop, "points");
        points.mkdir();//from   w  w  w . j a v  a  2s  .c  om

        Configuration configuration = new Configuration();
        FileSystem system = get(configuration);
        final Dimension other = dataset().dimension(dimension().type() == ROW ? COLUMN : ROW);

        List<NamedVector> vectors = new AbstractList<NamedVector>() {

            @Override
            public NamedVector get(int index) {
                final String vector = dimension().keys().get(index);
                return new NamedVector(new AbstractVector(other.keys().size()) {

                    @Override
                    public void setQuick(int index, double value) {
                        throw new UnsupportedOperationException();
                    }

                    @Override
                    public Vector like() {
                        return new RandomAccessSparseVector(size());
                    }

                    @Override
                    public Iterator<Element> iterator() {
                        return new Iterator<Element>() {
                            private int current = 0;

                            @Override
                            public boolean hasNext() {
                                return current < other.keys().size();
                            }

                            @Override
                            public Element next() {
                                return new Element() {
                                    private final int index = current++;

                                    @Override
                                    public void set(double value) {
                                        throw new UnsupportedOperationException();
                                    }

                                    @Override
                                    public int index() {
                                        return index;
                                    }

                                    @Override
                                    @SneakyThrows(InvalidCoordinateException.class)
                                    public double get() {
                                        return dimension().type() == ROW
                                                ? dataset().values().get(vector, other.keys().get(index))
                                                : dataset().values().get(other.keys().get(index), vector);
                                    }
                                };
                            }

                            @Override
                            public void remove() {
                                throw new UnsupportedOperationException();
                            }
                        };
                    }

                    @Override
                    public Iterator<Element> iterateNonZero() {
                        return iterator();
                    }

                    @Override
                    public boolean isSequentialAccess() {
                        return true;
                    }

                    @Override
                    public boolean isDense() {
                        return true;
                    }

                    @Override
                    @SneakyThrows(InvalidCoordinateException.class)
                    public double getQuick(int index) {
                        return dimension().type() == ROW
                                ? dataset().values().get(vector, other.keys().get(index))
                                : dataset().values().get(other.keys().get(index), vector);
                    }

                    @Override
                    public int getNumNondefaultElements() {
                        return other.keys().size();
                    }

                    @Override
                    protected Matrix matrixLike(int rows, int columns) {
                        throw new UnsupportedOperationException();
                    }
                }, vector);
            }

            @Override
            public int size() {
                return dimension().keys().size();
            }
        };

        // write input
        try (Writer writer = new Writer(system, configuration,
                new Path(new File(points, "file1").getAbsolutePath()), LongWritable.class,
                VectorWritable.class)) {
            VectorWritable writable = new VectorWritable();
            long record = 0;
            for (Vector vector : vectors) {
                writable.set(vector);
                writer.append(new LongWritable(record++), writable);
            }
        }

        // prepare clusters
        File clusters = new File(hadoop, "clusters");
        clusters.mkdir();
        try (Writer writer = new Writer(system, configuration,
                new Path(new File(clusters, "part-00000").getAbsolutePath()), Text.class, Cluster.class)) {
            for (int i = 0; i < k(); i++) {
                Vector vec = vectors.get(i);
                Cluster cluster = new Cluster(vec, i, new EuclideanDistanceMeasure());
                writer.append(new Text(cluster.getIdentifier()), cluster);
            }
        }

        File output = new File(hadoop, "output");
        output.mkdir();

        try {
            run(configuration, new Path(points.getAbsolutePath()), new Path(clusters.getAbsolutePath()),
                    new Path(output.getAbsolutePath()), metric.measurer(), convergence(), iterations(), true,
                    false);

            try (Reader reader = new Reader(system, new Path(
                    new File(new File(output, CLUSTERED_POINTS_DIR), "/part-m-00000").getAbsolutePath()),
                    configuration)) {
                IntWritable key = new IntWritable();
                WeightedVectorWritable value = new WeightedVectorWritable();
                Map<String, Set<String>> result = new HashMap<>();

                while (reader.next(key, value)) {
                    Set<String> cluster = result.get(key.toString());
                    if (cluster == null)
                        result.put(key.toString(), cluster = new HashSet<>());
                    cluster.add(((NamedVector) value.getVector()).getName());
                }

                return new AbstractKMeans() {
                }.dataset(dataset()).dimension(dimension()).name(name()).type(type())
                        .clusters(new HashSet<>(result.values()));
            }
        } catch (ClassNotFoundException | InterruptedException e) {
            throw new DatasetException(e);
        }
    } catch (IOException e) {
        throw new DatasetException(e);
    }
}