Example usage for org.apache.mahout.math AbstractVector AbstractVector

Introduction

In this page you can find the example usage for org.apache.mahout.math AbstractVector AbstractVector.

Prototype

protected AbstractVector(int size)

Source Link

Usage

From source file:edu.dfci.cccb.mev.kmeans.domain.hadoop.HadoopKMeansBuilder.java

License:Open Source License

@Override
public KMeans build() throws DatasetException {
    try (TemporaryFolder hadoop = new TemporaryFolder()) {
        File points = new File(hadoop, "points");
        points.mkdir();//from   w  w  w . j a v  a  2s  .c  om

        Configuration configuration = new Configuration();
        FileSystem system = get(configuration);
        final Dimension other = dataset().dimension(dimension().type() == ROW ? COLUMN : ROW);

        List<NamedVector> vectors = new AbstractList<NamedVector>() {

            @Override
            public NamedVector get(int index) {
                final String vector = dimension().keys().get(index);
                return new NamedVector(new AbstractVector(other.keys().size()) {

                    @Override
                    public void setQuick(int index, double value) {
                        throw new UnsupportedOperationException();
                    }

                    @Override
                    public Vector like() {
                        return new RandomAccessSparseVector(size());
                    }

                    @Override
                    public Iterator<Element> iterator() {
                        return new Iterator<Element>() {
                            private int current = 0;

                            @Override
                            public boolean hasNext() {
                                return current < other.keys().size();
                            }

                            @Override
                            public Element next() {
                                return new Element() {
                                    private final int index = current++;

                                    @Override
                                    public void set(double value) {
                                        throw new UnsupportedOperationException();
                                    }

                                    @Override
                                    public int index() {
                                        return index;
                                    }

                                    @Override
                                    @SneakyThrows(InvalidCoordinateException.class)
                                    public double get() {
                                        return dimension().type() == ROW
                                                ? dataset().values().get(vector, other.keys().get(index))
                                                : dataset().values().get(other.keys().get(index), vector);
                                    }
                                };
                            }

                            @Override
                            public void remove() {
                                throw new UnsupportedOperationException();
                            }
                        };
                    }

                    @Override
                    public Iterator<Element> iterateNonZero() {
                        return iterator();
                    }

                    @Override
                    public boolean isSequentialAccess() {
                        return true;
                    }

                    @Override
                    public boolean isDense() {
                        return true;
                    }

                    @Override
                    @SneakyThrows(InvalidCoordinateException.class)
                    public double getQuick(int index) {
                        return dimension().type() == ROW
                                ? dataset().values().get(vector, other.keys().get(index))
                                : dataset().values().get(other.keys().get(index), vector);
                    }

                    @Override
                    public int getNumNondefaultElements() {
                        return other.keys().size();
                    }

                    @Override
                    protected Matrix matrixLike(int rows, int columns) {
                        throw new UnsupportedOperationException();
                    }
                }, vector);
            }

            @Override
            public int size() {
                return dimension().keys().size();
            }
        };

        // write input
        try (Writer writer = new Writer(system, configuration,
                new Path(new File(points, "file1").getAbsolutePath()), LongWritable.class,
                VectorWritable.class)) {
            VectorWritable writable = new VectorWritable();
            long record = 0;
            for (Vector vector : vectors) {
                writable.set(vector);
                writer.append(new LongWritable(record++), writable);
            }
        }

        // prepare clusters
        File clusters = new File(hadoop, "clusters");
        clusters.mkdir();
        try (Writer writer = new Writer(system, configuration,
                new Path(new File(clusters, "part-00000").getAbsolutePath()), Text.class, Cluster.class)) {
            for (int i = 0; i < k(); i++) {
                Vector vec = vectors.get(i);
                Cluster cluster = new Cluster(vec, i, new EuclideanDistanceMeasure());
                writer.append(new Text(cluster.getIdentifier()), cluster);
            }
        }

        File output = new File(hadoop, "output");
        output.mkdir();

        try {
            run(configuration, new Path(points.getAbsolutePath()), new Path(clusters.getAbsolutePath()),
                    new Path(output.getAbsolutePath()), metric.measurer(), convergence(), iterations(), true,
                    false);

            try (Reader reader = new Reader(system, new Path(
                    new File(new File(output, CLUSTERED_POINTS_DIR), "/part-m-00000").getAbsolutePath()),
                    configuration)) {
                IntWritable key = new IntWritable();
                WeightedVectorWritable value = new WeightedVectorWritable();
                Map<String, Set<String>> result = new HashMap<>();

                while (reader.next(key, value)) {
                    Set<String> cluster = result.get(key.toString());
                    if (cluster == null)
                        result.put(key.toString(), cluster = new HashSet<>());
                    cluster.add(((NamedVector) value.getVector()).getName());
                }

                return new AbstractKMeans() {
                }.dataset(dataset()).dimension(dimension()).name(name()).type(type())
                        .clusters(new HashSet<>(result.values()));
            }
        } catch (ClassNotFoundException | InterruptedException e) {
            throw new DatasetException(e);
        }
    } catch (IOException e) {
        throw new DatasetException(e);
    }
}