Example usage for org.apache.mahout.math VectorWritable get

List of usage examples for org.apache.mahout.math VectorWritable get

Introduction

In this page you can find the example usage for org.apache.mahout.math VectorWritable get.

Prototype

public Vector get() 

Source Link

Usage

From source file:Vectors.java

License:Apache License

public static Vector merge(Iterable<VectorWritable> partialVectors) {
    Iterator<VectorWritable> vectors = partialVectors.iterator();
    Vector accumulator = vectors.next().get();
    while (vectors.hasNext()) {
        VectorWritable v = vectors.next();
        if (v != null) {
            Iterator<Vector.Element> nonZeroElements = v.get().iterateNonZero();
            while (nonZeroElements.hasNext()) {
                Vector.Element nonZeroElement = nonZeroElements.next();
                accumulator.setQuick(nonZeroElement.index(), nonZeroElement.get());
            }/*ww  w . ja  v a  2 s. c  o m*/
        }
    }
    return accumulator;
}

From source file:Vectors.java

License:Apache License

public static Vector.Element[] toArray(VectorWritable vectorWritable) {
    Vector.Element[] elements = new Vector.Element[vectorWritable.get().getNumNondefaultElements()];
    int k = 0;/*from  ww w.j a  v  a  2s.  co m*/
    Iterator<Vector.Element> nonZeroElements = vectorWritable.get().iterateNonZero();
    while (nonZeroElements.hasNext()) {
        Vector.Element nonZeroElement = nonZeroElements.next();
        elements[k++] = new TemporaryElement(nonZeroElement.index(), nonZeroElement.get());
    }
    return elements;
}

From source file:Vectors.java

License:Apache License

public static Vector readSequenceFile(Path path, Configuration conf) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    for (FileStatus fileStatus : fs.listStatus(path)) {
        if (fileStatus.getPath().getName().contains("part-")) {
            SequenceFile.Reader reader = null;
            try {
                reader = new SequenceFile.Reader(fs, fileStatus.getPath(), conf);
                Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf);
                VectorWritable value = (VectorWritable) ReflectionUtils.newInstance(reader.getValueClass(),
                        conf);/*from  w w w .ja v a 2 s.co  m*/
                reader.next(key, value);
                return value.get();
            } finally {
                IOUtils.closeStream(reader);
            }
        }
    }
    return null;
}

From source file:DisplayClustering.java

License:Apache License

protected static void plotSampleData(Graphics2D g2) {
    double sx = (double) res / DS;
    g2.setTransform(AffineTransform.getScaleInstance(sx, sx));

    // plot the axes
    g2.setColor(Color.BLACK);/* w w  w  .j  a  va  2s .c  o m*/
    Vector dv = new DenseVector(2).assign(SIZE / 2.0);
    plotRectangle(g2, new DenseVector(2).assign(2), dv);
    plotRectangle(g2, new DenseVector(2).assign(-2), dv);

    // plot the sample data
    g2.setColor(Color.DARK_GRAY);
    dv.assign(0.03);
    for (VectorWritable v : SAMPLE_DATA) {
        plotRectangle(g2, v.get(), dv);
    }
}

From source file:DisplayClustering.java

License:Apache License

/**
 * This method plots points and colors them according to their cluster
 * membership, rather than drawing ellipses.
 *
 * As of commit, this method is used only by K-means spectral clustering.
 * Since the cluster assignments are set within the eigenspace of the data, it
 * is not inherent that the original data cluster as they would in K-means:
 * that is, as symmetric gaussian mixtures.
 *
 * Since Spectral K-Means uses K-Means to cluster the eigenspace data, the raw
 * output is not directly usable. Rather, the cluster assignments from the raw
 * output need to be transferred back to the original data. As such, this
 * method will read the SequenceFile cluster results of K-means and transfer
 * the cluster assignments to the original data, coloring them appropriately.
 *
 * @param g2/*from w  w  w .  ja va2  s  .c o  m*/
 * @param data
 */
protected static void plotClusteredSampleData(Graphics2D g2, Path data) {
    double sx = (double) res / DS;
    g2.setTransform(AffineTransform.getScaleInstance(sx, sx));

    g2.setColor(Color.BLACK);
    Vector dv = new DenseVector(2).assign(SIZE / 2.0);
    plotRectangle(g2, new DenseVector(2).assign(2), dv);
    plotRectangle(g2, new DenseVector(2).assign(-2), dv);

    // plot the sample data, colored according to the cluster they belong to
    dv.assign(0.03);

    Path clusteredPointsPath = new Path(data, "clusteredPoints");
    Path inputPath = new Path(clusteredPointsPath, "part-m-00000");
    Map<Integer, Color> colors = new HashMap<Integer, Color>();
    int point = 0;
    for (Pair<IntWritable, WeightedVectorWritable> record : new SequenceFileIterable<IntWritable, WeightedVectorWritable>(
            inputPath, new Configuration())) {
        int clusterId = record.getFirst().get();
        VectorWritable v = SAMPLE_DATA.get(point++);
        Integer key = clusterId;
        if (!colors.containsKey(key)) {
            colors.put(key, COLORS[Math.min(COLORS.length - 1, colors.size())]);
        }
        plotClusteredRectangle(g2, v.get(), dv, colors.get(key));
    }
}

From source file:ac.keio.sslab.nlp.lda.RowIdJob.java

License:Apache License

@SuppressWarnings("deprecation")
@Override//  w w w. j  a  va  2  s .co m
public int run(String[] args) throws Exception {

    addInputOption();
    addOutputOption();

    Map<String, List<String>> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    Path outputPath = getOutputPath();
    Path indexPath = new Path(outputPath, "docIndex");
    Path matrixPath = new Path(outputPath, "matrix");

    try (SequenceFile.Writer indexWriter = SequenceFile.createWriter(fs, conf, indexPath, IntWritable.class,
            Text.class);
            SequenceFile.Writer matrixWriter = SequenceFile.createWriter(fs, conf, matrixPath,
                    IntWritable.class, VectorWritable.class)) {
        IntWritable docId = new IntWritable();
        int i = 0;
        int numCols = 0;
        for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>(
                getInputPath(), PathType.LIST, PathFilters.logsCRCFilter(), null, true, conf)) {
            VectorWritable value = record.getSecond();
            docId.set(i);
            indexWriter.append(docId, record.getFirst());
            matrixWriter.append(docId, value);
            i++;
            numCols = value.get().size();
        }

        log.info("Wrote out matrix with {} rows and {} columns to {}", i, numCols, matrixPath);
        return 0;
    }
}

From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.cpu.MatrixMultiplicationBSPCpu.java

License:Apache License

@Override
public void bsp(BSPPeer<IntWritable, TupleWritable, IntWritable, VectorWritable, MatrixRowMessage> peer)
        throws IOException, SyncException, InterruptedException {

    IntWritable key = new IntWritable();
    TupleWritable value = new TupleWritable();
    while (peer.readNext(key, value)) {

        // Logging
        if (isDebuggingEnabled) {
            for (int i = 0; i < value.size(); i++) {
                Vector vector = ((VectorWritable) value.get(i)).get();
                logger.writeChars("bsp,input,key=" + key + ",value=" + vector.toString() + "\n");
            }// w w  w  . ja v a  2s  .  c o m
        }

        Vector firstVector = ((VectorWritable) value.get(0)).get();
        Vector secondVector = ((VectorWritable) value.get(1)).get();

        // outCardinality is resulting column size n
        // (l x m) * (m x n) = (l x n)
        boolean firstIsOutFrag = secondVector.size() == outCardinality;

        // outFrag is Matrix which has the resulting column cardinality
        // (matrixB)
        Vector outFrag = firstIsOutFrag ? secondVector : firstVector;

        // multiplier is Matrix which has the resulting row count
        // (transposed matrixA)
        Vector multiplier = firstIsOutFrag ? firstVector : secondVector;

        if (isDebuggingEnabled) {
            logger.writeChars("bsp,firstIsOutFrag=" + firstIsOutFrag + "\n");
            logger.writeChars("bsp,outFrag=" + outFrag + "\n");
            logger.writeChars("bsp,multiplier=" + multiplier + "\n");
        }

        for (Vector.Element e : multiplier.nonZeroes()) {

            VectorWritable outVector = new VectorWritable();
            // Scalar Multiplication (Vector x Element)
            outVector.set(outFrag.times(e.get()));

            peer.send(masterTask, new MatrixRowMessage(e.index(), outVector));

            if (isDebuggingEnabled) {
                logger.writeChars("bsp,send,key=" + e.index() + ",value=" + outVector.get().toString() + "\n");
            }
        }
        if (isDebuggingEnabled) {
            logger.flush();
        }
    }
    peer.sync();
}

From source file:com.chimpler.example.eigenface.Helper.java

License:Apache License

public static double[][] readMatrixSequenceFile(String fileName) throws Exception {
    Configuration configuration = new Configuration();
    FileSystem fs = FileSystem.get(configuration);
    Reader matrixReader = new SequenceFile.Reader(fs, new Path(fileName), configuration);

    List<double[]> rows = new ArrayList<double[]>();
    IntWritable key = new IntWritable();
    VectorWritable value = new VectorWritable();
    while (matrixReader.next(key, value)) {
        Vector vector = value.get();
        double[] row = new double[vector.size()];
        for (int i = 0; i < vector.getNumNondefaultElements(); i++) {
            Element element = vector.getElement(i);
            row[element.index()] = element.get();
        }//from  w  w  w  .j  a v  a  2s.c o  m
        rows.add(row);
    }
    return rows.toArray(new double[rows.size()][]);
}

From source file:com.elex.dmp.lda.CachingCVB0Mapper.java

License:Apache License

@Override
public void map(Text docId, VectorWritable document, Context context) throws IOException, InterruptedException {
    /* where to get docTopics? */
    Vector topicVector = new DenseVector(new double[numTopics]).assign(1.0 / numTopics);
    modelTrainer.train(document.get(), topicVector, true, maxIters);
}

From source file:com.elex.dmp.lda.CachingCVB0PerplexityMapper.java

License:Apache License

@Override
public void map(Text docId, VectorWritable document, Context context) throws IOException, InterruptedException {
    if (1 > testFraction && random.nextFloat() >= testFraction) {
        return;//w  w w . j a v a2s  . co  m
    }
    context.getCounter(Counters.SAMPLED_DOCUMENTS).increment(1);
    outKey.set(document.get().norm(1));
    outValue.set(
            modelTrainer.calculatePerplexity(document.get(), topicVector.assign(1.0 / numTopics), maxIters));
    context.write(outKey, outValue);
}