Example usage for org.apache.mahout.math VectorWritable set

List of usage examples for org.apache.mahout.math VectorWritable set

Introduction

In this page you can find the example usage for org.apache.mahout.math VectorWritable set.

Prototype

public void set(Vector vector) 

Source Link

Usage

From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.cpu.MatrixMultiplicationBSPCpu.java

License:Apache License

@Override
public void bsp(BSPPeer<IntWritable, TupleWritable, IntWritable, VectorWritable, MatrixRowMessage> peer)
        throws IOException, SyncException, InterruptedException {

    IntWritable key = new IntWritable();
    TupleWritable value = new TupleWritable();
    while (peer.readNext(key, value)) {

        // Logging
        if (isDebuggingEnabled) {
            for (int i = 0; i < value.size(); i++) {
                Vector vector = ((VectorWritable) value.get(i)).get();
                logger.writeChars("bsp,input,key=" + key + ",value=" + vector.toString() + "\n");
            }//w ww .  j a  v a  2  s.  c om
        }

        Vector firstVector = ((VectorWritable) value.get(0)).get();
        Vector secondVector = ((VectorWritable) value.get(1)).get();

        // outCardinality is resulting column size n
        // (l x m) * (m x n) = (l x n)
        boolean firstIsOutFrag = secondVector.size() == outCardinality;

        // outFrag is Matrix which has the resulting column cardinality
        // (matrixB)
        Vector outFrag = firstIsOutFrag ? secondVector : firstVector;

        // multiplier is Matrix which has the resulting row count
        // (transposed matrixA)
        Vector multiplier = firstIsOutFrag ? firstVector : secondVector;

        if (isDebuggingEnabled) {
            logger.writeChars("bsp,firstIsOutFrag=" + firstIsOutFrag + "\n");
            logger.writeChars("bsp,outFrag=" + outFrag + "\n");
            logger.writeChars("bsp,multiplier=" + multiplier + "\n");
        }

        for (Vector.Element e : multiplier.nonZeroes()) {

            VectorWritable outVector = new VectorWritable();
            // Scalar Multiplication (Vector x Element)
            outVector.set(outFrag.times(e.get()));

            peer.send(masterTask, new MatrixRowMessage(e.index(), outVector));

            if (isDebuggingEnabled) {
                logger.writeChars("bsp,send,key=" + e.index() + ",value=" + outVector.get().toString() + "\n");
            }
        }
        if (isDebuggingEnabled) {
            logger.flush();
        }
    }
    peer.sync();
}

From source file:com.chimpler.example.eigenface.Helper.java

License:Apache License

public static void writeMatrixSequenceFile(String matrixSeqFileName, double[][] covarianceMatrix)
        throws Exception {
    int rowCount = covarianceMatrix.length;
    int columnCount = covarianceMatrix[0].length;

    Configuration configuration = new Configuration();
    FileSystem fs = FileSystem.get(configuration);
    Writer matrixWriter = new SequenceFile.Writer(fs, configuration, new Path(matrixSeqFileName),
            IntWritable.class, VectorWritable.class);

    IntWritable key = new IntWritable();
    VectorWritable value = new VectorWritable();

    double[] doubleValues = new double[columnCount];
    for (int i = 0; i < rowCount; i++) {
        key.set(i);/*  w  w  w  .  j  ava 2s  .  com*/
        for (int j = 0; j < columnCount; j++) {
            doubleValues[j] = covarianceMatrix[i][j];
        }
        Vector vector = new DenseVector(doubleValues);
        value.set(vector);

        matrixWriter.append(key, value);
    }
    matrixWriter.close();
}

From source file:com.lakhani.anchorgraph.applestovectors.java

public static void main(String args[]) throws Exception {
    List<NamedVector> apples = new ArrayList<NamedVector>();

    NamedVector apple;//w  w  w  .ja  v  a  2s.  co  m
    apple = new NamedVector(new DenseVector(new double[] { 0.11, 510, 1 }), "Small round green apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.23, 650, 3 }), "Large oval red apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.09, 630, 1 }), "Small elongated red apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.25, 590, 3 }), "Large round yellow apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.18, 520, 2 }), "Medium oval green apple");

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    Path path = new Path("/user/cloudera/anchorgraph/output");
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class);
    VectorWritable vec = new VectorWritable();
    for (NamedVector vector : apples) {
        vec.set(vector);
        writer.append(new Text(vector.getName()), vec);
    }
    writer.close();

    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path("appledata/apples"), conf);

    Text key = new Text();
    VectorWritable value = new VectorWritable();
    while (reader.next(key, value)) {
        System.out.println(key.toString() + " " + value.get().asFormatString());
    }
    reader.close();
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java

License:Apache License

@Override
public void putNext(Tuple t) throws IOException {
    IntWritable outputKey = new IntWritable();
    VectorWritable outputValue = new VectorWritable();
    outputKey.set((Integer) t.get(0));
    Tuple currRow = (Tuple) t.get(1);/* www.  j  a va 2  s .co  m*/
    Vector currRowVector;
    if (dimensions == 0) {
        throw new IllegalArgumentException("Trying to create 0 dimension vector");
    }
    if (STORE_AS_DENSE) {
        currRowVector = new NamedVector(new DenseVector(dimensions), outputKey.toString());
    } else if (STORE_AS_SEQUENTIAL) {
        currRowVector = new NamedVector(new SequentialAccessSparseVector(dimensions, currRow.size()),
                outputKey.toString());
    } else {
        currRowVector = new NamedVector(new RandomAccessSparseVector(dimensions, currRow.size()),
                outputKey.toString());
    }
    for (int ii = 0; ii < currRow.size(); ii++) {
        Object o = currRow.get(ii);
        switch (currRow.getType(ii)) {
        case DataType.INTEGER:
        case DataType.LONG:
        case DataType.FLOAT:
        case DataType.DOUBLE:
            currRowVector.set(ii, (Double) o);
            break;
        case DataType.TUPLE:
            // If this is a tuple then we want to set column and element
            Tuple subt = (Tuple) o;
            currRowVector.set((Integer) subt.get(0), (Double) subt.get(1));
            break;
        default:
            throw new RuntimeException("Unexpected tuple form");
        }
    }
    outputValue.set(currRowVector);
    try {
        writer.write(outputKey, outputValue);
    } catch (InterruptedException e) {
        LOG.error("Interrupted while writing", e);
    }
}

From source file:com.twitter.algebra.AlgebraCommon.java

License:Apache License

/**
 * Convert an in-memory representation of a matrix to a distributed MapDir
 * format. It then can be used in distributed jobs
 * /*from  w  ww  .  j  a  v  a2  s.c o  m*/
 * @param oriMatrix
 * @return path that will contain the matrix files
 * @throws Exception
 */
public static DistributedRowMatrix toMapDir(Matrix origMatrix, Path outPath, Path tmpPath, String label)
        throws Exception {
    Configuration conf = new Configuration();
    Path outputDir = new Path(outPath, label + origMatrix.numRows() + "x" + origMatrix.numCols());
    FileSystem fs = FileSystem.get(outputDir.toUri(), conf);
    if (!fs.exists(outputDir)) {
        Path mapDir = new Path(outputDir, "matrix-k-0");
        Path outputFile = new Path(mapDir, "data");
        @SuppressWarnings("deprecation")
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, outputFile, IntWritable.class,
                VectorWritable.class);
        VectorWritable vectorw = new VectorWritable();
        IntWritable intw = new IntWritable();
        try {
            for (int r = 0; r < origMatrix.numRows(); r++) {
                Vector vector = origMatrix.viewRow(r);
                vectorw.set(vector);
                intw.set(r);
                writer.append(intw, vectorw);
            }
        } finally {
            writer.close();
        }
        MapFile.fix(fs, mapDir, IntWritable.class, VectorWritable.class, false, conf);
    } else {
        log.warn("----------- Skip matrix " + outputDir + " - already exists");
    }
    DistributedRowMatrix dMatrix = new DistributedRowMatrix(outputDir, tmpPath, origMatrix.numRows(),
            origMatrix.numCols());
    dMatrix.setConf(conf);
    return dMatrix;
}

From source file:com.twitter.algebra.AlgebraCommon.java

License:Apache License

/**
 * Write a vector to filesystem so that it can be used by distributed jobs
 * @param vector/* w ww  .  j  av a 2s .  c  o m*/
 * @param outputDir
 * @param label the unique label that be used in naming the vector file
 * @param conf
 * @return
 * @throws IOException
 */
public static Path toDistributedVector(Vector vector, Path outputDir, String label, Configuration conf)
        throws IOException {
    Path outputFile = new Path(outputDir, "Vector-" + label);
    FileSystem fs = FileSystem.get(outputDir.toUri(), conf);
    if (fs.exists(outputFile)) {
        log.warn("----------- OVERWRITE " + outputFile + " already exists");
        fs.delete(outputFile, false);
    }
    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, outputFile, IntWritable.class,
            VectorWritable.class);
    VectorWritable vectorw = new VectorWritable();
    vectorw.set(vector);
    writer.append(new IntWritable(0), vectorw);
    writer.close();
    return outputFile;
}

From source file:com.twitter.algebra.matrix.format.MapDir.java

License:Apache License

/**
 * Get the value associated with the key
 * @param key/* w w  w . ja va2  s  .  c o m*/
 * @param val the object that will be filled with the retrieved value 
 * @return the retrieved value
 * @throws IOException
 */
public VectorWritable get(IntWritable key, VectorWritable val) throws IOException {
    if (lastReader == null && noMorePartitions)
        return null;
    if (lastReader == null) {
        loadReader(key);
        nextKey.set(key.get());
        boolean eof = lastReader.getClosest(nextKey, nextValue, true) == null;
        if (eof) {
            lastReader = null;
            return null;
        }
    }
    boolean eof = false;
    //skip over keys until find the one that the user is asking for. This should rarely 
    //occur as the user normally asks for sequential keys
    while (!eof && nextKey.compareTo(key) < 0)
        eof = !lastReader.next(nextKey, nextValue);
    //If the requested key is not in the current MapFile, reset the process and 
    //search in the next MapFile using recursive call
    if (eof) {
        lastReader = null;
        return get(key, val);
    }
    if (nextKey.equals(key)) {
        val.set(nextValue.get());
        //update nextKey and nextValue for the next call
        eof = !lastReader.next(nextKey, nextValue);
        if (eof)
            lastReader = null;
        return val;
    }
    return null;
}

From source file:csvToSequence.ConvertToSeqLargeTxtVec.java

public static void main(String[] args) throws IOException {
    String filename = "/home/ivan/WorkDir/ccFraud.csv";
    String outputfilename = "/home/ivan/WorkDir/part-0000";

    SequenceFile.Writer writer;/*from  ww w  .  ja  v a  2  s  .  c o m*/
    Configuration conf = new Configuration();

    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(outputfilename);

    writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class);
    VectorWritable vec = new VectorWritable();

    BufferedReader br = new BufferedReader(new FileReader(filename));
    String s;
    br.readLine(); //skip line

    while ((s = br.readLine()) != null) {
        String[] value = s.split(",");
        double[] numValue = new double[8];

        for (int i = 0; i < 8; i++)
            numValue[i] = Double.parseDouble(value[i]);

        if (Integer.parseInt(value[8]) == 1)
            value[8] = "Fraud/" + value[8];
        else
            value[8] = "Normal/" + value[8];

        NamedVector oneV = new NamedVector(new DenseVector(numValue), value[8]);

        vec.set(oneV.getDelegate());
        writer.append(new Text(oneV.getName()), vec);

    }
    writer.close();
}

From source file:csvToSequence.ConvertToSeqTextVecWritable.java

public static void main(String[] args) throws FileNotFoundException, IOException {

    String filename = "/home/ivan/WorkDir/ccFraud.csv";
    String outputfilename = "/home/ivan/WorkDir/part-0000";

    SequenceFile.Writer writer;/*from   ww w  .j  a va  2 s.  c o m*/
    Configuration conf = new Configuration();
    List<NamedVector> namedVectors = new ArrayList<>();
    /*Integer i = 1;
            
    CSVVectorIterator vectorCSVVectorIterator = new CSVVectorIterator(new FileReader(filename));
    //System.out.println("Densvector"+vec.next()):
            
            
            
    while(vectorCSVVectorIterator.hasNext()){
    NamedVector vecIt = new NamedVector(vectorCSVVectorIterator.next(),i.toString());
    namedVectors.add(vecIt);
    i++;
    }*/
    BufferedReader br = new BufferedReader(new FileReader(filename));
    String s;
    br.readLine(); //skip line
    while ((s = br.readLine()) != null) {
        String[] value = s.split(",");
        double[] numValue = new double[8];

        for (int i = 0; i < 8; i++)
            numValue[i] = Double.parseDouble(value[i]);

        if (Integer.parseInt(value[8]) == 1)
            value[8] = "Fraud/" + value[8];
        else
            value[8] = "Normal/" + value[8];

        NamedVector oneV = new NamedVector(new DenseVector(numValue), value[8]);
        namedVectors.add(oneV);

    }

    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(outputfilename);

    writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class);

    VectorWritable vec = new VectorWritable();

    for (NamedVector iter : namedVectors) {
        vec.set(iter.getDelegate());
        writer.append(new Text(iter.getName()), vec);
    }

    writer.close();

    /*try (SequenceFile.Reader reader = new SequenceFile.Reader(fs,path, conf)) {
    Text key = new Text();
    VectorWritable value = new VectorWritable();
    while (reader.next(key, value)) {
                
        System.out.println(key + " "+ value);
    }
    }*/

}

From source file:de.tuberlin.dima.cuttlefish.preprocessing.vectorization.Vectorizer.java

License:Open Source License

public void vectorize(File luceneIndexDir, File outputDir) throws Exception {

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.getLocal(conf);
    SequenceFile.Writer writer = null;

    FeatureDictionary dict = new FeatureDictionary();

    DirectoryReader reader = null;//from  ww w  . j  a  v  a  2s.  c  o m
    try {
        reader = DirectoryReader.open(new SimpleFSDirectory(luceneIndexDir));

        writer = SequenceFile.createWriter(fs, conf, new Path(outputDir.toString(), "documentVectors.seq"),
                IDAndCodes.class, VectorWritable.class);
        IDAndCodes idAndCodes = new IDAndCodes();
        VectorWritable vectorWritable = new VectorWritable();

        Fields fields = MultiFields.getFields(reader);
        if (fields != null) {
            Iterator<String> fieldNames = fields.iterator();
            while (fieldNames.hasNext()) {
                String field = fieldNames.next();
                if (!field.startsWith("bip:") && !"itemID".equals(field)) {

                    Terms terms = fields.terms(field);
                    TermsEnum termsEnum = terms.iterator(null);
                    BytesRef text;
                    while ((text = termsEnum.next()) != null) {
                        dict.addTextFeature(field, text.utf8ToString());
                    }
                }
            }
        }

        int numDocsVectorized = 0;

        for (int docID = 0; docID < reader.maxDoc(); docID++) {
            Document doc = reader.document(docID);

            int itemID = doc.getField("itemID").numericValue().intValue();

            RandomAccessSparseVector documentVector = new RandomAccessSparseVector(dict.numFeatures());
            Multimap<String, String> codes = HashMultimap.create();

            for (IndexableField field : doc.getFields()) {

                String fieldName = field.name();

                if (!fieldName.startsWith("bip:") && !"itemID".equals(fieldName)) {

                    Terms termFreqVector = reader.getTermVector(docID, fieldName);

                    if (termFreqVector != null) {

                        int maxTermFrequency = maxTermFrequency(termFreqVector);

                        TermsEnum te = termFreqVector.iterator(null);
                        BytesRef term;

                        while ((term = te.next()) != null) {

                            String termStr = term.utf8ToString();
                            int termFrequency = (int) te.totalTermFreq();

                            int documentFrequency = reader.docFreq(new Term(fieldName, term));
                            int numDocs = reader.numDocs();

                            double weight = weighting.weight(fieldName, termStr, termFrequency,
                                    documentFrequency, maxTermFrequency, numDocs);

                            int featureIndex = dict.index(fieldName, term.utf8ToString());
                            documentVector.setQuick(featureIndex, weight);
                        }
                    }

                } else if (fieldName.startsWith("bip:")) {
                    for (String value : doc.getValues(fieldName)) {
                        codes.put(fieldName, value);
                    }
                }
            }

            Vector featureVector = new SequentialAccessSparseVector(documentVector);

            weighting.normalize(featureVector);

            idAndCodes.set(itemID, codes);
            vectorWritable.set(featureVector);
            writer.append(idAndCodes, vectorWritable);

            numDocsVectorized++;
            if (numDocsVectorized % 100 == 0) {
                log.info("Vectorized {} documents", numDocsVectorized);
            }
        }

        log.info("Vectorized {} documents", numDocsVectorized);

        dict.writeToFile(new File(outputDir, "features.txt"));

        log.info("Wrote feature dictionary");

    } finally {
        Closeables.close(reader, true);
        Closeables.close(writer, true);
    }

}