Example usage for org.apache.mahout.math VectorWritable VectorWritable

Introduction

In this page you can find the example usage for org.apache.mahout.math VectorWritable VectorWritable.

Prototype

public VectorWritable()

Source Link

Usage

From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.cpu.MatrixMultiplicationBSPCpu.java

License:Apache License

@Override
public void bsp(BSPPeer<IntWritable, TupleWritable, IntWritable, VectorWritable, MatrixRowMessage> peer)
        throws IOException, SyncException, InterruptedException {

    IntWritable key = new IntWritable();
    TupleWritable value = new TupleWritable();
    while (peer.readNext(key, value)) {

        // Logging
        if (isDebuggingEnabled) {
            for (int i = 0; i < value.size(); i++) {
                Vector vector = ((VectorWritable) value.get(i)).get();
                logger.writeChars("bsp,input,key=" + key + ",value=" + vector.toString() + "\n");
            }/* www  .j  ava  2  s. co m*/
        }

        Vector firstVector = ((VectorWritable) value.get(0)).get();
        Vector secondVector = ((VectorWritable) value.get(1)).get();

        // outCardinality is resulting column size n
        // (l x m) * (m x n) = (l x n)
        boolean firstIsOutFrag = secondVector.size() == outCardinality;

        // outFrag is Matrix which has the resulting column cardinality
        // (matrixB)
        Vector outFrag = firstIsOutFrag ? secondVector : firstVector;

        // multiplier is Matrix which has the resulting row count
        // (transposed matrixA)
        Vector multiplier = firstIsOutFrag ? firstVector : secondVector;

        if (isDebuggingEnabled) {
            logger.writeChars("bsp,firstIsOutFrag=" + firstIsOutFrag + "\n");
            logger.writeChars("bsp,outFrag=" + outFrag + "\n");
            logger.writeChars("bsp,multiplier=" + multiplier + "\n");
        }

        for (Vector.Element e : multiplier.nonZeroes()) {

            VectorWritable outVector = new VectorWritable();
            // Scalar Multiplication (Vector x Element)
            outVector.set(outFrag.times(e.get()));

            peer.send(masterTask, new MatrixRowMessage(e.index(), outVector));

            if (isDebuggingEnabled) {
                logger.writeChars("bsp,send,key=" + e.index() + ",value=" + outVector.get().toString() + "\n");
            }
        }
        if (isDebuggingEnabled) {
            logger.flush();
        }
    }
    peer.sync();
}

From source file:com.chimpler.example.eigenface.Helper.java

License:Apache License

public static double[][] readMatrixSequenceFile(String fileName) throws Exception {
    Configuration configuration = new Configuration();
    FileSystem fs = FileSystem.get(configuration);
    Reader matrixReader = new SequenceFile.Reader(fs, new Path(fileName), configuration);

    List<double[]> rows = new ArrayList<double[]>();
    IntWritable key = new IntWritable();
    VectorWritable value = new VectorWritable();
    while (matrixReader.next(key, value)) {
        Vector vector = value.get();
        double[] row = new double[vector.size()];
        for (int i = 0; i < vector.getNumNondefaultElements(); i++) {
            Element element = vector.getElement(i);
            row[element.index()] = element.get();
        }//from  w  ww. j av a2s .c om
        rows.add(row);
    }
    return rows.toArray(new double[rows.size()][]);
}

From source file:com.chimpler.example.eigenface.Helper.java

License:Apache License

public static void writeMatrixSequenceFile(String matrixSeqFileName, double[][] covarianceMatrix)
        throws Exception {
    int rowCount = covarianceMatrix.length;
    int columnCount = covarianceMatrix[0].length;

    Configuration configuration = new Configuration();
    FileSystem fs = FileSystem.get(configuration);
    Writer matrixWriter = new SequenceFile.Writer(fs, configuration, new Path(matrixSeqFileName),
            IntWritable.class, VectorWritable.class);

    IntWritable key = new IntWritable();
    VectorWritable value = new VectorWritable();

    double[] doubleValues = new double[columnCount];
    for (int i = 0; i < rowCount; i++) {
        key.set(i);/*from  w ww  . ja va  2  s .co  m*/
        for (int j = 0; j < columnCount; j++) {
            doubleValues[j] = covarianceMatrix[i][j];
        }
        Vector vector = new DenseVector(doubleValues);
        value.set(vector);

        matrixWriter.append(key, value);
    }
    matrixWriter.close();
}

From source file:com.gsvic.csmr.DocumentWritable.java

License:Apache License

public DocumentWritable() {
    key = new Text();
    value = new VectorWritable();
}

From source file:com.gsvic.csmr.io.InputData.java

License:Apache License

/**
 * Reads a Vectorized Text File, tfidf vectors/tf vectors
 * @param conf// w  ww.  j a  v  a  2s . c o  m
 * @param input
 * @return Returns the vectorized text file in a HashMap
 * @throws IOException 
 */
public static HashMap<Text, VectorWritable> vectorizedTextReader(Configuration conf, Path input)
        throws IOException {

    FileSystem fs = FileSystem.get(conf);
    SequenceFile.Reader reader;
    reader = new SequenceFile.Reader(fs, input, conf);

    HashMap<Text, VectorWritable> dcf = new HashMap<>();
    Text key = new Text();
    VectorWritable value = new VectorWritable();

    while (reader.next(key, value)) {
        dcf.put(new Text(key.toString()), new VectorWritable(value.get()));
    }

    return dcf;
}

From source file:com.lakhani.anchorgraph.applestovectors.java

public static void main(String args[]) throws Exception {
    List<NamedVector> apples = new ArrayList<NamedVector>();

    NamedVector apple;//  w ww .j  a  va 2 s.co  m
    apple = new NamedVector(new DenseVector(new double[] { 0.11, 510, 1 }), "Small round green apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.23, 650, 3 }), "Large oval red apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.09, 630, 1 }), "Small elongated red apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.25, 590, 3 }), "Large round yellow apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.18, 520, 2 }), "Medium oval green apple");

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    Path path = new Path("/user/cloudera/anchorgraph/output");
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class);
    VectorWritable vec = new VectorWritable();
    for (NamedVector vector : apples) {
        vec.set(vector);
        writer.append(new Text(vector.getName()), vec);
    }
    writer.close();

    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path("appledata/apples"), conf);

    Text key = new Text();
    VectorWritable value = new VectorWritable();
    while (reader.next(key, value)) {
        System.out.println(key.toString() + " " + value.get().asFormatString());
    }
    reader.close();
}

From source file:com.luca.filipponi.tweetAnalysis.SentimentClassifier.CustomTestNaiveBayesDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();/*from  w w  w  .  jav a  2 s. c  o  m*/
    addOutputOption();
    addOption(addOption(DefaultOptionCreator.overwriteOption().create()));
    addOption("model", "m", "The path to the model built during training", true);
    addOption(
            buildOption("testComplementary", "c", "test complementary?", false, false, String.valueOf(false)));
    addOption(buildOption("runSequential", "seq", "run sequential?", false, false, String.valueOf(false)));
    addOption("labelIndex", "l", "The path to the location of the label index", true);
    Map<String, List<String>> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }
    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
        HadoopUtil.delete(getConf(), getOutputPath());
    }

    boolean complementary = hasOption("testComplementary");
    boolean sequential = hasOption("runSequential");
    if (sequential) {
        FileSystem fs = FileSystem.get(getConf());
        NaiveBayesModel model = NaiveBayesModel.materialize(new Path(getOption("model")), getConf());
        AbstractNaiveBayesClassifier classifier;
        if (complementary) {
            classifier = new ComplementaryNaiveBayesClassifier(model);
        } else {
            classifier = new StandardNaiveBayesClassifier(model);
        }
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, getConf(), getOutputPath(), Text.class,
                VectorWritable.class);
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, getInputPath(), getConf());
        Text key = new Text();
        VectorWritable vw = new VectorWritable();
        while (reader.next(key, vw)) {
            writer.append(new Text(SLASH.split(key.toString())[1]),
                    new VectorWritable(classifier.classifyFull(vw.get())));
        }
        writer.close();
        reader.close();
    } else {
        boolean succeeded = runMapReduce(parsedArgs);
        if (!succeeded) {
            return -1;
        }
    }

    //load the labels
    Map<Integer, String> labelMap = BayesUtils.readLabelIndex(getConf(), new Path(getOption("labelIndex")));

    //loop over the results and create the confusion matrix
    SequenceFileDirIterable<Text, VectorWritable> dirIterable = new SequenceFileDirIterable<Text, VectorWritable>(
            getOutputPath(), PathType.LIST, PathFilters.partFilter(), getConf());
    ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT");
    analyzeResults(labelMap, dirIterable, analyzer);

    log.info("{} Results: {}", complementary ? "Complementary" : "Standard NB", analyzer);
    return 0;
}

From source file:com.missionsky.scp.dataanalysis.mahout.TestNaiveBayesDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();//from   ww w.java 2s.  c om
    addOutputOption();
    addOption(addOption(DefaultOptionCreator.overwriteOption().create()));
    addOption("model", "m", "The path to the model built during training", true);
    addOption(
            buildOption("testComplementary", "c", "test complementary?", false, false, String.valueOf(false)));
    addOption(buildOption("runSequential", "seq", "run sequential?", false, false, String.valueOf(false)));
    addOption("labelIndex", "l", "The path to the location of the label index", true);
    Map<String, List<String>> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }
    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
        HadoopUtil.delete(getConf(), getOutputPath());
    }

    boolean complementary = hasOption("testComplementary");
    boolean sequential = hasOption("runSequential");
    if (sequential) {
        FileSystem fs = FileSystem.get(getConf());
        NaiveBayesModel model = NaiveBayesModel.materialize(new Path(getOption("model")), getConf());
        AbstractNaiveBayesClassifier classifier;
        if (complementary) {
            classifier = new ComplementaryNaiveBayesClassifier(model);
        } else {
            classifier = new StandardNaiveBayesClassifier(model);
        }
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, getConf(), getOutputPath(), Text.class,
                VectorWritable.class);
        Reader reader = new Reader(fs, getInputPath(), getConf());
        Text key = new Text();
        VectorWritable vw = new VectorWritable();
        while (reader.next(key, vw)) {
            writer.append(new Text(SLASH.split(key.toString())[1]),
                    new VectorWritable(classifier.classifyFull(vw.get())));
        }
        writer.close();
        reader.close();
    } else {
        boolean succeeded = runMapReduce(parsedArgs);
        if (!succeeded) {
            return -1;
        }
    }

    //load the labels
    Map<Integer, String> labelMap = BayesUtils.readLabelIndex(getConf(), new Path(getOption("labelIndex")));

    //loop over the results and create the confusion matrix
    SequenceFileDirIterable<Text, VectorWritable> dirIterable = new SequenceFileDirIterable<Text, VectorWritable>(
            getOutputPath(), PathType.LIST, PathFilters.partFilter(), getConf());
    ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT");
    analyzeResults(labelMap, dirIterable, analyzer);

    log.info("{} Results: {}", complementary ? "Complementary" : "Standard NB", analyzer);
    return 0;
}

From source file:com.mozilla.grouperfish.mahout.clustering.display.lda.OriginalText.java

License:Apache License

public static Map<Integer, PriorityQueue<Pair<Double, String>>> getDocIds(Path docTopicsPath, int numDocs) {
    Map<Integer, PriorityQueue<Pair<Double, String>>> docIdMap = new HashMap<Integer, PriorityQueue<Pair<Double, String>>>();
    Map<Integer, Double> maxDocScores = new HashMap<Integer, Double>();
    SequenceFileDirectoryReader pointsReader = null;
    try {/*from   w w w.j a  v a2  s .  c  o  m*/
        Text k = new Text();
        VectorWritable vw = new VectorWritable();
        pointsReader = new SequenceFileDirectoryReader(docTopicsPath);
        while (pointsReader.next(k, vw)) {
            String docId = k.toString();
            Vector normGamma = vw.get();
            Iterator<Element> iter = normGamma.iterateNonZero();
            double maxTopicScore = 0.0;
            int idx = 0;
            int topic = 0;
            while (iter.hasNext()) {
                Element e = iter.next();
                double score = e.get();
                if (score > maxTopicScore) {
                    maxTopicScore = score;
                    topic = idx;
                }

                idx++;
            }

            PriorityQueue<Pair<Double, String>> docIdsForTopic = docIdMap.get(topic);
            if (docIdsForTopic == null) {
                docIdsForTopic = new PriorityQueue<Pair<Double, String>>(numDocs);
            }

            Double maxDocScoreForTopic = maxDocScores.get(topic);
            if (maxDocScoreForTopic == null) {
                maxDocScoreForTopic = 0.0;
            }
            if (maxTopicScore > maxDocScoreForTopic) {
                maxDocScores.put(topic, maxTopicScore);
            }

            enqueue(docIdsForTopic, docId, maxTopicScore, numDocs);
            docIdMap.put(topic, docIdsForTopic);
        }
    } catch (IOException e) {
        LOG.error("IOException caught while reading clustered points", e);
    } finally {
        if (pointsReader != null) {
            pointsReader.close();
        }
    }

    for (Map.Entry<Integer, Double> entry : maxDocScores.entrySet()) {
        System.out.println("For topic: " + entry.getKey() + " max score: " + entry.getValue());
    }

    return docIdMap;
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java

License:Apache License

@Override
public void putNext(Tuple t) throws IOException {
    IntWritable outputKey = new IntWritable();
    VectorWritable outputValue = new VectorWritable();
    outputKey.set((Integer) t.get(0));
    Tuple currRow = (Tuple) t.get(1);/*w  w w  .j ava 2s . c om*/
    Vector currRowVector;
    if (dimensions == 0) {
        throw new IllegalArgumentException("Trying to create 0 dimension vector");
    }
    if (STORE_AS_DENSE) {
        currRowVector = new NamedVector(new DenseVector(dimensions), outputKey.toString());
    } else if (STORE_AS_SEQUENTIAL) {
        currRowVector = new NamedVector(new SequentialAccessSparseVector(dimensions, currRow.size()),
                outputKey.toString());
    } else {
        currRowVector = new NamedVector(new RandomAccessSparseVector(dimensions, currRow.size()),
                outputKey.toString());
    }
    for (int ii = 0; ii < currRow.size(); ii++) {
        Object o = currRow.get(ii);
        switch (currRow.getType(ii)) {
        case DataType.INTEGER:
        case DataType.LONG:
        case DataType.FLOAT:
        case DataType.DOUBLE:
            currRowVector.set(ii, (Double) o);
            break;
        case DataType.TUPLE:
            // If this is a tuple then we want to set column and element
            Tuple subt = (Tuple) o;
            currRowVector.set((Integer) subt.get(0), (Double) subt.get(1));
            break;
        default:
            throw new RuntimeException("Unexpected tuple form");
        }
    }
    outputValue.set(currRowVector);
    try {
        writer.write(outputKey, outputValue);
    } catch (InterruptedException e) {
        LOG.error("Interrupted while writing", e);
    }
}