Example usage for org.apache.mahout.math VectorWritable VectorWritable

List of usage examples for org.apache.mahout.math VectorWritable VectorWritable

Introduction

In this page you can find the example usage for org.apache.mahout.math VectorWritable VectorWritable.

Prototype

public VectorWritable() 

Source Link

Usage

From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.cpu.MatrixMultiplicationBSPCpu.java

License:Apache License

@Override
public void bsp(BSPPeer<IntWritable, TupleWritable, IntWritable, VectorWritable, MatrixRowMessage> peer)
        throws IOException, SyncException, InterruptedException {

    IntWritable key = new IntWritable();
    TupleWritable value = new TupleWritable();
    while (peer.readNext(key, value)) {

        // Logging
        if (isDebuggingEnabled) {
            for (int i = 0; i < value.size(); i++) {
                Vector vector = ((VectorWritable) value.get(i)).get();
                logger.writeChars("bsp,input,key=" + key + ",value=" + vector.toString() + "\n");
            }/* www  .j  ava  2  s. co m*/
        }

        Vector firstVector = ((VectorWritable) value.get(0)).get();
        Vector secondVector = ((VectorWritable) value.get(1)).get();

        // outCardinality is resulting column size n
        // (l x m) * (m x n) = (l x n)
        boolean firstIsOutFrag = secondVector.size() == outCardinality;

        // outFrag is Matrix which has the resulting column cardinality
        // (matrixB)
        Vector outFrag = firstIsOutFrag ? secondVector : firstVector;

        // multiplier is Matrix which has the resulting row count
        // (transposed matrixA)
        Vector multiplier = firstIsOutFrag ? firstVector : secondVector;

        if (isDebuggingEnabled) {
            logger.writeChars("bsp,firstIsOutFrag=" + firstIsOutFrag + "\n");
            logger.writeChars("bsp,outFrag=" + outFrag + "\n");
            logger.writeChars("bsp,multiplier=" + multiplier + "\n");
        }

        for (Vector.Element e : multiplier.nonZeroes()) {

            VectorWritable outVector = new VectorWritable();
            // Scalar Multiplication (Vector x Element)
            outVector.set(outFrag.times(e.get()));

            peer.send(masterTask, new MatrixRowMessage(e.index(), outVector));

            if (isDebuggingEnabled) {
                logger.writeChars("bsp,send,key=" + e.index() + ",value=" + outVector.get().toString() + "\n");
            }
        }
        if (isDebuggingEnabled) {
            logger.flush();
        }
    }
    peer.sync();
}

From source file:com.chimpler.example.eigenface.Helper.java

License:Apache License

public static double[][] readMatrixSequenceFile(String fileName) throws Exception {
    Configuration configuration = new Configuration();
    FileSystem fs = FileSystem.get(configuration);
    Reader matrixReader = new SequenceFile.Reader(fs, new Path(fileName), configuration);

    List<double[]> rows = new ArrayList<double[]>();
    IntWritable key = new IntWritable();
    VectorWritable value = new VectorWritable();
    while (matrixReader.next(key, value)) {
        Vector vector = value.get();
        double[] row = new double[vector.size()];
        for (int i = 0; i < vector.getNumNondefaultElements(); i++) {
            Element element = vector.getElement(i);
            row[element.index()] = element.get();
        }//from  w  ww. j av a2s .c om
        rows.add(row);
    }
    return rows.toArray(new double[rows.size()][]);
}

From source file:com.chimpler.example.eigenface.Helper.java

License:Apache License

public static void writeMatrixSequenceFile(String matrixSeqFileName, double[][] covarianceMatrix)
        throws Exception {
    int rowCount = covarianceMatrix.length;
    int columnCount = covarianceMatrix[0].length;

    Configuration configuration = new Configuration();
    FileSystem fs = FileSystem.get(configuration);
    Writer matrixWriter = new SequenceFile.Writer(fs, configuration, new Path(matrixSeqFileName),
            IntWritable.class, VectorWritable.class);

    IntWritable key = new IntWritable();
    VectorWritable value = new VectorWritable();

    double[] doubleValues = new double[columnCount];
    for (int i = 0; i < rowCount; i++) {
        key.set(i);/*from  w ww  . ja va  2  s .co  m*/
        for (int j = 0; j < columnCount; j++) {
            doubleValues[j] = covarianceMatrix[i][j];
        }
        Vector vector = new DenseVector(doubleValues);
        value.set(vector);

        matrixWriter.append(key, value);
    }
    matrixWriter.close();
}

From source file:com.gsvic.csmr.DocumentWritable.java

License:Apache License

public DocumentWritable() {
    key = new Text();
    value = new VectorWritable();
}

From source file:com.gsvic.csmr.io.InputData.java

License:Apache License

/**
 * Reads a Vectorized Text File, tfidf vectors/tf vectors
 * @param conf// w  ww.  j a  v  a  2s . c o  m
 * @param input
 * @return Returns the vectorized text file in a HashMap
 * @throws IOException 
 */
public static HashMap<Text, VectorWritable> vectorizedTextReader(Configuration conf, Path input)
        throws IOException {

    FileSystem fs = FileSystem.get(conf);
    SequenceFile.Reader reader;
    reader = new SequenceFile.Reader(fs, input, conf);

    HashMap<Text, VectorWritable> dcf = new HashMap<>();
    Text key = new Text();
    VectorWritable value = new VectorWritable();

    while (reader.next(key, value)) {
        dcf.put(new Text(key.toString()), new VectorWritable(value.get()));
    }

    return dcf;
}

From source file:com.lakhani.anchorgraph.applestovectors.java

public static void main(String args[]) throws Exception {
    List<NamedVector> apples = new ArrayList<NamedVector>();

    NamedVector apple;//  w ww .j  a  va 2 s.co  m
    apple = new NamedVector(new DenseVector(new double[] { 0.11, 510, 1 }), "Small round green apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.23, 650, 3 }), "Large oval red apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.09, 630, 1 }), "Small elongated red apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.25, 590, 3 }), "Large round yellow apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.18, 520, 2 }), "Medium oval green apple");

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    Path path = new Path("/user/cloudera/anchorgraph/output");
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class);
    VectorWritable vec = new VectorWritable();
    for (NamedVector vector : apples) {
        vec.set(vector);
        writer.append(new Text(vector.getName()), vec);
    }
    writer.close();

    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path("appledata/apples"), conf);

    Text key = new Text();
    VectorWritable value = new VectorWritable();
    while (reader.next(key, value)) {
        System.out.println(key.toString() + " " + value.get().asFormatString());
    }
    reader.close();
}

From source file:com.luca.filipponi.tweetAnalysis.SentimentClassifier.CustomTestNaiveBayesDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();/*from  w w  w  .  jav a  2 s. c  o  m*/
    addOutputOption();
    addOption(addOption(DefaultOptionCreator.overwriteOption().create()));
    addOption("model", "m", "The path to the model built during training", true);
    addOption(
            buildOption("testComplementary", "c", "test complementary?", false, false, String.valueOf(false)));
    addOption(buildOption("runSequential", "seq", "run sequential?", false, false, String.valueOf(false)));
    addOption("labelIndex", "l", "The path to the location of the label index", true);
    Map<String, List<String>> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }
    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
        HadoopUtil.delete(getConf(), getOutputPath());
    }

    boolean complementary = hasOption("testComplementary");
    boolean sequential = hasOption("runSequential");
    if (sequential) {
        FileSystem fs = FileSystem.get(getConf());
        NaiveBayesModel model = NaiveBayesModel.materialize(new Path(getOption("model")), getConf());
        AbstractNaiveBayesClassifier classifier;
        if (complementary) {
            classifier = new ComplementaryNaiveBayesClassifier(model);
        } else {
            classifier = new StandardNaiveBayesClassifier(model);
        }
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, getConf(), getOutputPath(), Text.class,
                VectorWritable.class);
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, getInputPath(), getConf());
        Text key = new Text();
        VectorWritable vw = new VectorWritable();
        while (reader.next(key, vw)) {
            writer.append(new Text(SLASH.split(key.toString())[1]),
                    new VectorWritable(classifier.classifyFull(vw.get())));
        }
        writer.close();
        reader.close();
    } else {
        boolean succeeded = runMapReduce(parsedArgs);
        if (!succeeded) {
            return -1;
        }
    }

    //load the labels
    Map<Integer, String> labelMap = BayesUtils.readLabelIndex(getConf(), new Path(getOption("labelIndex")));

    //loop over the results and create the confusion matrix
    SequenceFileDirIterable<Text, VectorWritable> dirIterable = new SequenceFileDirIterable<Text, VectorWritable>(
            getOutputPath(), PathType.LIST, PathFilters.partFilter(), getConf());
    ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT");
    analyzeResults(labelMap, dirIterable, analyzer);

    log.info("{} Results: {}", complementary ? "Complementary" : "Standard NB", analyzer);
    return 0;
}

From source file:com.missionsky.scp.dataanalysis.mahout.TestNaiveBayesDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();//from   ww w.java 2s.  c om
    addOutputOption();
    addOption(addOption(DefaultOptionCreator.overwriteOption().create()));
    addOption("model", "m", "The path to the model built during training", true);
    addOption(
            buildOption("testComplementary", "c", "test complementary?", false, false, String.valueOf(false)));
    addOption(buildOption("runSequential", "seq", "run sequential?", false, false, String.valueOf(false)));
    addOption("labelIndex", "l", "The path to the location of the label index", true);
    Map<String, List<String>> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }
    if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
        HadoopUtil.delete(getConf(), getOutputPath());
    }

    boolean complementary = hasOption("testComplementary");
    boolean sequential = hasOption("runSequential");
    if (sequential) {
        FileSystem fs = FileSystem.get(getConf());
        NaiveBayesModel model = NaiveBayesModel.materialize(new Path(getOption("model")), getConf());
        AbstractNaiveBayesClassifier classifier;
        if (complementary) {
            classifier = new ComplementaryNaiveBayesClassifier(model);
        } else {
            classifier = new StandardNaiveBayesClassifier(model);
        }
        SequenceFile.Writer writer = new SequenceFile.Writer(fs, getConf(), getOutputPath(), Text.class,
                VectorWritable.class);
        Reader reader = new Reader(fs, getInputPath(), getConf());
        Text key = new Text();
        VectorWritable vw = new VectorWritable();
        while (reader.next(key, vw)) {
            writer.append(new Text(SLASH.split(key.toString())[1]),
                    new VectorWritable(classifier.classifyFull(vw.get())));
        }
        writer.close();
        reader.close();
    } else {
        boolean succeeded = runMapReduce(parsedArgs);
        if (!succeeded) {
            return -1;
        }
    }

    //load the labels
    Map<Integer, String> labelMap = BayesUtils.readLabelIndex(getConf(), new Path(getOption("labelIndex")));

    //loop over the results and create the confusion matrix
    SequenceFileDirIterable<Text, VectorWritable> dirIterable = new SequenceFileDirIterable<Text, VectorWritable>(
            getOutputPath(), PathType.LIST, PathFilters.partFilter(), getConf());
    ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT");
    analyzeResults(labelMap, dirIterable, analyzer);

    log.info("{} Results: {}", complementary ? "Complementary" : "Standard NB", analyzer);
    return 0;
}

From source file:com.mozilla.grouperfish.mahout.clustering.display.lda.OriginalText.java

License:Apache License

public static Map<Integer, PriorityQueue<Pair<Double, String>>> getDocIds(Path docTopicsPath, int numDocs) {
    Map<Integer, PriorityQueue<Pair<Double, String>>> docIdMap = new HashMap<Integer, PriorityQueue<Pair<Double, String>>>();
    Map<Integer, Double> maxDocScores = new HashMap<Integer, Double>();
    SequenceFileDirectoryReader pointsReader = null;
    try {/*from   w w w.j a  v a2  s .  c  o  m*/
        Text k = new Text();
        VectorWritable vw = new VectorWritable();
        pointsReader = new SequenceFileDirectoryReader(docTopicsPath);
        while (pointsReader.next(k, vw)) {
            String docId = k.toString();
            Vector normGamma = vw.get();
            Iterator<Element> iter = normGamma.iterateNonZero();
            double maxTopicScore = 0.0;
            int idx = 0;
            int topic = 0;
            while (iter.hasNext()) {
                Element e = iter.next();
                double score = e.get();
                if (score > maxTopicScore) {
                    maxTopicScore = score;
                    topic = idx;
                }

                idx++;
            }

            PriorityQueue<Pair<Double, String>> docIdsForTopic = docIdMap.get(topic);
            if (docIdsForTopic == null) {
                docIdsForTopic = new PriorityQueue<Pair<Double, String>>(numDocs);
            }

            Double maxDocScoreForTopic = maxDocScores.get(topic);
            if (maxDocScoreForTopic == null) {
                maxDocScoreForTopic = 0.0;
            }
            if (maxTopicScore > maxDocScoreForTopic) {
                maxDocScores.put(topic, maxTopicScore);
            }

            enqueue(docIdsForTopic, docId, maxTopicScore, numDocs);
            docIdMap.put(topic, docIdsForTopic);
        }
    } catch (IOException e) {
        LOG.error("IOException caught while reading clustered points", e);
    } finally {
        if (pointsReader != null) {
            pointsReader.close();
        }
    }

    for (Map.Entry<Integer, Double> entry : maxDocScores.entrySet()) {
        System.out.println("For topic: " + entry.getKey() + " max score: " + entry.getValue());
    }

    return docIdMap;
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java

License:Apache License

@Override
public void putNext(Tuple t) throws IOException {
    IntWritable outputKey = new IntWritable();
    VectorWritable outputValue = new VectorWritable();
    outputKey.set((Integer) t.get(0));
    Tuple currRow = (Tuple) t.get(1);/*w  w w  .j ava 2s . c om*/
    Vector currRowVector;
    if (dimensions == 0) {
        throw new IllegalArgumentException("Trying to create 0 dimension vector");
    }
    if (STORE_AS_DENSE) {
        currRowVector = new NamedVector(new DenseVector(dimensions), outputKey.toString());
    } else if (STORE_AS_SEQUENTIAL) {
        currRowVector = new NamedVector(new SequentialAccessSparseVector(dimensions, currRow.size()),
                outputKey.toString());
    } else {
        currRowVector = new NamedVector(new RandomAccessSparseVector(dimensions, currRow.size()),
                outputKey.toString());
    }
    for (int ii = 0; ii < currRow.size(); ii++) {
        Object o = currRow.get(ii);
        switch (currRow.getType(ii)) {
        case DataType.INTEGER:
        case DataType.LONG:
        case DataType.FLOAT:
        case DataType.DOUBLE:
            currRowVector.set(ii, (Double) o);
            break;
        case DataType.TUPLE:
            // If this is a tuple then we want to set column and element
            Tuple subt = (Tuple) o;
            currRowVector.set((Integer) subt.get(0), (Double) subt.get(1));
            break;
        default:
            throw new RuntimeException("Unexpected tuple form");
        }
    }
    outputValue.set(currRowVector);
    try {
        writer.write(outputKey, outputValue);
    } catch (InterruptedException e) {
        LOG.error("Interrupted while writing", e);
    }
}