List of usage examples for org.apache.mahout.math VectorWritable VectorWritable
public VectorWritable()
From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.cpu.MatrixMultiplicationBSPCpu.java
License:Apache License
@Override public void bsp(BSPPeer<IntWritable, TupleWritable, IntWritable, VectorWritable, MatrixRowMessage> peer) throws IOException, SyncException, InterruptedException { IntWritable key = new IntWritable(); TupleWritable value = new TupleWritable(); while (peer.readNext(key, value)) { // Logging if (isDebuggingEnabled) { for (int i = 0; i < value.size(); i++) { Vector vector = ((VectorWritable) value.get(i)).get(); logger.writeChars("bsp,input,key=" + key + ",value=" + vector.toString() + "\n"); }/* www .j ava 2 s. co m*/ } Vector firstVector = ((VectorWritable) value.get(0)).get(); Vector secondVector = ((VectorWritable) value.get(1)).get(); // outCardinality is resulting column size n // (l x m) * (m x n) = (l x n) boolean firstIsOutFrag = secondVector.size() == outCardinality; // outFrag is Matrix which has the resulting column cardinality // (matrixB) Vector outFrag = firstIsOutFrag ? secondVector : firstVector; // multiplier is Matrix which has the resulting row count // (transposed matrixA) Vector multiplier = firstIsOutFrag ? firstVector : secondVector; if (isDebuggingEnabled) { logger.writeChars("bsp,firstIsOutFrag=" + firstIsOutFrag + "\n"); logger.writeChars("bsp,outFrag=" + outFrag + "\n"); logger.writeChars("bsp,multiplier=" + multiplier + "\n"); } for (Vector.Element e : multiplier.nonZeroes()) { VectorWritable outVector = new VectorWritable(); // Scalar Multiplication (Vector x Element) outVector.set(outFrag.times(e.get())); peer.send(masterTask, new MatrixRowMessage(e.index(), outVector)); if (isDebuggingEnabled) { logger.writeChars("bsp,send,key=" + e.index() + ",value=" + outVector.get().toString() + "\n"); } } if (isDebuggingEnabled) { logger.flush(); } } peer.sync(); }
From source file:com.chimpler.example.eigenface.Helper.java
License:Apache License
public static double[][] readMatrixSequenceFile(String fileName) throws Exception { Configuration configuration = new Configuration(); FileSystem fs = FileSystem.get(configuration); Reader matrixReader = new SequenceFile.Reader(fs, new Path(fileName), configuration); List<double[]> rows = new ArrayList<double[]>(); IntWritable key = new IntWritable(); VectorWritable value = new VectorWritable(); while (matrixReader.next(key, value)) { Vector vector = value.get(); double[] row = new double[vector.size()]; for (int i = 0; i < vector.getNumNondefaultElements(); i++) { Element element = vector.getElement(i); row[element.index()] = element.get(); }//from w ww. j av a2s .c om rows.add(row); } return rows.toArray(new double[rows.size()][]); }
From source file:com.chimpler.example.eigenface.Helper.java
License:Apache License
public static void writeMatrixSequenceFile(String matrixSeqFileName, double[][] covarianceMatrix) throws Exception { int rowCount = covarianceMatrix.length; int columnCount = covarianceMatrix[0].length; Configuration configuration = new Configuration(); FileSystem fs = FileSystem.get(configuration); Writer matrixWriter = new SequenceFile.Writer(fs, configuration, new Path(matrixSeqFileName), IntWritable.class, VectorWritable.class); IntWritable key = new IntWritable(); VectorWritable value = new VectorWritable(); double[] doubleValues = new double[columnCount]; for (int i = 0; i < rowCount; i++) { key.set(i);/*from w ww . ja va 2 s .co m*/ for (int j = 0; j < columnCount; j++) { doubleValues[j] = covarianceMatrix[i][j]; } Vector vector = new DenseVector(doubleValues); value.set(vector); matrixWriter.append(key, value); } matrixWriter.close(); }
From source file:com.gsvic.csmr.DocumentWritable.java
License:Apache License
public DocumentWritable() { key = new Text(); value = new VectorWritable(); }
From source file:com.gsvic.csmr.io.InputData.java
License:Apache License
/** * Reads a Vectorized Text File, tfidf vectors/tf vectors * @param conf// w ww. j a v a 2s . c o m * @param input * @return Returns the vectorized text file in a HashMap * @throws IOException */ public static HashMap<Text, VectorWritable> vectorizedTextReader(Configuration conf, Path input) throws IOException { FileSystem fs = FileSystem.get(conf); SequenceFile.Reader reader; reader = new SequenceFile.Reader(fs, input, conf); HashMap<Text, VectorWritable> dcf = new HashMap<>(); Text key = new Text(); VectorWritable value = new VectorWritable(); while (reader.next(key, value)) { dcf.put(new Text(key.toString()), new VectorWritable(value.get())); } return dcf; }
From source file:com.lakhani.anchorgraph.applestovectors.java
public static void main(String args[]) throws Exception { List<NamedVector> apples = new ArrayList<NamedVector>(); NamedVector apple;// w ww .j a va 2 s.co m apple = new NamedVector(new DenseVector(new double[] { 0.11, 510, 1 }), "Small round green apple"); apples.add(apple); apple = new NamedVector(new DenseVector(new double[] { 0.23, 650, 3 }), "Large oval red apple"); apples.add(apple); apple = new NamedVector(new DenseVector(new double[] { 0.09, 630, 1 }), "Small elongated red apple"); apples.add(apple); apple = new NamedVector(new DenseVector(new double[] { 0.25, 590, 3 }), "Large round yellow apple"); apples.add(apple); apple = new NamedVector(new DenseVector(new double[] { 0.18, 520, 2 }), "Medium oval green apple"); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); Path path = new Path("/user/cloudera/anchorgraph/output"); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class); VectorWritable vec = new VectorWritable(); for (NamedVector vector : apples) { vec.set(vector); writer.append(new Text(vector.getName()), vec); } writer.close(); SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path("appledata/apples"), conf); Text key = new Text(); VectorWritable value = new VectorWritable(); while (reader.next(key, value)) { System.out.println(key.toString() + " " + value.get().asFormatString()); } reader.close(); }
From source file:com.luca.filipponi.tweetAnalysis.SentimentClassifier.CustomTestNaiveBayesDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/*from w w w . jav a 2 s. c o m*/ addOutputOption(); addOption(addOption(DefaultOptionCreator.overwriteOption().create())); addOption("model", "m", "The path to the model built during training", true); addOption( buildOption("testComplementary", "c", "test complementary?", false, false, String.valueOf(false))); addOption(buildOption("runSequential", "seq", "run sequential?", false, false, String.valueOf(false))); addOption("labelIndex", "l", "The path to the location of the label index", true); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { HadoopUtil.delete(getConf(), getOutputPath()); } boolean complementary = hasOption("testComplementary"); boolean sequential = hasOption("runSequential"); if (sequential) { FileSystem fs = FileSystem.get(getConf()); NaiveBayesModel model = NaiveBayesModel.materialize(new Path(getOption("model")), getConf()); AbstractNaiveBayesClassifier classifier; if (complementary) { classifier = new ComplementaryNaiveBayesClassifier(model); } else { classifier = new StandardNaiveBayesClassifier(model); } SequenceFile.Writer writer = new SequenceFile.Writer(fs, getConf(), getOutputPath(), Text.class, VectorWritable.class); SequenceFile.Reader reader = new SequenceFile.Reader(fs, getInputPath(), getConf()); Text key = new Text(); VectorWritable vw = new VectorWritable(); while (reader.next(key, vw)) { writer.append(new Text(SLASH.split(key.toString())[1]), new VectorWritable(classifier.classifyFull(vw.get()))); } writer.close(); reader.close(); } else { boolean succeeded = runMapReduce(parsedArgs); if (!succeeded) { return -1; } } //load the labels Map<Integer, String> labelMap = BayesUtils.readLabelIndex(getConf(), new Path(getOption("labelIndex"))); //loop over the results and create the confusion matrix SequenceFileDirIterable<Text, VectorWritable> dirIterable = new SequenceFileDirIterable<Text, VectorWritable>( getOutputPath(), PathType.LIST, PathFilters.partFilter(), getConf()); ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT"); analyzeResults(labelMap, dirIterable, analyzer); log.info("{} Results: {}", complementary ? "Complementary" : "Standard NB", analyzer); return 0; }
From source file:com.missionsky.scp.dataanalysis.mahout.TestNaiveBayesDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();//from ww w.java 2s. c om addOutputOption(); addOption(addOption(DefaultOptionCreator.overwriteOption().create())); addOption("model", "m", "The path to the model built during training", true); addOption( buildOption("testComplementary", "c", "test complementary?", false, false, String.valueOf(false))); addOption(buildOption("runSequential", "seq", "run sequential?", false, false, String.valueOf(false))); addOption("labelIndex", "l", "The path to the location of the label index", true); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) { HadoopUtil.delete(getConf(), getOutputPath()); } boolean complementary = hasOption("testComplementary"); boolean sequential = hasOption("runSequential"); if (sequential) { FileSystem fs = FileSystem.get(getConf()); NaiveBayesModel model = NaiveBayesModel.materialize(new Path(getOption("model")), getConf()); AbstractNaiveBayesClassifier classifier; if (complementary) { classifier = new ComplementaryNaiveBayesClassifier(model); } else { classifier = new StandardNaiveBayesClassifier(model); } SequenceFile.Writer writer = new SequenceFile.Writer(fs, getConf(), getOutputPath(), Text.class, VectorWritable.class); Reader reader = new Reader(fs, getInputPath(), getConf()); Text key = new Text(); VectorWritable vw = new VectorWritable(); while (reader.next(key, vw)) { writer.append(new Text(SLASH.split(key.toString())[1]), new VectorWritable(classifier.classifyFull(vw.get()))); } writer.close(); reader.close(); } else { boolean succeeded = runMapReduce(parsedArgs); if (!succeeded) { return -1; } } //load the labels Map<Integer, String> labelMap = BayesUtils.readLabelIndex(getConf(), new Path(getOption("labelIndex"))); //loop over the results and create the confusion matrix SequenceFileDirIterable<Text, VectorWritable> dirIterable = new SequenceFileDirIterable<Text, VectorWritable>( getOutputPath(), PathType.LIST, PathFilters.partFilter(), getConf()); ResultAnalyzer analyzer = new ResultAnalyzer(labelMap.values(), "DEFAULT"); analyzeResults(labelMap, dirIterable, analyzer); log.info("{} Results: {}", complementary ? "Complementary" : "Standard NB", analyzer); return 0; }
From source file:com.mozilla.grouperfish.mahout.clustering.display.lda.OriginalText.java
License:Apache License
public static Map<Integer, PriorityQueue<Pair<Double, String>>> getDocIds(Path docTopicsPath, int numDocs) { Map<Integer, PriorityQueue<Pair<Double, String>>> docIdMap = new HashMap<Integer, PriorityQueue<Pair<Double, String>>>(); Map<Integer, Double> maxDocScores = new HashMap<Integer, Double>(); SequenceFileDirectoryReader pointsReader = null; try {/*from w w w.j a v a2 s . c o m*/ Text k = new Text(); VectorWritable vw = new VectorWritable(); pointsReader = new SequenceFileDirectoryReader(docTopicsPath); while (pointsReader.next(k, vw)) { String docId = k.toString(); Vector normGamma = vw.get(); Iterator<Element> iter = normGamma.iterateNonZero(); double maxTopicScore = 0.0; int idx = 0; int topic = 0; while (iter.hasNext()) { Element e = iter.next(); double score = e.get(); if (score > maxTopicScore) { maxTopicScore = score; topic = idx; } idx++; } PriorityQueue<Pair<Double, String>> docIdsForTopic = docIdMap.get(topic); if (docIdsForTopic == null) { docIdsForTopic = new PriorityQueue<Pair<Double, String>>(numDocs); } Double maxDocScoreForTopic = maxDocScores.get(topic); if (maxDocScoreForTopic == null) { maxDocScoreForTopic = 0.0; } if (maxTopicScore > maxDocScoreForTopic) { maxDocScores.put(topic, maxTopicScore); } enqueue(docIdsForTopic, docId, maxTopicScore, numDocs); docIdMap.put(topic, docIdsForTopic); } } catch (IOException e) { LOG.error("IOException caught while reading clustered points", e); } finally { if (pointsReader != null) { pointsReader.close(); } } for (Map.Entry<Integer, Double> entry : maxDocScores.entrySet()) { System.out.println("For topic: " + entry.getKey() + " max score: " + entry.getValue()); } return docIdMap; }
From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java
License:Apache License
@Override public void putNext(Tuple t) throws IOException { IntWritable outputKey = new IntWritable(); VectorWritable outputValue = new VectorWritable(); outputKey.set((Integer) t.get(0)); Tuple currRow = (Tuple) t.get(1);/*w w w .j ava 2s . c om*/ Vector currRowVector; if (dimensions == 0) { throw new IllegalArgumentException("Trying to create 0 dimension vector"); } if (STORE_AS_DENSE) { currRowVector = new NamedVector(new DenseVector(dimensions), outputKey.toString()); } else if (STORE_AS_SEQUENTIAL) { currRowVector = new NamedVector(new SequentialAccessSparseVector(dimensions, currRow.size()), outputKey.toString()); } else { currRowVector = new NamedVector(new RandomAccessSparseVector(dimensions, currRow.size()), outputKey.toString()); } for (int ii = 0; ii < currRow.size(); ii++) { Object o = currRow.get(ii); switch (currRow.getType(ii)) { case DataType.INTEGER: case DataType.LONG: case DataType.FLOAT: case DataType.DOUBLE: currRowVector.set(ii, (Double) o); break; case DataType.TUPLE: // If this is a tuple then we want to set column and element Tuple subt = (Tuple) o; currRowVector.set((Integer) subt.get(0), (Double) subt.get(1)); break; default: throw new RuntimeException("Unexpected tuple form"); } } outputValue.set(currRowVector); try { writer.write(outputKey, outputValue); } catch (InterruptedException e) { LOG.error("Interrupted while writing", e); } }