List of usage examples for org.apache.mahout.math VectorWritable get
public Vector get()
From source file:Vectors.java
License:Apache License
public static Vector merge(Iterable<VectorWritable> partialVectors) { Iterator<VectorWritable> vectors = partialVectors.iterator(); Vector accumulator = vectors.next().get(); while (vectors.hasNext()) { VectorWritable v = vectors.next(); if (v != null) { Iterator<Vector.Element> nonZeroElements = v.get().iterateNonZero(); while (nonZeroElements.hasNext()) { Vector.Element nonZeroElement = nonZeroElements.next(); accumulator.setQuick(nonZeroElement.index(), nonZeroElement.get()); }/*ww w . ja v a 2 s. c o m*/ } } return accumulator; }
From source file:Vectors.java
License:Apache License
public static Vector.Element[] toArray(VectorWritable vectorWritable) { Vector.Element[] elements = new Vector.Element[vectorWritable.get().getNumNondefaultElements()]; int k = 0;/*from ww w.j a v a 2s. co m*/ Iterator<Vector.Element> nonZeroElements = vectorWritable.get().iterateNonZero(); while (nonZeroElements.hasNext()) { Vector.Element nonZeroElement = nonZeroElements.next(); elements[k++] = new TemporaryElement(nonZeroElement.index(), nonZeroElement.get()); } return elements; }
From source file:Vectors.java
License:Apache License
public static Vector readSequenceFile(Path path, Configuration conf) throws IOException { FileSystem fs = FileSystem.get(conf); for (FileStatus fileStatus : fs.listStatus(path)) { if (fileStatus.getPath().getName().contains("part-")) { SequenceFile.Reader reader = null; try { reader = new SequenceFile.Reader(fs, fileStatus.getPath(), conf); Text key = (Text) ReflectionUtils.newInstance(reader.getKeyClass(), conf); VectorWritable value = (VectorWritable) ReflectionUtils.newInstance(reader.getValueClass(), conf);/*from w w w .ja v a 2 s.co m*/ reader.next(key, value); return value.get(); } finally { IOUtils.closeStream(reader); } } } return null; }
From source file:DisplayClustering.java
License:Apache License
protected static void plotSampleData(Graphics2D g2) { double sx = (double) res / DS; g2.setTransform(AffineTransform.getScaleInstance(sx, sx)); // plot the axes g2.setColor(Color.BLACK);/* w w w .j a va 2s .c o m*/ Vector dv = new DenseVector(2).assign(SIZE / 2.0); plotRectangle(g2, new DenseVector(2).assign(2), dv); plotRectangle(g2, new DenseVector(2).assign(-2), dv); // plot the sample data g2.setColor(Color.DARK_GRAY); dv.assign(0.03); for (VectorWritable v : SAMPLE_DATA) { plotRectangle(g2, v.get(), dv); } }
From source file:DisplayClustering.java
License:Apache License
/** * This method plots points and colors them according to their cluster * membership, rather than drawing ellipses. * * As of commit, this method is used only by K-means spectral clustering. * Since the cluster assignments are set within the eigenspace of the data, it * is not inherent that the original data cluster as they would in K-means: * that is, as symmetric gaussian mixtures. * * Since Spectral K-Means uses K-Means to cluster the eigenspace data, the raw * output is not directly usable. Rather, the cluster assignments from the raw * output need to be transferred back to the original data. As such, this * method will read the SequenceFile cluster results of K-means and transfer * the cluster assignments to the original data, coloring them appropriately. * * @param g2/*from w w w . ja va2 s .c o m*/ * @param data */ protected static void plotClusteredSampleData(Graphics2D g2, Path data) { double sx = (double) res / DS; g2.setTransform(AffineTransform.getScaleInstance(sx, sx)); g2.setColor(Color.BLACK); Vector dv = new DenseVector(2).assign(SIZE / 2.0); plotRectangle(g2, new DenseVector(2).assign(2), dv); plotRectangle(g2, new DenseVector(2).assign(-2), dv); // plot the sample data, colored according to the cluster they belong to dv.assign(0.03); Path clusteredPointsPath = new Path(data, "clusteredPoints"); Path inputPath = new Path(clusteredPointsPath, "part-m-00000"); Map<Integer, Color> colors = new HashMap<Integer, Color>(); int point = 0; for (Pair<IntWritable, WeightedVectorWritable> record : new SequenceFileIterable<IntWritable, WeightedVectorWritable>( inputPath, new Configuration())) { int clusterId = record.getFirst().get(); VectorWritable v = SAMPLE_DATA.get(point++); Integer key = clusterId; if (!colors.containsKey(key)) { colors.put(key, COLORS[Math.min(COLORS.length - 1, colors.size())]); } plotClusteredRectangle(g2, v.get(), dv, colors.get(key)); } }
From source file:ac.keio.sslab.nlp.lda.RowIdJob.java
License:Apache License
@SuppressWarnings("deprecation") @Override// w w w. j a va 2 s .co m public int run(String[] args) throws Exception { addInputOption(); addOutputOption(); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path outputPath = getOutputPath(); Path indexPath = new Path(outputPath, "docIndex"); Path matrixPath = new Path(outputPath, "matrix"); try (SequenceFile.Writer indexWriter = SequenceFile.createWriter(fs, conf, indexPath, IntWritable.class, Text.class); SequenceFile.Writer matrixWriter = SequenceFile.createWriter(fs, conf, matrixPath, IntWritable.class, VectorWritable.class)) { IntWritable docId = new IntWritable(); int i = 0; int numCols = 0; for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>( getInputPath(), PathType.LIST, PathFilters.logsCRCFilter(), null, true, conf)) { VectorWritable value = record.getSecond(); docId.set(i); indexWriter.append(docId, record.getFirst()); matrixWriter.append(docId, value); i++; numCols = value.get().size(); } log.info("Wrote out matrix with {} rows and {} columns to {}", i, numCols, matrixPath); return 0; } }
From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.cpu.MatrixMultiplicationBSPCpu.java
License:Apache License
@Override public void bsp(BSPPeer<IntWritable, TupleWritable, IntWritable, VectorWritable, MatrixRowMessage> peer) throws IOException, SyncException, InterruptedException { IntWritable key = new IntWritable(); TupleWritable value = new TupleWritable(); while (peer.readNext(key, value)) { // Logging if (isDebuggingEnabled) { for (int i = 0; i < value.size(); i++) { Vector vector = ((VectorWritable) value.get(i)).get(); logger.writeChars("bsp,input,key=" + key + ",value=" + vector.toString() + "\n"); }// w w w . ja v a 2s . c o m } Vector firstVector = ((VectorWritable) value.get(0)).get(); Vector secondVector = ((VectorWritable) value.get(1)).get(); // outCardinality is resulting column size n // (l x m) * (m x n) = (l x n) boolean firstIsOutFrag = secondVector.size() == outCardinality; // outFrag is Matrix which has the resulting column cardinality // (matrixB) Vector outFrag = firstIsOutFrag ? secondVector : firstVector; // multiplier is Matrix which has the resulting row count // (transposed matrixA) Vector multiplier = firstIsOutFrag ? firstVector : secondVector; if (isDebuggingEnabled) { logger.writeChars("bsp,firstIsOutFrag=" + firstIsOutFrag + "\n"); logger.writeChars("bsp,outFrag=" + outFrag + "\n"); logger.writeChars("bsp,multiplier=" + multiplier + "\n"); } for (Vector.Element e : multiplier.nonZeroes()) { VectorWritable outVector = new VectorWritable(); // Scalar Multiplication (Vector x Element) outVector.set(outFrag.times(e.get())); peer.send(masterTask, new MatrixRowMessage(e.index(), outVector)); if (isDebuggingEnabled) { logger.writeChars("bsp,send,key=" + e.index() + ",value=" + outVector.get().toString() + "\n"); } } if (isDebuggingEnabled) { logger.flush(); } } peer.sync(); }
From source file:com.chimpler.example.eigenface.Helper.java
License:Apache License
public static double[][] readMatrixSequenceFile(String fileName) throws Exception { Configuration configuration = new Configuration(); FileSystem fs = FileSystem.get(configuration); Reader matrixReader = new SequenceFile.Reader(fs, new Path(fileName), configuration); List<double[]> rows = new ArrayList<double[]>(); IntWritable key = new IntWritable(); VectorWritable value = new VectorWritable(); while (matrixReader.next(key, value)) { Vector vector = value.get(); double[] row = new double[vector.size()]; for (int i = 0; i < vector.getNumNondefaultElements(); i++) { Element element = vector.getElement(i); row[element.index()] = element.get(); }//from w w w .j a v a 2s.c o m rows.add(row); } return rows.toArray(new double[rows.size()][]); }
From source file:com.elex.dmp.lda.CachingCVB0Mapper.java
License:Apache License
@Override public void map(Text docId, VectorWritable document, Context context) throws IOException, InterruptedException { /* where to get docTopics? */ Vector topicVector = new DenseVector(new double[numTopics]).assign(1.0 / numTopics); modelTrainer.train(document.get(), topicVector, true, maxIters); }
From source file:com.elex.dmp.lda.CachingCVB0PerplexityMapper.java
License:Apache License
@Override public void map(Text docId, VectorWritable document, Context context) throws IOException, InterruptedException { if (1 > testFraction && random.nextFloat() >= testFraction) { return;//w w w . j a v a2s . co m } context.getCounter(Counters.SAMPLED_DOCUMENTS).increment(1); outKey.set(document.get().norm(1)); outValue.set( modelTrainer.calculatePerplexity(document.get(), topicVector.assign(1.0 / numTopics), maxIters)); context.write(outKey, outValue); }