List of usage examples for org.apache.mahout.utils.vectors.io SequenceFileVectorWriter SequenceFileVectorWriter
public SequenceFileVectorWriter(SequenceFile.Writer writer)
From source file:com.grantingersoll.intell.clustering.KMeansClusteringEngine.java
License:Apache License
private static VectorWriter getSeqFileWriter(String outFile) throws IOException { Path path = new Path(outFile); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); // TODO: Make this parameter driven SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, path, LongWritable.class, VectorWritable.class); return new SequenceFileVectorWriter(seqWriter); }
From source file:com.tamingtext.mahout.VectorExamplesTest.java
License:Apache License
@Test public void testProgrammatic() throws Exception { //<start id="vec.examples.programmatic"/> double[] vals = new double[] { 0.3, 1.8, 200.228 }; Vector dense = new DenseVector(vals);//<co id="vec.exam.dense"/> assertTrue(dense.size() == 3);/*from www. ja va 2 s . c o m*/ Vector sparseSame = new SequentialAccessSparseVector(3);//<co id="vec.exam.sparse.same"/> Vector sparse = new SequentialAccessSparseVector(3000);//<co id="vec.exam.sparse"/> for (int i = 0; i < vals.length; i++) {//<co id="vec.exam.assign.sparse"/> sparseSame.set(i, vals[i]); sparse.set(i, vals[i]); } assertFalse(dense.equals(sparse));//<co id="vec.exam.notequals.d.s"/> assertEquals(dense, sparseSame);//<co id="vec.exam.equals.d.s"/> assertFalse(sparse.equals(sparseSame)); /* <calloutlist> <callout arearefs="vec.exam.dense"><para>Create a <classname>DenseVector</classname> with a label of "my-dense" and 3 values. The cardinality of this vector is 3 </para></callout> <callout arearefs="vec.exam.sparse.same"><para>Create a <classname>SparseVector</classname> with a label of my-sparse-same that has cardinality of 3</para></callout> <callout arearefs="vec.exam.sparse"><para>Create a <classname>SparseVector</classname> with a label of my-sparse and a cardinality of 3000.</para></callout> <callout arearefs="vec.exam.assign.sparse"><para>Set the values to the first 3 items in the sparse vectors.</para></callout> <callout arearefs="vec.exam.notequals.d.s"><para>The dense and the sparse <classname>Vector</classname>s are not equal because they have different cardinality.</para></callout> <callout arearefs="vec.exam.equals.d.s"><para>The dense and sparseSame <classname>Vector</classname>s are equal because they have the same values and cardinality</para></callout> </calloutlist> */ //<end id="vec.examples.programmatic"/> //<start id="vec.examples.seq.file"/> File tmpDir = new File(System.getProperty("java.io.tmpdir")); File tmpLoc = new File(tmpDir, "sfvwt"); tmpLoc.mkdirs(); File tmpFile = File.createTempFile("sfvwt", ".dat", tmpLoc); Path path = new Path(tmpFile.getAbsolutePath()); Configuration conf = new Configuration();//<co id="vec.examples.seq.conf"/> FileSystem fs = FileSystem.get(conf); SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, path, LongWritable.class, VectorWritable.class);//<co id="vec.examples.seq.writer"/> VectorWriter vecWriter = new SequenceFileVectorWriter(seqWriter);//<co id="vec.examples.seq.vecwriter"/> List<Vector> vectors = new ArrayList<Vector>(); vectors.add(sparse); vectors.add(sparseSame); vecWriter.write(vectors);//<co id="vec.examples.seq.write"/> vecWriter.close(); /* <calloutlist> <callout arearefs="vec.examples.seq.conf"><para>Create a <classname>Configuration</classname> for Hadoop</para></callout> <callout arearefs="vec.examples.seq.writer"><para>Create a Hadoop <classname>SequenceFile.Writer</classname> to handle the job of physically writing out the vectors to a file in HDFS</para></callout> <callout arearefs="vec.examples.seq.vecwriter"><para>A <classname>VectorWriter</classname> processes the <classname>Vector</classname>s and invokes the underlying write methods on the <classname>SequenceFile.Writer</classname></para></callout> <callout arearefs="vec.examples.seq.write"><para>Do the work of writing out the files</para></callout> </calloutlist> */ //<end id="vec.examples.seq.file"/> }