Example usage for org.apache.mahout.utils.vectors.io VectorWriter write

List of usage examples for org.apache.mahout.utils.vectors.io VectorWriter write

Introduction

In this page you can find the example usage for org.apache.mahout.utils.vectors.io VectorWriter write.

Prototype

void write(Vector vector) throws IOException;

Source Link

Document

Write out a vector

Usage

From source file:com.tamingtext.mahout.VectorExamplesTest.java

License:Apache License

@Test
public void testProgrammatic() throws Exception {
    //<start id="vec.examples.programmatic"/>
    double[] vals = new double[] { 0.3, 1.8, 200.228 };
    Vector dense = new DenseVector(vals);//<co id="vec.exam.dense"/>
    assertTrue(dense.size() == 3);//  w  w w.ja  va 2  s  .co m
    Vector sparseSame = new SequentialAccessSparseVector(3);//<co id="vec.exam.sparse.same"/>
    Vector sparse = new SequentialAccessSparseVector(3000);//<co id="vec.exam.sparse"/>
    for (int i = 0; i < vals.length; i++) {//<co id="vec.exam.assign.sparse"/>
        sparseSame.set(i, vals[i]);
        sparse.set(i, vals[i]);
    }
    assertFalse(dense.equals(sparse));//<co id="vec.exam.notequals.d.s"/>
    assertEquals(dense, sparseSame);//<co id="vec.exam.equals.d.s"/>
    assertFalse(sparse.equals(sparseSame));
    /*
    <calloutlist>
    <callout arearefs="vec.exam.dense"><para>Create a <classname>DenseVector</classname> with a label of "my-dense" and 3 values.  The cardinality of this vector is 3 </para></callout>
    <callout arearefs="vec.exam.sparse.same"><para>Create a <classname>SparseVector</classname> with a label of my-sparse-same that has cardinality of 3</para></callout>
            
    <callout arearefs="vec.exam.sparse"><para>Create a <classname>SparseVector</classname> with a label of my-sparse and a cardinality of 3000.</para></callout>
    <callout arearefs="vec.exam.assign.sparse"><para>Set the values to the first 3 items in the sparse vectors.</para></callout>
    <callout arearefs="vec.exam.notequals.d.s"><para>The dense and the sparse <classname>Vector</classname>s are not equal because they have different cardinality.</para></callout>
    <callout arearefs="vec.exam.equals.d.s"><para>The dense and sparseSame <classname>Vector</classname>s are equal because they have the same values and cardinality</para></callout>
            
    </calloutlist>
    */
    //<end id="vec.examples.programmatic"/>
    //<start id="vec.examples.seq.file"/>
    File tmpDir = new File(System.getProperty("java.io.tmpdir"));
    File tmpLoc = new File(tmpDir, "sfvwt");
    tmpLoc.mkdirs();
    File tmpFile = File.createTempFile("sfvwt", ".dat", tmpLoc);

    Path path = new Path(tmpFile.getAbsolutePath());
    Configuration conf = new Configuration();//<co id="vec.examples.seq.conf"/>
    FileSystem fs = FileSystem.get(conf);
    SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, path, LongWritable.class,
            VectorWritable.class);//<co id="vec.examples.seq.writer"/>
    VectorWriter vecWriter = new SequenceFileVectorWriter(seqWriter);//<co id="vec.examples.seq.vecwriter"/>
    List<Vector> vectors = new ArrayList<Vector>();
    vectors.add(sparse);
    vectors.add(sparseSame);
    vecWriter.write(vectors);//<co id="vec.examples.seq.write"/>
    vecWriter.close();
    /*
    <calloutlist>
    <callout arearefs="vec.examples.seq.conf"><para>Create a <classname>Configuration</classname> for Hadoop</para></callout>
    <callout arearefs="vec.examples.seq.writer"><para>Create a Hadoop <classname>SequenceFile.Writer</classname> to handle the job of physically writing out the vectors to a file in HDFS</para></callout>
    <callout arearefs="vec.examples.seq.vecwriter"><para>A <classname>VectorWriter</classname> processes the <classname>Vector</classname>s and invokes the underlying write methods on the <classname>SequenceFile.Writer</classname></para></callout>
    <callout arearefs="vec.examples.seq.write"><para>Do the work of writing out the files</para></callout>
            
    </calloutlist>
    */
    //<end id="vec.examples.seq.file"/>
}