Example usage for org.apache.mahout.math SequentialAccessSparseVector SequentialAccessSparseVector

List of usage examples for org.apache.mahout.math SequentialAccessSparseVector SequentialAccessSparseVector

Introduction

In this page you can find the example usage for org.apache.mahout.math SequentialAccessSparseVector SequentialAccessSparseVector.

Prototype

public SequentialAccessSparseVector(SequentialAccessSparseVector other) 

Source Link

Usage

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

public void reset() {
    for (int x = 0; x < numTopics; x++) {
        topicTermCounts.assignRow(x, new SequentialAccessSparseVector(numTerms));
    }//from   w  ww  .  jav a  2s  .com
    topicSums.assign(1.0);
    initializeThreadPool();
}

From source file:com.elex.dmp.vectorizer.TFPartialVectorReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<StringTuple> values, Context context)
        throws IOException, InterruptedException {
    Iterator<StringTuple> it = values.iterator();
    if (!it.hasNext()) {
        return;//www. j a va 2  s.c o  m
    }
    StringTuple value = it.next();

    Vector vector = new RandomAccessSparseVector(dimension, value.length()); // guess at initial size

    if (maxNGramSize >= 2) {
        ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()),
                maxNGramSize);
        try {
            do {
                String term = sf.getAttribute(CharTermAttribute.class).toString();
                if (!term.isEmpty() && dictionary.containsKey(term)) { // ngram
                    int termId = dictionary.get(term);
                    vector.setQuick(termId, vector.getQuick(termId) + 1);
                }
            } while (sf.incrementToken());

            sf.end();
        } finally {
            Closeables.closeQuietly(sf);
        }
    } else {
        for (String term : value.getEntries()) {
            if (!term.isEmpty() && dictionary.containsKey(term)) { // unigram
                int termId = dictionary.get(term);
                vector.setQuick(termId, vector.getQuick(termId) + 1);
            }
        }
    }
    if (sequentialAccess) {
        vector = new SequentialAccessSparseVector(vector);
    }

    if (namedVector) {
        vector = new NamedVector(vector, key.toString());
    }

    // if the vector has no nonZero entries (nothing in the dictionary), let's not waste space sending it to disk.
    if (vector.getNumNondefaultElements() > 0) {
        VectorWritable vectorWritable = new VectorWritable(vector);
        context.write(key, vectorWritable);
    } else {
        context.getCounter("TFParticalVectorReducer", "emptyVectorCount").increment(1);
    }
}

From source file:com.ml.ira.algos.RunLogistic.java

License:Apache License

static void mainToOutput(String[] args, PrintWriter output) throws Exception {
    if (parseArgs(args)) {
        if (!showAuc && !showConfusion && !showScores) {
            showAuc = true;//from w ww . ja v a  2  s  .  co  m
            showConfusion = true;
        }

        Auc collector = new Auc();
        LogisticModelParameters lmp;
        if (modelFile.startsWith("hdfs://")) {
            lmp = LogisticModelParameters.loadFrom(new Path(modelFile));
        } else {
            lmp = LogisticModelParameters.loadFrom(new File(modelFile));
        }
        CsvRecordFactory csv = lmp.getCsvRecordFactory();
        OnlineLogisticRegression lr = lmp.createRegression();
        BufferedReader in = TrainLogistic.open(inputFile);
        //String line = in.readLine();
        //csv.firstLine(line);
        String line;
        if (fieldNames != null && fieldNames.equalsIgnoreCase("internal")) {
            csv.firstLine(lmp.getFieldNames());
        } else {
            csv.firstLine(in.readLine());
        }
        line = in.readLine();
        if (showScores) {
            output.println("\"target\",\"model-output\",\"log-likelihood\"");
        }
        while (line != null) {
            Vector v = new SequentialAccessSparseVector(lmp.getNumFeatures());
            int target = csv.processLine(line, v);

            double score = lr.classifyScalar(v);
            if (showScores) {
                output.printf(Locale.ENGLISH, "%d,%.3f,%.6f%n", target, score, lr.logLikelihood(target, v));
            }
            collector.add(target, score);
            line = in.readLine();
        }

        if (showAuc) {
            output.printf(Locale.ENGLISH, "AUC = %.2f%n", collector.auc());
        }
        if (showConfusion) {
            Matrix m = collector.confusion();
            output.printf(Locale.ENGLISH, "confusion: [[%.1f, %.1f], [%.1f, %.1f]]%n", m.get(0, 0), m.get(1, 0),
                    m.get(0, 1), m.get(1, 1));
            m = collector.entropy();
            output.printf(Locale.ENGLISH, "entropy: [[%.1f, %.1f], [%.1f, %.1f]]%n", m.get(0, 0), m.get(1, 0),
                    m.get(0, 1), m.get(1, 1));
        }
    }
}

From source file:com.skp.experiment.cf.als.hadoop.SolveImplicitFeedbackMultithreadedMapper.java

License:Apache License

@Override
protected void map(IntWritable userOrItemID, VectorWritable ratingsWritable, Context ctx)
        throws IOException, InterruptedException {
    Vector ratings = new SequentialAccessSparseVector(ratingsWritable.get());

    Vector uiOrmj = solver.solve(ratings);
    ctx.write(userOrItemID, new VectorWritable(uiOrmj));
}

From source file:com.tamingtext.mahout.VectorExamplesTest.java

License:Apache License

@Test
public void testProgrammatic() throws Exception {
    //<start id="vec.examples.programmatic"/>
    double[] vals = new double[] { 0.3, 1.8, 200.228 };
    Vector dense = new DenseVector(vals);//<co id="vec.exam.dense"/>
    assertTrue(dense.size() == 3);/* w  ww  .  j ava  2  s .c o m*/
    Vector sparseSame = new SequentialAccessSparseVector(3);//<co id="vec.exam.sparse.same"/>
    Vector sparse = new SequentialAccessSparseVector(3000);//<co id="vec.exam.sparse"/>
    for (int i = 0; i < vals.length; i++) {//<co id="vec.exam.assign.sparse"/>
        sparseSame.set(i, vals[i]);
        sparse.set(i, vals[i]);
    }
    assertFalse(dense.equals(sparse));//<co id="vec.exam.notequals.d.s"/>
    assertEquals(dense, sparseSame);//<co id="vec.exam.equals.d.s"/>
    assertFalse(sparse.equals(sparseSame));
    /*
    <calloutlist>
    <callout arearefs="vec.exam.dense"><para>Create a <classname>DenseVector</classname> with a label of "my-dense" and 3 values.  The cardinality of this vector is 3 </para></callout>
    <callout arearefs="vec.exam.sparse.same"><para>Create a <classname>SparseVector</classname> with a label of my-sparse-same that has cardinality of 3</para></callout>
            
    <callout arearefs="vec.exam.sparse"><para>Create a <classname>SparseVector</classname> with a label of my-sparse and a cardinality of 3000.</para></callout>
    <callout arearefs="vec.exam.assign.sparse"><para>Set the values to the first 3 items in the sparse vectors.</para></callout>
    <callout arearefs="vec.exam.notequals.d.s"><para>The dense and the sparse <classname>Vector</classname>s are not equal because they have different cardinality.</para></callout>
    <callout arearefs="vec.exam.equals.d.s"><para>The dense and sparseSame <classname>Vector</classname>s are equal because they have the same values and cardinality</para></callout>
            
    </calloutlist>
    */
    //<end id="vec.examples.programmatic"/>
    //<start id="vec.examples.seq.file"/>
    File tmpDir = new File(System.getProperty("java.io.tmpdir"));
    File tmpLoc = new File(tmpDir, "sfvwt");
    tmpLoc.mkdirs();
    File tmpFile = File.createTempFile("sfvwt", ".dat", tmpLoc);

    Path path = new Path(tmpFile.getAbsolutePath());
    Configuration conf = new Configuration();//<co id="vec.examples.seq.conf"/>
    FileSystem fs = FileSystem.get(conf);
    SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, path, LongWritable.class,
            VectorWritable.class);//<co id="vec.examples.seq.writer"/>
    VectorWriter vecWriter = new SequenceFileVectorWriter(seqWriter);//<co id="vec.examples.seq.vecwriter"/>
    List<Vector> vectors = new ArrayList<Vector>();
    vectors.add(sparse);
    vectors.add(sparseSame);
    vecWriter.write(vectors);//<co id="vec.examples.seq.write"/>
    vecWriter.close();
    /*
    <calloutlist>
    <callout arearefs="vec.examples.seq.conf"><para>Create a <classname>Configuration</classname> for Hadoop</para></callout>
    <callout arearefs="vec.examples.seq.writer"><para>Create a Hadoop <classname>SequenceFile.Writer</classname> to handle the job of physically writing out the vectors to a file in HDFS</para></callout>
    <callout arearefs="vec.examples.seq.vecwriter"><para>A <classname>VectorWriter</classname> processes the <classname>Vector</classname>s and invokes the underlying write methods on the <classname>SequenceFile.Writer</classname></para></callout>
    <callout arearefs="vec.examples.seq.write"><para>Do the work of writing out the files</para></callout>
            
    </calloutlist>
    */
    //<end id="vec.examples.seq.file"/>
}

From source file:com.twitter.algebra.MergeVectorsReducer.java

License:Apache License

@Override
public void reduce(WritableComparable<?> key, Iterable<VectorWritable> vectors, Context context)
        throws IOException, InterruptedException {
    Vector merged = VectorWritable.merge(vectors.iterator()).get();
    context.write(key, new VectorWritable(new SequentialAccessSparseVector(merged)));
}

From source file:de.isabeldrostfromm.sof.naive.Vectoriser.java

License:Open Source License

/**
 * @return Returns a vector generated for the given text based on encoding with LuceneTextValueEncoder
 * *//*from ww w . j  a  v a2 s. com*/
private static Vector luceneEncode(int probes, String text) {
    LuceneTextValueEncoder encoder = new LuceneTextValueEncoder("sof");
    encoder.setAnalyzer(new StandardAnalyzer(Version.LUCENE_36));
    encoder.setProbes(probes);
    encoder.addText(text);
    Vector vector = new SequentialAccessSparseVector(probes);
    encoder.flush(1, vector);
    return vector;
}

From source file:de.isabeldrostfromm.sof.util.Vectors.java

License:Open Source License

/**
 * Appends two vectors directly after one another, leaving all non set elements zero.
 * *///from  w  w w.jav a 2s .c om
public static Vector append(Vector... vectors) {
    int totalSize = 0;
    for (Vector vec : vectors) {
        totalSize += vec.size();
    }

    Vector result = new SequentialAccessSparseVector(totalSize);
    result.assign(0);

    int lastIndex = 0;
    for (Vector vector : vectors) {
        for (Element elem : vector) {
            result.setQuick(lastIndex + elem.index(), elem.get());
        }
        lastIndex += vector.size();
    }
    return result;
}

From source file:de.isabeldrostfromm.sof.util.Vectors.java

License:Open Source License

/**
 * Creates a new SequentialSparseAccessVector and assigns the given values one after another to it.
 * *///from w w  w.  ja va  2s  . c o  m
public static SequentialAccessSparseVector newSequentialAccessSparseVector(double... ds) {
    SequentialAccessSparseVector result = new SequentialAccessSparseVector(ds.length);
    for (int i = 0; i < ds.length; i++) {
        result.setQuick(i, ds[i]);
    }
    return result;
}

From source file:de.isabeldrostfromm.sof.util.VectorsTest.java

License:Open Source License

private SequentialAccessSparseVector randomVector() {
    int length = atMost(100);
    SequentialAccessSparseVector vec = new SequentialAccessSparseVector(length);
    for (int i = 0; i < length; i++) {
        vec.setQuick(i, randomDouble());
    }//w w w .j  av a2 s. c  om
    return vec;
}