List of usage examples for org.apache.mahout.math SequentialAccessSparseVector SequentialAccessSparseVector
public SequentialAccessSparseVector(SequentialAccessSparseVector other)
From source file:com.elex.dmp.core.TopicModel.java
License:Apache License
public void reset() { for (int x = 0; x < numTopics; x++) { topicTermCounts.assignRow(x, new SequentialAccessSparseVector(numTerms)); }//from w ww . jav a 2s .com topicSums.assign(1.0); initializeThreadPool(); }
From source file:com.elex.dmp.vectorizer.TFPartialVectorReducer.java
License:Apache License
@Override protected void reduce(Text key, Iterable<StringTuple> values, Context context) throws IOException, InterruptedException { Iterator<StringTuple> it = values.iterator(); if (!it.hasNext()) { return;//www. j a va 2 s.c o m } StringTuple value = it.next(); Vector vector = new RandomAccessSparseVector(dimension, value.length()); // guess at initial size if (maxNGramSize >= 2) { ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()), maxNGramSize); try { do { String term = sf.getAttribute(CharTermAttribute.class).toString(); if (!term.isEmpty() && dictionary.containsKey(term)) { // ngram int termId = dictionary.get(term); vector.setQuick(termId, vector.getQuick(termId) + 1); } } while (sf.incrementToken()); sf.end(); } finally { Closeables.closeQuietly(sf); } } else { for (String term : value.getEntries()) { if (!term.isEmpty() && dictionary.containsKey(term)) { // unigram int termId = dictionary.get(term); vector.setQuick(termId, vector.getQuick(termId) + 1); } } } if (sequentialAccess) { vector = new SequentialAccessSparseVector(vector); } if (namedVector) { vector = new NamedVector(vector, key.toString()); } // if the vector has no nonZero entries (nothing in the dictionary), let's not waste space sending it to disk. if (vector.getNumNondefaultElements() > 0) { VectorWritable vectorWritable = new VectorWritable(vector); context.write(key, vectorWritable); } else { context.getCounter("TFParticalVectorReducer", "emptyVectorCount").increment(1); } }
From source file:com.ml.ira.algos.RunLogistic.java
License:Apache License
static void mainToOutput(String[] args, PrintWriter output) throws Exception { if (parseArgs(args)) { if (!showAuc && !showConfusion && !showScores) { showAuc = true;//from w ww . ja v a 2 s . co m showConfusion = true; } Auc collector = new Auc(); LogisticModelParameters lmp; if (modelFile.startsWith("hdfs://")) { lmp = LogisticModelParameters.loadFrom(new Path(modelFile)); } else { lmp = LogisticModelParameters.loadFrom(new File(modelFile)); } CsvRecordFactory csv = lmp.getCsvRecordFactory(); OnlineLogisticRegression lr = lmp.createRegression(); BufferedReader in = TrainLogistic.open(inputFile); //String line = in.readLine(); //csv.firstLine(line); String line; if (fieldNames != null && fieldNames.equalsIgnoreCase("internal")) { csv.firstLine(lmp.getFieldNames()); } else { csv.firstLine(in.readLine()); } line = in.readLine(); if (showScores) { output.println("\"target\",\"model-output\",\"log-likelihood\""); } while (line != null) { Vector v = new SequentialAccessSparseVector(lmp.getNumFeatures()); int target = csv.processLine(line, v); double score = lr.classifyScalar(v); if (showScores) { output.printf(Locale.ENGLISH, "%d,%.3f,%.6f%n", target, score, lr.logLikelihood(target, v)); } collector.add(target, score); line = in.readLine(); } if (showAuc) { output.printf(Locale.ENGLISH, "AUC = %.2f%n", collector.auc()); } if (showConfusion) { Matrix m = collector.confusion(); output.printf(Locale.ENGLISH, "confusion: [[%.1f, %.1f], [%.1f, %.1f]]%n", m.get(0, 0), m.get(1, 0), m.get(0, 1), m.get(1, 1)); m = collector.entropy(); output.printf(Locale.ENGLISH, "entropy: [[%.1f, %.1f], [%.1f, %.1f]]%n", m.get(0, 0), m.get(1, 0), m.get(0, 1), m.get(1, 1)); } } }
From source file:com.skp.experiment.cf.als.hadoop.SolveImplicitFeedbackMultithreadedMapper.java
License:Apache License
@Override protected void map(IntWritable userOrItemID, VectorWritable ratingsWritable, Context ctx) throws IOException, InterruptedException { Vector ratings = new SequentialAccessSparseVector(ratingsWritable.get()); Vector uiOrmj = solver.solve(ratings); ctx.write(userOrItemID, new VectorWritable(uiOrmj)); }
From source file:com.tamingtext.mahout.VectorExamplesTest.java
License:Apache License
@Test public void testProgrammatic() throws Exception { //<start id="vec.examples.programmatic"/> double[] vals = new double[] { 0.3, 1.8, 200.228 }; Vector dense = new DenseVector(vals);//<co id="vec.exam.dense"/> assertTrue(dense.size() == 3);/* w ww . j ava 2 s .c o m*/ Vector sparseSame = new SequentialAccessSparseVector(3);//<co id="vec.exam.sparse.same"/> Vector sparse = new SequentialAccessSparseVector(3000);//<co id="vec.exam.sparse"/> for (int i = 0; i < vals.length; i++) {//<co id="vec.exam.assign.sparse"/> sparseSame.set(i, vals[i]); sparse.set(i, vals[i]); } assertFalse(dense.equals(sparse));//<co id="vec.exam.notequals.d.s"/> assertEquals(dense, sparseSame);//<co id="vec.exam.equals.d.s"/> assertFalse(sparse.equals(sparseSame)); /* <calloutlist> <callout arearefs="vec.exam.dense"><para>Create a <classname>DenseVector</classname> with a label of "my-dense" and 3 values. The cardinality of this vector is 3 </para></callout> <callout arearefs="vec.exam.sparse.same"><para>Create a <classname>SparseVector</classname> with a label of my-sparse-same that has cardinality of 3</para></callout> <callout arearefs="vec.exam.sparse"><para>Create a <classname>SparseVector</classname> with a label of my-sparse and a cardinality of 3000.</para></callout> <callout arearefs="vec.exam.assign.sparse"><para>Set the values to the first 3 items in the sparse vectors.</para></callout> <callout arearefs="vec.exam.notequals.d.s"><para>The dense and the sparse <classname>Vector</classname>s are not equal because they have different cardinality.</para></callout> <callout arearefs="vec.exam.equals.d.s"><para>The dense and sparseSame <classname>Vector</classname>s are equal because they have the same values and cardinality</para></callout> </calloutlist> */ //<end id="vec.examples.programmatic"/> //<start id="vec.examples.seq.file"/> File tmpDir = new File(System.getProperty("java.io.tmpdir")); File tmpLoc = new File(tmpDir, "sfvwt"); tmpLoc.mkdirs(); File tmpFile = File.createTempFile("sfvwt", ".dat", tmpLoc); Path path = new Path(tmpFile.getAbsolutePath()); Configuration conf = new Configuration();//<co id="vec.examples.seq.conf"/> FileSystem fs = FileSystem.get(conf); SequenceFile.Writer seqWriter = SequenceFile.createWriter(fs, conf, path, LongWritable.class, VectorWritable.class);//<co id="vec.examples.seq.writer"/> VectorWriter vecWriter = new SequenceFileVectorWriter(seqWriter);//<co id="vec.examples.seq.vecwriter"/> List<Vector> vectors = new ArrayList<Vector>(); vectors.add(sparse); vectors.add(sparseSame); vecWriter.write(vectors);//<co id="vec.examples.seq.write"/> vecWriter.close(); /* <calloutlist> <callout arearefs="vec.examples.seq.conf"><para>Create a <classname>Configuration</classname> for Hadoop</para></callout> <callout arearefs="vec.examples.seq.writer"><para>Create a Hadoop <classname>SequenceFile.Writer</classname> to handle the job of physically writing out the vectors to a file in HDFS</para></callout> <callout arearefs="vec.examples.seq.vecwriter"><para>A <classname>VectorWriter</classname> processes the <classname>Vector</classname>s and invokes the underlying write methods on the <classname>SequenceFile.Writer</classname></para></callout> <callout arearefs="vec.examples.seq.write"><para>Do the work of writing out the files</para></callout> </calloutlist> */ //<end id="vec.examples.seq.file"/> }
From source file:com.twitter.algebra.MergeVectorsReducer.java
License:Apache License
@Override public void reduce(WritableComparable<?> key, Iterable<VectorWritable> vectors, Context context) throws IOException, InterruptedException { Vector merged = VectorWritable.merge(vectors.iterator()).get(); context.write(key, new VectorWritable(new SequentialAccessSparseVector(merged))); }
From source file:de.isabeldrostfromm.sof.naive.Vectoriser.java
License:Open Source License
/** * @return Returns a vector generated for the given text based on encoding with LuceneTextValueEncoder * *//*from ww w . j a v a2 s. com*/ private static Vector luceneEncode(int probes, String text) { LuceneTextValueEncoder encoder = new LuceneTextValueEncoder("sof"); encoder.setAnalyzer(new StandardAnalyzer(Version.LUCENE_36)); encoder.setProbes(probes); encoder.addText(text); Vector vector = new SequentialAccessSparseVector(probes); encoder.flush(1, vector); return vector; }
From source file:de.isabeldrostfromm.sof.util.Vectors.java
License:Open Source License
/** * Appends two vectors directly after one another, leaving all non set elements zero. * *///from w w w.jav a 2s .c om public static Vector append(Vector... vectors) { int totalSize = 0; for (Vector vec : vectors) { totalSize += vec.size(); } Vector result = new SequentialAccessSparseVector(totalSize); result.assign(0); int lastIndex = 0; for (Vector vector : vectors) { for (Element elem : vector) { result.setQuick(lastIndex + elem.index(), elem.get()); } lastIndex += vector.size(); } return result; }
From source file:de.isabeldrostfromm.sof.util.Vectors.java
License:Open Source License
/** * Creates a new SequentialSparseAccessVector and assigns the given values one after another to it. * *///from w w w. ja va 2s . c o m public static SequentialAccessSparseVector newSequentialAccessSparseVector(double... ds) { SequentialAccessSparseVector result = new SequentialAccessSparseVector(ds.length); for (int i = 0; i < ds.length; i++) { result.setQuick(i, ds[i]); } return result; }
From source file:de.isabeldrostfromm.sof.util.VectorsTest.java
License:Open Source License
private SequentialAccessSparseVector randomVector() { int length = atMost(100); SequentialAccessSparseVector vec = new SequentialAccessSparseVector(length); for (int i = 0; i < length; i++) { vec.setQuick(i, randomDouble()); }//w w w .j av a2 s. c om return vec; }