List of usage examples for org.apache.mahout.math Vector nonZeroes
Iterable<Element> nonZeroes();
From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.cpu.MatrixMultiplicationBSPCpu.java
License:Apache License
@Override public void bsp(BSPPeer<IntWritable, TupleWritable, IntWritable, VectorWritable, MatrixRowMessage> peer) throws IOException, SyncException, InterruptedException { IntWritable key = new IntWritable(); TupleWritable value = new TupleWritable(); while (peer.readNext(key, value)) { // Logging if (isDebuggingEnabled) { for (int i = 0; i < value.size(); i++) { Vector vector = ((VectorWritable) value.get(i)).get(); logger.writeChars("bsp,input,key=" + key + ",value=" + vector.toString() + "\n"); }// w w w.ja v a2s. c o m } Vector firstVector = ((VectorWritable) value.get(0)).get(); Vector secondVector = ((VectorWritable) value.get(1)).get(); // outCardinality is resulting column size n // (l x m) * (m x n) = (l x n) boolean firstIsOutFrag = secondVector.size() == outCardinality; // outFrag is Matrix which has the resulting column cardinality // (matrixB) Vector outFrag = firstIsOutFrag ? secondVector : firstVector; // multiplier is Matrix which has the resulting row count // (transposed matrixA) Vector multiplier = firstIsOutFrag ? firstVector : secondVector; if (isDebuggingEnabled) { logger.writeChars("bsp,firstIsOutFrag=" + firstIsOutFrag + "\n"); logger.writeChars("bsp,outFrag=" + outFrag + "\n"); logger.writeChars("bsp,multiplier=" + multiplier + "\n"); } for (Vector.Element e : multiplier.nonZeroes()) { VectorWritable outVector = new VectorWritable(); // Scalar Multiplication (Vector x Element) outVector.set(outFrag.times(e.get())); peer.send(masterTask, new MatrixRowMessage(e.index(), outVector)); if (isDebuggingEnabled) { logger.writeChars("bsp,send,key=" + e.index() + ",value=" + outVector.get().toString() + "\n"); } } if (isDebuggingEnabled) { logger.flush(); } } peer.sync(); }
From source file:com.innometrics.integration.app.recommender.ml.als.AlternatingLeastSquaresSolver.java
License:Apache License
static Matrix createRiIiMaybeTransposed(Vector ratingVector) { Preconditions.checkArgument(ratingVector.isSequentialAccess(), "Ratings should be iterable in Index or Sequential Order"); double[][] RiIiMaybeTransposed = new double[ratingVector.getNumNondefaultElements()][1]; int index = 0; for (Vector.Element elem : ratingVector.nonZeroes()) { RiIiMaybeTransposed[index++][0] = elem.get(); }/*from ww w .jav a2 s . c om*/ return new DenseMatrix(RiIiMaybeTransposed, true); }
From source file:com.innometrics.integration.app.recommender.ml.als.ImplicitFeedbackAlternatingLeastSquaresSolver.java
License:Apache License
/** Y' (Cu - I) Y + I */ private Matrix getYtransponseCuMinusIYPlusLambdaI(Vector userRatings) { Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!"); /* (Cu -I) Y */ OpenIntObjectHashMap<Vector> CuMinusIY = new OpenIntObjectHashMap<Vector>( userRatings.getNumNondefaultElements()); for (Element e : userRatings.nonZeroes()) { CuMinusIY.put(e.index(), Y.get(e.index()).times(confidence(e.get()) - 1)); }//from w ww . j a v a2s. com Matrix YtransponseCuMinusIY = new DenseMatrix(numFeatures, numFeatures); /* Y' (Cu -I) Y by outer products */ for (Element e : userRatings.nonZeroes()) { for (Vector.Element feature : Y.get(e.index()).all()) { Vector partial = CuMinusIY.get(e.index()).times(feature.get()); YtransponseCuMinusIY.viewRow(feature.index()).assign(partial, Functions.PLUS); } } /* Y' (Cu - I) Y + I add lambda on the diagonal */ for (int feature = 0; feature < numFeatures; feature++) { YtransponseCuMinusIY.setQuick(feature, feature, YtransponseCuMinusIY.getQuick(feature, feature) + lambda); } return YtransponseCuMinusIY; }
From source file:com.innometrics.integration.app.recommender.ml.als.ImplicitFeedbackAlternatingLeastSquaresSolver.java
License:Apache License
/** Y' Cu p(u) */ private Matrix getYtransponseCuPu(Vector userRatings) { Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!"); Vector YtransponseCuPu = new DenseVector(numFeatures); for (Element e : userRatings.nonZeroes()) { YtransponseCuPu.assign(Y.get(e.index()).times(confidence(e.get())), Functions.PLUS); }// ww w .j av a 2 s. co m return columnVectorAsMatrix(YtransponseCuPu); }
From source file:com.netease.news.classifier.naivebayes.AbstractNaiveBayesClassifier.java
License:Apache License
protected double getScoreForLabelInstance(int label, Vector instance) { double result = 0.0; for (Element e : instance.nonZeroes()) { result += e.get() * getScoreForLabelFeature(label, e.index()); }/*from w w w.j a v a 2s . c om*/ return result; }
From source file:com.netease.news.classifier.naivebayes.ComplementaryThetaTrainer.java
License:Apache License
@Override public void train(int label, Vector perLabelWeight) { double labelWeight = labelWeight(label); for (Vector.Element e : perLabelWeight.nonZeroes()) { updatePerLabelThetaNormalizer(label, ComplementaryNaiveBayesClassifier.computeWeight( featureWeight(e.index()), e.get(), totalWeightSum(), labelWeight, alphaI(), numFeatures())); }/* w w w . ja v a 2s.c o m*/ }
From source file:com.netease.news.classifier.naivebayes.StandardThetaTrainer.java
License:Apache License
@Override public void train(int label, Vector perLabelWeight) { double labelWeight = labelWeight(label); for (Vector.Element e : perLabelWeight.nonZeroes()) { updatePerLabelThetaNormalizer(label, StandardNaiveBayesClassifier.computeWeight(e.get(), labelWeight, alphaI(), numFeatures())); }// w w w . j av a2 s.c om }
From source file:com.pocketx.gravity.recommender.cf.similarity.mapreduce.ToItemVectorsMapper.java
License:Apache License
@Override protected void map(VarLongWritable rowIndex, VectorWritable vectorWritable, Context ctx) throws IOException, InterruptedException { Vector userRatings = vectorWritable.get(); int numElementsBeforeSampling = userRatings.getNumNondefaultElements(); userRatings = Vectors.maybeSample(userRatings, sampleSize); int numElementsAfterSampling = userRatings.getNumNondefaultElements(); int column = TasteHadoopUtils.idToIndex(rowIndex.get()); VectorWritable itemVector = new VectorWritable(new RandomAccessSparseVector(Integer.MAX_VALUE, 1)); itemVector.setWritesLaxPrecision(true); ///*from w w w. j a v a2s. c o m*/ Iterator<Vector.Element> iterator = userRatings.nonZeroes().iterator(); // while (iterator.hasNext()) { Vector.Element elem = iterator.next(); itemVector.get().setQuick(column, elem.get()); ctx.write(new IntWritable(elem.index()), itemVector); } ctx.getCounter(Elements.USER_RATINGS_USED).increment(numElementsAfterSampling); ctx.getCounter(Elements.USER_RATINGS_NEGLECTED) .increment(numElementsBeforeSampling - numElementsAfterSampling); }
From source file:com.scaleunlimited.classify.model.HashedFeaturesLibLinearModel.java
License:Apache License
/** * Given a map from term to count, generate a feature array using * _maxFeatureIndex as the max index, based on the hash of the term. * /*from ww w . j a v a2 s . c om*/ * @param terms * @return array of LibLinear features */ private Feature[] getFeatures(Map<String, Integer> terms) { // First create the vector, where each term's index is the hash // of the term, and the value is the term count. Map<Integer, Integer> collisionCount = new HashMap<>(); Vector v = new RandomAccessSparseVector(_maxFeatureIndex); for (String term : terms.keySet()) { int index = calcHashJoaat(term, _maxFeatureIndex); double curValue = v.getQuick(index); if (_averageCollisions && (curValue != 0.0)) { Integer curCollisionCount = collisionCount.get(index); if (curCollisionCount == null) { // Number of values we'll need to divide by collisionCount.put(index, 2); } else { collisionCount.put(index, curCollisionCount + 1); } v.setQuick(index, curValue + terms.get(term)); } else { v.setQuick(index, terms.get(term)); } } // Now adjust the vector for collisions, if needed. if (_averageCollisions && !collisionCount.isEmpty()) { for (Integer index : collisionCount.keySet()) { double curValue = v.getQuick(index); v.setQuick(index, curValue / collisionCount.get(index)); } } // Apply the term vector normalizer. getNormalizer().normalize(v); List<FeatureNode> features = new ArrayList<FeatureNode>(terms.size()); for (Element e : v.nonZeroes()) { features.add(new FeatureNode(e.index() + 1, e.get())); } // We need to sort by increasing index. Collections.sort(features, new Comparator<FeatureNode>() { @Override public int compare(FeatureNode o1, FeatureNode o2) { return o1.index - o2.index; } }); return features.toArray(new FeatureNode[features.size()]); }
From source file:com.twitter.algebra.nmf.ErrDMJ.java
License:Apache License
public static long run(Configuration conf, DistributedRowMatrix X, Vector xColSumVec, DistributedRowMatrix A, DistributedRowMatrix Yt, String label) throws IOException, InterruptedException, ClassNotFoundException { log.info("running " + ErrDMJ.class.getName()); if (X.numRows() != A.numRows()) { throw new CardinalityException(A.numRows(), A.numRows()); }// ww w . j a v a 2s .c o m if (A.numCols() != Yt.numCols()) { throw new CardinalityException(A.numCols(), Yt.numCols()); } if (X.numCols() != Yt.numRows()) { throw new CardinalityException(X.numCols(), Yt.numRows()); } Path outPath = new Path(A.getOutputTempPath(), label); FileSystem fs = FileSystem.get(outPath.toUri(), conf); ErrDMJ job = new ErrDMJ(); long totalErr = -1; if (!fs.exists(outPath)) { Job hJob = job.run(conf, X.getRowPath(), A.getRowPath(), Yt.getRowPath(), outPath, A.numRows(), Yt.numRows(), Yt.numCols()); Counters counters = hJob.getCounters(); counters.findCounter("Result", "sumAbs").getValue(); log.info("FINAL ERR is " + totalErr); } else { log.warn("----------- Skip already exists: " + outPath); } Vector sumErrVec = AlgebraCommon.mapDirToSparseVector(outPath, 1, X.numCols(), conf); double maxColErr = Double.MIN_VALUE; double sumColErr = 0; int cntColErr = 0; Iterator<Vector.Element> it = sumErrVec.nonZeroes().iterator(); while (it.hasNext()) { Vector.Element el = it.next(); double errP2 = el.get(); double origP2 = xColSumVec.get(el.index()); double colErr = Math.sqrt(errP2 / origP2); log.info("col: " + el.index() + " sum(err^2): " + errP2 + " sum(val^2): " + origP2 + " colErr: " + colErr); maxColErr = Math.max(colErr, maxColErr); sumColErr += colErr; cntColErr++; } log.info(" Max Col Err: " + maxColErr); log.info(" Avg Col Err: " + sumColErr / cntColErr); return totalErr; }