Example usage for org.apache.mahout.math Vector nonZeroes

List of usage examples for org.apache.mahout.math Vector nonZeroes

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector nonZeroes.

Prototype

Iterable<Element> nonZeroes();

Source Link

Usage

From source file:at.illecker.hama.rootbeer.examples.matrixmultiplication.compositeinput.cpu.MatrixMultiplicationBSPCpu.java

License:Apache License

@Override
public void bsp(BSPPeer<IntWritable, TupleWritable, IntWritable, VectorWritable, MatrixRowMessage> peer)
        throws IOException, SyncException, InterruptedException {

    IntWritable key = new IntWritable();
    TupleWritable value = new TupleWritable();
    while (peer.readNext(key, value)) {

        // Logging
        if (isDebuggingEnabled) {
            for (int i = 0; i < value.size(); i++) {
                Vector vector = ((VectorWritable) value.get(i)).get();
                logger.writeChars("bsp,input,key=" + key + ",value=" + vector.toString() + "\n");
            }//  w w w.ja v a2s.  c o  m
        }

        Vector firstVector = ((VectorWritable) value.get(0)).get();
        Vector secondVector = ((VectorWritable) value.get(1)).get();

        // outCardinality is resulting column size n
        // (l x m) * (m x n) = (l x n)
        boolean firstIsOutFrag = secondVector.size() == outCardinality;

        // outFrag is Matrix which has the resulting column cardinality
        // (matrixB)
        Vector outFrag = firstIsOutFrag ? secondVector : firstVector;

        // multiplier is Matrix which has the resulting row count
        // (transposed matrixA)
        Vector multiplier = firstIsOutFrag ? firstVector : secondVector;

        if (isDebuggingEnabled) {
            logger.writeChars("bsp,firstIsOutFrag=" + firstIsOutFrag + "\n");
            logger.writeChars("bsp,outFrag=" + outFrag + "\n");
            logger.writeChars("bsp,multiplier=" + multiplier + "\n");
        }

        for (Vector.Element e : multiplier.nonZeroes()) {

            VectorWritable outVector = new VectorWritable();
            // Scalar Multiplication (Vector x Element)
            outVector.set(outFrag.times(e.get()));

            peer.send(masterTask, new MatrixRowMessage(e.index(), outVector));

            if (isDebuggingEnabled) {
                logger.writeChars("bsp,send,key=" + e.index() + ",value=" + outVector.get().toString() + "\n");
            }
        }
        if (isDebuggingEnabled) {
            logger.flush();
        }
    }
    peer.sync();
}

From source file:com.innometrics.integration.app.recommender.ml.als.AlternatingLeastSquaresSolver.java

License:Apache License

static Matrix createRiIiMaybeTransposed(Vector ratingVector) {
    Preconditions.checkArgument(ratingVector.isSequentialAccess(),
            "Ratings should be iterable in Index or Sequential Order");

    double[][] RiIiMaybeTransposed = new double[ratingVector.getNumNondefaultElements()][1];
    int index = 0;
    for (Vector.Element elem : ratingVector.nonZeroes()) {
        RiIiMaybeTransposed[index++][0] = elem.get();
    }/*from  ww w .jav  a2  s  .  c om*/
    return new DenseMatrix(RiIiMaybeTransposed, true);
}

From source file:com.innometrics.integration.app.recommender.ml.als.ImplicitFeedbackAlternatingLeastSquaresSolver.java

License:Apache License

/** Y' (Cu - I) Y +  I */
private Matrix getYtransponseCuMinusIYPlusLambdaI(Vector userRatings) {
    Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!");

    /* (Cu -I) Y */
    OpenIntObjectHashMap<Vector> CuMinusIY = new OpenIntObjectHashMap<Vector>(
            userRatings.getNumNondefaultElements());
    for (Element e : userRatings.nonZeroes()) {
        CuMinusIY.put(e.index(), Y.get(e.index()).times(confidence(e.get()) - 1));
    }//from  w  ww .  j a v  a2s. com

    Matrix YtransponseCuMinusIY = new DenseMatrix(numFeatures, numFeatures);

    /* Y' (Cu -I) Y by outer products */
    for (Element e : userRatings.nonZeroes()) {
        for (Vector.Element feature : Y.get(e.index()).all()) {
            Vector partial = CuMinusIY.get(e.index()).times(feature.get());
            YtransponseCuMinusIY.viewRow(feature.index()).assign(partial, Functions.PLUS);
        }
    }

    /* Y' (Cu - I) Y +  I  add lambda on the diagonal */
    for (int feature = 0; feature < numFeatures; feature++) {
        YtransponseCuMinusIY.setQuick(feature, feature,
                YtransponseCuMinusIY.getQuick(feature, feature) + lambda);
    }

    return YtransponseCuMinusIY;
}

From source file:com.innometrics.integration.app.recommender.ml.als.ImplicitFeedbackAlternatingLeastSquaresSolver.java

License:Apache License

/** Y' Cu p(u) */
private Matrix getYtransponseCuPu(Vector userRatings) {
    Preconditions.checkArgument(userRatings.isSequentialAccess(), "need sequential access to ratings!");

    Vector YtransponseCuPu = new DenseVector(numFeatures);

    for (Element e : userRatings.nonZeroes()) {
        YtransponseCuPu.assign(Y.get(e.index()).times(confidence(e.get())), Functions.PLUS);
    }// ww  w  .j  av a 2  s.  co  m

    return columnVectorAsMatrix(YtransponseCuPu);
}

From source file:com.netease.news.classifier.naivebayes.AbstractNaiveBayesClassifier.java

License:Apache License

protected double getScoreForLabelInstance(int label, Vector instance) {
    double result = 0.0;
    for (Element e : instance.nonZeroes()) {
        result += e.get() * getScoreForLabelFeature(label, e.index());
    }/*from  w  w w.j  a v  a  2s . c om*/
    return result;
}

From source file:com.netease.news.classifier.naivebayes.ComplementaryThetaTrainer.java

License:Apache License

@Override
public void train(int label, Vector perLabelWeight) {
    double labelWeight = labelWeight(label);
    for (Vector.Element e : perLabelWeight.nonZeroes()) {
        updatePerLabelThetaNormalizer(label, ComplementaryNaiveBayesClassifier.computeWeight(
                featureWeight(e.index()), e.get(), totalWeightSum(), labelWeight, alphaI(), numFeatures()));
    }/*  w  w  w  .  ja  v a 2s.c o  m*/
}

From source file:com.netease.news.classifier.naivebayes.StandardThetaTrainer.java

License:Apache License

@Override
public void train(int label, Vector perLabelWeight) {
    double labelWeight = labelWeight(label);
    for (Vector.Element e : perLabelWeight.nonZeroes()) {
        updatePerLabelThetaNormalizer(label,
                StandardNaiveBayesClassifier.computeWeight(e.get(), labelWeight, alphaI(), numFeatures()));
    }// w  w w  .  j av  a2  s.c  om
}

From source file:com.pocketx.gravity.recommender.cf.similarity.mapreduce.ToItemVectorsMapper.java

License:Apache License

@Override
protected void map(VarLongWritable rowIndex, VectorWritable vectorWritable, Context ctx)
        throws IOException, InterruptedException {
    Vector userRatings = vectorWritable.get();

    int numElementsBeforeSampling = userRatings.getNumNondefaultElements();
    userRatings = Vectors.maybeSample(userRatings, sampleSize);
    int numElementsAfterSampling = userRatings.getNumNondefaultElements();

    int column = TasteHadoopUtils.idToIndex(rowIndex.get());
    VectorWritable itemVector = new VectorWritable(new RandomAccessSparseVector(Integer.MAX_VALUE, 1));
    itemVector.setWritesLaxPrecision(true);
    ///*from w  w  w.  j  a v  a2s. c  o m*/
    Iterator<Vector.Element> iterator = userRatings.nonZeroes().iterator();
    //
    while (iterator.hasNext()) {
        Vector.Element elem = iterator.next();
        itemVector.get().setQuick(column, elem.get());
        ctx.write(new IntWritable(elem.index()), itemVector);
    }

    ctx.getCounter(Elements.USER_RATINGS_USED).increment(numElementsAfterSampling);
    ctx.getCounter(Elements.USER_RATINGS_NEGLECTED)
            .increment(numElementsBeforeSampling - numElementsAfterSampling);
}

From source file:com.scaleunlimited.classify.model.HashedFeaturesLibLinearModel.java

License:Apache License

/**
 * Given a map from term to count, generate a feature array using
 * _maxFeatureIndex as the max index, based on the hash of the term.
 * /*from ww w .  j a v  a2  s . c  om*/
 * @param terms
 * @return array of LibLinear features
 */

private Feature[] getFeatures(Map<String, Integer> terms) {

    // First create the vector, where each term's index is the hash
    // of the term, and the value is the term count.
    Map<Integer, Integer> collisionCount = new HashMap<>();
    Vector v = new RandomAccessSparseVector(_maxFeatureIndex);
    for (String term : terms.keySet()) {
        int index = calcHashJoaat(term, _maxFeatureIndex);
        double curValue = v.getQuick(index);
        if (_averageCollisions && (curValue != 0.0)) {
            Integer curCollisionCount = collisionCount.get(index);
            if (curCollisionCount == null) {
                // Number of values we'll need to divide by
                collisionCount.put(index, 2);
            } else {
                collisionCount.put(index, curCollisionCount + 1);
            }

            v.setQuick(index, curValue + terms.get(term));
        } else {
            v.setQuick(index, terms.get(term));
        }
    }

    // Now adjust the vector for collisions, if needed.
    if (_averageCollisions && !collisionCount.isEmpty()) {
        for (Integer index : collisionCount.keySet()) {
            double curValue = v.getQuick(index);
            v.setQuick(index, curValue / collisionCount.get(index));
        }
    }

    // Apply the term vector normalizer.
    getNormalizer().normalize(v);

    List<FeatureNode> features = new ArrayList<FeatureNode>(terms.size());
    for (Element e : v.nonZeroes()) {
        features.add(new FeatureNode(e.index() + 1, e.get()));
    }

    // We need to sort by increasing index.
    Collections.sort(features, new Comparator<FeatureNode>() {

        @Override
        public int compare(FeatureNode o1, FeatureNode o2) {
            return o1.index - o2.index;
        }
    });

    return features.toArray(new FeatureNode[features.size()]);
}

From source file:com.twitter.algebra.nmf.ErrDMJ.java

License:Apache License

public static long run(Configuration conf, DistributedRowMatrix X, Vector xColSumVec, DistributedRowMatrix A,
        DistributedRowMatrix Yt, String label)
        throws IOException, InterruptedException, ClassNotFoundException {
    log.info("running " + ErrDMJ.class.getName());
    if (X.numRows() != A.numRows()) {
        throw new CardinalityException(A.numRows(), A.numRows());
    }//  ww  w . j  a  v a 2s .c o m
    if (A.numCols() != Yt.numCols()) {
        throw new CardinalityException(A.numCols(), Yt.numCols());
    }
    if (X.numCols() != Yt.numRows()) {
        throw new CardinalityException(X.numCols(), Yt.numRows());
    }
    Path outPath = new Path(A.getOutputTempPath(), label);
    FileSystem fs = FileSystem.get(outPath.toUri(), conf);
    ErrDMJ job = new ErrDMJ();
    long totalErr = -1;
    if (!fs.exists(outPath)) {
        Job hJob = job.run(conf, X.getRowPath(), A.getRowPath(), Yt.getRowPath(), outPath, A.numRows(),
                Yt.numRows(), Yt.numCols());
        Counters counters = hJob.getCounters();
        counters.findCounter("Result", "sumAbs").getValue();
        log.info("FINAL ERR is " + totalErr);
    } else {
        log.warn("----------- Skip already exists: " + outPath);
    }
    Vector sumErrVec = AlgebraCommon.mapDirToSparseVector(outPath, 1, X.numCols(), conf);
    double maxColErr = Double.MIN_VALUE;
    double sumColErr = 0;
    int cntColErr = 0;
    Iterator<Vector.Element> it = sumErrVec.nonZeroes().iterator();
    while (it.hasNext()) {
        Vector.Element el = it.next();
        double errP2 = el.get();
        double origP2 = xColSumVec.get(el.index());
        double colErr = Math.sqrt(errP2 / origP2);
        log.info("col: " + el.index() + " sum(err^2): " + errP2 + " sum(val^2): " + origP2 + " colErr: "
                + colErr);
        maxColErr = Math.max(colErr, maxColErr);
        sumColErr += colErr;
        cntColErr++;
    }
    log.info(" Max Col Err: " + maxColErr);
    log.info(" Avg Col Err: " + sumColErr / cntColErr);
    return totalErr;
}