Example usage for org.apache.mahout.math Vector getQuick

List of usage examples for org.apache.mahout.math Vector getQuick

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector getQuick.

Prototype

double getQuick(int index);

Source Link

Document

Return the value at the given index, without checking bounds

Usage

From source file:com.cloudera.science.ml.core.vectors.Vectors.java

License:Open Source License

/**
 * Converts the given {@code Vector} into a {@code double[]}.
 * //from  ww w.j  a v  a2 s.  com
 * @param v The vector to convert
 * @return The resulting array of values
 */
public static double[] toArray(Vector v) {
    double[] ret = new double[v.size()];
    for (int i = 0; i < ret.length; i++) {
        ret[i] = v.getQuick(i);
    }
    return ret;
}

From source file:com.cloudera.science.ml.kmeans.parallel.CentersIndex.java

License:Open Source License

private BitSet index(Vector vec) {
    double[] prod = new double[projectionBits];
    if (vec.isDense()) {
        for (int i = 0; i < vec.size(); i++) {
            double v = vec.getQuick(i);
            if (v != 0.0) {
                for (int j = 0; j < projectionBits; j++) {
                    prod[j] += v * projection[i + j * dimensions];
                }//from  www. ja  va2 s  . c  o m
            }
        }
    } else {
        Iterator<Vector.Element> iter = vec.iterateNonZero();
        while (iter.hasNext()) {
            Vector.Element e = iter.next();
            for (int j = 0; j < projectionBits; j++) {
                prod[j] = e.get() * projection[e.index() + j * dimensions];
            }
        }
    }
    BitSet bitset = new BitSet(projectionBits);
    for (int i = 0; i < projectionBits; i++) {
        if (prod[i] > 0.0) {
            bitset.set(i);
        }
    }
    return bitset;
}

From source file:com.cloudera.science.ml.kmeans.parallel.CentersIndex.java

License:Open Source License

private static double dot(Vector vec, double[] p) {
    double dot = 0;
    if (vec.isDense()) {
        for (int i = 0; i < p.length; i++) {
            dot += vec.getQuick(i) * p[i];
        }/*w  w w  .j ava  2s.c om*/
    } else {
        Iterator<Vector.Element> iter = vec.iterateNonZero();
        while (iter.hasNext()) {
            Vector.Element e = iter.next();
            dot += e.get() * p[e.index()];
        }
    }
    return dot;
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

public void trainDocTopicModel(Vector original, Vector topics, Matrix docTopicModel) {
    // first calculate p(topic|term,document) for all terms in original, and all topics,
    // using p(term|topic) and p(topic|doc)
    pTopicGivenTerm(original, topics, docTopicModel);
    normalizeByTopic(docTopicModel);/*from w w  w .ja  v a  2 s .c o m*/
    // now multiply, term-by-term, by the document, to get the weighted distribution of
    // term-topic pairs from this document.
    Iterator<Vector.Element> it = original.iterateNonZero();
    while (it.hasNext()) {
        Vector.Element e = it.next();
        for (int x = 0; x < numTopics; x++) {
            Vector docTopicModelRow = docTopicModel.viewRow(x);
            docTopicModelRow.setQuick(e.index(), docTopicModelRow.getQuick(e.index()) * e.get());
        }
    }
    // now recalculate p(topic|doc) by summing contributions from all of pTopicGivenTerm
    topics.assign(0.0);
    for (int x = 0; x < numTopics; x++) {
        topics.set(x, docTopicModel.viewRow(x).norm(1));
    }
    // now renormalize so that sum_x(p(x|doc)) = 1
    topics.assign(Functions.mult(1 / topics.norm(1)));
}

From source file:com.elex.dmp.vectorizer.TFPartialVectorReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<StringTuple> values, Context context)
        throws IOException, InterruptedException {
    Iterator<StringTuple> it = values.iterator();
    if (!it.hasNext()) {
        return;/*  w  ww  .  java2s.co m*/
    }
    StringTuple value = it.next();

    Vector vector = new RandomAccessSparseVector(dimension, value.length()); // guess at initial size

    if (maxNGramSize >= 2) {
        ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()),
                maxNGramSize);
        try {
            do {
                String term = sf.getAttribute(CharTermAttribute.class).toString();
                if (!term.isEmpty() && dictionary.containsKey(term)) { // ngram
                    int termId = dictionary.get(term);
                    vector.setQuick(termId, vector.getQuick(termId) + 1);
                }
            } while (sf.incrementToken());

            sf.end();
        } finally {
            Closeables.closeQuietly(sf);
        }
    } else {
        for (String term : value.getEntries()) {
            if (!term.isEmpty() && dictionary.containsKey(term)) { // unigram
                int termId = dictionary.get(term);
                vector.setQuick(termId, vector.getQuick(termId) + 1);
            }
        }
    }
    if (sequentialAccess) {
        vector = new SequentialAccessSparseVector(vector);
    }

    if (namedVector) {
        vector = new NamedVector(vector, key.toString());
    }

    // if the vector has no nonZero entries (nothing in the dictionary), let's not waste space sending it to disk.
    if (vector.getNumNondefaultElements() > 0) {
        VectorWritable vectorWritable = new VectorWritable(vector);
        context.write(key, vectorWritable);
    } else {
        context.getCounter("TFParticalVectorReducer", "emptyVectorCount").increment(1);
    }
}

From source file:com.ikanow.infinit.e.processing.custom.utils.HadoopUtils.java

License:Open Source License

private static BasicDBList listFromMahoutVector(Vector vec, String prefix, BasicDBObject element) {
    if (vec instanceof NamedVector) {
        element.put(prefix + "Name", ((NamedVector) vec).getName());
    }/*from  w ww .  ja v a  2 s.  c  o m*/
    BasicDBList dbl2 = new BasicDBList();
    if (vec.isDense()) {
        int nSize = vec.size();
        dbl2.ensureCapacity(nSize);
        for (int i = 0; i < nSize; ++i) {
            dbl2.add(vec.getQuick(i));
        }
    } else { // sparse, write as a set in the format [{int:double}]
        Iterator<org.apache.mahout.math.Vector.Element> elIt = vec.iterateNonZero();
        while (elIt.hasNext()) {
            BasicDBObject el2 = new BasicDBObject();
            org.apache.mahout.math.Vector.Element el = elIt.next();
            el2.put("k", el.index());
            el2.put("v", el.get());
            dbl2.add(el2);
        }
    }
    return dbl2;
}

From source file:com.innometrics.integration.app.recommender.ml.als.AlternatingLeastSquaresSolver.java

License:Apache License

static Matrix createMiIi(Iterable<Vector> featureVectors, int numFeatures) {
    double[][] MiIi = new double[numFeatures][Iterables.size(featureVectors)];
    int n = 0;/*from  ww w . j  a  v a 2  s . c o m*/
    for (Vector featureVector : featureVectors) {
        for (int m = 0; m < numFeatures; m++) {
            MiIi[m][n] = featureVector.getQuick(m);
        }
        n++;
    }
    return new DenseMatrix(MiIi, true);
}

From source file:com.innometrics.integration.app.recommender.ml.als.ImplicitFeedbackAlternatingLeastSquaresSolver.java

License:Apache License

public Matrix getYtransposeY(final OpenIntObjectHashMap<Vector> Y) {

    ExecutorService queue = Executors.newFixedThreadPool(numTrainingThreads);
    if (log.isInfoEnabled()) {
        log.info("Starting the computation of Y'Y");
    }/*from w  w  w  .  j  a v a2s .c om*/
    long startTime = System.nanoTime();
    final IntArrayList indexes = Y.keys();
    final int numIndexes = indexes.size();

    final double[][] YtY = new double[numFeatures][numFeatures];

    // Compute Y'Y by dot products between the 'columns' of Y
    for (int i = 0; i < numFeatures; i++) {
        for (int j = i; j < numFeatures; j++) {

            final int ii = i;
            final int jj = j;
            queue.execute(new Runnable() {
                @Override
                public void run() {
                    double dot = 0;
                    for (int k = 0; k < numIndexes; k++) {
                        Vector row = Y.get(indexes.getQuick(k));
                        dot += row.getQuick(ii) * row.getQuick(jj);
                    }
                    YtY[ii][jj] = dot;
                    if (ii != jj) {
                        YtY[jj][ii] = dot;
                    }
                }
            });

        }
    }
    queue.shutdown();
    try {
        queue.awaitTermination(1, TimeUnit.DAYS);
    } catch (InterruptedException e) {
        log.error("Error during Y'Y queue shutdown", e);
        throw new RuntimeException("Error during Y'Y queue shutdown");
    }
    if (log.isInfoEnabled()) {
        log.info("Computed Y'Y in " + (System.nanoTime() - startTime) / 1000000.0 + " ms");
    }
    return new DenseMatrix(YtY, true);
}

From source file:com.scaleunlimited.classify.model.HashedFeaturesLibLinearModel.java

License:Apache License

/**
 * Given a map from term to count, generate a feature array using
 * _maxFeatureIndex as the max index, based on the hash of the term.
 * /*from   w  w w .j  a  va  2 s .co m*/
 * @param terms
 * @return array of LibLinear features
 */

private Feature[] getFeatures(Map<String, Integer> terms) {

    // First create the vector, where each term's index is the hash
    // of the term, and the value is the term count.
    Map<Integer, Integer> collisionCount = new HashMap<>();
    Vector v = new RandomAccessSparseVector(_maxFeatureIndex);
    for (String term : terms.keySet()) {
        int index = calcHashJoaat(term, _maxFeatureIndex);
        double curValue = v.getQuick(index);
        if (_averageCollisions && (curValue != 0.0)) {
            Integer curCollisionCount = collisionCount.get(index);
            if (curCollisionCount == null) {
                // Number of values we'll need to divide by
                collisionCount.put(index, 2);
            } else {
                collisionCount.put(index, curCollisionCount + 1);
            }

            v.setQuick(index, curValue + terms.get(term));
        } else {
            v.setQuick(index, terms.get(term));
        }
    }

    // Now adjust the vector for collisions, if needed.
    if (_averageCollisions && !collisionCount.isEmpty()) {
        for (Integer index : collisionCount.keySet()) {
            double curValue = v.getQuick(index);
            v.setQuick(index, curValue / collisionCount.get(index));
        }
    }

    // Apply the term vector normalizer.
    getNormalizer().normalize(v);

    List<FeatureNode> features = new ArrayList<FeatureNode>(terms.size());
    for (Element e : v.nonZeroes()) {
        features.add(new FeatureNode(e.index() + 1, e.get()));
    }

    // We need to sort by increasing index.
    Collections.sort(features, new Comparator<FeatureNode>() {

        @Override
        public int compare(FeatureNode o1, FeatureNode o2) {
            return o1.index - o2.index;
        }
    });

    return features.toArray(new FeatureNode[features.size()]);
}

From source file:com.scaleunlimited.classify.model.RawFeaturesLibLinearModel.java

License:Apache License

private FeatureNode[] vectorToFeatureNodes(Vector vector) {
    int featureCount = vector.getNumNondefaultElements();
    FeatureNode[] x = new FeatureNode[featureCount];
    int arrayIndex = 0;
    int cardinality = vector.size();
    for (int i = 0; i < cardinality; i++) {
        double value = vector.getQuick(i);
        if (value != 0.0) {
            // (At least) Linear.train assumes that FeatureNode.index
            // is 1-based, and we don't really have to map back to our
            // term indexes, so just add one. YUCK!
            x[arrayIndex++] = new FeatureNode(i + 1, value);
        }/*  www. j a v  a  2  s.  c  om*/
    }
    return x;
}