Example usage for org.apache.mahout.math Vector norm

List of usage examples for org.apache.mahout.math Vector norm

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector norm.

Prototype

double norm(double power);

Source Link

Document

Return the k-norm of the vector.

Usage

From source file:ca.uwaterloo.cpami.mahout.matrix.utils.GramSchmidt.java

License:Apache License

public static void orthonormalizeColumns(Matrix mx) {

    //int n = mx.numCols();
    int n = mx.numRows();

    for (int c = 0; c < n; c++) {
        System.out.println("col: " + c);
        Vector col = mx.viewRow(c);
        for (int c1 = 0; c1 < c; c1++) {
            Vector viewC1 = mx.viewRow(c1);
            col.assign(col.minus(viewC1.times(viewC1.dot(col))));

        }/*from   w ww .  ja  v a 2s. c  o m*/
        final double norm2 = col.norm(2);
        if (norm2 == 0) {
            System.out.println("zero");
        }
        col.assign(new DoubleFunction() {
            @Override
            public double apply(double x) {
                return x / norm2;
            }
        });
    }
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

public void trainDocTopicModel(Vector original, Vector topics, Matrix docTopicModel) {
    // first calculate p(topic|term,document) for all terms in original, and all topics,
    // using p(term|topic) and p(topic|doc)
    pTopicGivenTerm(original, topics, docTopicModel);
    normalizeByTopic(docTopicModel);/*from w w  w . j a v  a 2 s.  co m*/
    // now multiply, term-by-term, by the document, to get the weighted distribution of
    // term-topic pairs from this document.
    Iterator<Vector.Element> it = original.iterateNonZero();
    while (it.hasNext()) {
        Vector.Element e = it.next();
        for (int x = 0; x < numTopics; x++) {
            Vector docTopicModelRow = docTopicModel.viewRow(x);
            docTopicModelRow.setQuick(e.index(), docTopicModelRow.getQuick(e.index()) * e.get());
        }
    }
    // now recalculate p(topic|doc) by summing contributions from all of pTopicGivenTerm
    topics.assign(0.0);
    for (int x = 0; x < numTopics; x++) {
        topics.set(x, docTopicModel.viewRow(x).norm(1));
    }
    // now renormalize so that sum_x(p(x|doc)) = 1
    topics.assign(Functions.mult(1 / topics.norm(1)));
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

public void updateTopic(int topic, Vector docTopicCounts) {
    topicTermCounts.viewRow(topic).assign(docTopicCounts, Functions.PLUS);
    topicSums.set(topic, topicSums.get(topic) + docTopicCounts.norm(1));
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

/**
 * sum_x sum_a (c_ai * log(p(x|i) * p(a|x)))
 *//*w w  w .j av a  2 s .c  o  m*/
public double perplexity(Vector document, Vector docTopics) {
    double perplexity = 0;
    double norm = docTopics.norm(1) + (docTopics.size() * alpha);
    Iterator<Vector.Element> it = document.iterateNonZero();
    while (it.hasNext()) {
        Vector.Element e = it.next();
        int term = e.index();
        double prob = 0;
        for (int x = 0; x < numTopics; x++) {
            double d = (docTopics.get(x) + alpha) / norm;
            double p = d * (topicTermCounts.viewRow(x).get(term) + eta) / (topicSums.get(x) + eta * numTerms);
            prob += p;
        }
        perplexity += e.get() * Math.log(prob);
    }
    return -perplexity;
}

From source file:com.elex.dmp.lda.InMemoryCollapsedVariationalBayes0.java

License:Apache License

private void postInitCorpus() {
    totalCorpusWeight = 0;//from   w ww.  j av  a2 s.c o m
    int numNonZero = 0;
    for (int i = 0; i < numDocuments; i++) {
        Vector v = corpusWeights.viewRow(i);
        double norm;
        if (v != null && (norm = v.norm(1)) != 0) {
            numNonZero += v.getNumNondefaultElements();
            totalCorpusWeight += norm;
        }
    }
    String s = "Initializing corpus with %d docs, %d terms, %d nonzero entries, total termWeight %f";
    log.info(String.format(s, numDocuments, numTerms, numNonZero, totalCorpusWeight));
}

From source file:com.elex.dmp.lda.ModelTrainer.java

License:Apache License

public double calculatePerplexity(VectorIterable matrix, VectorIterable docTopicCounts, double testFraction) {
    Iterator<MatrixSlice> docIterator = matrix.iterator();
    Iterator<MatrixSlice> docTopicIterator = docTopicCounts.iterator();
    double perplexity = 0;
    double matrixNorm = 0;
    while (docIterator.hasNext() && docTopicIterator.hasNext()) {
        MatrixSlice docSlice = docIterator.next();
        MatrixSlice topicSlice = docTopicIterator.next();
        int docId = docSlice.index();
        Vector document = docSlice.vector();
        Vector topicDist = topicSlice.vector();
        if (testFraction == 0 || docId % (1 / testFraction) == 0) {
            trainSync(document, topicDist, false, 10);
            perplexity += readModel.perplexity(document, topicDist);
            matrixNorm += document.norm(1);
        }//w  w  w . j  a v  a2  s  .c o  m
    }
    return perplexity / matrixNorm;
}

From source file:de.isabeldrostfromm.sof.util.VectorsTest.java

License:Open Source License

@Test
@Repeat(iterations = 10)//w  w  w  .j  a v a2  s  .  c om
public void testAppendTwo() {
    Vector vecA = randomVector();
    Vector vecB = randomVector();
    Vector result = Vectors.append(vecA, vecB);
    double sum = Math.pow(vecA.norm(2), 2) + Math.pow(vecB.norm(2), 2);
    double length = Math.sqrt(sum);
    assertEquals("Appending two vectors should result in a vector of added length.", length, result.norm(2),
            0.00001);
}

From source file:de.isabeldrostfromm.sof.util.VectorsTest.java

License:Open Source License

@Test
@Repeat(iterations = 10)//from w ww . j  a  va 2s  .  c  o  m
public void testCreation() {
    Vector vec = randomVector();
    double[] entries = new double[vec.getNumNondefaultElements()];
    int index = 0;
    for (Vector.Element e : vec) {
        entries[index] = e.get();
        index++;
    }
    Vector result = Vectors.newSequentialAccessSparseVector(entries);
    assertEquals("Original vector should have same length as the one created from its entries.", vec.norm(2),
            result.norm(2), 0.0001);
}

From source file:org.trustedanalytics.atk.giraph.algorithms.cgd.ConjugateGradientDescentComputation.java

License:Apache License

/**
 * Compute alpha//from  www  .  j  a  v a 2s. c  om
 *
 * @param gradient of type Vector
 * @param conjugate of type Vector
 * @param messages of type Iterable
 * @return alpha of type double
 */
private double computeAlpha(Vector gradient, Vector conjugate, Iterable<MessageData4CFWritable> messages) {
    double alpha = 0d;
    if (conjugate.norm(1d) == 0d) {
        return alpha;
    }
    double predictSquared = 0d;
    int numTrain = 0;
    for (MessageData4CFWritable message : messages) {
        EdgeType et = message.getType();
        if (et == EdgeType.TRAIN) {
            Vector vector = message.getVector();
            double predict = conjugate.dot(vector);
            predictSquared += predict * predict;
            numTrain++;
        }
    }
    if (numTrain > 0) {
        alpha = -gradient.dot(conjugate) / (predictSquared / numTrain + lambda * conjugate.dot(conjugate));
    }
    return alpha;
}

From source file:org.trustedanalytics.atk.giraph.algorithms.cgd.ConjugateGradientDescentComputation.java

License:Apache License

/**
 * Compute beta according to Hestenes-Stiefel formula
 *
 * @param gradient of type Vector/*from   w  ww.  ja  va 2s.  c  o  m*/
 * @param conjugate of type Vector
 * @param gradientNext of type Vector
 * @return beta of type double
 */
private double computeBeta(Vector gradient, Vector conjugate, Vector gradientNext) {
    double beta = 0d;
    if (conjugate.norm(1d) == 0d) {
        return beta;
    }
    Vector deltaVector = gradientNext.minus(gradient);
    beta = -gradientNext.dot(deltaVector) / conjugate.dot(deltaVector);
    return beta;
}