Example usage for org.apache.mahout.math Matrix viewRow

List of usage examples for org.apache.mahout.math Matrix viewRow

Introduction

In this page you can find the example usage for org.apache.mahout.math Matrix viewRow.

Prototype

Vector viewRow(int row);

Source Link

Document

Return a reference to a row.

Usage

From source file:ca.uwaterloo.cpami.mahout.matrix.utils.GramSchmidt.java

License:Apache License

public static void orthonormalizeColumns(Matrix mx) {

    //int n = mx.numCols();
    int n = mx.numRows();

    for (int c = 0; c < n; c++) {
        System.out.println("col: " + c);
        Vector col = mx.viewRow(c);
        for (int c1 = 0; c1 < c; c1++) {
            Vector viewC1 = mx.viewRow(c1);
            col.assign(col.minus(viewC1.times(viewC1.dot(col))));

        }//ww  w  .j  a  v a2 s  .  c  o  m
        final double norm2 = col.norm(2);
        if (norm2 == 0) {
            System.out.println("zero");
        }
        col.assign(new DoubleFunction() {
            @Override
            public double apply(double x) {
                return x / norm2;
            }
        });
    }
}

From source file:ca.uwaterloo.cpami.mahout.matrix.utils.GramSchmidt.java

License:Apache License

public static void main(String[] args) throws IOException {

    //final Configuration conf = new Configuration();
    //final FileSystem fs = FileSystem.get(conf);
    //final SequenceFile.Reader reader = new SequenceFile.Reader(fs,
    //   new Path("R1.dat"), conf);
    //IntWritable key = new IntWritable();
    //VectorWritable vec = new VectorWritable();
    Matrix mat = new SparseMatrix(1500, 100);
    //SparseRealMatrix mat2 = new OpenMapRealMatrix(12419,1500 );
    BufferedReader reader = new BufferedReader(new FileReader("R.3.csv"));
    String line = null;/*  w  ww.  j  a  v a2 s. co  m*/
    while ((line = reader.readLine()) != null) {
        String[] parts = line.split(",");

        mat.set(Integer.parseInt(parts[0]), Integer.parseInt(parts[1]), Double.parseDouble(parts[2]));
        /*
        Vector v = vec.get();
        int i=0;
        Iterator<Vector.Element> itr = v.iterateNonZero();
        while(itr.hasNext()){
           double elem = itr.next().get();
           if(elem !=0)
              mat2.setEntry(i, key.get(), elem);
           i++;
        }
        */
    }

    //mat = mat.transpose();
    System.out.println(mat.viewColumn(0).isDense());
    System.out.println(mat.viewRow(0).isDense());
    mat = mat.transpose();
    GramSchmidt.orthonormalizeColumns(mat);
    /*
    System.out.println("started QR");
    System.out.println(Runtime.getRuntime().maxMemory());
    System.out.println(Runtime.getRuntime().maxMemory()-Runtime.getRuntime().freeMemory());
    QRDecomposition qr = new QRDecomposition(mat2);
    System.out.println(qr.getQ().getColumnDimension());
    System.out.println(qr.getQ().getRowDimension());
    */
    //mat = mat.transpose();
    //storeSparseColumns(mat);
    //for (int i = 0; i < 10; i++) {
    //   System.out.println(mat.viewRow(i).getNumNondefaultElements());
    //}

}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

public TopicModel(Matrix topicTermCounts, Vector topicSums, double eta, double alpha, String[] dictionary,
        int numThreads, double modelWeight) {
    this.dictionary = dictionary;
    this.topicTermCounts = topicTermCounts;
    this.topicSums = topicSums;
    this.numTopics = topicSums.size();
    this.numTerms = topicTermCounts.numCols();
    this.eta = eta;
    this.alpha = alpha;
    this.sampler = new Sampler(RandomUtils.getRandom());
    this.numThreads = numThreads;
    if (modelWeight != 1) {
        topicSums.assign(Functions.mult(modelWeight));
        for (int x = 0; x < numTopics; x++) {
            topicTermCounts.viewRow(x).assign(Functions.mult(modelWeight));
        }//from ww w  .  j  a  v  a  2  s .  com
    }
    initializeThreadPool();
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

private static Pair<Matrix, Vector> randomMatrix(int numTopics, int numTerms, Random random) {
    Matrix topicTermCounts = new DenseMatrix(numTopics, numTerms);
    Vector topicSums = new DenseVector(numTopics);
    if (random != null) {
        for (int x = 0; x < numTopics; x++) {
            for (int term = 0; term < numTerms; term++) {
                topicTermCounts.viewRow(x).set(term, random.nextDouble());
            }//from   www.  jav  a  2s . co  m
        }
    }
    for (int x = 0; x < numTopics; x++) {
        topicSums.set(x, random == null ? 1.0 : topicTermCounts.viewRow(x).norm(1));
    }
    return Pair.of(topicTermCounts, topicSums);
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

public static Pair<Matrix, Vector> loadModel(Configuration conf, Path... modelPaths) throws IOException {
    int numTopics = -1;
    int numTerms = -1;
    List<Pair<Integer, Vector>> rows = Lists.newArrayList();
    for (Path modelPath : modelPaths) {
        for (Pair<Text, VectorWritable> row : new SequenceFileIterable<Text, VectorWritable>(modelPath, true,
                conf)) {/*from  w  ww  .  ja  va  2  s  .c  om*/
            rows.add(Pair.of(Integer.parseInt(row.getFirst().toString()), row.getSecond().get()));//keytext
            numTopics = Math.max(numTopics, Integer.parseInt(row.getFirst().toString()));//keytext
            if (numTerms < 0) {
                numTerms = row.getSecond().get().size();
            }
        }
    }
    if (rows.isEmpty()) {
        throw new IOException(Arrays.toString(modelPaths) + " have no vectors in it");
    }
    numTopics++;
    Matrix model = new DenseMatrix(numTopics, numTerms);
    Vector topicSums = new DenseVector(numTopics);
    for (Pair<Integer, Vector> pair : rows) {
        model.viewRow(pair.getFirst()).assign(pair.getSecond());
        topicSums.set(pair.getFirst(), pair.getSecond().norm(1));
    }
    return Pair.of(model, topicSums);
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

public void trainDocTopicModel(Vector original, Vector topics, Matrix docTopicModel) {
    // first calculate p(topic|term,document) for all terms in original, and all topics,
    // using p(term|topic) and p(topic|doc)
    pTopicGivenTerm(original, topics, docTopicModel);
    normalizeByTopic(docTopicModel);// w w w  .j a va  2s. c om
    // now multiply, term-by-term, by the document, to get the weighted distribution of
    // term-topic pairs from this document.
    Iterator<Vector.Element> it = original.iterateNonZero();
    while (it.hasNext()) {
        Vector.Element e = it.next();
        for (int x = 0; x < numTopics; x++) {
            Vector docTopicModelRow = docTopicModel.viewRow(x);
            docTopicModelRow.setQuick(e.index(), docTopicModelRow.getQuick(e.index()) * e.get());
        }
    }
    // now recalculate p(topic|doc) by summing contributions from all of pTopicGivenTerm
    topics.assign(0.0);
    for (int x = 0; x < numTopics; x++) {
        topics.set(x, docTopicModel.viewRow(x).norm(1));
    }
    // now renormalize so that sum_x(p(x|doc)) = 1
    topics.assign(Functions.mult(1 / topics.norm(1)));
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

public void update(Matrix docTopicCounts) {
    for (int x = 0; x < numTopics; x++) {
        updaters[x % updaters.length].update(x, docTopicCounts.viewRow(x));
    }//from  w  w  w  .  j a  va 2  s . c o m
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

/**
 * Computes {@code p(topic x|term a, document i)} distributions given input document {@code i}.
 * {@code pTGT[x][a]} is the (un-normalized) {@code p(x|a,i)}, or if docTopics is {@code null},
 * {@code p(a|x)} (also un-normalized).//from   ww w.  j  av a 2  s.  com
 *
 * @param document doc-term vector encoding {@code w(term a|document i)}.
 * @param docTopics {@code docTopics[x]} is the overall weight of topic {@code x} in given
 *          document. If {@code null}, a topic weight of {@code 1.0} is used for all topics.
 * @param termTopicDist storage for output {@code p(x|a,i)} distributions.
 */
private void pTopicGivenTerm(Vector document, Vector docTopics, Matrix termTopicDist) {
    // for each topic x
    for (int x = 0; x < numTopics; x++) {
        // get p(topic x | document i), or 1.0 if docTopics is null
        double topicWeight = docTopics == null ? 1.0 : docTopics.get(x);
        // get w(term a | topic x)
        Vector topicTermRow = topicTermCounts.viewRow(x);
        // get \sum_a w(term a | topic x)
        double topicSum = topicSums.get(x);
        // get p(topic x | term a) distribution to update
        Vector termTopicRow = termTopicDist.viewRow(x);

        // for each term a in document i with non-zero weight
        Iterator<Vector.Element> it = document.iterateNonZero();
        while (it.hasNext()) {
            Vector.Element e = it.next();
            int termIndex = e.index();

            // calc un-normalized p(topic x | term a, document i)
            double termTopicLikelihood = (topicTermRow.get(termIndex) + eta) * (topicWeight + alpha)
                    / (topicSum + eta * numTerms);
            termTopicRow.set(termIndex, termTopicLikelihood);
        }
    }
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

private void normalizeByTopic(Matrix perTopicSparseDistributions) {
    Iterator<Vector.Element> it = perTopicSparseDistributions.viewRow(0).iterateNonZero();
    // then make sure that each of these is properly normalized by topic: sum_x(p(x|t,d)) = 1
    while (it.hasNext()) {
        Vector.Element e = it.next();
        int a = e.index();
        double sum = 0;
        for (int x = 0; x < numTopics; x++) {
            sum += perTopicSparseDistributions.viewRow(x).get(a);
        }/*from  ww  w  . ja  va 2 s  . c  o  m*/
        for (int x = 0; x < numTopics; x++) {
            perTopicSparseDistributions.viewRow(x).set(a, perTopicSparseDistributions.viewRow(x).get(a) / sum);
        }
    }
}

From source file:com.elex.dmp.lda.TopicModel.java

License:Apache License

public static Pair<Matrix, Vector> loadModel(Configuration conf, Path... modelPaths) throws IOException {
    int numTopics = -1;
    int numTerms = -1;
    List<Pair<Integer, Vector>> rows = Lists.newArrayList();
    for (Path modelPath : modelPaths) {
        for (Pair<Text, VectorWritable> row : new SequenceFileIterable<Text, VectorWritable>(modelPath, true,
                conf)) {/*from  w  ww . ja va2 s .  co  m*/
            rows.add(Pair.of(Integer.parseInt(row.getFirst().toString()), row.getSecond().get()));//keytext
            numTopics = Math.max(numTopics, Integer.parseInt(row.getFirst().toString()));//keytext
            if (numTerms < 0) {
                numTerms = row.getSecond().get().size();
            }
        }
    }
    if (rows.isEmpty()) {
        throw new IOException(Arrays.toString(modelPaths) + " have no vectors in it");
    }
    numTopics++;
    Matrix model = new DenseMatrix(numTopics, numTerms);
    Vector topicSums = new DenseVector(numTopics);
    for (Pair<Integer, Vector> pair : rows) {
        model.viewRow(pair.getFirst()).assign(pair.getSecond());
        topicSums.set(pair.getFirst(), pair.getSecond().norm(1));
    }
    return Pair.of(model, topicSums);
}