Example usage for org.apache.mahout.math Vector getNumNondefaultElements

List of usage examples for org.apache.mahout.math Vector getNumNondefaultElements

Introduction

In this page you can find the example usage for org.apache.mahout.math Vector getNumNondefaultElements.

Prototype

int getNumNondefaultElements();

Source Link

Document

Return the number of values in the recipient which are not the default value.

Usage

From source file:Vectors.java

License:Apache License

public static Vector maybeSample(Vector original, int sampleSize) {
    if (original.getNumNondefaultElements() <= sampleSize) {
        return original;
    }//from w w  w . ja va  2  s .  c om
    Vector sample = original.like();
    Iterator<Vector.Element> sampledElements = new FixedSizeSamplingIterator<Vector.Element>(sampleSize,
            original.iterateNonZero());
    while (sampledElements.hasNext()) {
        Vector.Element elem = sampledElements.next();
        sample.setQuick(elem.index(), elem.get());
    }
    return sample;
}

From source file:Vectors.java

License:Apache License

public static Vector topKElements(int k, Vector original) {
    if (original.getNumNondefaultElements() <= k) {
        return original;
    }/* w ww . jav  a 2s  .  c o  m*/
    TopK<Vector.Element> topKQueue = new TopK<Vector.Element>(k, BY_VALUE);
    Iterator<Vector.Element> nonZeroElements = original.iterateNonZero();
    while (nonZeroElements.hasNext()) {
        Vector.Element nonZeroElement = nonZeroElements.next();
        topKQueue.offer(new Vectors.TemporaryElement(nonZeroElement));
    }
    Vector topKSimilarities = original.like();
    for (Vector.Element topKSimilarity : topKQueue.retrieve()) {
        topKSimilarities.setQuick(topKSimilarity.index(), topKSimilarity.get());
    }
    return topKSimilarities;
}

From source file:ca.uwaterloo.cpami.mahout.matrix.utils.GramSchmidt.java

License:Apache License

public static void storeSparseColumns(Matrix mat) {
    int numCols = mat.numCols();
    int numRows = mat.numRows();
    for (int i = 0; i < numCols; i++) {
        Vector sparseVect = new RandomAccessSparseVector(numRows);
        Vector col = mat.viewColumn(i);
        Iterator<Vector.Element> itr = col.iterateNonZero();
        while (itr.hasNext()) {
            Element elem = itr.next();
            if (elem.get() != 0) {
                System.out.println(elem.get());
                sparseVect.set(elem.index(), elem.get());
            }//from w  w  w  .jav  a  2  s . c o m
        }
        System.out.println(sparseVect.getNumNondefaultElements());

        mat.assignColumn(i, sparseVect);
        System.out.println(mat.viewColumn(i).getNumNondefaultElements());
        System.exit(1);

    }
}

From source file:com.chimpler.example.eigenface.Helper.java

License:Apache License

public static double[][] readMatrixSequenceFile(String fileName) throws Exception {
    Configuration configuration = new Configuration();
    FileSystem fs = FileSystem.get(configuration);
    Reader matrixReader = new SequenceFile.Reader(fs, new Path(fileName), configuration);

    List<double[]> rows = new ArrayList<double[]>();
    IntWritable key = new IntWritable();
    VectorWritable value = new VectorWritable();
    while (matrixReader.next(key, value)) {
        Vector vector = value.get();
        double[] row = new double[vector.size()];
        for (int i = 0; i < vector.getNumNondefaultElements(); i++) {
            Element element = vector.getElement(i);
            row[element.index()] = element.get();
        }//from  w  ww.j  a  v  a2  s  .c o  m
        rows.add(row);
    }
    return rows.toArray(new double[rows.size()][]);
}

From source file:com.elex.dmp.core.TopicModel.java

License:Apache License

public static String vectorToSortedString(Vector vector, String[] dictionary) {
    List<Pair<String, Double>> vectorValues = new ArrayList<Pair<String, Double>>(
            vector.getNumNondefaultElements());
    Iterator<Vector.Element> it = vector.iterateNonZero();
    while (it.hasNext()) {
        Vector.Element e = it.next();
        vectorValues/*ww  w . jav  a2s .  c  om*/
                .add(Pair.of(dictionary != null ? dictionary[e.index()] : String.valueOf(e.index()), e.get()));
    }
    Collections.sort(vectorValues, new Comparator<Pair<String, Double>>() {
        @Override
        public int compare(Pair<String, Double> x, Pair<String, Double> y) {
            return y.getSecond().compareTo(x.getSecond());
        }
    });
    Iterator<Pair<String, Double>> listIt = vectorValues.iterator();
    StringBuilder bldr = new StringBuilder(2048);
    bldr.append('{');
    int i = 0;
    while (listIt.hasNext() && i < 25) {
        i++;
        Pair<String, Double> p = listIt.next();
        bldr.append(p.getFirst());
        bldr.append(':');
        bldr.append(p.getSecond());
        bldr.append(',');
    }
    if (bldr.length() > 1) {
        bldr.setCharAt(bldr.length() - 1, '}');
    }
    return bldr.toString();
}

From source file:com.elex.dmp.lda.InMemoryCollapsedVariationalBayes0.java

License:Apache License

private void postInitCorpus() {
    totalCorpusWeight = 0;//from  w  w  w.j  a v a 2 s. co  m
    int numNonZero = 0;
    for (int i = 0; i < numDocuments; i++) {
        Vector v = corpusWeights.viewRow(i);
        double norm;
        if (v != null && (norm = v.norm(1)) != 0) {
            numNonZero += v.getNumNondefaultElements();
            totalCorpusWeight += norm;
        }
    }
    String s = "Initializing corpus with %d docs, %d terms, %d nonzero entries, total termWeight %f";
    log.info(String.format(s, numDocuments, numTerms, numNonZero, totalCorpusWeight));
}

From source file:com.elex.dmp.lda.ModelTrainer.java

License:Apache License

public void train(VectorIterable matrix, VectorIterable docTopicCounts, int numDocTopicIters) {
    start();//from w w w .  j  av  a 2 s .c o m
    Iterator<MatrixSlice> docIterator = matrix.iterator();
    Iterator<MatrixSlice> docTopicIterator = docTopicCounts.iterator();
    long startTime = System.nanoTime();
    int i = 0;
    double[] times = new double[100];
    Map<Vector, Vector> batch = Maps.newHashMap();
    int numTokensInBatch = 0;
    long batchStart = System.nanoTime();
    while (docIterator.hasNext() && docTopicIterator.hasNext()) {
        i++;
        Vector document = docIterator.next().vector();
        Vector topicDist = docTopicIterator.next().vector();
        if (isReadWrite) {
            if (batch.size() < numTrainThreads) {
                batch.put(document, topicDist);
                if (log.isDebugEnabled()) {
                    numTokensInBatch += document.getNumNondefaultElements();
                }
            } else {
                batchTrain(batch, true, numDocTopicIters);
                long time = System.nanoTime();
                log.debug("trained {} docs with {} tokens, start time {}, end time {}",
                        new Object[] { numTrainThreads, numTokensInBatch, batchStart, time });
                batchStart = time;
                numTokensInBatch = 0;
            }
        } else {
            long start = System.nanoTime();
            train(document, topicDist, true, numDocTopicIters);
            if (log.isDebugEnabled()) {
                times[i % times.length] = (System.nanoTime() - start)
                        / (1.0e6 * document.getNumNondefaultElements());
                if (i % 100 == 0) {
                    long time = System.nanoTime() - startTime;
                    log.debug("trained " + i + " documents in " + (time / 1.0e6) + "ms");
                    if (i % 500 == 0) {
                        Arrays.sort(times);
                        log.debug("training took median " + times[times.length / 2] + "ms per token-instance");
                    }
                }
            }
        }
    }
    stop();
}

From source file:com.elex.dmp.vectorizer.TFPartialVectorReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<StringTuple> values, Context context)
        throws IOException, InterruptedException {
    Iterator<StringTuple> it = values.iterator();
    if (!it.hasNext()) {
        return;/*from w ww .j av  a2  s. com*/
    }
    StringTuple value = it.next();

    Vector vector = new RandomAccessSparseVector(dimension, value.length()); // guess at initial size

    if (maxNGramSize >= 2) {
        ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()),
                maxNGramSize);
        try {
            do {
                String term = sf.getAttribute(CharTermAttribute.class).toString();
                if (!term.isEmpty() && dictionary.containsKey(term)) { // ngram
                    int termId = dictionary.get(term);
                    vector.setQuick(termId, vector.getQuick(termId) + 1);
                }
            } while (sf.incrementToken());

            sf.end();
        } finally {
            Closeables.closeQuietly(sf);
        }
    } else {
        for (String term : value.getEntries()) {
            if (!term.isEmpty() && dictionary.containsKey(term)) { // unigram
                int termId = dictionary.get(term);
                vector.setQuick(termId, vector.getQuick(termId) + 1);
            }
        }
    }
    if (sequentialAccess) {
        vector = new SequentialAccessSparseVector(vector);
    }

    if (namedVector) {
        vector = new NamedVector(vector, key.toString());
    }

    // if the vector has no nonZero entries (nothing in the dictionary), let's not waste space sending it to disk.
    if (vector.getNumNondefaultElements() > 0) {
        VectorWritable vectorWritable = new VectorWritable(vector);
        context.write(key, vectorWritable);
    } else {
        context.getCounter("TFParticalVectorReducer", "emptyVectorCount").increment(1);
    }
}

From source file:com.innometrics.integration.app.recommender.ml.als.AlternatingLeastSquaresSolver.java

License:Apache License

public static Vector solve(Iterable<Vector> featureVectors, Vector ratingVector, double lambda,
        int numFeatures) {

    Preconditions.checkNotNull(featureVectors, "Feature Vectors cannot be null");
    Preconditions.checkArgument(!Iterables.isEmpty(featureVectors));
    Preconditions.checkNotNull(ratingVector, "Rating Vector cannot be null");
    Preconditions.checkArgument(ratingVector.getNumNondefaultElements() > 0, "Rating Vector cannot be empty");
    Preconditions.checkArgument(Iterables.size(featureVectors) == ratingVector.getNumNondefaultElements());

    int nui = ratingVector.getNumNondefaultElements();

    Matrix MiIi = createMiIi(featureVectors, numFeatures);
    Matrix RiIiMaybeTransposed = createRiIiMaybeTransposed(ratingVector);

    /* compute Ai = MiIi * t(MiIi) + lambda * nui * E */
    Matrix Ai = miTimesMiTransposePlusLambdaTimesNuiTimesE(MiIi, lambda, nui);
    /* compute Vi = MiIi * t(R(i,Ii)) */
    Matrix Vi = MiIi.times(RiIiMaybeTransposed);
    /* compute Ai * ui = Vi */
    return solve(Ai, Vi);
}

From source file:com.innometrics.integration.app.recommender.ml.als.AlternatingLeastSquaresSolver.java

License:Apache License

static Matrix createRiIiMaybeTransposed(Vector ratingVector) {
    Preconditions.checkArgument(ratingVector.isSequentialAccess(),
            "Ratings should be iterable in Index or Sequential Order");

    double[][] RiIiMaybeTransposed = new double[ratingVector.getNumNondefaultElements()][1];
    int index = 0;
    for (Vector.Element elem : ratingVector.nonZeroes()) {
        RiIiMaybeTransposed[index++][0] = elem.get();
    }//  ww  w. j  a  v a 2  s  . co m
    return new DenseMatrix(RiIiMaybeTransposed, true);
}