Example usage for org.deeplearning4j.models.word2vec VocabWord setIndex

List of usage examples for org.deeplearning4j.models.word2vec VocabWord setIndex

Introduction

In this page you can find the example usage for org.deeplearning4j.models.word2vec VocabWord setIndex.

Prototype

public void setIndex(int index) 

Source Link

Document

Sets index in Huffman tree

Usage

From source file:de.mpii.docsimilarity.mr.utils.io.WordVectorSerializer.java

License:Apache License

/**
 * Loads an in memory cache from the given path (sets syn0 and the vocab)
 *
 * @param vectorsFile/*from   ww  w  .  j  av  a  2  s  . c  o m*/
 *            the path of the file to load
 * @return
 * @throws FileNotFoundException
 */
public static Pair<InMemoryLookupTable, VocabCache> loadTxt(File vectorsFile) throws FileNotFoundException {
    BufferedReader write = new BufferedReader(new FileReader(vectorsFile));
    VocabCache cache = new InMemoryLookupCache();

    InMemoryLookupTable lookupTable;

    LineIterator iter = IOUtils.lineIterator(write);
    List<INDArray> arrays = new ArrayList<>();
    while (iter.hasNext()) {
        String line = iter.nextLine();
        String[] split = line.split(" ");
        String word = split[0];
        VocabWord word1 = new VocabWord(1.0, word);
        cache.addToken(word1);
        cache.addWordToIndex(cache.numWords(), word);
        word1.setIndex(cache.numWords());
        cache.putVocabWord(word);
        INDArray row = Nd4j.create(Nd4j.createBuffer(split.length - 1));
        for (int i = 1; i < split.length; i++) {
            row.putScalar(i - 1, Float.parseFloat(split[i]));
        }
        arrays.add(row);
    }

    INDArray syn = Nd4j.create(new int[] { arrays.size(), arrays.get(0).columns() });
    for (int i = 0; i < syn.rows(); i++) {
        syn.putRow(i, arrays.get(i));
    }

    lookupTable = (InMemoryLookupTable) new InMemoryLookupTable.Builder().vectorLength(arrays.get(0).columns())
            .useAdaGrad(false).cache(cache).build();
    Nd4j.clearNans(syn);
    lookupTable.setSyn0(syn);

    iter.close();

    return new Pair<>(lookupTable, cache);
}

From source file:edu.umd.umiacs.clip.tools.scor.WordVectorUtils.java

License:Apache License

public static WordVectors loadTxt(File vectorsFile, boolean... normalize) {
    AbstractCache cache = new AbstractCache<>();
    INDArray arrays[] = lines(vectorsFile.toPath()).map(line -> line.split(" "))
            .filter(fields -> fields.length > 2).map(split -> {
                VocabWord word = new VocabWord(1.0, split[0]);
                word.setIndex(cache.numWords());
                cache.addToken(word);/*from w  w  w . j  a va2 s  . c o m*/
                cache.addWordToIndex(word.getIndex(), split[0]);
                float[] vector = new float[split.length - 1];
                range(1, split.length).parallel().forEach(i -> vector[i - 1] = parseFloat(split[i]));
                return Nd4j.create(vector);
            }).toArray(size -> new INDArray[size]);

    INDArray syn = Nd4j.vstack(arrays);

    InMemoryLookupTable lookupTable = new InMemoryLookupTable.Builder().vectorLength(arrays[0].columns())
            .useAdaGrad(false).cache(cache).useHierarchicSoftmax(false).build();
    Nd4j.clearNans(syn);
    if (normalize.length > 0 && normalize[0]) {
        syn.diviColumnVector(syn.norm2(1));
    }

    lookupTable.setSyn0(syn);

    WordVectorsImpl vectors = new WordVectorsImpl();
    vectors.setLookupTable(lookupTable);
    vectors.setVocab(cache);
    return vectors;
}