Example usage for org.deeplearning4j.models.word2vec VocabWord getIndex

List of usage examples for org.deeplearning4j.models.word2vec VocabWord getIndex

Introduction

In this page you can find the example usage for org.deeplearning4j.models.word2vec VocabWord getIndex.

Prototype

public int getIndex() 

Source Link

Document

Returns index in Huffman tree

Usage

From source file:edu.umd.umiacs.clip.tools.scor.WordVectorUtils.java

License:Apache License

public static WordVectors loadTxt(File vectorsFile, boolean... normalize) {
    AbstractCache cache = new AbstractCache<>();
    INDArray arrays[] = lines(vectorsFile.toPath()).map(line -> line.split(" "))
            .filter(fields -> fields.length > 2).map(split -> {
                VocabWord word = new VocabWord(1.0, split[0]);
                word.setIndex(cache.numWords());
                cache.addToken(word);//w w  w.  java  2s  .  c  o  m
                cache.addWordToIndex(word.getIndex(), split[0]);
                float[] vector = new float[split.length - 1];
                range(1, split.length).parallel().forEach(i -> vector[i - 1] = parseFloat(split[i]));
                return Nd4j.create(vector);
            }).toArray(size -> new INDArray[size]);

    INDArray syn = Nd4j.vstack(arrays);

    InMemoryLookupTable lookupTable = new InMemoryLookupTable.Builder().vectorLength(arrays[0].columns())
            .useAdaGrad(false).cache(cache).useHierarchicSoftmax(false).build();
    Nd4j.clearNans(syn);
    if (normalize.length > 0 && normalize[0]) {
        syn.diviColumnVector(syn.norm2(1));
    }

    lookupTable.setSyn0(syn);

    WordVectorsImpl vectors = new WordVectorsImpl();
    vectors.setLookupTable(lookupTable);
    vectors.setVocab(cache);
    return vectors;
}