List of usage examples for org.deeplearning4j.models.word2vec VocabWord setIndex
public void setIndex(int index)
From source file:de.mpii.docsimilarity.mr.utils.io.WordVectorSerializer.java
License:Apache License
/** * Loads an in memory cache from the given path (sets syn0 and the vocab) * * @param vectorsFile/*from ww w . j av a 2 s . c o m*/ * the path of the file to load * @return * @throws FileNotFoundException */ public static Pair<InMemoryLookupTable, VocabCache> loadTxt(File vectorsFile) throws FileNotFoundException { BufferedReader write = new BufferedReader(new FileReader(vectorsFile)); VocabCache cache = new InMemoryLookupCache(); InMemoryLookupTable lookupTable; LineIterator iter = IOUtils.lineIterator(write); List<INDArray> arrays = new ArrayList<>(); while (iter.hasNext()) { String line = iter.nextLine(); String[] split = line.split(" "); String word = split[0]; VocabWord word1 = new VocabWord(1.0, word); cache.addToken(word1); cache.addWordToIndex(cache.numWords(), word); word1.setIndex(cache.numWords()); cache.putVocabWord(word); INDArray row = Nd4j.create(Nd4j.createBuffer(split.length - 1)); for (int i = 1; i < split.length; i++) { row.putScalar(i - 1, Float.parseFloat(split[i])); } arrays.add(row); } INDArray syn = Nd4j.create(new int[] { arrays.size(), arrays.get(0).columns() }); for (int i = 0; i < syn.rows(); i++) { syn.putRow(i, arrays.get(i)); } lookupTable = (InMemoryLookupTable) new InMemoryLookupTable.Builder().vectorLength(arrays.get(0).columns()) .useAdaGrad(false).cache(cache).build(); Nd4j.clearNans(syn); lookupTable.setSyn0(syn); iter.close(); return new Pair<>(lookupTable, cache); }
From source file:edu.umd.umiacs.clip.tools.scor.WordVectorUtils.java
License:Apache License
public static WordVectors loadTxt(File vectorsFile, boolean... normalize) { AbstractCache cache = new AbstractCache<>(); INDArray arrays[] = lines(vectorsFile.toPath()).map(line -> line.split(" ")) .filter(fields -> fields.length > 2).map(split -> { VocabWord word = new VocabWord(1.0, split[0]); word.setIndex(cache.numWords()); cache.addToken(word);/*from w w w . j a va2 s . c o m*/ cache.addWordToIndex(word.getIndex(), split[0]); float[] vector = new float[split.length - 1]; range(1, split.length).parallel().forEach(i -> vector[i - 1] = parseFloat(split[i])); return Nd4j.create(vector); }).toArray(size -> new INDArray[size]); INDArray syn = Nd4j.vstack(arrays); InMemoryLookupTable lookupTable = new InMemoryLookupTable.Builder().vectorLength(arrays[0].columns()) .useAdaGrad(false).cache(cache).useHierarchicSoftmax(false).build(); Nd4j.clearNans(syn); if (normalize.length > 0 && normalize[0]) { syn.diviColumnVector(syn.norm2(1)); } lookupTable.setSyn0(syn); WordVectorsImpl vectors = new WordVectorsImpl(); vectors.setLookupTable(lookupTable); vectors.setVocab(cache); return vectors; }