List of usage examples for org.deeplearning4j.text.sentenceiterator CollectionSentenceIterator CollectionSentenceIterator
public CollectionSentenceIterator(Collection<String> coll)
From source file:net.liaocy.ml4j.nlp.word2vec.Train.java
public void train(Collection<String> commaSentences, Language lang, String modelName) throws IOException { System.out.println("Load & Vectorize Sentences...."); SentenceIterator iter = new CollectionSentenceIterator(commaSentences); iter.setPreProcessor(new SentencePreProcessor() { @Override//from w ww . j a v a 2s . c om public String preProcess(String sentence) { return sentence; } }); MyTokenizerFactory t = new MyTokenizerFactory(lang); t.setTokenPreProcessor(new TokenPreProcess() { @Override public String preProcess(String token) { return token; } }); System.out.println("Building model...."); Word2Vec vec = new Word2Vec.Builder().minWordFrequency(0).iterations(1).layerSize(200).seed(42) .windowSize(5).learningRate(0.025).iterate(iter).tokenizerFactory(t).build(); System.out.println("Fitting Word2Vec model...."); vec.fit(); System.out.println("Save Model..."); this.saveModel(modelName, vec); }