List of usage examples for edu.stanford.nlp.process WordToSentenceProcessor WordToSentenceProcessor
public WordToSentenceProcessor(Set<String> boundaryToDiscard)
From source file:org.exist.xquery.corenlp.Tokenize.java
License:Open Source License
private void tokenizeString(String text, final OutDocType outputFormat) { PTBTokenizer<CoreLabel> tokenizer = PTBTokenizer.newPTBTokenizer(new StringReader(text), tokenizeNLs, true); cachedTokenizer = tokenizer;/*from w w w .j a v a 2 s. c o m*/ List<CoreLabel> tokens = tokenizer.tokenize(); List<List<CoreLabel>> sentences = new WordToSentenceProcessor( WordToSentenceProcessor.NewlineIsSentenceBreak.TWO_CONSECUTIVE).wordsToSentences(tokens); createSpreadsheet(sentences, tokens, outputFormat); }