List of usage examples for edu.stanford.nlp.util StringUtils joinWithOriginalWhiteSpace
public static String joinWithOriginalWhiteSpace(List<CoreLabel> tokens)
From source file:weka.filters.unsupervised.attribute.PartOfSpeechTagging.java
License:Open Source License
/** * Obtains the sentences from the document. * * @param doc the document to turn into sentences. * @return the list of sentences//from w w w. ja v a2 s .com */ protected List<String> getSentences(String doc) { List<String> result; DocumentPreprocessor preProcessor; result = new ArrayList<String>(); preProcessor = new DocumentPreprocessor(new StringReader(doc)); preProcessor.setTokenizerFactory(getTokenizerFactory()); for (List sentence : preProcessor) result.add(StringUtils.joinWithOriginalWhiteSpace(sentence)); return result; }
From source file:weka.gui.explorer.NLPParseTreePanel.java
License:Open Source License
/** * Obtains the sentences from the document. * * @param doc the document to turn into sentences. * @return the list of sentences/*from ww w.ja va2s . c o m*/ */ protected List<String> getSentences(String doc) { List<String> result; DocumentPreprocessor preProcessor; result = new ArrayList<String>(); try { preProcessor = new DocumentPreprocessor(new StringReader(doc)); preProcessor.setTokenizerFactory(PartOfSpeechTagging.getTokenizerFactory()); for (List sentence : preProcessor) result.add(StringUtils.joinWithOriginalWhiteSpace(sentence)); } catch (Exception e) { showErrorMessage("Parsing error", "Failed to split document into sentences!", e); } return result; }