Example usage for edu.stanford.nlp.process WordToSentenceProcessor WordToSentenceProcessor

List of usage examples for edu.stanford.nlp.process WordToSentenceProcessor WordToSentenceProcessor

Introduction

In this page you can find the example usage for edu.stanford.nlp.process WordToSentenceProcessor WordToSentenceProcessor.

Prototype

public WordToSentenceProcessor() 

Source Link

Document

Create a WordToSentenceProcessor using a sensible default list of tokens for sentence ending for English/Latin writing systems.

Usage

From source file:edu.iastate.airl.semtus.parser.Parser.java

License:Open Source License

/**
 * Get sentences from words/*from w ww.  j a  v  a2  s .  c o m*/
 *
 * @param theseWords
 *            words
 * @return list of sentences
 */
static public List<Sentence<Word>> getSentences(List<Word> theseWords) {
    WordToSentenceProcessor<Word, String, Word> thisSentenceProcessor = new WordToSentenceProcessor<Word, String, Word>();
    List<List<Word>> theseProtoSentences = thisSentenceProcessor.process(theseWords);
    List<Sentence<Word>> theseSentences = new ArrayList<Sentence<Word>>();
    for (List<Word> thisProtoSentence : theseProtoSentences)
        theseSentences.add(new Sentence<Word>(thisProtoSentence));
    return theseSentences;
}

From source file:wikiminer.Article.java

public void createPhrases() {
    //// Tokenize
    List<CoreLabel> tokens = new ArrayList<>();
    PTBTokenizer<CoreLabel> tokenizer = new PTBTokenizer<>(new StringReader(text), new CoreLabelTokenFactory(),
            "");/*from   w ww  .j  av a  2  s. com*/
    while (tokenizer.hasNext()) {
        tokens.add(tokenizer.next());
    }
    //// Split sentences from tokens
    List<List<CoreLabel>> sentences = new WordToSentenceProcessor<CoreLabel>().process(tokens);
    //// Join back together
    int end;
    int start = 0;
    for (List<CoreLabel> sentence : sentences) {
        end = sentence.get(sentence.size() - 1).endPosition();
        phrases.add(new Phrase(text.substring(start, end).trim()));
        start = end;
    }

    /*Reader reader = new StringReader(text);
    DocumentPreprocessor dp = new DocumentPreprocessor(reader);
            
    for (List<HasWord> sentence : dp) {
    String out = Sentence.listToString(sentence);
    //replace -LRB- and -RRB- with opening and closing brackets
    out = out.replace("-LRB-", "(");
    out = out.replace("-RRB-", ")");
    Phrase line = new Phrase(out);
    phrases.add(line);
    System.out.println(line.getText());
    }*/
}