Example usage for edu.stanford.nlp.process DocumentPreprocessor DocumentPreprocessor

List of usage examples for edu.stanford.nlp.process DocumentPreprocessor DocumentPreprocessor

Introduction

In this page you can find the example usage for edu.stanford.nlp.process DocumentPreprocessor DocumentPreprocessor.

Prototype

DocumentPreprocessor

Source Link

Usage

From source file:es.uniovi.aic.miex.run.Miex.java

License:Open Source License

private static List<List<? extends HasWord>> chopSentences(String text) {
    DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor();

    List<List<? extends HasWord>> sentences = null;

    StringReader theReader = new StringReader(text);

    sentences = documentPreprocessor.getSentencesFromText(theReader, null, null, -1);

    return sentences;
}

From source file:reck.corpora.DocumentImpl.java

License:Open Source License

public ArrayList getParseTreeList() {

    // variables needed to process the files to be parsed
    TokenizerFactory tokenizerFactory = null;
    DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor();
    boolean tokenized = false; // whether or not the input file has already been tokenized
    Function<List<HasWord>, List<HasWord>> escaper = null;
    int tagDelimiter = -1;
    String sentenceDelimiter = null;
    String elementDelimiter = null;
    Options op = ParserConstants.lp.getOp();
    PrintWriter pwErr = op.tlpParams.pw(System.err);

    try {//from w w w . j  a  v  a 2s . c om
        if (elementDelimiter != null) {
            document = documentPreprocessor.getSentencesFromXML(textFilename, escaper, elementDelimiter,
                    sentenceDelimiter);
        } else {
            document = documentPreprocessor.getSentencesFromText(textFilename, escaper, sentenceDelimiter,
                    tagDelimiter);
        }
    } catch (IOException e) {
        pwErr.println("ERROR: Couldn't open file: " + textFilename);
    }

    ArrayList treeList = ParserConstants.lp.parseFile(textFilename, noTaggedContent, startSentence, tokenized,
            tokenizerFactory, document, documentPreprocessor, escaper, tagDelimiter);

    return treeList;
}