List of usage examples for edu.stanford.nlp.process DocumentPreprocessor DocumentPreprocessor
DocumentPreprocessor
From source file:es.uniovi.aic.miex.run.Miex.java
License:Open Source License
private static List<List<? extends HasWord>> chopSentences(String text) { DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(); List<List<? extends HasWord>> sentences = null; StringReader theReader = new StringReader(text); sentences = documentPreprocessor.getSentencesFromText(theReader, null, null, -1); return sentences; }
From source file:reck.corpora.DocumentImpl.java
License:Open Source License
public ArrayList getParseTreeList() { // variables needed to process the files to be parsed TokenizerFactory tokenizerFactory = null; DocumentPreprocessor documentPreprocessor = new DocumentPreprocessor(); boolean tokenized = false; // whether or not the input file has already been tokenized Function<List<HasWord>, List<HasWord>> escaper = null; int tagDelimiter = -1; String sentenceDelimiter = null; String elementDelimiter = null; Options op = ParserConstants.lp.getOp(); PrintWriter pwErr = op.tlpParams.pw(System.err); try {//from w w w . j a v a 2s . c om if (elementDelimiter != null) { document = documentPreprocessor.getSentencesFromXML(textFilename, escaper, elementDelimiter, sentenceDelimiter); } else { document = documentPreprocessor.getSentencesFromText(textFilename, escaper, sentenceDelimiter, tagDelimiter); } } catch (IOException e) { pwErr.println("ERROR: Couldn't open file: " + textFilename); } ArrayList treeList = ParserConstants.lp.parseFile(textFilename, noTaggedContent, startSentence, tokenized, tokenizerFactory, document, documentPreprocessor, escaper, tagDelimiter); return treeList; }