List of usage examples for edu.stanford.nlp.pipeline WordsToSentencesAnnotator WordsToSentencesAnnotator
public WordsToSentencesAnnotator(boolean verbose, String boundaryTokenRegex, Set<String> boundaryToDiscard, Set<String> htmlElementsToDiscard, String newlineIsSentenceBreak, String boundaryMultiTokenRegex, Set<String> tokenRegexesToDiscard)
From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.CoreNlpSegmenter.java
License:Open Source License
@Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); tokenizerAnnotator = new ModelProviderBase<TokenizerAnnotator>(this, "corenlp", "tokenizer") { {// w w w . ja va2s .c o m setDefault(LOCATION, NOT_REQUIRED); } @Override protected TokenizerAnnotator produceResource(URL aUrl) throws IOException { Properties props = getAggregatedProperties(); Properties coreNlpProps = new Properties(); coreNlpProps.setProperty("tokenize.language", props.getProperty(LANGUAGE)); //coreNlpProps.setProperty("tokenize.class", null); //coreNlpProps.setProperty("tokenize.whitespace", "false"); //coreNlpProps.setProperty("tokenize.options", null); //coreNlpProps.setProperty("tokenize.keepeol", "false"); String extraOptions = null; TokenizerAnnotator annotator = new TokenizerAnnotator(verbose, coreNlpProps, extraOptions); return annotator; } }; sentenceAnnotator = new ModelProviderBase<WordsToSentencesAnnotator>(this, "corenlp", "sentence") { { setDefault(LOCATION, NOT_REQUIRED); } @Override protected WordsToSentencesAnnotator produceResource(URL aUrl) throws IOException { WordsToSentencesAnnotator annotator = new WordsToSentencesAnnotator(verbose, boundaryTokenRegex, boundaryToDiscard, htmlElementsToDiscard, newlineIsSentenceBreak, boundaryMultiTokenRegex, tokenRegexesToDiscard); return annotator; } }; }