Example usage for edu.stanford.nlp.pipeline WordsToSentencesAnnotator WordsToSentencesAnnotator

List of usage examples for edu.stanford.nlp.pipeline WordsToSentencesAnnotator WordsToSentencesAnnotator

Introduction

In this page you can find the example usage for edu.stanford.nlp.pipeline WordsToSentencesAnnotator WordsToSentencesAnnotator.

Prototype

public WordsToSentencesAnnotator(boolean verbose, String boundaryTokenRegex, Set<String> boundaryToDiscard,
            Set<String> htmlElementsToDiscard, String newlineIsSentenceBreak, String boundaryMultiTokenRegex,
            Set<String> tokenRegexesToDiscard) 

Source Link

Usage

From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.CoreNlpSegmenter.java

License:Open Source License

@Override
public void initialize(UimaContext aContext) throws ResourceInitializationException {
    super.initialize(aContext);

    tokenizerAnnotator = new ModelProviderBase<TokenizerAnnotator>(this, "corenlp", "tokenizer") {
        {//  w  w  w . ja  va2s .c  o  m
            setDefault(LOCATION, NOT_REQUIRED);
        }

        @Override
        protected TokenizerAnnotator produceResource(URL aUrl) throws IOException {
            Properties props = getAggregatedProperties();

            Properties coreNlpProps = new Properties();
            coreNlpProps.setProperty("tokenize.language", props.getProperty(LANGUAGE));
            //coreNlpProps.setProperty("tokenize.class", null);
            //coreNlpProps.setProperty("tokenize.whitespace", "false");
            //coreNlpProps.setProperty("tokenize.options", null);
            //coreNlpProps.setProperty("tokenize.keepeol", "false");

            String extraOptions = null;

            TokenizerAnnotator annotator = new TokenizerAnnotator(verbose, coreNlpProps, extraOptions);

            return annotator;
        }
    };

    sentenceAnnotator = new ModelProviderBase<WordsToSentencesAnnotator>(this, "corenlp", "sentence") {
        {
            setDefault(LOCATION, NOT_REQUIRED);
        }

        @Override
        protected WordsToSentencesAnnotator produceResource(URL aUrl) throws IOException {
            WordsToSentencesAnnotator annotator = new WordsToSentencesAnnotator(verbose, boundaryTokenRegex,
                    boundaryToDiscard, htmlElementsToDiscard, newlineIsSentenceBreak, boundaryMultiTokenRegex,
                    tokenRegexesToDiscard);

            return annotator;
        }
    };
}