Example usage for edu.stanford.nlp.pipeline TokenizerAnnotator TokenizerAnnotator

List of usage examples for edu.stanford.nlp.pipeline TokenizerAnnotator TokenizerAnnotator

Introduction

In this page you can find the example usage for edu.stanford.nlp.pipeline TokenizerAnnotator TokenizerAnnotator.

Prototype

public TokenizerAnnotator(boolean verbose, Properties props) 

Source Link

Usage

From source file:shef.mt.tools.ParsingProcessor.java

public ParsingProcessor(String lang, String pm, String dm, HashSet<String> requirements) {
    //Store required resources:
    this.requiresPOSTags = requirements.contains("postags");
    this.requiresDepCounts = requirements.contains("depcounts");

    //Create model path objects:
    String posModel = null;/*w  w w.j av a2s . co  m*/
    String depModel = null;

    //Setup model paths:
    if (pm == null) {
        if (lang.equals("english")) {
            posModel = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
        } else if (lang.equals("spanish")) {
            posModel = "edu/stanford/nlp/models/pos-tagger/spanish/spanish-distsim.tagger";
        } else if (lang.equals("chinese")) {
            posModel = "edu/stanford/nlp/models/pos-tagger/chinese-distsim/chinese-distsim.tagger";
        } else {
            posModel = "edu/stanford.nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
        }
    } else {
        posModel = pm;
    }
    if (dm == null) {
        if (lang.equals("english")) {
            depModel = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz";
        } else if (lang.equals("spanish")) {
            depModel = "edu/stanford/nlp/models/lexparser/spanishPCFG.ser.gz";
        } else if (lang.equals("chinese")) {
            depModel = "edu/stanford/nlp/models/lexparser/chinesePCFG.ser.gz";
        } else {
            depModel = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz";
        }
    } else {
        depModel = dm;
    }

    //Create base properties:
    Properties props = new Properties();
    if (this.requiresDepCounts) {
        props.setProperty("annotators", "tokenize, ssplit, pos, parse");
    } else {
        props.setProperty("annotators", "tokenize, ssplit, pos");
    }

    //Create base pipeline:
    pipeline = new StanfordCoreNLP(props);

    try {
        //Create pipeline object:
        tokenizer = new TokenizerAnnotator(true, TokenizerAnnotator.TokenizerType.Whitespace);

        //Add objects to the pipeline:
        pipeline.addAnnotator(tokenizer);
    } catch (Exception ex) {
        System.out.println("ERROR: Problem while creating Stanford tokenizer.");
    }

    try {
        //Create pipeline object:
        tagger = new POSTaggerAnnotator(posModel, false);

        //Add object to the pipeline:
        pipeline.addAnnotator(tagger);
    } catch (Exception ex) {
        System.out.println(
                "ERROR: Problem while creating Stanford POS tagger. Please review the model paths and check for library availability.");
    }

    //If dependency counts are required:
    if (this.requiresDepCounts) {
        try {
            //Create pipeline object:
            parser = new ParserAnnotator(depModel, false, 300, StringUtils.EMPTY_STRING_ARRAY);

            //Add object to the pipeline:
            pipeline.addAnnotator(parser);
        } catch (Exception ex) {
            System.out.println(
                    "ERROR: Problem while creating Stanford dependency parser. Please review the model paths and check for library availability.");
        }
    }
}