List of usage examples for edu.stanford.nlp.pipeline TokenizerAnnotator TokenizerAnnotator
public TokenizerAnnotator(boolean verbose, Properties props)
From source file:shef.mt.tools.ParsingProcessor.java
public ParsingProcessor(String lang, String pm, String dm, HashSet<String> requirements) { //Store required resources: this.requiresPOSTags = requirements.contains("postags"); this.requiresDepCounts = requirements.contains("depcounts"); //Create model path objects: String posModel = null;/*w w w.j av a2s . co m*/ String depModel = null; //Setup model paths: if (pm == null) { if (lang.equals("english")) { posModel = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; } else if (lang.equals("spanish")) { posModel = "edu/stanford/nlp/models/pos-tagger/spanish/spanish-distsim.tagger"; } else if (lang.equals("chinese")) { posModel = "edu/stanford/nlp/models/pos-tagger/chinese-distsim/chinese-distsim.tagger"; } else { posModel = "edu/stanford.nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger"; } } else { posModel = pm; } if (dm == null) { if (lang.equals("english")) { depModel = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz"; } else if (lang.equals("spanish")) { depModel = "edu/stanford/nlp/models/lexparser/spanishPCFG.ser.gz"; } else if (lang.equals("chinese")) { depModel = "edu/stanford/nlp/models/lexparser/chinesePCFG.ser.gz"; } else { depModel = "edu/stanford/nlp/models/lexparser/englishRNN.ser.gz"; } } else { depModel = dm; } //Create base properties: Properties props = new Properties(); if (this.requiresDepCounts) { props.setProperty("annotators", "tokenize, ssplit, pos, parse"); } else { props.setProperty("annotators", "tokenize, ssplit, pos"); } //Create base pipeline: pipeline = new StanfordCoreNLP(props); try { //Create pipeline object: tokenizer = new TokenizerAnnotator(true, TokenizerAnnotator.TokenizerType.Whitespace); //Add objects to the pipeline: pipeline.addAnnotator(tokenizer); } catch (Exception ex) { System.out.println("ERROR: Problem while creating Stanford tokenizer."); } try { //Create pipeline object: tagger = new POSTaggerAnnotator(posModel, false); //Add object to the pipeline: pipeline.addAnnotator(tagger); } catch (Exception ex) { System.out.println( "ERROR: Problem while creating Stanford POS tagger. Please review the model paths and check for library availability."); } //If dependency counts are required: if (this.requiresDepCounts) { try { //Create pipeline object: parser = new ParserAnnotator(depModel, false, 300, StringUtils.EMPTY_STRING_ARRAY); //Add object to the pipeline: pipeline.addAnnotator(parser); } catch (Exception ex) { System.out.println( "ERROR: Problem while creating Stanford dependency parser. Please review the model paths and check for library availability."); } } }