List of usage examples for edu.stanford.nlp.pipeline StanfordCoreNLP StanfordCoreNLP
public StanfordCoreNLP(String propsFileNamePrefix, boolean enforceRequirements)
From source file:be.fivebyfive.lingua.stanfordcorenlp.Pipeline.java
License:Open Source License
public void initPipeline() { String dmode = props.getProperty(DEP_PROPERTY); if (dmode != null) { depMode = props.getProperty(DEP_PROPERTY); }//from w w w . j av a2s . co m pipeline = new StanfordCoreNLP(props, false); }
From source file:com.epictodo.controller.nlp.NLPLoadEngine.java
License:Open Source License
public NLPLoadEngine() { this.mute();//from ww w . j a v a 2 s .com Properties _properties = new Properties(); _properties.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref, sentiment"); try { CLASSIFIER = CRFClassifier.getClassifierNoExceptions(CLASSIFIER_MODEL); LEXICAL_PARSER = LexicalizedParser.loadModel(ENGLISHPCFG_MODEL); _pipeline = new StanfordCoreNLP(_properties, true); _pipeline.addAnnotator(new TimeAnnotator("sutime", _properties)); _logger.log(Level.INFO, "Successfully loaded models."); } catch (RuntimeException ex) { _logger.log(Level.SEVERE, "Error loading models."); throw ex; } }
From source file:gr.aueb.cs.nlp.bioasq.classifiers.Baseline.java
public static ArrayList<String> lemmatize(String documentText) { ArrayList<String> lemmas = new ArrayList<String>(); Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, lemma"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props, false); String text = documentText;/*from w ww. j a v a 2s . com*/ Annotation document = pipeline.process(text); for (CoreMap sentence : document.get(SentencesAnnotation.class)) { for (CoreLabel token : sentence.get(TokensAnnotation.class)) { String word = token.get(TextAnnotation.class); String lemma = token.get(LemmaAnnotation.class); lemmas.add(lemma); } } return lemmas; }
From source file:gr.aueb.cs.nlp.bioasq.classifiers.Features.java
public static ArrayList<String> lemmatize(String documentText) { ArrayList<String> lemmas = new ArrayList<String>(); Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, lemma"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props, false); String text = documentText;// w w w .j a v a2 s . c o m Annotation document = pipeline.process(text); for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) { for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { String word = token.get(CoreAnnotations.TextAnnotation.class); String lemma = token.get(CoreAnnotations.LemmaAnnotation.class); lemmas.add(lemma); } } return lemmas; }
From source file:knu.univ.lingvo.coref.MentionExtractor.java
License:Open Source License
/** Load Stanford Processor: skip unnecessary annotator */ protected static StanfordCoreNLP loadStanfordProcessor(Properties props) { boolean replicateCoNLL = Boolean.parseBoolean(props.getProperty(Constants.REPLICATECONLL_PROP, "false")); Properties pipelineProps = new Properties(props); StringBuilder annoSb = new StringBuilder(""); if (!Constants.USE_GOLD_POS && !replicateCoNLL) { annoSb.append("pos, lemma"); } else {// w w w . j av a 2 s .c om annoSb.append("lemma"); } if (Constants.USE_TRUECASE) { annoSb.append(", truecase"); } if (!Constants.USE_GOLD_NE && !replicateCoNLL) { annoSb.append(", ner"); } if (!Constants.USE_GOLD_PARSES && !replicateCoNLL) { annoSb.append(", parse"); } String annoStr = annoSb.toString(); SieveCoreferenceSystem.logger .info("MentionExtractor ignores specified annotators, using annotators=" + annoStr); pipelineProps.put("annotators", annoStr); return new StanfordCoreNLP(pipelineProps, false); }
From source file:nlpOperations.MyStemmer.java
public static void main(String[] args) { String text = "this is a test question"; Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, lemma"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props, false); Annotation document = pipeline.process(text); for (CoreMap sentence : document.get(SentencesAnnotation.class)) { for (CoreLabel token : sentence.get(TokensAnnotation.class)) { String word = token.get(TextAnnotation.class); String lemma = token.get(LemmaAnnotation.class); System.out.println("lemmatized version :" + lemma); }// w ww . java 2 s . c o m } }
From source file:nlpOperations.MyStemmer.java
public static String sentLemma(String inputSent) { String stemmedSent = ""; Properties props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, lemma"); StanfordCoreNLP pipeline = new StanfordCoreNLP(props, false); Annotation document = pipeline.process(inputSent); for (CoreMap sentence : document.get(SentencesAnnotation.class)) { for (CoreLabel token : sentence.get(TokensAnnotation.class)) { String word = token.get(TextAnnotation.class); String lemma = token.get(LemmaAnnotation.class); System.out.println("lemmatized version :" + lemma); stemmedSent += " " + lemma; }//from www .j a v a2s .co m } return stemmedSent; }
From source file:org.ets.research.nlp.stanford_thrift.coref.StanfordCorefThrift.java
License:Open Source License
public StanfordCorefThrift() { // This works, as opposed to creating a // edu.stanford.nlp.pipeline.DeterministicCorefAnnotator // object directly, because the coreference code runs the // parse tree a few times on its own, despite it having // been run (and parse trees having been stored) as part // of the mandatory NER. Creating the object this way, // the coreference system can create new org.ets.research.nlp.stanford_thrift.parser objects // on-the-fly, despite the fact that they're never // initialized here. Very strange. These parsers // seem to use the default PCFG model. Properties props = new Properties(); props.put("annotators", "dcoref"); coref = new StanfordCoreNLP(props, false); }
From source file:SentimentAnalysis.SentiWordNet.java
License:Open Source License
public SentiWordNet(String pathToSWN) throws IOException { parser = new TypedDependency(); // This is our main dictionary representation dictionary = new HashMap<String, Double>(); props = new Properties(); props.put("annotators", "tokenize, ssplit, pos, lemma"); pipeline = new StanfordCoreNLP(props, false); // From String to list of doubles. HashMap<String, HashMap<Integer, Double>> tempDictionary = new HashMap<String, HashMap<Integer, Double>>(); BufferedReader csv = null;//w w w . jav a 2 s. c o m try { csv = new BufferedReader(new FileReader(pathToSWN)); int lineNumber = 0; String line; while ((line = csv.readLine()) != null) { lineNumber++; // If it's a comment, skip this line. if (!line.trim().startsWith("#")) { // We use tab separation String[] data = line.split("\t"); String wordTypeMarker = data[0]; // Example line: // POS ID PosS NegS SynsetTerm#sensenumber Desc // a 00009618 0.5 0.25 spartan#4 austere#3 ascetical#2 // ascetic#2 practicing great self-denial;...etc // Is it a valid line? Otherwise, through exception. if (data.length != 6) { throw new IllegalArgumentException( "Incorrect tabulation format in file, line: " + lineNumber); } // Calculate synset score as score = PosS - NegS Double synsetScore = Double.parseDouble(data[2]) - Double.parseDouble(data[3]); // Get all Synset terms String[] synTermsSplit = data[4].split(" "); // Go through all terms of current synset. for (String synTermSplit : synTermsSplit) { // Get synterm and synterm rank String[] synTermAndRank = synTermSplit.split("#"); String synTerm = synTermAndRank[0] + "#" + wordTypeMarker; int synTermRank = Integer.parseInt(synTermAndRank[1]); // What we get here is a map of the type: // term -> {score of synset#1, score of synset#2...} // Add map to term if it doesn't have one if (!tempDictionary.containsKey(synTerm)) { tempDictionary.put(synTerm, new HashMap<Integer, Double>()); } // Add synset link to synterm tempDictionary.get(synTerm).put(synTermRank, synsetScore); } } } // Go through all the terms. for (Map.Entry<String, HashMap<Integer, Double>> entry : tempDictionary.entrySet()) { String word = entry.getKey(); Map<Integer, Double> synSetScoreMap = entry.getValue(); // Calculate weighted average. Weigh the synsets according to // their rank. // Score= 1/2*first + 1/3*second + 1/4*third ..... etc. // Sum = 1/1 + 1/2 + 1/3 ... double score = 0.0; double sum = 0.0; for (Map.Entry<Integer, Double> setScore : synSetScoreMap.entrySet()) { score += setScore.getValue() / (double) setScore.getKey(); sum += 1.0 / (double) setScore.getKey(); } score /= sum; dictionary.put(word, score); } } catch (Exception e) { e.printStackTrace(); } finally { if (csv != null) { csv.close(); } } }
From source file:uk.co.jassoft.markets.utils.lingual.NamedEntityRecognizer.java
public NamedEntityRecognizer(String propertiesFilePrefix) { this.pipeline = new StanfordCoreNLP(propertiesFilePrefix, false); }