Example usage for edu.stanford.nlp.pipeline StanfordCoreNLP StanfordCoreNLP

Introduction

In this page you can find the example usage for edu.stanford.nlp.pipeline StanfordCoreNLP StanfordCoreNLP.

Prototype

public StanfordCoreNLP(String propsFileNamePrefix, boolean enforceRequirements)

Source Link

Usage

From source file:be.fivebyfive.lingua.stanfordcorenlp.Pipeline.java

License:Open Source License

public void initPipeline() {
    String dmode = props.getProperty(DEP_PROPERTY);
    if (dmode != null) {
        depMode = props.getProperty(DEP_PROPERTY);
    }//from  w  w w .  j av a2s .  co  m
    pipeline = new StanfordCoreNLP(props, false);
}

From source file:com.epictodo.controller.nlp.NLPLoadEngine.java

License:Open Source License

public NLPLoadEngine() {
    this.mute();//from   ww w .  j a v  a  2  s .com
    Properties _properties = new Properties();
    _properties.put("annotators", "tokenize, ssplit, pos, lemma, ner, parse, dcoref, sentiment");

    try {
        CLASSIFIER = CRFClassifier.getClassifierNoExceptions(CLASSIFIER_MODEL);
        LEXICAL_PARSER = LexicalizedParser.loadModel(ENGLISHPCFG_MODEL);
        _pipeline = new StanfordCoreNLP(_properties, true);
        _pipeline.addAnnotator(new TimeAnnotator("sutime", _properties));

        _logger.log(Level.INFO, "Successfully loaded models.");
    } catch (RuntimeException ex) {
        _logger.log(Level.SEVERE, "Error loading models.");
        throw ex;
    }
}

From source file:gr.aueb.cs.nlp.bioasq.classifiers.Baseline.java

public static ArrayList<String> lemmatize(String documentText) {
    ArrayList<String> lemmas = new ArrayList<String>();
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props, false);
    String text = documentText;/*from w  ww.  j a v  a  2s .  com*/
    Annotation document = pipeline.process(text);
    for (CoreMap sentence : document.get(SentencesAnnotation.class)) {
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            String word = token.get(TextAnnotation.class);
            String lemma = token.get(LemmaAnnotation.class);
            lemmas.add(lemma);
        }
    }
    return lemmas;
}

From source file:gr.aueb.cs.nlp.bioasq.classifiers.Features.java

public static ArrayList<String> lemmatize(String documentText) {
    ArrayList<String> lemmas = new ArrayList<String>();
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props, false);
    String text = documentText;// w  w  w  .j a v  a2  s  .  c  o  m
    Annotation document = pipeline.process(text);
    for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
        for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            String word = token.get(CoreAnnotations.TextAnnotation.class);
            String lemma = token.get(CoreAnnotations.LemmaAnnotation.class);
            lemmas.add(lemma);
        }
    }
    return lemmas;
}

From source file:knu.univ.lingvo.coref.MentionExtractor.java

License:Open Source License

/** Load Stanford Processor: skip unnecessary annotator */
protected static StanfordCoreNLP loadStanfordProcessor(Properties props) {
    boolean replicateCoNLL = Boolean.parseBoolean(props.getProperty(Constants.REPLICATECONLL_PROP, "false"));

    Properties pipelineProps = new Properties(props);
    StringBuilder annoSb = new StringBuilder("");
    if (!Constants.USE_GOLD_POS && !replicateCoNLL) {
        annoSb.append("pos, lemma");
    } else {// w  w  w  . j  av  a 2  s .c  om
        annoSb.append("lemma");
    }
    if (Constants.USE_TRUECASE) {
        annoSb.append(", truecase");
    }
    if (!Constants.USE_GOLD_NE && !replicateCoNLL) {
        annoSb.append(", ner");
    }
    if (!Constants.USE_GOLD_PARSES && !replicateCoNLL) {
        annoSb.append(", parse");
    }
    String annoStr = annoSb.toString();
    SieveCoreferenceSystem.logger
            .info("MentionExtractor ignores specified annotators, using annotators=" + annoStr);
    pipelineProps.put("annotators", annoStr);
    return new StanfordCoreNLP(pipelineProps, false);
}

From source file:nlpOperations.MyStemmer.java

public static void main(String[] args) {
    String text = "this is a test question";
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props, false);

    Annotation document = pipeline.process(text);

    for (CoreMap sentence : document.get(SentencesAnnotation.class)) {
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            String word = token.get(TextAnnotation.class);
            String lemma = token.get(LemmaAnnotation.class);
            System.out.println("lemmatized version :" + lemma);
        }// w ww  .  java 2  s .  c o  m
    }
}

From source file:nlpOperations.MyStemmer.java

public static String sentLemma(String inputSent) {

    String stemmedSent = "";
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma");
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props, false);

    Annotation document = pipeline.process(inputSent);

    for (CoreMap sentence : document.get(SentencesAnnotation.class)) {
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            String word = token.get(TextAnnotation.class);
            String lemma = token.get(LemmaAnnotation.class);
            System.out.println("lemmatized version :" + lemma);
            stemmedSent += " " + lemma;
        }//from  www .j a v  a2s .co  m
    }
    return stemmedSent;
}

From source file:org.ets.research.nlp.stanford_thrift.coref.StanfordCorefThrift.java

License:Open Source License

public StanfordCorefThrift() {
    // This works, as opposed to creating a
    // edu.stanford.nlp.pipeline.DeterministicCorefAnnotator
    // object directly, because the coreference code runs the
    // parse tree a few times on its own, despite it having
    // been run (and parse trees having been stored) as part
    // of the mandatory NER.  Creating the object this way,
    // the coreference system can create new org.ets.research.nlp.stanford_thrift.parser objects
    // on-the-fly, despite the fact that they're never
    // initialized here.  Very strange.  These parsers
    // seem to use the default PCFG model.
    Properties props = new Properties();
    props.put("annotators", "dcoref");
    coref = new StanfordCoreNLP(props, false);
}

From source file:SentimentAnalysis.SentiWordNet.java

License:Open Source License

public SentiWordNet(String pathToSWN) throws IOException {
    parser = new TypedDependency();
    // This is our main dictionary representation
    dictionary = new HashMap<String, Double>();
    props = new Properties();
    props.put("annotators", "tokenize, ssplit, pos, lemma");
    pipeline = new StanfordCoreNLP(props, false);

    // From String to list of doubles.
    HashMap<String, HashMap<Integer, Double>> tempDictionary = new HashMap<String, HashMap<Integer, Double>>();

    BufferedReader csv = null;//w w w  . jav a 2  s.  c  o m
    try {
        csv = new BufferedReader(new FileReader(pathToSWN));
        int lineNumber = 0;

        String line;
        while ((line = csv.readLine()) != null) {
            lineNumber++;

            // If it's a comment, skip this line.
            if (!line.trim().startsWith("#")) {
                // We use tab separation
                String[] data = line.split("\t");
                String wordTypeMarker = data[0];

                // Example line:
                // POS ID PosS NegS SynsetTerm#sensenumber Desc
                // a 00009618 0.5 0.25 spartan#4 austere#3 ascetical#2
                // ascetic#2 practicing great self-denial;...etc

                // Is it a valid line? Otherwise, through exception.
                if (data.length != 6) {
                    throw new IllegalArgumentException(
                            "Incorrect tabulation format in file, line: " + lineNumber);
                }

                // Calculate synset score as score = PosS - NegS
                Double synsetScore = Double.parseDouble(data[2]) - Double.parseDouble(data[3]);

                // Get all Synset terms
                String[] synTermsSplit = data[4].split(" ");

                // Go through all terms of current synset.
                for (String synTermSplit : synTermsSplit) {
                    // Get synterm and synterm rank
                    String[] synTermAndRank = synTermSplit.split("#");
                    String synTerm = synTermAndRank[0] + "#" + wordTypeMarker;

                    int synTermRank = Integer.parseInt(synTermAndRank[1]);
                    // What we get here is a map of the type:
                    // term -> {score of synset#1, score of synset#2...}

                    // Add map to term if it doesn't have one
                    if (!tempDictionary.containsKey(synTerm)) {
                        tempDictionary.put(synTerm, new HashMap<Integer, Double>());
                    }

                    // Add synset link to synterm
                    tempDictionary.get(synTerm).put(synTermRank, synsetScore);
                }
            }
        }

        // Go through all the terms.
        for (Map.Entry<String, HashMap<Integer, Double>> entry : tempDictionary.entrySet()) {
            String word = entry.getKey();
            Map<Integer, Double> synSetScoreMap = entry.getValue();

            // Calculate weighted average. Weigh the synsets according to
            // their rank.
            // Score= 1/2*first + 1/3*second + 1/4*third ..... etc.
            // Sum = 1/1 + 1/2 + 1/3 ...
            double score = 0.0;
            double sum = 0.0;
            for (Map.Entry<Integer, Double> setScore : synSetScoreMap.entrySet()) {
                score += setScore.getValue() / (double) setScore.getKey();
                sum += 1.0 / (double) setScore.getKey();
            }
            score /= sum;

            dictionary.put(word, score);
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (csv != null) {
            csv.close();
        }
    }
}

From source file:uk.co.jassoft.markets.utils.lingual.NamedEntityRecognizer.java

public NamedEntityRecognizer(String propertiesFilePrefix) {
    this.pipeline = new StanfordCoreNLP(propertiesFilePrefix, false);
}