Example usage for edu.stanford.nlp.ling IndexedWord lemma

List of usage examples for edu.stanford.nlp.ling IndexedWord lemma

Introduction

In this page you can find the example usage for edu.stanford.nlp.ling IndexedWord lemma.

Prototype

@Override
    public String lemma() 

Source Link

Usage

From source file:ca.ualberta.exemplar.core.RelationExtraction.java

License:Open Source License

private static String produceRelationName(IndexedWord verb, IndexedWord noun, SemanticGraph dependencies,
        boolean shouldNormalize) {
    StringBuilder rel = new StringBuilder();
    List<IndexedWord> verbPhrase = new ArrayList<IndexedWord>();
    List<IndexedWord> nounPhrase = new ArrayList<IndexedWord>();

    if (verb != null)
        verbPhrase.add(verb);//from   ww w.  j a va  2s  . c  o m
    if (noun != null)
        nounPhrase.add(noun);

    if (!shouldNormalize) {
        if (noun != null)
            addModifiers(nounPhrase, noun, dependencies);
        if (verb != null)
            addModifiers(verbPhrase, verb, dependencies);
    }

    sortWordsByIndex(verbPhrase);
    sortWordsByIndex(nounPhrase);

    for (IndexedWord word : verbPhrase) {
        if (shouldNormalize)
            rel.append(word.lemma());
        else
            rel.append(word.word());

        rel.append(' ');
    }

    for (IndexedWord word : nounPhrase) {
        if (shouldNormalize)
            rel.append(word.lemma());
        else
            rel.append(word.word());

        rel.append(' ');
    }

    return rel.toString().trim();
}

From source file:ca.ualberta.exemplar.core.RelationExtraction.java

License:Open Source License

private static String produceRelationName(String verb, IndexedWord noun, SemanticGraph dependencies,
        boolean shouldNormalize) {
    StringBuilder rel = new StringBuilder();
    List<IndexedWord> nounPhrase = new ArrayList<IndexedWord>();
    rel.append(verb + ' ');

    if (noun != null)
        nounPhrase.add(noun);/*from  w w  w. java 2 s  .  c o m*/

    if (!shouldNormalize)
        if (noun != null)
            addModifiers(nounPhrase, noun, dependencies);

    sortWordsByIndex(nounPhrase);

    for (IndexedWord word : nounPhrase) {
        if (shouldNormalize)
            rel.append(word.lemma());
        else
            rel.append(word.word());

        rel.append(' ');
    }

    return rel.toString().trim();
}

From source file:edu.anu.spice.SemanticConcept.java

License:Open Source License

public SemanticConcept(IndexedWord word, HashSet<Integer> synsets) {
    this(word.lemma().trim().toLowerCase(), synsets);
}

From source file:edu.anu.spice.SpiceParser.java

License:Open Source License

/**
 * Attaches particles to the main predicate.
 *//*from   w ww . j a  v  a  2 s  .  c om*/
protected String getPredicate(SemanticGraph sg, IndexedWord mainPred) {
    if (sg.hasChildWithReln(mainPred, UniversalEnglishGrammaticalRelations.PHRASAL_VERB_PARTICLE)) {
        IndexedWord part = sg.getChildWithReln(mainPred,
                UniversalEnglishGrammaticalRelations.PHRASAL_VERB_PARTICLE);
        return String.format("%s %s", mainPred.lemma().equals("be") ? "" : mainPred.lemma(), part.value());
    }
    return mainPred.lemma();
}

From source file:edu.anu.spice.SpiceParser.java

License:Open Source License

protected ProposedTuples parseAnnotation(Annotation ann) {
    ProposedTuples tuples = new ProposedTuples();
    ArrayList<SemanticGraph> sgs = new ArrayList<SemanticGraph>();
    for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
        SemanticGraph sg = sentence/* ww  w.ja  v  a  2  s.  c  o  m*/
                .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
        sgs.add(sg);
    }
    for (SemanticGraph sg : sgs) {
        // Everything from RuleBasedParser except resolvePlurals(sg);
        SemanticGraphEnhancer.processQuanftificationModifiers(sg);
        SemanticGraphEnhancer.collapseCompounds(sg);
        SemanticGraphEnhancer.collapseParticles(sg);
        SemanticGraphEnhancer.resolvePronouns(sg);

        SemgrexMatcher matcher = SUBJ_PRED_OBJ_TRIPLET_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord subj = matcher.getNode("subj");
            IndexedWord obj = matcher.getNode("obj");
            IndexedWord pred = matcher.getNode("pred");
            String reln = matcher.getRelnString("objreln");
            String predicate = getPredicate(sg, pred);
            if (reln.startsWith("nmod:") && !reln.equals("nmod:poss") && !reln.equals("nmod:agent")) {
                predicate += reln.replace("nmod:", " ").replace("_", " ");
            }
            tuples.addTuple(subj, obj, predicate);
        }

        matcher = ACL_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord subj = matcher.getNode("subj");
            IndexedWord obj = matcher.getNode("obj");
            IndexedWord pred = matcher.getNode("pred");
            String reln = matcher.getRelnString("objreln");
            String predicate = getPredicate(sg, pred);
            if (reln.startsWith("nmod:") && !reln.equals("nmod:poss") && !reln.equals("nmod:agent")) {
                predicate += reln.replace("nmod:", " ").replace("_", " ");
            }
            tuples.addTuple(subj, obj, predicate);
        }

        SemgrexPattern[] subjPredPatterns = { SUBJ_PRED_PAIR_PATTERN, COPULAR_PATTERN };
        for (SemgrexPattern p : subjPredPatterns) {
            matcher = p.matcher(sg);
            while (matcher.find()) {
                IndexedWord subj = matcher.getNode("subj");
                IndexedWord pred = matcher.getNode("pred");
                if (sg.hasChildWithReln(pred, UniversalEnglishGrammaticalRelations.CASE_MARKER)) {
                    IndexedWord caseMarker = sg.getChildWithReln(pred,
                            UniversalEnglishGrammaticalRelations.CASE_MARKER);
                    String prep = caseMarker.value();
                    if (sg.hasChildWithReln(caseMarker,
                            UniversalEnglishGrammaticalRelations.MULTI_WORD_EXPRESSION)) {
                        for (IndexedWord additionalCaseMarker : sg.getChildrenWithReln(caseMarker,
                                UniversalEnglishGrammaticalRelations.MULTI_WORD_EXPRESSION)) {
                            prep = prep + " " + additionalCaseMarker.value();
                        }
                    }
                    tuples.addTuple(subj, pred, prep);
                } else {
                    if (!pred.lemma().equals("be")) {
                        tuples.addTuple(subj, pred);
                    }
                }
            }
        }

        matcher = ADJ_MOD_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord obj = matcher.getNode("obj");
            IndexedWord adj = matcher.getNode("adj");
            tuples.addTuple(obj, adj);
        }

        matcher = ADJ_PRED_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord obj = matcher.getNode("obj");
            IndexedWord adj = matcher.getNode("adj");
            tuples.addTuple(obj, adj);
        }

        matcher = PP_MOD_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord gov = matcher.getNode("gov");
            IndexedWord mod = matcher.getNode("mod");
            String reln = matcher.getRelnString("reln");
            String predicate = reln.replace("nmod:", "").replace("_", " ");
            if (predicate.equals("poss") || predicate.equals("agent")) {
                continue;
            }
            tuples.addTuple(gov, mod, predicate);
        }

        matcher = POSS_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord gov = matcher.getNode("gov");
            IndexedWord mod = matcher.getNode("mod");
            tuples.addTuple(mod, gov, "have");
        }

        matcher = AGENT_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord subj = matcher.getNode("subj");
            IndexedWord obj = matcher.getNode("obj");
            IndexedWord pred = matcher.getNode("pred");
            tuples.addTuple(subj, obj, getPredicate(sg, pred));
        }

        matcher = PLURAL_SUBJECT_OBJECT_PATTERN.matcher(sg);
        while (matcher.findNextMatchingNode()) {
            IndexedWord subj = matcher.getNode("subj");
            IndexedWord obj = matcher.getNode("obj");
            checkForNumericAttribute(tuples, sg, subj);
            checkForNumericAttribute(tuples, sg, obj);
        }

        matcher = PLURAL_SUBJECT_PATTERN.matcher(sg);
        while (matcher.findNextMatchingNode()) {
            IndexedWord subj = matcher.getNode("subj");
            checkForNumericAttribute(tuples, sg, subj);
        }

        matcher = PLURAL_OTHER_PATTERN.matcher(sg);
        while (matcher.findNextMatchingNode()) {
            IndexedWord word = matcher.getNode("word");
            checkForNumericAttribute(tuples, sg, word);
        }

        matcher = COMPOUND_NOUN_PATTERN.matcher(sg);
        Set<IndexedWord> compoundNouns = new HashSet<IndexedWord>();
        while (matcher.find()) {
            IndexedWord tail = matcher.getNode("tail");
            IndexedWord head = matcher.getNode("head");
            compoundNouns.add(tail);
            compoundNouns.add(head);
            tuples.addTuple(tail, head);
        }

        // Must happen last, since it will reuse existing parts of the scene
        // graph
        matcher = NOUN_CONJ_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord tail = matcher.getNode("tail");
            IndexedWord head = matcher.getNode("head");
            int original_length = tuples.tuples.size();
            for (int i = 0; i < original_length; ++i) {
                ArrayList<String> prop = tuples.tuples.get(i);
                if (prop.size() == 3 && prop.get(0).equals(head)) {
                    tuples.addTuple(head, prop.get(1), prop.get(2));
                }
                if (prop.size() == 3 && prop.get(1).equals(tail)) {
                    tuples.addTuple(tail, prop.get(1), prop.get(2));
                }
            }
        }

        matcher = NOUN_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord word = matcher.getNode("word");
            if (!compoundNouns.contains(word)) {
                tuples.addTuple(word);
            }
        }
    }
    return tuples;
}

From source file:eu.ubipol.opinionmining.nlp_engine.Sentence.java

License:Open Source License

protected Sentence(SemanticGraph dependencies, int indexStart, DatabaseAdapter adp, int beginPosition) {
    IndexedWord rootWord = dependencies.getFirstRoot();
    sentenceRoot = new Token(rootWord.originalText(), rootWord.lemma(), rootWord.tag(), null, null,
            rootWord.index() + indexStart, rootWord.beginPosition(), rootWord.endPosition(), adp,
            beginPosition);/*from  ww w . java 2 s.co m*/
    addChildTokens(sentenceRoot, rootWord, dependencies, indexStart, adp, beginPosition);
    sentenceRoot.transferScores();
    if (sentenceRoot.isAKeyword())
        sentenceRoot.addAspectScore(sentenceRoot.getScore(), sentenceRoot.getWeight(),
                sentenceRoot.getAspect());
    indexStart += dependencies.size();
}

From source file:eu.ubipol.opinionmining.nlp_engine.Sentence.java

License:Open Source License

private void addChildTokens(Token rootToken, IndexedWord currentRoot, SemanticGraph dependencies,
        int indexStart, DatabaseAdapter adp, int beginPosition) {
    for (IndexedWord child : dependencies.getChildren(currentRoot)) {
        Token childToken = new Token(child.originalText(), child.lemma(), child.tag(), rootToken,
                dependencies.getEdge(currentRoot, child).toString(), child.index() + indexStart,
                child.beginPosition(), child.endPosition(), adp, beginPosition);
        rootToken.addChildToken(childToken);
        addChildTokens(childToken, child, dependencies, indexStart, adp, beginPosition);
    }//from ww w.  j av a2  s  .  c  o m
}

From source file:featureExtractor.NLPFeatures.java

static void processLine(String text, int lineId) throws IOException {
    bw_root.write(Integer.toString(lineId));
    bw_subj.write(Integer.toString(lineId));
    bw_underRoot.write(Integer.toString(lineId));
    bw_nerType.write(Integer.toString(lineId));

    //text = "A gigantic Hong Kong set was constructed in downtown Detroit. The set was so big that the Detroit People Mover track ended up becoming part of the set and shooting had to be adjusted to allow the track to move through the set.  ";//"One of three new television series scheduled for release in 2014 based on DC Comics characters. The others being Constantine (2014) and The Flash (2014).  ";
    HashMap<String, Integer> nerCount = new HashMap<>();
    int superlativePOS = 0;

    try {/*from   ww  w.ja  v a2  s.  co  m*/
        Annotation document = new Annotation(text);
        pipeline.annotate(document);

        List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);

        for (CoreMap sentence : sentences) {
            SemanticGraph dependencies = sentence
                    .get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
            // getting root words
            for (IndexedWord rword : dependencies.getRoots()) {
                //System.out.println(rword.lemma());
                //System.out.println(rword.ner());
                if (rword.ner().equals("O"))
                    bw_root.write("\t" + rword.ner() + ":" + rword.lemma());
                //else if(rword.ner().equals("PERSON"))
                else
                    bw_root.write("\t" + rword.ner() + ":" + rword.originalText());
                /*
                else
                bw_root.write(" entity_" + rword.ner());
                */
                // under root
                for (IndexedWord child : dependencies.getChildren(rword)) {
                    //System.out.println("here: " + child.originalText());
                    /*
                    if(child.ner().equals("PERSON"))
                    bw_underRoot.write(" " + child.originalText());
                    else*/
                    if (!child.ner().equals("O"))
                        bw_underRoot.write("\t" + child.ner() + ":" + child.originalText());
                }

                // nsubj | nsubpass words
                GrammaticalRelation[] subjects = { EnglishGrammaticalRelations.NOMINAL_SUBJECT,
                        EnglishGrammaticalRelations.NOMINAL_PASSIVE_SUBJECT };
                for (IndexedWord current : dependencies.descendants(rword))
                    for (IndexedWord nsubWord : dependencies.getChildrenWithRelns(current,
                            Arrays.asList(subjects))) {
                        //System.out.println("wow: " + nsubWord.originalText());
                        if (!nsubWord.ner().equals("O"))
                            bw_subj.write("\t" + nsubWord.ner() + ":" + nsubWord.originalText());
                        else {
                            //System.out.println(nsubWord.lemma());
                            bw_subj.write("\t" + nsubWord.ner() + ":" + nsubWord.lemma());
                        } /*
                          else
                          bw_subj.write(" entity_"+nsubWord.ner());
                          */
                    }
            }

            // NER Types frequency
            for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
                String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
                String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);

                if (pos.equals("JJS") || pos.equals("RBS"))
                    superlativePOS++;

                nerCount.putIfAbsent(ne, 0);
                nerCount.put(ne, nerCount.get(ne) + 1);
            }

            //System.out.println("dependency graph:\n" + dependencies);
        }
    } catch (Exception e) {
        System.out.println("IGNORED:");
    }

    bw_nerType.write("\t" + Integer.toString(superlativePOS));

    for (String ne : ners) {
        if (nerCount.containsKey(ne))
            bw_nerType.write("\t" + nerCount.get(ne).toString());
        else
            bw_nerType.write("\t0");
    }
    bw_root.write("\n");
    bw_underRoot.write("\n");
    bw_nerType.write("\n");
    bw_subj.write("\n");
    if (lineId % 25 == 0) {
        bw_root.flush();
        bw_underRoot.flush();
        bw_nerType.flush();
        bw_subj.flush();
    }
}

From source file:knu.univ.lingvo.coref.Mention.java

License:Open Source License

public int getModifiers(Dictionaries dict) {

    if (headIndexedWord == null)
        return 0;

    int count = 0;
    List<Pair<GrammaticalRelation, IndexedWord>> childPairs = dependency.childPairs(headIndexedWord);
    for (Pair<GrammaticalRelation, IndexedWord> childPair : childPairs) {
        GrammaticalRelation gr = childPair.first;
        IndexedWord word = childPair.second;
        if (gr == EnglishGrammaticalRelations.ADJECTIVAL_MODIFIER
                || gr == EnglishGrammaticalRelations.VERBAL_MODIFIER
                || gr == EnglishGrammaticalRelations.RELATIVE_CLAUSE_MODIFIER
                || gr.toString().startsWith("prep_")) {
            count++;/*from w  ww . ja  va 2 s. c  o m*/
        }
        // add noun modifier when the mention isn't a NER
        if (nerString.equals("O") && gr == EnglishGrammaticalRelations.NOUN_COMPOUND_MODIFIER) {
            count++;
        }

        // add possessive if not a personal determiner
        if (gr == EnglishGrammaticalRelations.POSSESSION_MODIFIER && !dict.determiners.contains(word.lemma())) {
            count++;
        }
    }
    return count;
}

From source file:knu.univ.lingvo.coref.Mention.java

License:Open Source License

public int getNegation(Dictionaries dict) {

    if (headIndexedWord == null)
        return 0;

    // direct negation in a child
    Collection<IndexedWord> children = dependency.getChildren(headIndexedWord);
    for (IndexedWord child : children) {
        if (dict.negations.contains(child.lemma()))
            return 1;
    }/*  w  w  w . j a va 2s  .c  o  m*/

    // or has a sibling
    Collection<IndexedWord> siblings = dependency.getSiblings(headIndexedWord);
    for (IndexedWord sibling : siblings) {
        if (dict.negations.contains(sibling.lemma())
                && !dependency.hasParentWithReln(headIndexedWord, EnglishGrammaticalRelations.NOMINAL_SUBJECT))
            return 1;
    }
    // check the parent
    List<Pair<GrammaticalRelation, IndexedWord>> parentPairs = dependency.parentPairs(headIndexedWord);
    if (!parentPairs.isEmpty()) {
        Pair<GrammaticalRelation, IndexedWord> parentPair = parentPairs.get(0);
        GrammaticalRelation gr = parentPair.first;
        // check negative prepositions
        if (dict.neg_relations.contains(gr.toString()))
            return 1;
    }
    return 0;
}