Example usage for edu.stanford.nlp.semgraph SemanticGraph outgoingEdgeList

Introduction

In this page you can find the example usage for edu.stanford.nlp.semgraph SemanticGraph outgoingEdgeList.

Prototype

public List<SemanticGraphEdge> outgoingEdgeList(IndexedWord v)

Source Link

Usage

From source file:ca.mcgill.cs.crown.procedure.ParseExtractor.java

License:Creative Commons License

/** 
 * Gets the candidate hypernyms form the provided subdef
 *
 * @returns a mapping from the candidate to the heuristics that generated it
 *//*ww  w  . jav a  2 s.c  om*/
MultiMap<String, String> getCandidates(SemanticGraph dependencies, String subdef, POS spos_) {

    MultiMap<String, String> candidates = new HashMultiMap<String, String>();
    char sensePos = toChar(spos_);

    Collection<IndexedWord> roots = dependencies.getRoots();
    next_root: for (IndexedWord root : roots) {
        String word = root.get(TextAnnotation.class);
        String lemma = root.get(LemmaAnnotation.class);
        String pos = root.get(PartOfSpeechAnnotation.class);
        char lemmaPos = pos.substring(0, 1).toLowerCase().charAt(0);

        String lemmaLc = lemma.toLowerCase();

        //System.out.println("testing: " + lemma + "/" + pos);

        // If the lemma is a verb, check for phrasal verbal particle (e.g.,
        // "lead on", "edge out") and if present, add them to the lemma
        if (lemmaPos == 'v') {
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                if (e.getRelation().getShortName().equals("prt")) {
                    IndexedWord dep = e.getDependent();
                    lemma = lemma + " " + dep.get(LemmaAnnotation.class);
                    break;
                }
            }
        }

        // Heuristic 1: root matches exact POS
        if (lemmaPos == sensePos) {

            // Edge case for Heuristics 7: If the lemma is a noun and is
            // saying that this is an instance (e.g., "An instance of ..."),
            // then we take the dependent noun from instance
            //
            // Terrible example:
            //   The second of the two Books of Chronicles and the
            //   fourteenth book of the Old Testament of the Bible.
            //
            boolean foundExistentialDependent = false;
            if (lemma.equals("instance") || lemma.equals("example") || lemma.equals("first")
                    || lemma.equals("second") || lemma.equals("third") || lemma.equals("fourth")
                    || lemma.equals("fifth") || lemma.equals("sixth") || lemma.equals("series")) {
                // Check that there's actually a prepositional phrase
                // attached
                List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);

                for (SemanticGraphEdge e : edges) {
                    if (e.getRelation().getShortName().equals("prep")) {
                        IndexedWord dep = e.getDependent();
                        String depLemma = dep.get(LemmaAnnotation.class);
                        char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase()
                                .charAt(0);

                        //System.out.println("HEURISTIC 7");
                        if (depPos == sensePos) {
                            candidates.put(depLemma, "Heuristic-7");
                            addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-7");
                            foundExistentialDependent = true;
                        }
                    }
                }
            }
            if (foundExistentialDependent)
                continue next_root;

            // Heuristic 10: In the case of noun phrases, take the last noun
            // in the phrase, e.g., "Molten material", "pringtime snow
            // runoff"
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            boolean foundDependent = false;
            for (SemanticGraphEdge e : edges) {
                if (e.getRelation().getShortName().equals("dep")) {
                    IndexedWord dep = e.getDependent();
                    String depLemma = dep.get(LemmaAnnotation.class);
                    char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0);

                    //System.out.println("HEURISTIC 10");
                    if (depPos == sensePos) {
                        foundDependent = true;
                        candidates.put(depLemma, "Heuristic-10");
                        addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-10");
                    }
                }
            }

            if (!foundDependent) {
                //System.out.println("HEURISTIC 1");
                candidates.put(lemma, "Heuristic-1");
                addSiblings(root, candidates, sensePos, dependencies, "Heuristic-1");
            }
        }

        // Heuristic 2: subdef is either (1) one word or (2) two or more
        // word that *must be connected by a conjunction, and (3) the lemma
        // has the wrong part of speech, but could have the same POS (i.e.,
        // the lemma was probably POS-tagged incorrectly).  
        if (sensePos != lemmaPos) {

            // Only one word in the subdef, which can manifest itself as the
            // graph having no vertices! (size == 0)
            if (dependencies.size() < 1) {
                // System.out.println("HEURISTIC 2a");
                IIndexWord iword = dict.getIndexWord(lemma, spos_);
                if (iword != null)
                    candidates.put(lemma, "Heuristic-2a");
                else {
                    // Sometimes adjectves get lemmatized to a verb form
                    // which is in correct.  Check to see if the token
                    // matches
                    String token = root.get(TextAnnotation.class);
                    iword = dict.getIndexWord(token, spos_);
                    if (iword != null)
                        candidates.put(token, "Heuristic-2a");
                }
            } else {
                // System.out.println("HEURISTIC 2b");
                Set<IndexedWord> tmp = new HashSet<IndexedWord>();
                List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
                for (SemanticGraphEdge e : edges) {
                    // System.out.printf("edge from %s -> %s %s%n", lemma,
                    //                   e.getRelation().getShortName(),
                    //                   e.getRelation().getLongName());
                    if (e.getRelation().getShortName().equals("conj")) {
                        if (tmp.size() == 0)
                            tmp.add(root);
                        tmp.add(e.getDependent());
                    }
                }
                if (!tmp.isEmpty()) {
                    for (IndexedWord iw : tmp) {
                        String lem = iw.get(LemmaAnnotation.class);
                        IIndexWord iword = dict.getIndexWord(lem, spos_);
                        if (iword != null)
                            candidates.put(lem, "Heuristic-2b");
                        else {
                            // Sometimes adjectves get lemmatized to a verb
                            // form which is in correct.  Check to see if
                            // the token matches
                            String token = iw.get(TextAnnotation.class);
                            iword = dict.getIndexWord(token, spos_);
                            if (iword != null)
                                candidates.put(token, "Heuristic-2b");
                        }
                    }
                    //System.out.println(tmp);
                }
            }
        }

        // Heuristics 3: the subdef is phrased as an overly-general description
        // of a person using "one", e.g., "one who does X".  Replace this with
        // "person"
        if (sensePos == 'n' && (lemma.equals("one") || lemma.equals("someone"))) {
            // check the dependency graph for a "who" attachment

            // TODO

            // ... or be lazy and just check for the token
            Matcher m = WHO.matcher(subdef);
            if (m.find()) {
                candidates.put("person", "Heuristic-3: Person");
            }
        }

        // Heuristic 4: if the root lemma is an adjective and the target
        // sense is a noun, look for a modifying a noun or set of nouns,
        // report those
        ///
        // Example: "a small, arched passageway"
        if (sensePos == 'n' && lemmaPos == 'j') {
            //System.out.println("HEURISTIC 4");
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                // System.out.printf("edge from %s -> %s %s%n", lemma,
                //                   e.getRelation().getShortName(),
                //                   e.getRelation().getLongName());

                if (e.getRelation().getShortName().equals("appos")
                        || e.getRelation().getShortName().equals("dep")) {
                    IndexedWord dep = e.getDependent();
                    String depLemma = dep.get(LemmaAnnotation.class);
                    // System.out.println("!!! " + depLemma);
                    char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0);

                    if (depPos == sensePos) {
                        candidates.put(depLemma, "Heuristic-4: Head Noun");
                        addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-4: Head Noun");
                    }
                    //break;

                }
            }

        }

        // Heuristic 5: if the root lemma is a verb and the target sense is
        // a noun, look for a subject noun
        if (sensePos == 'n' && lemmaPos == 'v') {
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                if (e.getRelation().getShortName().equals("nsubj")) {
                    IndexedWord dep = e.getDependent();

                    String depLemma = dep.get(LemmaAnnotation.class);
                    char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0);

                    if (depPos == sensePos) {
                        candidates.put(depLemma, "Heuristic-5: Subject Noun");
                        addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-5: Subject Noun");
                    }
                    break;

                }
            }
        }

        // Heuristic 6: if the root lemma is an existential quantifier or
        // something like it (e.g., "Any of ...") and
        // the target sense is a noun, look for a subject noun
        if (sensePos == 'n' && lemmaPos == 'd') {
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                // System.out.printf("edge from %s -> %s %s%n", lemma,
                //                    e.getRelation().getShortName(),
                //                    e.getRelation().getLongName());

                if (e.getRelation().getShortName().equals("prep")
                        || e.getRelation().getShortName().equals("dep")) {
                    IndexedWord dep = e.getDependent();

                    String depLemma = dep.get(LemmaAnnotation.class);
                    char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0);

                    // System.out.println(depLemma + "/" + depPos);

                    // This should be the common case
                    if (depPos == sensePos) {
                        candidates.put(depLemma, "Heuristic-6: Existential Example");
                        addSiblings(dep, candidates, sensePos, dependencies,
                                "Heuristic-6: Existential Example");
                    }
                    // This is for some really (really) unusually parsed
                    // edge cases
                    else {
                        List<SemanticGraphEdge> depEdges = dependencies.outgoingEdgeList(dep);
                        for (SemanticGraphEdge e2 : depEdges) {

                            if (e2.getRelation().getShortName().equals("rcmod")) {
                                IndexedWord dep2 = e2.getDependent();
                                String depLemma2 = dep2.get(LemmaAnnotation.class);
                                char depPos2 = dep2.get(PartOfSpeechAnnotation.class).substring(0, 1)
                                        .toLowerCase().charAt(0);

                                if (depPos2 == sensePos) {
                                    candidates.put(depLemma2, "Heuristic-6: Existential Example");
                                    addSiblings(dep2, candidates, sensePos, dependencies,
                                            "Heuristic-6: Existential Example");
                                }
                            }
                        }
                    }
                }
            }
        }

        // Heuristic 8: if the root lemma is a verb and the sense is an
        // adjective, but the verb is modified by an adverb, this catches
        // that cases that Heuristics 2 does not
        if (sensePos == 'j' && lemmaPos == 'v') {

            Set<IndexedWord> tmp = new HashSet<IndexedWord>();
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                // System.out.printf("edge from %s -> %s %s%n", lemma,
                //                   e.getRelation().getShortName(),
                //                   e.getRelation().getLongName());
                if (e.getRelation().getShortName().equals("advmod")) {
                    IIndexWord iword = dict.getIndexWord(lemma, spos_);
                    if (iword != null)
                        candidates.put(lemma, "Heuristic-8: Adv-modified Verb");
                    else {
                        // Sometimes adjectves get lemmatized to a verb
                        // form which is in correct.  Check to see if
                        // the token matches
                        String token = root.get(TextAnnotation.class);
                        iword = dict.getIndexWord(token, spos_);
                        if (iword != null)
                            candidates.put(token, "Heuristic-8: Adv-modified Verb");
                    }
                }
            }
        }

        // Heuristic 9: if the sense is an adjective and the root lemma
        // begins with with a negative *and* the gloss contains something
        // like "not [x]", then pull out the "x" and use it as the hypernym
        if (sensePos == 'j' && lemma.equals("not")) {
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                // System.out.printf("edge from %s -> %s %s%n", lemma,
                //                    e.getRelation().getShortName(),
                //                    e.getRelation().getLongName());

                if (e.getRelation().getShortName().equals("dep")) {
                    IndexedWord dep = e.getDependent();

                    String depLemma = dep.get(LemmaAnnotation.class);
                    char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0);

                    if (depPos == sensePos) {
                        candidates.put(depLemma, "Heuristic-9: negated adj");
                        addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-9: negated adj");
                    }
                    break;

                }
            }
        }

        // Heuristic 11: if the sense is a verb and the root lemma
        // is "to", this is probably a case of mistaken POS-tagging
        if (sensePos == 'v' && lemma.equals("to")) {
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                if (e.getRelation().getShortName().equals("pobj")) {
                    IndexedWord dep = e.getDependent();
                    IIndexWord iword = dict.getIndexWord(lemma, spos_);
                    if (iword != null)
                        candidates.put(lemma, "Heuristic-11: verbal infinitive");
                    else {
                        // Sometimes verbs get lemmatized to a noun form
                        // that is incorrect.  Check to see if the token
                        // matches
                        String token = dep.get(TextAnnotation.class);
                        iword = dict.getIndexWord(token, spos_);
                        if (iword != null)
                            candidates.put(token, "Heuristic-9: verbal infinitive");
                    }
                }
            }
        }

    }
    return candidates;
}

From source file:ca.mcgill.cs.crown.procedure.ParseExtractor.java

License:Creative Commons License

/**
 * If we know we want {@code toAdd}, get all of its siblings that are joined
 * by conjunctions as candidates too//from   ww  w. j  a va  2 s.c o  m
 */
void addSiblings(IndexedWord toAdd, MultiMap<String, String> candidates, char targetPos, SemanticGraph parse,
        String reason) {
    List<SemanticGraphEdge> edges = parse.outgoingEdgeList(toAdd);
    for (SemanticGraphEdge e : edges) {
        if (e.getRelation().getShortName().equals("conj")) {
            IndexedWord dep = e.getDependent();
            String depLemma = dep.get(LemmaAnnotation.class);
            char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0);
            if (targetPos == depPos) {
                if (targetPos != 'v') {
                    candidates.put(depLemma, reason + " (In conjunction)");
                }
                // Check for phrasal verb particles
                else {
                    List<SemanticGraphEdge> depEdges = parse.outgoingEdgeList(dep);
                    for (SemanticGraphEdge e2 : depEdges) {
                        if (e2.getRelation().getShortName().equals("prt")) {
                            IndexedWord dep2 = e.getDependent();
                            depLemma = depLemma + " " + dep2.get(LemmaAnnotation.class);
                            break;
                        }
                    }
                }
            }
        }
    }
}