Example usage for edu.stanford.nlp.semgraph SemanticGraph size

List of usage examples for edu.stanford.nlp.semgraph SemanticGraph size

Introduction

In this page you can find the example usage for edu.stanford.nlp.semgraph SemanticGraph size.

Prototype

public int size() 

Source Link

Document

Returns the number of nodes in the graph

Usage

From source file:ca.mcgill.cs.crown.procedure.ParseExtractor.java

License:Creative Commons License

/** 
 * Gets the candidate hypernyms form the provided subdef
 *
 * @returns a mapping from the candidate to the heuristics that generated it
 */// w w  w . jav  a2s. co m
MultiMap<String, String> getCandidates(SemanticGraph dependencies, String subdef, POS spos_) {

    MultiMap<String, String> candidates = new HashMultiMap<String, String>();
    char sensePos = toChar(spos_);

    Collection<IndexedWord> roots = dependencies.getRoots();
    next_root: for (IndexedWord root : roots) {
        String word = root.get(TextAnnotation.class);
        String lemma = root.get(LemmaAnnotation.class);
        String pos = root.get(PartOfSpeechAnnotation.class);
        char lemmaPos = pos.substring(0, 1).toLowerCase().charAt(0);

        String lemmaLc = lemma.toLowerCase();

        //System.out.println("testing: " + lemma + "/" + pos);

        // If the lemma is a verb, check for phrasal verbal particle (e.g.,
        // "lead on", "edge out") and if present, add them to the lemma
        if (lemmaPos == 'v') {
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                if (e.getRelation().getShortName().equals("prt")) {
                    IndexedWord dep = e.getDependent();
                    lemma = lemma + " " + dep.get(LemmaAnnotation.class);
                    break;
                }
            }
        }

        // Heuristic 1: root matches exact POS
        if (lemmaPos == sensePos) {

            // Edge case for Heuristics 7: If the lemma is a noun and is
            // saying that this is an instance (e.g., "An instance of ..."),
            // then we take the dependent noun from instance
            //
            // Terrible example:
            //   The second of the two Books of Chronicles and the
            //   fourteenth book of the Old Testament of the Bible.
            //
            boolean foundExistentialDependent = false;
            if (lemma.equals("instance") || lemma.equals("example") || lemma.equals("first")
                    || lemma.equals("second") || lemma.equals("third") || lemma.equals("fourth")
                    || lemma.equals("fifth") || lemma.equals("sixth") || lemma.equals("series")) {
                // Check that there's actually a prepositional phrase
                // attached
                List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);

                for (SemanticGraphEdge e : edges) {
                    if (e.getRelation().getShortName().equals("prep")) {
                        IndexedWord dep = e.getDependent();
                        String depLemma = dep.get(LemmaAnnotation.class);
                        char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase()
                                .charAt(0);

                        //System.out.println("HEURISTIC 7");
                        if (depPos == sensePos) {
                            candidates.put(depLemma, "Heuristic-7");
                            addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-7");
                            foundExistentialDependent = true;
                        }
                    }
                }
            }
            if (foundExistentialDependent)
                continue next_root;

            // Heuristic 10: In the case of noun phrases, take the last noun
            // in the phrase, e.g., "Molten material", "pringtime snow
            // runoff"
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            boolean foundDependent = false;
            for (SemanticGraphEdge e : edges) {
                if (e.getRelation().getShortName().equals("dep")) {
                    IndexedWord dep = e.getDependent();
                    String depLemma = dep.get(LemmaAnnotation.class);
                    char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0);

                    //System.out.println("HEURISTIC 10");
                    if (depPos == sensePos) {
                        foundDependent = true;
                        candidates.put(depLemma, "Heuristic-10");
                        addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-10");
                    }
                }
            }

            if (!foundDependent) {
                //System.out.println("HEURISTIC 1");
                candidates.put(lemma, "Heuristic-1");
                addSiblings(root, candidates, sensePos, dependencies, "Heuristic-1");
            }
        }

        // Heuristic 2: subdef is either (1) one word or (2) two or more
        // word that *must be connected by a conjunction, and (3) the lemma
        // has the wrong part of speech, but could have the same POS (i.e.,
        // the lemma was probably POS-tagged incorrectly).  
        if (sensePos != lemmaPos) {

            // Only one word in the subdef, which can manifest itself as the
            // graph having no vertices! (size == 0)
            if (dependencies.size() < 1) {
                // System.out.println("HEURISTIC 2a");
                IIndexWord iword = dict.getIndexWord(lemma, spos_);
                if (iword != null)
                    candidates.put(lemma, "Heuristic-2a");
                else {
                    // Sometimes adjectves get lemmatized to a verb form
                    // which is in correct.  Check to see if the token
                    // matches
                    String token = root.get(TextAnnotation.class);
                    iword = dict.getIndexWord(token, spos_);
                    if (iword != null)
                        candidates.put(token, "Heuristic-2a");
                }
            } else {
                // System.out.println("HEURISTIC 2b");
                Set<IndexedWord> tmp = new HashSet<IndexedWord>();
                List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
                for (SemanticGraphEdge e : edges) {
                    // System.out.printf("edge from %s -> %s %s%n", lemma,
                    //                   e.getRelation().getShortName(),
                    //                   e.getRelation().getLongName());
                    if (e.getRelation().getShortName().equals("conj")) {
                        if (tmp.size() == 0)
                            tmp.add(root);
                        tmp.add(e.getDependent());
                    }
                }
                if (!tmp.isEmpty()) {
                    for (IndexedWord iw : tmp) {
                        String lem = iw.get(LemmaAnnotation.class);
                        IIndexWord iword = dict.getIndexWord(lem, spos_);
                        if (iword != null)
                            candidates.put(lem, "Heuristic-2b");
                        else {
                            // Sometimes adjectves get lemmatized to a verb
                            // form which is in correct.  Check to see if
                            // the token matches
                            String token = iw.get(TextAnnotation.class);
                            iword = dict.getIndexWord(token, spos_);
                            if (iword != null)
                                candidates.put(token, "Heuristic-2b");
                        }
                    }
                    //System.out.println(tmp);
                }
            }
        }

        // Heuristics 3: the subdef is phrased as an overly-general description
        // of a person using "one", e.g., "one who does X".  Replace this with
        // "person"
        if (sensePos == 'n' && (lemma.equals("one") || lemma.equals("someone"))) {
            // check the dependency graph for a "who" attachment

            // TODO

            // ... or be lazy and just check for the token
            Matcher m = WHO.matcher(subdef);
            if (m.find()) {
                candidates.put("person", "Heuristic-3: Person");
            }
        }

        // Heuristic 4: if the root lemma is an adjective and the target
        // sense is a noun, look for a modifying a noun or set of nouns,
        // report those
        ///
        // Example: "a small, arched passageway"
        if (sensePos == 'n' && lemmaPos == 'j') {
            //System.out.println("HEURISTIC 4");
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                // System.out.printf("edge from %s -> %s %s%n", lemma,
                //                   e.getRelation().getShortName(),
                //                   e.getRelation().getLongName());

                if (e.getRelation().getShortName().equals("appos")
                        || e.getRelation().getShortName().equals("dep")) {
                    IndexedWord dep = e.getDependent();
                    String depLemma = dep.get(LemmaAnnotation.class);
                    // System.out.println("!!! " + depLemma);
                    char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0);

                    if (depPos == sensePos) {
                        candidates.put(depLemma, "Heuristic-4: Head Noun");
                        addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-4: Head Noun");
                    }
                    //break;

                }
            }

        }

        // Heuristic 5: if the root lemma is a verb and the target sense is
        // a noun, look for a subject noun
        if (sensePos == 'n' && lemmaPos == 'v') {
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                if (e.getRelation().getShortName().equals("nsubj")) {
                    IndexedWord dep = e.getDependent();

                    String depLemma = dep.get(LemmaAnnotation.class);
                    char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0);

                    if (depPos == sensePos) {
                        candidates.put(depLemma, "Heuristic-5: Subject Noun");
                        addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-5: Subject Noun");
                    }
                    break;

                }
            }
        }

        // Heuristic 6: if the root lemma is an existential quantifier or
        // something like it (e.g., "Any of ...") and
        // the target sense is a noun, look for a subject noun
        if (sensePos == 'n' && lemmaPos == 'd') {
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                // System.out.printf("edge from %s -> %s %s%n", lemma,
                //                    e.getRelation().getShortName(),
                //                    e.getRelation().getLongName());

                if (e.getRelation().getShortName().equals("prep")
                        || e.getRelation().getShortName().equals("dep")) {
                    IndexedWord dep = e.getDependent();

                    String depLemma = dep.get(LemmaAnnotation.class);
                    char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0);

                    // System.out.println(depLemma + "/" + depPos);

                    // This should be the common case
                    if (depPos == sensePos) {
                        candidates.put(depLemma, "Heuristic-6: Existential Example");
                        addSiblings(dep, candidates, sensePos, dependencies,
                                "Heuristic-6: Existential Example");
                    }
                    // This is for some really (really) unusually parsed
                    // edge cases
                    else {
                        List<SemanticGraphEdge> depEdges = dependencies.outgoingEdgeList(dep);
                        for (SemanticGraphEdge e2 : depEdges) {

                            if (e2.getRelation().getShortName().equals("rcmod")) {
                                IndexedWord dep2 = e2.getDependent();
                                String depLemma2 = dep2.get(LemmaAnnotation.class);
                                char depPos2 = dep2.get(PartOfSpeechAnnotation.class).substring(0, 1)
                                        .toLowerCase().charAt(0);

                                if (depPos2 == sensePos) {
                                    candidates.put(depLemma2, "Heuristic-6: Existential Example");
                                    addSiblings(dep2, candidates, sensePos, dependencies,
                                            "Heuristic-6: Existential Example");
                                }
                            }
                        }
                    }
                }
            }
        }

        // Heuristic 8: if the root lemma is a verb and the sense is an
        // adjective, but the verb is modified by an adverb, this catches
        // that cases that Heuristics 2 does not
        if (sensePos == 'j' && lemmaPos == 'v') {

            Set<IndexedWord> tmp = new HashSet<IndexedWord>();
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                // System.out.printf("edge from %s -> %s %s%n", lemma,
                //                   e.getRelation().getShortName(),
                //                   e.getRelation().getLongName());
                if (e.getRelation().getShortName().equals("advmod")) {
                    IIndexWord iword = dict.getIndexWord(lemma, spos_);
                    if (iword != null)
                        candidates.put(lemma, "Heuristic-8: Adv-modified Verb");
                    else {
                        // Sometimes adjectves get lemmatized to a verb
                        // form which is in correct.  Check to see if
                        // the token matches
                        String token = root.get(TextAnnotation.class);
                        iword = dict.getIndexWord(token, spos_);
                        if (iword != null)
                            candidates.put(token, "Heuristic-8: Adv-modified Verb");
                    }
                }
            }
        }

        // Heuristic 9: if the sense is an adjective and the root lemma
        // begins with with a negative *and* the gloss contains something
        // like "not [x]", then pull out the "x" and use it as the hypernym
        if (sensePos == 'j' && lemma.equals("not")) {
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                // System.out.printf("edge from %s -> %s %s%n", lemma,
                //                    e.getRelation().getShortName(),
                //                    e.getRelation().getLongName());

                if (e.getRelation().getShortName().equals("dep")) {
                    IndexedWord dep = e.getDependent();

                    String depLemma = dep.get(LemmaAnnotation.class);
                    char depPos = dep.get(PartOfSpeechAnnotation.class).substring(0, 1).toLowerCase().charAt(0);

                    if (depPos == sensePos) {
                        candidates.put(depLemma, "Heuristic-9: negated adj");
                        addSiblings(dep, candidates, sensePos, dependencies, "Heuristic-9: negated adj");
                    }
                    break;

                }
            }
        }

        // Heuristic 11: if the sense is a verb and the root lemma
        // is "to", this is probably a case of mistaken POS-tagging
        if (sensePos == 'v' && lemma.equals("to")) {
            List<SemanticGraphEdge> edges = dependencies.outgoingEdgeList(root);
            for (SemanticGraphEdge e : edges) {
                if (e.getRelation().getShortName().equals("pobj")) {
                    IndexedWord dep = e.getDependent();
                    IIndexWord iword = dict.getIndexWord(lemma, spos_);
                    if (iword != null)
                        candidates.put(lemma, "Heuristic-11: verbal infinitive");
                    else {
                        // Sometimes verbs get lemmatized to a noun form
                        // that is incorrect.  Check to see if the token
                        // matches
                        String token = dep.get(TextAnnotation.class);
                        iword = dict.getIndexWord(token, spos_);
                        if (iword != null)
                            candidates.put(token, "Heuristic-9: verbal infinitive");
                    }
                }
            }
        }

    }
    return candidates;
}