Example usage for edu.stanford.nlp.semgraph.semgrex SemgrexMatcher findNextMatchingNode

List of usage examples for edu.stanford.nlp.semgraph.semgrex SemgrexMatcher findNextMatchingNode

Introduction

In this page you can find the example usage for edu.stanford.nlp.semgraph.semgrex SemgrexMatcher findNextMatchingNode.

Prototype

public boolean findNextMatchingNode() 

Source Link

Document

Find the next match of the pattern in the graph such that the matching node (that is, the node matching the root node of the pattern) differs from the previous matching node.

Usage

From source file:edu.anu.spice.SpiceParser.java

License:Open Source License

protected ProposedTuples parseAnnotation(Annotation ann) {
    ProposedTuples tuples = new ProposedTuples();
    ArrayList<SemanticGraph> sgs = new ArrayList<SemanticGraph>();
    for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
        SemanticGraph sg = sentence/*from   w  w  w.j  av a2  s .  co m*/
                .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
        sgs.add(sg);
    }
    for (SemanticGraph sg : sgs) {
        // Everything from RuleBasedParser except resolvePlurals(sg);
        SemanticGraphEnhancer.processQuanftificationModifiers(sg);
        SemanticGraphEnhancer.collapseCompounds(sg);
        SemanticGraphEnhancer.collapseParticles(sg);
        SemanticGraphEnhancer.resolvePronouns(sg);

        SemgrexMatcher matcher = SUBJ_PRED_OBJ_TRIPLET_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord subj = matcher.getNode("subj");
            IndexedWord obj = matcher.getNode("obj");
            IndexedWord pred = matcher.getNode("pred");
            String reln = matcher.getRelnString("objreln");
            String predicate = getPredicate(sg, pred);
            if (reln.startsWith("nmod:") && !reln.equals("nmod:poss") && !reln.equals("nmod:agent")) {
                predicate += reln.replace("nmod:", " ").replace("_", " ");
            }
            tuples.addTuple(subj, obj, predicate);
        }

        matcher = ACL_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord subj = matcher.getNode("subj");
            IndexedWord obj = matcher.getNode("obj");
            IndexedWord pred = matcher.getNode("pred");
            String reln = matcher.getRelnString("objreln");
            String predicate = getPredicate(sg, pred);
            if (reln.startsWith("nmod:") && !reln.equals("nmod:poss") && !reln.equals("nmod:agent")) {
                predicate += reln.replace("nmod:", " ").replace("_", " ");
            }
            tuples.addTuple(subj, obj, predicate);
        }

        SemgrexPattern[] subjPredPatterns = { SUBJ_PRED_PAIR_PATTERN, COPULAR_PATTERN };
        for (SemgrexPattern p : subjPredPatterns) {
            matcher = p.matcher(sg);
            while (matcher.find()) {
                IndexedWord subj = matcher.getNode("subj");
                IndexedWord pred = matcher.getNode("pred");
                if (sg.hasChildWithReln(pred, UniversalEnglishGrammaticalRelations.CASE_MARKER)) {
                    IndexedWord caseMarker = sg.getChildWithReln(pred,
                            UniversalEnglishGrammaticalRelations.CASE_MARKER);
                    String prep = caseMarker.value();
                    if (sg.hasChildWithReln(caseMarker,
                            UniversalEnglishGrammaticalRelations.MULTI_WORD_EXPRESSION)) {
                        for (IndexedWord additionalCaseMarker : sg.getChildrenWithReln(caseMarker,
                                UniversalEnglishGrammaticalRelations.MULTI_WORD_EXPRESSION)) {
                            prep = prep + " " + additionalCaseMarker.value();
                        }
                    }
                    tuples.addTuple(subj, pred, prep);
                } else {
                    if (!pred.lemma().equals("be")) {
                        tuples.addTuple(subj, pred);
                    }
                }
            }
        }

        matcher = ADJ_MOD_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord obj = matcher.getNode("obj");
            IndexedWord adj = matcher.getNode("adj");
            tuples.addTuple(obj, adj);
        }

        matcher = ADJ_PRED_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord obj = matcher.getNode("obj");
            IndexedWord adj = matcher.getNode("adj");
            tuples.addTuple(obj, adj);
        }

        matcher = PP_MOD_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord gov = matcher.getNode("gov");
            IndexedWord mod = matcher.getNode("mod");
            String reln = matcher.getRelnString("reln");
            String predicate = reln.replace("nmod:", "").replace("_", " ");
            if (predicate.equals("poss") || predicate.equals("agent")) {
                continue;
            }
            tuples.addTuple(gov, mod, predicate);
        }

        matcher = POSS_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord gov = matcher.getNode("gov");
            IndexedWord mod = matcher.getNode("mod");
            tuples.addTuple(mod, gov, "have");
        }

        matcher = AGENT_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord subj = matcher.getNode("subj");
            IndexedWord obj = matcher.getNode("obj");
            IndexedWord pred = matcher.getNode("pred");
            tuples.addTuple(subj, obj, getPredicate(sg, pred));
        }

        matcher = PLURAL_SUBJECT_OBJECT_PATTERN.matcher(sg);
        while (matcher.findNextMatchingNode()) {
            IndexedWord subj = matcher.getNode("subj");
            IndexedWord obj = matcher.getNode("obj");
            checkForNumericAttribute(tuples, sg, subj);
            checkForNumericAttribute(tuples, sg, obj);
        }

        matcher = PLURAL_SUBJECT_PATTERN.matcher(sg);
        while (matcher.findNextMatchingNode()) {
            IndexedWord subj = matcher.getNode("subj");
            checkForNumericAttribute(tuples, sg, subj);
        }

        matcher = PLURAL_OTHER_PATTERN.matcher(sg);
        while (matcher.findNextMatchingNode()) {
            IndexedWord word = matcher.getNode("word");
            checkForNumericAttribute(tuples, sg, word);
        }

        matcher = COMPOUND_NOUN_PATTERN.matcher(sg);
        Set<IndexedWord> compoundNouns = new HashSet<IndexedWord>();
        while (matcher.find()) {
            IndexedWord tail = matcher.getNode("tail");
            IndexedWord head = matcher.getNode("head");
            compoundNouns.add(tail);
            compoundNouns.add(head);
            tuples.addTuple(tail, head);
        }

        // Must happen last, since it will reuse existing parts of the scene
        // graph
        matcher = NOUN_CONJ_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord tail = matcher.getNode("tail");
            IndexedWord head = matcher.getNode("head");
            int original_length = tuples.tuples.size();
            for (int i = 0; i < original_length; ++i) {
                ArrayList<String> prop = tuples.tuples.get(i);
                if (prop.size() == 3 && prop.get(0).equals(head)) {
                    tuples.addTuple(head, prop.get(1), prop.get(2));
                }
                if (prop.size() == 3 && prop.get(1).equals(tail)) {
                    tuples.addTuple(tail, prop.get(1), prop.get(2));
                }
            }
        }

        matcher = NOUN_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord word = matcher.getNode("word");
            if (!compoundNouns.contains(word)) {
                tuples.addTuple(word);
            }
        }
    }
    return tuples;
}