List of usage examples for edu.stanford.nlp.semgraph.semgrex SemgrexPattern matcher
public SemgrexMatcher matcher(SemanticGraph sg)
From source file:edu.anu.spice.SpiceParser.java
License:Open Source License
protected ProposedTuples parseAnnotation(Annotation ann) { ProposedTuples tuples = new ProposedTuples(); ArrayList<SemanticGraph> sgs = new ArrayList<SemanticGraph>(); for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) { SemanticGraph sg = sentence/*from w ww.ja v a2 s . c o m*/ .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class); sgs.add(sg); } for (SemanticGraph sg : sgs) { // Everything from RuleBasedParser except resolvePlurals(sg); SemanticGraphEnhancer.processQuanftificationModifiers(sg); SemanticGraphEnhancer.collapseCompounds(sg); SemanticGraphEnhancer.collapseParticles(sg); SemanticGraphEnhancer.resolvePronouns(sg); SemgrexMatcher matcher = SUBJ_PRED_OBJ_TRIPLET_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord obj = matcher.getNode("obj"); IndexedWord pred = matcher.getNode("pred"); String reln = matcher.getRelnString("objreln"); String predicate = getPredicate(sg, pred); if (reln.startsWith("nmod:") && !reln.equals("nmod:poss") && !reln.equals("nmod:agent")) { predicate += reln.replace("nmod:", " ").replace("_", " "); } tuples.addTuple(subj, obj, predicate); } matcher = ACL_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord obj = matcher.getNode("obj"); IndexedWord pred = matcher.getNode("pred"); String reln = matcher.getRelnString("objreln"); String predicate = getPredicate(sg, pred); if (reln.startsWith("nmod:") && !reln.equals("nmod:poss") && !reln.equals("nmod:agent")) { predicate += reln.replace("nmod:", " ").replace("_", " "); } tuples.addTuple(subj, obj, predicate); } SemgrexPattern[] subjPredPatterns = { SUBJ_PRED_PAIR_PATTERN, COPULAR_PATTERN }; for (SemgrexPattern p : subjPredPatterns) { matcher = p.matcher(sg); while (matcher.find()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord pred = matcher.getNode("pred"); if (sg.hasChildWithReln(pred, UniversalEnglishGrammaticalRelations.CASE_MARKER)) { IndexedWord caseMarker = sg.getChildWithReln(pred, UniversalEnglishGrammaticalRelations.CASE_MARKER); String prep = caseMarker.value(); if (sg.hasChildWithReln(caseMarker, UniversalEnglishGrammaticalRelations.MULTI_WORD_EXPRESSION)) { for (IndexedWord additionalCaseMarker : sg.getChildrenWithReln(caseMarker, UniversalEnglishGrammaticalRelations.MULTI_WORD_EXPRESSION)) { prep = prep + " " + additionalCaseMarker.value(); } } tuples.addTuple(subj, pred, prep); } else { if (!pred.lemma().equals("be")) { tuples.addTuple(subj, pred); } } } } matcher = ADJ_MOD_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord obj = matcher.getNode("obj"); IndexedWord adj = matcher.getNode("adj"); tuples.addTuple(obj, adj); } matcher = ADJ_PRED_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord obj = matcher.getNode("obj"); IndexedWord adj = matcher.getNode("adj"); tuples.addTuple(obj, adj); } matcher = PP_MOD_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord gov = matcher.getNode("gov"); IndexedWord mod = matcher.getNode("mod"); String reln = matcher.getRelnString("reln"); String predicate = reln.replace("nmod:", "").replace("_", " "); if (predicate.equals("poss") || predicate.equals("agent")) { continue; } tuples.addTuple(gov, mod, predicate); } matcher = POSS_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord gov = matcher.getNode("gov"); IndexedWord mod = matcher.getNode("mod"); tuples.addTuple(mod, gov, "have"); } matcher = AGENT_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord obj = matcher.getNode("obj"); IndexedWord pred = matcher.getNode("pred"); tuples.addTuple(subj, obj, getPredicate(sg, pred)); } matcher = PLURAL_SUBJECT_OBJECT_PATTERN.matcher(sg); while (matcher.findNextMatchingNode()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord obj = matcher.getNode("obj"); checkForNumericAttribute(tuples, sg, subj); checkForNumericAttribute(tuples, sg, obj); } matcher = PLURAL_SUBJECT_PATTERN.matcher(sg); while (matcher.findNextMatchingNode()) { IndexedWord subj = matcher.getNode("subj"); checkForNumericAttribute(tuples, sg, subj); } matcher = PLURAL_OTHER_PATTERN.matcher(sg); while (matcher.findNextMatchingNode()) { IndexedWord word = matcher.getNode("word"); checkForNumericAttribute(tuples, sg, word); } matcher = COMPOUND_NOUN_PATTERN.matcher(sg); Set<IndexedWord> compoundNouns = new HashSet<IndexedWord>(); while (matcher.find()) { IndexedWord tail = matcher.getNode("tail"); IndexedWord head = matcher.getNode("head"); compoundNouns.add(tail); compoundNouns.add(head); tuples.addTuple(tail, head); } // Must happen last, since it will reuse existing parts of the scene // graph matcher = NOUN_CONJ_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord tail = matcher.getNode("tail"); IndexedWord head = matcher.getNode("head"); int original_length = tuples.tuples.size(); for (int i = 0; i < original_length; ++i) { ArrayList<String> prop = tuples.tuples.get(i); if (prop.size() == 3 && prop.get(0).equals(head)) { tuples.addTuple(head, prop.get(1), prop.get(2)); } if (prop.size() == 3 && prop.get(1).equals(tail)) { tuples.addTuple(tail, prop.get(1), prop.get(2)); } } } matcher = NOUN_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord word = matcher.getNode("word"); if (!compoundNouns.contains(word)) { tuples.addTuple(word); } } } return tuples; }
From source file:org.wso2.extension.siddhi.gpl.execution.nlp.RelationshipByVerbStreamProcessor.java
License:Open Source License
private void findMatchingEvents(CoreMap sentence, SemgrexPattern pattern, Set<Event> eventSet) { SemanticGraph graph = sentence//from w w w. j a v a 2 s . c o m .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class); SemgrexMatcher matcher = pattern.matcher(graph); while (matcher.find()) { Event event = new Event(); event.verb = matcher.getNode(Constants.VERB) == null ? null : matcher.getNode(Constants.VERB).word(); event.subject = matcher.getNode(Constants.SUBJECT) == null ? null : matcher.getNode(Constants.SUBJECT).word(); event.object = matcher.getNode(Constants.OBJECT) == null ? null : matcher.getNode(Constants.OBJECT).word(); eventSet.add(event); } }
From source file:org.wso2.gpl.siddhi.extension.nlp.RelationshipByVerbStreamProcessor.java
License:Open Source License
private void findMatchingEvents(CoreMap sentence, SemgrexPattern pattern, Set<Event> eventSet) { SemanticGraph graph = sentence/*from w w w .j a v a 2s . c om*/ .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class); SemgrexMatcher matcher = pattern.matcher(graph); while (matcher.find()) { Event event = new Event(); event.verb = matcher.getNode(Constants.verb) == null ? null : matcher.getNode(Constants.verb).word(); event.subject = matcher.getNode(Constants.subject) == null ? null : matcher.getNode(Constants.subject).word(); event.object = matcher.getNode(Constants.object) == null ? null : matcher.getNode(Constants.object).word(); eventSet.add(event); } }