List of usage examples for edu.stanford.nlp.semgraph.semgrex SemgrexMatcher getRelnString
public String getRelnString(String name)
From source file:edu.anu.spice.SpiceParser.java
License:Open Source License
protected ProposedTuples parseAnnotation(Annotation ann) { ProposedTuples tuples = new ProposedTuples(); ArrayList<SemanticGraph> sgs = new ArrayList<SemanticGraph>(); for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) { SemanticGraph sg = sentence//ww w.j ava2 s . c o m .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class); sgs.add(sg); } for (SemanticGraph sg : sgs) { // Everything from RuleBasedParser except resolvePlurals(sg); SemanticGraphEnhancer.processQuanftificationModifiers(sg); SemanticGraphEnhancer.collapseCompounds(sg); SemanticGraphEnhancer.collapseParticles(sg); SemanticGraphEnhancer.resolvePronouns(sg); SemgrexMatcher matcher = SUBJ_PRED_OBJ_TRIPLET_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord obj = matcher.getNode("obj"); IndexedWord pred = matcher.getNode("pred"); String reln = matcher.getRelnString("objreln"); String predicate = getPredicate(sg, pred); if (reln.startsWith("nmod:") && !reln.equals("nmod:poss") && !reln.equals("nmod:agent")) { predicate += reln.replace("nmod:", " ").replace("_", " "); } tuples.addTuple(subj, obj, predicate); } matcher = ACL_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord obj = matcher.getNode("obj"); IndexedWord pred = matcher.getNode("pred"); String reln = matcher.getRelnString("objreln"); String predicate = getPredicate(sg, pred); if (reln.startsWith("nmod:") && !reln.equals("nmod:poss") && !reln.equals("nmod:agent")) { predicate += reln.replace("nmod:", " ").replace("_", " "); } tuples.addTuple(subj, obj, predicate); } SemgrexPattern[] subjPredPatterns = { SUBJ_PRED_PAIR_PATTERN, COPULAR_PATTERN }; for (SemgrexPattern p : subjPredPatterns) { matcher = p.matcher(sg); while (matcher.find()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord pred = matcher.getNode("pred"); if (sg.hasChildWithReln(pred, UniversalEnglishGrammaticalRelations.CASE_MARKER)) { IndexedWord caseMarker = sg.getChildWithReln(pred, UniversalEnglishGrammaticalRelations.CASE_MARKER); String prep = caseMarker.value(); if (sg.hasChildWithReln(caseMarker, UniversalEnglishGrammaticalRelations.MULTI_WORD_EXPRESSION)) { for (IndexedWord additionalCaseMarker : sg.getChildrenWithReln(caseMarker, UniversalEnglishGrammaticalRelations.MULTI_WORD_EXPRESSION)) { prep = prep + " " + additionalCaseMarker.value(); } } tuples.addTuple(subj, pred, prep); } else { if (!pred.lemma().equals("be")) { tuples.addTuple(subj, pred); } } } } matcher = ADJ_MOD_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord obj = matcher.getNode("obj"); IndexedWord adj = matcher.getNode("adj"); tuples.addTuple(obj, adj); } matcher = ADJ_PRED_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord obj = matcher.getNode("obj"); IndexedWord adj = matcher.getNode("adj"); tuples.addTuple(obj, adj); } matcher = PP_MOD_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord gov = matcher.getNode("gov"); IndexedWord mod = matcher.getNode("mod"); String reln = matcher.getRelnString("reln"); String predicate = reln.replace("nmod:", "").replace("_", " "); if (predicate.equals("poss") || predicate.equals("agent")) { continue; } tuples.addTuple(gov, mod, predicate); } matcher = POSS_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord gov = matcher.getNode("gov"); IndexedWord mod = matcher.getNode("mod"); tuples.addTuple(mod, gov, "have"); } matcher = AGENT_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord obj = matcher.getNode("obj"); IndexedWord pred = matcher.getNode("pred"); tuples.addTuple(subj, obj, getPredicate(sg, pred)); } matcher = PLURAL_SUBJECT_OBJECT_PATTERN.matcher(sg); while (matcher.findNextMatchingNode()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord obj = matcher.getNode("obj"); checkForNumericAttribute(tuples, sg, subj); checkForNumericAttribute(tuples, sg, obj); } matcher = PLURAL_SUBJECT_PATTERN.matcher(sg); while (matcher.findNextMatchingNode()) { IndexedWord subj = matcher.getNode("subj"); checkForNumericAttribute(tuples, sg, subj); } matcher = PLURAL_OTHER_PATTERN.matcher(sg); while (matcher.findNextMatchingNode()) { IndexedWord word = matcher.getNode("word"); checkForNumericAttribute(tuples, sg, word); } matcher = COMPOUND_NOUN_PATTERN.matcher(sg); Set<IndexedWord> compoundNouns = new HashSet<IndexedWord>(); while (matcher.find()) { IndexedWord tail = matcher.getNode("tail"); IndexedWord head = matcher.getNode("head"); compoundNouns.add(tail); compoundNouns.add(head); tuples.addTuple(tail, head); } // Must happen last, since it will reuse existing parts of the scene // graph matcher = NOUN_CONJ_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord tail = matcher.getNode("tail"); IndexedWord head = matcher.getNode("head"); int original_length = tuples.tuples.size(); for (int i = 0; i < original_length; ++i) { ArrayList<String> prop = tuples.tuples.get(i); if (prop.size() == 3 && prop.get(0).equals(head)) { tuples.addTuple(head, prop.get(1), prop.get(2)); } if (prop.size() == 3 && prop.get(1).equals(tail)) { tuples.addTuple(tail, prop.get(1), prop.get(2)); } } } matcher = NOUN_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord word = matcher.getNode("word"); if (!compoundNouns.contains(word)) { tuples.addTuple(word); } } } return tuples; }
From source file:opendial.datastructs.Template.java
License:Open Source License
@Override public List<MatchResult> find(String str, int maxResults) { Value v = ValueFactory.create(str); List<MatchResult> results = new ArrayList<MatchResult>(); if (v instanceof RelationalVal) { RelationalVal rv = (RelationalVal) v; SemgrexMatcher m = pattern.matcher(rv.getGraph()); while (m.find()) { MatchResult result = new MatchResult(true); for (String slot : slots) { if (m.getNode(slot) != null) { RelationalVal subgraph = rv.getSubGraph(m.getNode(slot).index()); result.addPair(slot, subgraph); } else { result.addPair(slot, m.getRelnString(slot)); }//from www .j ava2 s. c o m } results.add(result); } } return results; }
From source file:org.wso2.extension.siddhi.gpl.execution.nlp.SemgrexPatternStreamProcessor.java
License:Open Source License
@Override protected void process(ComplexEventChunk<StreamEvent> streamEventChunk, Processor nextProcessor, StreamEventCloner streamEventCloner, ComplexEventPopulater complexEventPopulater) { synchronized (this) { while (streamEventChunk.hasNext()) { StreamEvent streamEvent = streamEventChunk.next(); Annotation document = pipeline .process(attributeExpressionExecutors[1].execute(streamEvent).toString()); for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) { SemanticGraph graph = sentence .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class); SemgrexMatcher matcher = regexPattern.matcher(graph); while (matcher.find()) { Object[] data = new Object[attributeCount]; data[0] = matcher.getMatch().value(); for (String nodeName : matcher.getNodeNames()) { if (namedElementParamPositions.containsKey(nodeName)) { data[namedElementParamPositions.get(nodeName)] = matcher.getNode(nodeName) == null ? null : matcher.getNode(nodeName).word(); }/* w w w. j ava2 s. c o m*/ } for (String relationName : matcher.getRelationNames()) { if (namedElementParamPositions.containsKey(relationName)) { data[namedElementParamPositions.get(relationName)] = matcher .getRelnString(relationName); } } StreamEvent newStreamEvent = streamEventCloner.copyStreamEvent(streamEvent); complexEventPopulater.populateComplexEvent(newStreamEvent, data); streamEventChunk.insertBeforeCurrent(newStreamEvent); } } streamEventChunk.remove(); } } nextProcessor.process(streamEventChunk); }
From source file:org.wso2.siddhi.extension.nlp.SemgrexPatternTransformProcessor.java
License:Open Source License
@Override protected InStream processEvent(InEvent inEvent) { if (logger.isDebugEnabled()) { logger.debug(String.format("Event received. Regex:%s Event:%s", regexPattern.pattern(), inEvent)); }//from w ww . j a v a 2 s . com Object[] inStreamData = inEvent.getData(); Annotation document = pipeline.process((String) inEvent.getData(inStreamParamPosition)); InListEvent transformedListEvent = new InListEvent(); for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) { SemanticGraph graph = sentence .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class); SemgrexMatcher matcher = regexPattern.matcher(graph); while (matcher.find()) { Object[] outStreamData = new Object[inStreamData.length + attributeCount]; outStreamData[0] = matcher.getMatch().value(); for (String nodeName : matcher.getNodeNames()) { if (namedElementParamPositions.containsKey(nodeName)) { outStreamData[namedElementParamPositions.get(nodeName)] = matcher.getNode(nodeName) == null ? null : matcher.getNode(nodeName).word(); } } for (String relationName : matcher.getRelationNames()) { if (namedElementParamPositions.containsKey(relationName)) { outStreamData[namedElementParamPositions.get(relationName)] = matcher .getRelnString(relationName); } } System.arraycopy(inStreamData, 0, outStreamData, attributeCount, inStreamData.length); transformedListEvent .addEvent(new InEvent(inEvent.getStreamId(), System.currentTimeMillis(), outStreamData)); } } return transformedListEvent; }