List of usage examples for edu.stanford.nlp.semgraph SemanticGraph hasChildWithReln
public boolean hasChildWithReln(IndexedWord vertex, GrammaticalRelation reln)
From source file:edu.anu.spice.SpiceParser.java
License:Open Source License
/** * Attaches particles to the main predicate. *//*ww w . j a v a 2s . co m*/ protected String getPredicate(SemanticGraph sg, IndexedWord mainPred) { if (sg.hasChildWithReln(mainPred, UniversalEnglishGrammaticalRelations.PHRASAL_VERB_PARTICLE)) { IndexedWord part = sg.getChildWithReln(mainPred, UniversalEnglishGrammaticalRelations.PHRASAL_VERB_PARTICLE); return String.format("%s %s", mainPred.lemma().equals("be") ? "" : mainPred.lemma(), part.value()); } return mainPred.lemma(); }
From source file:edu.anu.spice.SpiceParser.java
License:Open Source License
/** * Checks if a word has a numerical modifier, and if so adds it as an object * with attribute//from w ww .ja va 2s. co m */ protected void checkForNumericAttribute(ProposedTuples tuples, SemanticGraph sg, IndexedWord word) { if (sg.hasChildWithReln(word, UniversalEnglishGrammaticalRelations.NUMERIC_MODIFIER)) { IndexedWord nummod = sg.getChildWithReln(word, UniversalEnglishGrammaticalRelations.NUMERIC_MODIFIER); /* Prevent things like "number 5" */ if (nummod.index() < word.index()) { tuples.addTuple(word, nummod); } } else if (sg.hasChildWithReln(word, SemanticGraphEnhancer.QMOD_RELATION)) { IndexedWord qmod = sg.getChildWithReln(word, SemanticGraphEnhancer.QMOD_RELATION); tuples.addTuple(word, qmod); } }
From source file:edu.anu.spice.SpiceParser.java
License:Open Source License
protected ProposedTuples parseAnnotation(Annotation ann) { ProposedTuples tuples = new ProposedTuples(); ArrayList<SemanticGraph> sgs = new ArrayList<SemanticGraph>(); for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) { SemanticGraph sg = sentence .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class); sgs.add(sg);//from w w w. j a v a 2s . c o m } for (SemanticGraph sg : sgs) { // Everything from RuleBasedParser except resolvePlurals(sg); SemanticGraphEnhancer.processQuanftificationModifiers(sg); SemanticGraphEnhancer.collapseCompounds(sg); SemanticGraphEnhancer.collapseParticles(sg); SemanticGraphEnhancer.resolvePronouns(sg); SemgrexMatcher matcher = SUBJ_PRED_OBJ_TRIPLET_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord obj = matcher.getNode("obj"); IndexedWord pred = matcher.getNode("pred"); String reln = matcher.getRelnString("objreln"); String predicate = getPredicate(sg, pred); if (reln.startsWith("nmod:") && !reln.equals("nmod:poss") && !reln.equals("nmod:agent")) { predicate += reln.replace("nmod:", " ").replace("_", " "); } tuples.addTuple(subj, obj, predicate); } matcher = ACL_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord obj = matcher.getNode("obj"); IndexedWord pred = matcher.getNode("pred"); String reln = matcher.getRelnString("objreln"); String predicate = getPredicate(sg, pred); if (reln.startsWith("nmod:") && !reln.equals("nmod:poss") && !reln.equals("nmod:agent")) { predicate += reln.replace("nmod:", " ").replace("_", " "); } tuples.addTuple(subj, obj, predicate); } SemgrexPattern[] subjPredPatterns = { SUBJ_PRED_PAIR_PATTERN, COPULAR_PATTERN }; for (SemgrexPattern p : subjPredPatterns) { matcher = p.matcher(sg); while (matcher.find()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord pred = matcher.getNode("pred"); if (sg.hasChildWithReln(pred, UniversalEnglishGrammaticalRelations.CASE_MARKER)) { IndexedWord caseMarker = sg.getChildWithReln(pred, UniversalEnglishGrammaticalRelations.CASE_MARKER); String prep = caseMarker.value(); if (sg.hasChildWithReln(caseMarker, UniversalEnglishGrammaticalRelations.MULTI_WORD_EXPRESSION)) { for (IndexedWord additionalCaseMarker : sg.getChildrenWithReln(caseMarker, UniversalEnglishGrammaticalRelations.MULTI_WORD_EXPRESSION)) { prep = prep + " " + additionalCaseMarker.value(); } } tuples.addTuple(subj, pred, prep); } else { if (!pred.lemma().equals("be")) { tuples.addTuple(subj, pred); } } } } matcher = ADJ_MOD_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord obj = matcher.getNode("obj"); IndexedWord adj = matcher.getNode("adj"); tuples.addTuple(obj, adj); } matcher = ADJ_PRED_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord obj = matcher.getNode("obj"); IndexedWord adj = matcher.getNode("adj"); tuples.addTuple(obj, adj); } matcher = PP_MOD_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord gov = matcher.getNode("gov"); IndexedWord mod = matcher.getNode("mod"); String reln = matcher.getRelnString("reln"); String predicate = reln.replace("nmod:", "").replace("_", " "); if (predicate.equals("poss") || predicate.equals("agent")) { continue; } tuples.addTuple(gov, mod, predicate); } matcher = POSS_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord gov = matcher.getNode("gov"); IndexedWord mod = matcher.getNode("mod"); tuples.addTuple(mod, gov, "have"); } matcher = AGENT_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord obj = matcher.getNode("obj"); IndexedWord pred = matcher.getNode("pred"); tuples.addTuple(subj, obj, getPredicate(sg, pred)); } matcher = PLURAL_SUBJECT_OBJECT_PATTERN.matcher(sg); while (matcher.findNextMatchingNode()) { IndexedWord subj = matcher.getNode("subj"); IndexedWord obj = matcher.getNode("obj"); checkForNumericAttribute(tuples, sg, subj); checkForNumericAttribute(tuples, sg, obj); } matcher = PLURAL_SUBJECT_PATTERN.matcher(sg); while (matcher.findNextMatchingNode()) { IndexedWord subj = matcher.getNode("subj"); checkForNumericAttribute(tuples, sg, subj); } matcher = PLURAL_OTHER_PATTERN.matcher(sg); while (matcher.findNextMatchingNode()) { IndexedWord word = matcher.getNode("word"); checkForNumericAttribute(tuples, sg, word); } matcher = COMPOUND_NOUN_PATTERN.matcher(sg); Set<IndexedWord> compoundNouns = new HashSet<IndexedWord>(); while (matcher.find()) { IndexedWord tail = matcher.getNode("tail"); IndexedWord head = matcher.getNode("head"); compoundNouns.add(tail); compoundNouns.add(head); tuples.addTuple(tail, head); } // Must happen last, since it will reuse existing parts of the scene // graph matcher = NOUN_CONJ_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord tail = matcher.getNode("tail"); IndexedWord head = matcher.getNode("head"); int original_length = tuples.tuples.size(); for (int i = 0; i < original_length; ++i) { ArrayList<String> prop = tuples.tuples.get(i); if (prop.size() == 3 && prop.get(0).equals(head)) { tuples.addTuple(head, prop.get(1), prop.get(2)); } if (prop.size() == 3 && prop.get(1).equals(tail)) { tuples.addTuple(tail, prop.get(1), prop.get(2)); } } } matcher = NOUN_PATTERN.matcher(sg); while (matcher.find()) { IndexedWord word = matcher.getNode("word"); if (!compoundNouns.contains(word)) { tuples.addTuple(word); } } } return tuples; }