List of usage examples for edu.stanford.nlp.trees Tree parent
public Tree parent(Tree root)
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and James like Susan. -> John likes Susan. * // w ww . j a v a2 s. co m */ private void extractConjoinedNPs(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; Tree conjoinedNode; Tree parent; TregexMatcher matcher; Question newQuestion; //only extract conjoined NPs that are arguments or adjuncts of the main verb // in the tree, this means the closest S will be the one under the root tregexOpStr = "NP=parent < (CONJP|CC !< or|nor [ " + " $+ /^(N.*|PRP|SBAR)$/=child $-- /^(N.*|PRP|SBAR)$/ | " //there must be a noun on each side of the conjunction + " $-- /^(N.*|PRP|SBAR)$/=child $+ /^(N.*|PRP|SBAR)$/ ] ) " //this avoids extracting from flat NPs such as "the smaller and darker form" + " !>> (/.*/ $ (CC|CONJP !< or|nor)) " //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) + " !$ (CC|CONJP !< or|nor)" + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR "; //+ " >> (ROOT !< (S <+(VP) (/^VB.*$/ < are|were|be|seem|appear))) " ; //don't break plural predicate nominatives (e.g., "John and Mary are two of my best friends.") matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); List<Integer> nodeIndexes = new ArrayList<Integer>(); List<Integer> parentIDs = new ArrayList<Integer>(); while (matcher.find()) { //store the parents' IDs (in the tree) parent = matcher.getNode("parent"); parentIDs.add(parent.nodeNumber(input.getIntermediateTree())); conjoinedNode = matcher.getNode("child"); //store the conjoined nodes' index into their parent's list of children int idx = parent.objectIndexOf(conjoinedNode); if (!nodeIndexes.contains(idx)) nodeIndexes.add(idx); } //for each of the conjoined children, //create a new tree by removing all the nodes they are conjoined with Collections.sort(nodeIndexes);//sort, just to keep them in the original order for (int i = 0; i < nodeIndexes.size(); i++) { newQuestion = input.deeperCopy(); Tree t = newQuestion.getIntermediateTree(); parent = t.getNodeNumber(parentIDs.get(i)); Tree gparent = parent.parent(t); conjoinedNode = parent.getChild(nodeIndexes.get(i)); String siblingLabel; //Remove all the nodes that are conjoined //with the selected noun (or are conjunctions, commas). //These can have labels NP, NN, ..., PRP for pronouns, CC, "," for commas, ":" for semi-colons for (int j = 0; j < parent.numChildren(); j++) { if (parent.getChild(j) == conjoinedNode) continue; siblingLabel = parent.getChild(j).label().toString(); if (siblingLabel.matches("^[NCP,:S].*")) { parent.removeChild(j); j--; } } //if there is an trivial unary "NP -> NP", //remove the parent and put the child in its place if (parent.numChildren() == 1 && parent.getChild(0).label().equals("NP")) { int tmpIndex = gparent.objectIndexOf(parent); gparent.removeChild(tmpIndex); gparent.addChild(tmpIndex, parent.getChild(0)); } correctTense(conjoinedNode, gparent); addQuotationMarksIfNeeded(newQuestion.getIntermediateTree()); //if(GlobalProperties.getDebug()) System.err.println("extractConjoinedNPs: "+newQuestion.getIntermediateTree().toString()); if (this.getComputeFeatures) newQuestion.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name if (this.getComputeFeatures) newQuestion.setFeatureValue("extractedFromConjoinedNPs", 1.0); extracted.add(newQuestion); } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and Mary like Bill. -> John LIKES Bill. Mary LIKES Bill. * John and I like Bill -> John LIKES Bill. I LIKE Bill. * John and I are old. -> I IS old. John IS old. *///from www . ja v a 2s. c om private void correctTense(Tree subject, Tree clause) { int tmpIndex; //correct verb tense when modifying subjects for (Tree uncle : clause.getChildrenAsList()) { String newVerbPOS = null; Tree verbPreterminal = null; boolean needToModifyVerb = false; //if the node is a subject (i.e., its uncle is a VP), then check //to see if its tense needs to be changed String headPOS = subject.headPreTerminal(this.hf).label().toString(); if (uncle.label().toString().equals("VP") && !headPOS.endsWith("S")) { verbPreterminal = uncle.headPreTerminal(this.hf); //original main verb was plural but the conjoined subject word is singular //e.g., John (and Mary) like Bill. -> John like Bill. if ((verbPreterminal.label().toString().equals("VB") || verbPreterminal.label().toString().equals("VBP"))) { //the parser confuses VBP with VB if (subject.yield().toString().equals("I") || subject.yield().toString().equals("you")) { newVerbPOS = "VBP"; } else { newVerbPOS = "VBZ"; } needToModifyVerb = true; } else if (verbPreterminal.label().toString().equals("VBD")) { newVerbPOS = "VBD"; needToModifyVerb = true; } } //if needed, change the tense of the verb if (needToModifyVerb) { String verbLemma = QuestionUtil.getLemma(verbPreterminal.getChild(0).label().toString(), verbPreterminal.label().toString()); String newVerb; //special cases if (verbLemma.equals("be") && newVerbPOS.equals("VBD")) { if (subject.label().toString().endsWith("S")) newVerb = "were"; else newVerb = "was"; } else if (verbLemma.equals("be") && subject.yield().toString().equals("I") && newVerbPOS.equals("VBP")) { newVerb = "am"; } else { //default newVerb = this.conjugator.getSurfaceForm(verbLemma, newVerbPOS); } tmpIndex = verbPreterminal.parent(uncle).objectIndexOf(verbPreterminal); Tree verbParent = verbPreterminal.parent(uncle); verbParent.removeChild(tmpIndex); verbParent.addChild(tmpIndex, QuestionUtil.readTreeFromString("(" + newVerbPOS + " " + newVerb + ")")); } } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
private void removeConjoinedSiblingsHelper(Tree copy, int childindex) { //if(GlobalProperties.getDebug()) System.err.println("removeConjoinedSiblingsHelper: "+copy.toString()); Tree child = copy.getNodeNumber(childindex); Tree parent = child.parent(copy); Tree gparent = parent.parent(copy);/*from ww w . jav a 2 s. c o m*/ int parentIdx = gparent.objectIndexOf(parent); //By an annoying PTB convention, some verb phrase conjunctions //can conjoin two verb preterminals under a VP, //rather than only allowing VP nodes to be conjoined. //e.g., John walked and played. //So, we add an extra VP node in between if necessary if (child.label().toString().startsWith("VB")) { gparent.removeChild(parentIdx); Tree newTree = factory.newTreeNode("VP", new ArrayList<Tree>()); newTree.addChild(child); gparent.addChild(parentIdx, newTree); } else { gparent.setChild(parentIdx, child); } }
From source file:edu.cmu.ark.Question.java
License:Open Source License
public List<Tree> findLogicalWordsAboveIntermediateTree() { List<Tree> res = new ArrayList<Tree>(); Tree pred = intermediateTree.getChild(0).headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()); String lemma = AnalysisUtilities.getInstance().getLemma(pred.yield().toString(), pred.label().toString()); String tregexOpStr;//from w w w . j a v a 2s.c om TregexPattern matchPattern; TregexMatcher matcher; Tree sourcePred = null; for (Tree leaf : sourceTree.getLeaves()) { Tree tmp = leaf.parent(sourceTree); String sourceLemma = AnalysisUtilities.getInstance().getLemma(leaf.label().toString(), tmp.label().toString()); if (sourceLemma.equals(lemma)) { sourcePred = tmp; break; } } tregexOpStr = "RB|VB|VBD|VBP|VBZ|IN|MD|WRB|WDT|CC=command"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(sourceTree); Tree command; while (matcher.find() && sourcePred != null) { command = matcher.getNode("command"); if (AnalysisUtilities.cCommands(sourceTree, command, sourcePred) && command.parent(sourceTree) != sourcePred.parent(sourceTree)) { res.add(command); } } return res; }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and James like Susan. -> John likes Susan. * //from w w w . ja v a2 s . c o m */ private void extractConjoinedNPs(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; Tree conjoinedNode; Tree parent; TregexMatcher matcher; Question newQuestion; //only extract conjoined NPs that are arguments or adjuncts of the main verb // in the tree, this means the closest S will be the one under the root tregexOpStr = "NP=parent < (CONJP|CC !< or|nor [ " + " $+ /^(N.*|PRP|SBAR)$/=child $-- /^(N.*|PRP|SBAR)$/ | " //there must be a noun on each side of the conjunction + " $-- /^(N.*|PRP|SBAR)$/=child $+ /^(N.*|PRP|SBAR)$/ ] ) " //this avoids extracting from flat NPs such as "the smaller and darker form" + " !>> (/.*/ $ (CC|CONJP !< or|nor)) " //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) + " !$ (CC|CONJP !< or|nor)" + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR "; //+ " >> (ROOT !< (S <+(VP) (/^VB.*$/ < are|were|be|seem|appear))) " ; //don't break plural predicate nominatives (e.g., "John and Mary are two of my best friends.") matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); List<Integer> nodeIndexes = new ArrayList<Integer>(); List<Integer> parentIDs = new ArrayList<Integer>(); while (matcher.find()) { //store the parents' IDs (in the tree) parent = matcher.getNode("parent"); parentIDs.add(parent.nodeNumber(input.getIntermediateTree())); conjoinedNode = matcher.getNode("child"); //store the conjoined nodes' index into their parent's list of children int idx = parent.indexOf(conjoinedNode); if (!nodeIndexes.contains(idx)) nodeIndexes.add(idx); } //for each of the conjoined children, //create a new tree by removing all the nodes they are conjoined with Collections.sort(nodeIndexes);//sort, just to keep them in the original order for (int i = 0; i < nodeIndexes.size(); i++) { newQuestion = input.deeperCopy(); Tree t = newQuestion.getIntermediateTree(); parent = t.getNodeNumber(parentIDs.get(i)); Tree gparent = parent.parent(t); conjoinedNode = parent.getChild(nodeIndexes.get(i)); String siblingLabel; //Remove all the nodes that are conjoined //with the selected noun (or are conjunctions, commas). //These can have labels NP, NN, ..., PRP for pronouns, CC, "," for commas, ":" for semi-colons for (int j = 0; j < parent.numChildren(); j++) { if (parent.getChild(j) == conjoinedNode) continue; siblingLabel = parent.getChild(j).label().toString(); if (siblingLabel.matches("^[NCP,:S].*")) { parent.removeChild(j); j--; } } //if there is an trivial unary "NP -> NP", //remove the parent and put the child in its place if (parent.numChildren() == 1 && parent.getChild(0).label().equals("NP")) { int tmpIndex = gparent.indexOf(parent); gparent.removeChild(tmpIndex); gparent.addChild(tmpIndex, parent.getChild(0)); } correctTense(conjoinedNode, gparent); addQuotationMarksIfNeeded(newQuestion.getIntermediateTree()); if (GlobalProperties.getDebug()) System.err.println("extractConjoinedNPs: " + newQuestion.getIntermediateTree().toString()); if (GlobalProperties.getComputeFeatures()) newQuestion.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name if (GlobalProperties.getComputeFeatures()) newQuestion.setFeatureValue("extractedFromConjoinedNPs", 1.0); extracted.add(newQuestion); } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and Mary like Bill. -> John LIKES Bill. Mary LIKES Bill. * John and I like Bill -> John LIKES Bill. I LIKE Bill. * John and I are old. -> I IS old. John IS old. *//*w ww . j a va 2 s . c o m*/ private void correctTense(Tree subject, Tree clause) { int tmpIndex; //correct verb tense when modifying subjects for (Tree uncle : clause.getChildrenAsList()) { String newVerbPOS = null; Tree verbPreterminal = null; boolean needToModifyVerb = false; //if the node is a subject (i.e., its uncle is a VP), then check //to see if its tense needs to be changed String headPOS = subject.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()).label() .toString(); if (uncle.label().toString().equals("VP") && !headPOS.endsWith("S")) { verbPreterminal = uncle.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()); //original main verb was plural but the conjoined subject word is singular //e.g., John (and Mary) like Bill. -> John like Bill. if ((verbPreterminal.label().toString().equals("VB") || verbPreterminal.label().toString().equals("VBP"))) { //the parser confuses VBP with VB if (subject.yield().toString().equals("I") || subject.yield().toString().equals("you")) { newVerbPOS = "VBP"; } else { newVerbPOS = "VBZ"; } needToModifyVerb = true; } else if (verbPreterminal.label().toString().equals("VBD")) { newVerbPOS = "VBD"; needToModifyVerb = true; } } //if needed, change the tense of the verb if (needToModifyVerb) { String verbLemma = AnalysisUtilities.getInstance().getLemma( verbPreterminal.getChild(0).label().toString(), verbPreterminal.label().toString()); String newVerb; //special cases if (verbLemma.equals("be") && newVerbPOS.equals("VBD")) { if (subject.label().toString().endsWith("S")) newVerb = "were"; else newVerb = "was"; } else if (verbLemma.equals("be") && subject.yield().toString().equals("I") && newVerbPOS.equals("VBP")) { newVerb = "am"; } else { //default newVerb = AnalysisUtilities.getInstance().getSurfaceForm(verbLemma, newVerbPOS); } tmpIndex = verbPreterminal.parent(uncle).indexOf(verbPreterminal); Tree verbParent = verbPreterminal.parent(uncle); verbParent.removeChild(tmpIndex); verbParent.addChild(tmpIndex, AnalysisUtilities.getInstance().readTreeFromString("(" + newVerbPOS + " " + newVerb + ")")); } } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
private void removeConjoinedSiblingsHelper(Tree copy, int childindex) { if (GlobalProperties.getDebug()) System.err.println("removeConjoinedSiblingsHelper: " + copy.toString()); Tree child = copy.getNodeNumber(childindex); Tree parent = child.parent(copy); Tree gparent = parent.parent(copy);// w w w . ja v a 2 s. co m int parentIdx = gparent.indexOf(parent); //By an annoying PTB convention, some verb phrase conjunctions //can conjoin two verb preterminals under a VP, //rather than only allowing VP nodes to be conjoined. //e.g., John walked and played. //So, we add an extra VP node in between if necessary if (child.label().toString().startsWith("VB")) { gparent.removeChild(parentIdx); Tree newTree = factory.newTreeNode("VP", new ArrayList<Tree>()); newTree.addChild(child); gparent.addChild(parentIdx, newTree); } else { gparent.setChild(parentIdx, child); } }
From source file:elkfed.coref.features.pairs.FE_AppositiveParse.java
License:Apache License
public static Boolean getAppositivePrs(PairInstance inst) { // should be in the same sentence if (inst.getAnaphor().getSentId() != inst.getAntecedent().getSentId()) return false; // exclude pairs where anaphor is an NE -- this might be a bad idea though.. if (inst.getAnaphor().isEnamex()) return false; if (inst.getAntecedent().isEnamex() && inst.getAnaphor().isEnamex()) { // exclude pairs of NE that have different type if (!(inst.getAntecedent().getEnamexType().equals(inst.getAnaphor().getEnamexType()))) return false; // exclude pairs of LOC-ne if (inst.getAntecedent().getEnamexType().toLowerCase().startsWith("gpe")) return false; if (inst.getAntecedent().getEnamexType().toLowerCase().startsWith("loc")) return false; }//www . j a v a 2 s . c o m // should have not-null maxnp-trees (otherwise -- problematic mentions) Tree sentenceTree = inst.getAnaphor().getSentenceTree(); Tree AnaTree = inst.getAnaphor().getMaxNPParseTree(); Tree AnteTree = inst.getAntecedent().getMaxNPParseTree(); if (sentenceTree == null) return false; if (AnaTree == null) return false; if (AnteTree == null) return false; // the structure should be ( * (,) (ANA)) or ( * (,) (ANTE)) -- depends on the ordering, annotation, mention extraction etc if (AnteTree.parent(sentenceTree) == AnaTree) { Tree[] chlds = AnaTree.children(); Boolean lastcomma = false; for (int i = 0; i < chlds.length && chlds[i] != AnteTree; i++) { lastcomma = false; if (chlds[i].value().equalsIgnoreCase(",")) lastcomma = true; } return lastcomma; } if (AnaTree.parent(sentenceTree) == AnteTree) { Tree[] chlds = AnteTree.children(); Boolean lastcomma = false; for (int i = 0; i < chlds.length && chlds[i] != AnaTree; i++) { lastcomma = false; if (chlds[i].value().equalsIgnoreCase(",")) lastcomma = true; } return lastcomma; } return false; }
From source file:elkfed.coref.features.pairs.FE_CCommand.java
License:Apache License
public static Boolean getCCommand(PairInstance inst) { // should be in the same sentence if (inst.getAnaphor().getSentId() != inst.getAntecedent().getSentId()) return false; //Ana should not be reflexive or reciprocal pronoun if (inst.getAnaphor().getReflPronoun()) return false; // should have not-null maxnp-trees (otherwise -- problematic mentions) Tree sentenceTree = inst.getAnaphor().getSentenceTree(); Tree AnaTree = inst.getAnaphor().getMaxNPParseTree(); Tree AnteTree = inst.getAntecedent().getMaxNPParseTree(); if (sentenceTree == null) return false; if (AnaTree == null) return false; if (AnteTree == null) return false; // should not dominate each other if (AnaTree.dominates(AnteTree)) return false; if (AnteTree.dominates(AnaTree)) return false; //the first branching node for ante should dominate ana (but not via S-node) AnteTree = AnteTree.parent(sentenceTree); while (AnteTree != null) { if (AnteTree.children().length > 1) { if (!AnteTree.dominates(AnaTree)) return false; while (AnaTree != null && AnaTree != AnteTree) { if (AnaTree.value().toLowerCase().startsWith("s")) return false; AnaTree = AnaTree.parent(sentenceTree); }/*from ww w . j a v a 2 s . c om*/ return true; } AnteTree = AnteTree.parent(sentenceTree); } return false; }
From source file:elkfed.coref.features.pairs.FE_Copula.java
License:Apache License
public static Boolean getCopula(PairInstance inst) { // should be in the same sentence if (inst.getAnaphor().getSentId() != inst.getAntecedent().getSentId()) { return false; }//from ww w . j av a2 s.com // should have not-null maxnp-trees (otherwise -- problematic mentions) Tree sentenceTree = inst.getAnaphor().getSentenceTree(); Tree AnaTree = inst.getAnaphor().getMaxNPParseTree(); Tree AnteTree = inst.getAntecedent().getMaxNPParseTree(); if (sentenceTree == null) { return false; } if (AnaTree == null) { return false; } if (AnteTree == null) { return false; } // exclude "there is .." (ToDo: exclude other expletives!) if (inst.getAntecedent().getMarkableString().toLowerCase().matches(NONREF_NP)) { return false; } //exclude date and time if (inst.getAnaphor().getSemanticClass() == SemanticClass.TIME) { return false; } if (inst.getAnaphor().getSemanticClass() == SemanticClass.DATE) { return false; } if (inst.getAntecedent().getSemanticClass() == SemanticClass.TIME) { return false; } if (inst.getAntecedent().getSemanticClass() == SemanticClass.DATE) { return false; } // should be subj-obj of the same verb Tree vp = AnaTree.parent(sentenceTree); if (vp == null) { return false; } if (!vp.value().equalsIgnoreCase("vp")) { return false; } while (vp.parent(sentenceTree) != null && vp.parent(sentenceTree).value().equalsIgnoreCase("vp")) { vp = vp.parent(sentenceTree); } if (vp.parent(sentenceTree) == null) { return false; } Boolean foundante = false; Tree[] chldsup = vp.parent(sentenceTree).children(); for (int i = 0; i < chldsup.length; i++) { if (chldsup[i] == AnteTree) { foundante = true; } if (chldsup[i] == vp && foundante == false) { return false; } } vp = AnaTree.parent(sentenceTree); // we do not want to go higher here -- "S is *ing O" fires otherwise // should not contain a modal verb Tree[] chlds = vp.children(); for (int i = 0; i < chlds.length; i++) { if (chlds[i].value().equalsIgnoreCase("rb")) { return false; } if (chlds[i].value().equalsIgnoreCase("md") && chlds[i].getLeaves().get(0).value().toLowerCase().matches(MODAL_VERB)) { return false; } } // the verb should be one of the copula verbs for (int i = 0; i < chlds.length; i++) { if (chlds[i].value().equalsIgnoreCase("vbd") || chlds[i].value().equalsIgnoreCase("aux") || chlds[i].value().equalsIgnoreCase("vbn") || chlds[i].value().equalsIgnoreCase("vb") || chlds[i].value().equalsIgnoreCase("vbd") || chlds[i].value().equalsIgnoreCase("vbp") || chlds[i].value().equalsIgnoreCase("vbz") || chlds[i].value().equalsIgnoreCase("vbg")) { if (chlds[i].getLeaves().get(0).value().toLowerCase().matches(COPULA_VERB)) { /* System.out.println("Found positive copula verb (" +chlds[i].getLeaves().get(0).value() + ") for ("+ inst.getAnaphor().getMarkableString()+ "),("+ inst.getAntecedent().getMarkableString()+ ") "); */ return true; } } } return false; }