List of usage examples for edu.stanford.nlp.trees Tree addChild
public void addChild(int i, Tree t)
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and James like Susan. -> John likes Susan. * //from w w w .ja v a2s . c om */ private void extractConjoinedNPs(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; Tree conjoinedNode; Tree parent; TregexMatcher matcher; Question newQuestion; //only extract conjoined NPs that are arguments or adjuncts of the main verb // in the tree, this means the closest S will be the one under the root tregexOpStr = "NP=parent < (CONJP|CC !< or|nor [ " + " $+ /^(N.*|PRP|SBAR)$/=child $-- /^(N.*|PRP|SBAR)$/ | " //there must be a noun on each side of the conjunction + " $-- /^(N.*|PRP|SBAR)$/=child $+ /^(N.*|PRP|SBAR)$/ ] ) " //this avoids extracting from flat NPs such as "the smaller and darker form" + " !>> (/.*/ $ (CC|CONJP !< or|nor)) " //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) + " !$ (CC|CONJP !< or|nor)" + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR "; //+ " >> (ROOT !< (S <+(VP) (/^VB.*$/ < are|were|be|seem|appear))) " ; //don't break plural predicate nominatives (e.g., "John and Mary are two of my best friends.") matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); List<Integer> nodeIndexes = new ArrayList<Integer>(); List<Integer> parentIDs = new ArrayList<Integer>(); while (matcher.find()) { //store the parents' IDs (in the tree) parent = matcher.getNode("parent"); parentIDs.add(parent.nodeNumber(input.getIntermediateTree())); conjoinedNode = matcher.getNode("child"); //store the conjoined nodes' index into their parent's list of children int idx = parent.objectIndexOf(conjoinedNode); if (!nodeIndexes.contains(idx)) nodeIndexes.add(idx); } //for each of the conjoined children, //create a new tree by removing all the nodes they are conjoined with Collections.sort(nodeIndexes);//sort, just to keep them in the original order for (int i = 0; i < nodeIndexes.size(); i++) { newQuestion = input.deeperCopy(); Tree t = newQuestion.getIntermediateTree(); parent = t.getNodeNumber(parentIDs.get(i)); Tree gparent = parent.parent(t); conjoinedNode = parent.getChild(nodeIndexes.get(i)); String siblingLabel; //Remove all the nodes that are conjoined //with the selected noun (or are conjunctions, commas). //These can have labels NP, NN, ..., PRP for pronouns, CC, "," for commas, ":" for semi-colons for (int j = 0; j < parent.numChildren(); j++) { if (parent.getChild(j) == conjoinedNode) continue; siblingLabel = parent.getChild(j).label().toString(); if (siblingLabel.matches("^[NCP,:S].*")) { parent.removeChild(j); j--; } } //if there is an trivial unary "NP -> NP", //remove the parent and put the child in its place if (parent.numChildren() == 1 && parent.getChild(0).label().equals("NP")) { int tmpIndex = gparent.objectIndexOf(parent); gparent.removeChild(tmpIndex); gparent.addChild(tmpIndex, parent.getChild(0)); } correctTense(conjoinedNode, gparent); addQuotationMarksIfNeeded(newQuestion.getIntermediateTree()); //if(GlobalProperties.getDebug()) System.err.println("extractConjoinedNPs: "+newQuestion.getIntermediateTree().toString()); if (this.getComputeFeatures) newQuestion.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name if (this.getComputeFeatures) newQuestion.setFeatureValue("extractedFromConjoinedNPs", 1.0); extracted.add(newQuestion); } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and Mary like Bill. -> John LIKES Bill. Mary LIKES Bill. * John and I like Bill -> John LIKES Bill. I LIKE Bill. * John and I are old. -> I IS old. John IS old. */// w w w . j ava2 s .c om private void correctTense(Tree subject, Tree clause) { int tmpIndex; //correct verb tense when modifying subjects for (Tree uncle : clause.getChildrenAsList()) { String newVerbPOS = null; Tree verbPreterminal = null; boolean needToModifyVerb = false; //if the node is a subject (i.e., its uncle is a VP), then check //to see if its tense needs to be changed String headPOS = subject.headPreTerminal(this.hf).label().toString(); if (uncle.label().toString().equals("VP") && !headPOS.endsWith("S")) { verbPreterminal = uncle.headPreTerminal(this.hf); //original main verb was plural but the conjoined subject word is singular //e.g., John (and Mary) like Bill. -> John like Bill. if ((verbPreterminal.label().toString().equals("VB") || verbPreterminal.label().toString().equals("VBP"))) { //the parser confuses VBP with VB if (subject.yield().toString().equals("I") || subject.yield().toString().equals("you")) { newVerbPOS = "VBP"; } else { newVerbPOS = "VBZ"; } needToModifyVerb = true; } else if (verbPreterminal.label().toString().equals("VBD")) { newVerbPOS = "VBD"; needToModifyVerb = true; } } //if needed, change the tense of the verb if (needToModifyVerb) { String verbLemma = QuestionUtil.getLemma(verbPreterminal.getChild(0).label().toString(), verbPreterminal.label().toString()); String newVerb; //special cases if (verbLemma.equals("be") && newVerbPOS.equals("VBD")) { if (subject.label().toString().endsWith("S")) newVerb = "were"; else newVerb = "was"; } else if (verbLemma.equals("be") && subject.yield().toString().equals("I") && newVerbPOS.equals("VBP")) { newVerb = "am"; } else { //default newVerb = this.conjugator.getSurfaceForm(verbLemma, newVerbPOS); } tmpIndex = verbPreterminal.parent(uncle).objectIndexOf(verbPreterminal); Tree verbParent = verbPreterminal.parent(uncle); verbParent.removeChild(tmpIndex); verbParent.addChild(tmpIndex, QuestionUtil.readTreeFromString("(" + newVerbPOS + " " + newVerb + ")")); } } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * //from ww w.j a v a 2 s. c o m * John studied, hoping to get a good grade. -> John hoped to get a good grade. * * @param extracted * @param input */ private void extractVerbParticipialModifiers(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = "S=sub $- /,/ !< NP < (VP=participial < VBG=verb) " + " >+(VP) (S|SINV < NP=subj) " + " >> (ROOT <<# /VB.*/=tense) "; //tense determined by top-most verb matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { String verbPOS = findTense(matcher.getNode("tense")); Tree p = matcher.getNode("participial").deepCopy(); Tree verb = matcher.getNode("verb"); String verbLemma = QuestionUtil.getLemma(verb.getChild(0).label().toString(), verb.label().toString()); String newVerb = this.conjugator.getSurfaceForm(verbLemma, verbPOS); int verbIndex = p.objectIndexOf(verb); p.removeChild(verbIndex); p.addChild(verbIndex, QuestionUtil.readTreeFromString("(" + verbPOS + " " + newVerb + ")")); String treeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + p.toString() + " (. .)))"; Tree newTree = QuestionUtil.readTreeFromString(treeStr); correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0)); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromVerbParticipial", 1.0); if (this.getComputeFeatures) System.err.println("extractVerbParticipialModifiers: " + newTree.toString()); addIfNovel(extracted, newTreeWithFeatures); } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John, who is a friend of mine, likes Susan. -> John is a friend of mine. * /*from ww w .ja v a2 s. com*/ */ private void extractNonRestrictiveRelativeClauses(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; TregexMatcher matcherclause; tregexOpStr = "NP=np < (SBAR=sbar [ < (WHADVP=wherecomp < (WRB < where)) " + " | < (WHNP !< /WP\\$/) " + " | < (WHNP=possessive < /WP\\$/)" //John, whose car was + " | < (WHPP < IN|TO=preposition) ] $-- NP $- /,/ " + " < S=relclause !< WHADJP)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); //iterate over all the relative clauses in the input //and create an output sentence for each one. while (matcher.find()) { Tree missingArgumentTree = matcher.getNode("np"); Tree relclause = matcher.getNode("relclause"); if (missingArgumentTree == null || relclause == null) continue; missingArgumentTree = missingArgumentTree.deepCopy(); relclause = relclause.deepCopy(); Tree possessive = matcher.getNode("possessive"); Tree sbar = matcher.getNode("sbar").deepCopy(); makeDeterminerDefinite(missingArgumentTree); if (possessive != null) { possessive = possessive.deepCopy(); possessive.removeChild(0); String newTree = "(NP (NP " + missingArgumentTree.toString() + " (POS 's))"; for (int i = 0; i < possessive.numChildren(); i++) newTree += possessive.getChild(i).toString() + " "; newTree += ")"; missingArgumentTree = QuestionUtil.readTreeFromString(newTree); } //remove the relative clause and the commas surrounding it from the missing argument tree for (int i = 0; i < missingArgumentTree.numChildren(); i++) { if (missingArgumentTree.getChild(i).equals(sbar)) { //remove the relative clause missingArgumentTree.removeChild(i); //remove the comma after the relative clause if (i < missingArgumentTree.numChildren() && missingArgumentTree.getChild(i).label().toString().equals(",")) { missingArgumentTree.removeChild(i); } //remove the comma before the relative clause if (i > 0 && missingArgumentTree.getChild(i - 1).label().toString().equals(",")) { missingArgumentTree.removeChild(i - 1); i--; } i--; } } //put the noun in the clause at the topmost place with an opening for a noun. //Note that this may mess up if there are noun phrase adjuncts like "The man I met Tuesday". //specifically: //the parent of the noun can be either a clause (S) as in "The man who met me" //or a verb phrase as in "The man who I met". //for verb phrases, add the noun to the end since it will be an object. //for clauses, add the noun to the beginning since it will be the subject. tregexOpStr = "S|VP=newparent !< NP < (VP=verb !< TO !$ TO)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcherclause = matchPattern.matcher(relclause); boolean subjectMovement = true; if (!matcherclause.find()) { tregexOpStr = "VP=newparent !< VP < /VB.*/=verb !>> (S !< NP) !<< (VP !< VP !< NP)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcherclause = matchPattern.matcher(relclause); subjectMovement = false; } //reset (so the first match isn't skipped) matcherclause = matchPattern.matcher(relclause); if (matcherclause.find()) { Tree newparenttree = matcherclause.getNode("newparent"); Tree verbtree = matcherclause.getNode("verb"); boolean ppRelativeClause = false; if (matcher.getNode("wherecomp") != null) { String tmp = "(PP (IN at) " + missingArgumentTree.toString() + ")"; missingArgumentTree = QuestionUtil.readTreeFromString(tmp); ppRelativeClause = true; subjectMovement = false; } else if (matcher.getNode("preposition") != null) { String tmp = "(PP (IN " + matcher.getNode("preposition").yield().toString() + ") " + missingArgumentTree.toString() + ")"; missingArgumentTree = QuestionUtil.readTreeFromString(tmp); ppRelativeClause = true; } if (subjectMovement) { //subject newparenttree.addChild(newparenttree.objectIndexOf(verbtree), missingArgumentTree); } else { // newparentlabel is VP if (ppRelativeClause) newparenttree.addChild(newparenttree.numChildren(), missingArgumentTree); else newparenttree.addChild(newparenttree.objectIndexOf(verbtree) + 1, missingArgumentTree); } //create a new tree with punctuation Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>()); newTree.addChild(relclause); QuestionUtil.addPeriodIfNeeded(newTree); //if(GlobalProperties.getDebug()) System.err.println("extractRelativeClauses: "+ newTree.toString()); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromRelativeClause", 1.0); addIfNovel(extracted, newTreeWithFeatures); } } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John, hoping to get a good grade, studied. -> John hoped to get a good grade. * Walking to the store, John saw Susan -> John was walking to the store. * //w ww. j av a2 s .co m * NOTE: This method produces false positives for sentences like, * "Broadly speaking, the project was successful." * where the participial phrase does not modify the subject. * * @param extracted * @param input */ private void extractNounParticipialModifiers(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = "ROOT < (S " + " [ << (NP < (NP=subj $++ (/,/ $+ (VP=modifier <# VBN|VBG|VP=tense )))) " //modifiers that appear after nouns + " | < (S !< NP|SBAR < (VP=modifier <# VBN|VBG|VP=tense) $+ (/,/ $+ NP=subj)) " //modifiers before the subject. e.g., Founded by John, the company... + " | < (SBAR < (S !< NP|SBAR < (VP=modifier <# VBN|VBG=tense)) $+ (/,/ $+ NP=subj)) " //e.g., While walking to the store, John saw Susan. + " | < (PP=modifier !< NP <# VBG=tense $+ (/,/ $+ NP=subj)) ] ) " // e.g., Walking to the store, John saw Susan. + " <<# /^VB.*$/=maintense "; //tense determined by top-most verb matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { Tree nountree = matcher.getNode("subj").deepCopy(); Tree vptree = matcher.getNode("modifier"); Tree verb = matcher.getNode("tense"); makeDeterminerDefinite(nountree); if (vptree.label().toString().equals("PP")) vptree.label().setValue("VP"); String verbPOS = findTense(matcher.getNode("maintense")); if (vptree == null || nountree == null) return; String newTreeStr; if (verb.label().toString().equals("VBG")) { //for present partcipials, change the tense to the tense of the main verb //e.g., walking to the store -> walked to the store String verbLemma = QuestionUtil.getLemma(verb.getChild(0).label().toString(), verb.label().toString()); String newVerb = this.conjugator.getSurfaceForm(verbLemma, verbPOS); int verbIndex = vptree.objectIndexOf(verb); vptree = vptree.deepCopy(); vptree.removeChild(verbIndex); vptree.addChild(verbIndex, QuestionUtil.readTreeFromString("(" + verbPOS + " " + newVerb + ")")); newTreeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + vptree.toString() + " (. .)))"; } else { //for past participials, add a copula //e.g., John, exhausted, -> John was exhausted //(or for conjunctions, just add the copula---kind of a hack to make the moby dick sentence work out) String auxiliary; if (verbPOS.equals("VBP") || verbPOS.equals("VBD")) { if (isPlural(nountree)) auxiliary = "(VBD were)"; else auxiliary = "(VBD was)"; } else { if (isPlural(nountree)) auxiliary = "(VB are)"; else auxiliary = "(VBZ is)"; } newTreeStr = "(ROOT (S " + nountree + " (VP " + auxiliary + " " + vptree + ") (. .)))"; } Tree newTree = QuestionUtil.readTreeFromString(newTreeStr); correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0)); addQuotationMarksIfNeeded(newTree); //if(GlobalProperties.getDebug()) System.err.println("extractNounParticipialModifiers: "+ newTree.toString()); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0); //old feature name if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromNounParticipial", 1.0); extracted.add(newTreeWithFeatures); } }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * * This method removes the answer phrase from its original position * and places it at the front of the main clause. * * Note: Tsurgeon operations are perhaps not optimal here. * Using the Stanford API to move nodes directly might be simpler... * *//* ww w.ja v a2 s . c o m*/ private List<Tree> moveWHPhraseUnmovable(Tree inputTree, Tree intermediateTree, int i, boolean subjectMovement) { Tree copyTree2; List<Tree> res = new ArrayList<Tree>(); Tree mainclauseNode; String marker = "/^(UNMOVABLE-NP|UNMOVABLE-PP|UNMOVABLE-SBAR)-" + i + "$/"; List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); String tregexOpStr; TregexPattern matchPattern; //extract the "answer" phrase and generate a WH phrase from it tregexOpStr = "ROOT=root < (SQ=qclause << " + marker + "=answer < VP=predicate)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: inputTree:" + inputTree.toString()); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: tregexOpStr:" + tregexOpStr); TregexMatcher matcher = matchPattern.matcher(inputTree); matcher.find(); Tree phraseToMove = matcher.getNode("answer"); String whPhraseSubtree; if (printExtractedPhrases) System.out.println("EXTRACTED\t" + phraseToMove.yield().toString()); whGen.generateWHPhraseSubtrees(removeMarkersFromTree(phraseToMove), intermediateTree.yield().toString()); List<String> whPhraseSubtrees = whGen.getWHPhraseSubtrees(); List<String> leftOverPrepositions = whGen.getLeftOverPrepositions(); //copyTree = inputTree.deeperCopy(); //The placeholder is necessary because tsurgeon will complain //if an added node has no children. This placeholder is removed below. // ps.add(Tsurgeon.parseOperation("insert (PREPPLACEHOLDER dummy) $+ answer")); // ps.add(Tsurgeon.parseOperation("prune answer")); // ps.add(Tsurgeon.parseOperation("insert (SBARQ=mainclause PLACEHOLDER=placeholder) >0 root")); // ps.add(Tsurgeon.parseOperation("move qclause >-1 mainclause")); // p = Tsurgeon.collectOperations(ps); // ops.add(new Pair<TregexPattern,TsurgeonPattern>(matchPattern,p)); // Tsurgeon.processPatternsOnTree(ops, copyTree); //copyTree = removeMarkersFromTree(copyTree); //Now put each WH phrase into the tree and remove the original answer. //Operate on the tree directly rather than using tsurgeon //because tsurgeon can't parse operations that insert trees with special characters (e.g., ":") for (int j = 0; j < whPhraseSubtrees.size(); j++) { copyTree2 = inputTree.deeperCopy(); whPhraseSubtree = whPhraseSubtrees.get(j); // if(GlobalProperties.getDebug()) System.err.println("moveWHPhrase: whPhraseSubtree:"+whPhraseSubtree); // tregexOpStr = "ROOT < (SBARQ=mainclause < PLACEHOLDER=ph1) << (__=ph2Parent < PREPPLACEHOLDER=ph2)"; // matchPattern = TregexPatternFactory.getPattern(tregexOpStr); // matcher = matchPattern.matcher(copyTree2); // if(!matcher.find()){ // continue; // } matcher = matchPattern.matcher(copyTree2); matcher.find(); mainclauseNode = matcher.getNode("answer"); if (mainclauseNode == null) continue; //replace the wh placeholder with a wh phrase int cc = mainclauseNode.numChildren(); for (int c = 0; c < cc; c++) mainclauseNode.removeChild(0); mainclauseNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(whPhraseSubtree)); copyTree2 = removeMarkersFromTree(copyTree2); //Replace the pp placeholder with the left over preposition. //This may happen when the answer phrase was a PP. //e.g., John went to the game. -> What did John go to? // prepPlaceholderParent = matcher.getNode("ph2Parent"); // int index = prepPlaceholderParent.indexOf(matcher.getNode("ph2")); // if(leftOverPreposition != null && leftOverPreposition.length()>0){ // prepPlaceholderParent.addChild(index, AnalysisUtilities.getInstance().readTreeFromString(leftOverPreposition)); // } // //now remove the left-over-preposition placeholder // ps.clear(); // ps.add(Tsurgeon.parseOperation("prune ph2")); // p = Tsurgeon.collectOperations(ps); // ops.clear(); // ops.add(new Pair<TregexPattern,TsurgeonPattern>(TregexPatternFactory.getPattern("PREPPLACEHOLDER=ph2"),p)); // Tsurgeon.processPatternsOnTree(ops, copyTree2); copyTree2 = moveLeadingAdjuncts(copyTree2); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: " + copyTree2.toString()); res.add(copyTree2); } return res; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * * This method removes the answer phrase from its original position * and places it at the front of the main clause. * * Note: Tsurgeon operations are perhaps not optimal here. * Using the Stanford API to move nodes directly might be simpler... * */// ww w . j ava2s.c o m private List<Tree> moveWHPhrase(Tree inputTree, Tree intermediateTree, int i, boolean subjectMovement) { Tree copyTree; Tree copyTree2; List<Tree> res = new ArrayList<Tree>(); Tree mainclauseNode; Tree prepPlaceholderParent; String marker = "/^(NP|PP|SBAR)-" + i + "$/"; List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; //extract the "answer" phrase and generate a WH phrase from it tregexOpStr = "ROOT=root < (SQ=qclause << " + marker + "=answer < VP=predicate)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: inputTree:" + inputTree.toString()); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: tregexOpStr:" + tregexOpStr); TregexMatcher matcher = matchPattern.matcher(inputTree); matcher.find(); Tree phraseToMove = matcher.getNode("answer"); String whPhraseSubtree; String leftOverPreposition; if (printExtractedPhrases) System.out.println("EXTRACTED\t" + phraseToMove.yield().toString()); whGen.generateWHPhraseSubtrees(removeMarkersFromTree(phraseToMove), intermediateTree.yield().toString()); List<String> whPhraseSubtrees = whGen.getWHPhraseSubtrees(); List<String> leftOverPrepositions = whGen.getLeftOverPrepositions(); copyTree = inputTree.deeperCopy(); //The placeholder is necessary because tsurgeon will complain //if an added node has no children. This placeholder is removed below. ps.add(Tsurgeon.parseOperation("insert (PREPPLACEHOLDER dummy) $+ answer")); ps.add(Tsurgeon.parseOperation("prune answer")); ps.add(Tsurgeon.parseOperation("insert (SBARQ=mainclause PLACEHOLDER=placeholder) >0 root")); ps.add(Tsurgeon.parseOperation("move qclause >-1 mainclause")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, copyTree); copyTree = removeMarkersFromTree(copyTree); //Now put each WH phrase into the tree and remove the original answer. //Operate on the tree directly rather than using tsurgeon //because tsurgeon can't parse operations that insert trees with special characters (e.g., ":") for (int j = 0; j < whPhraseSubtrees.size(); j++) { copyTree2 = copyTree.deeperCopy(); whPhraseSubtree = whPhraseSubtrees.get(j); leftOverPreposition = leftOverPrepositions.get(j); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: whPhraseSubtree:" + whPhraseSubtree); tregexOpStr = "ROOT < (SBARQ=mainclause < PLACEHOLDER=ph1) << (__=ph2Parent < PREPPLACEHOLDER=ph2)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(copyTree2); if (!matcher.find()) { continue; } mainclauseNode = matcher.getNode("mainclause"); //replace the wh placeholder with a wh phrase mainclauseNode.removeChild(0); mainclauseNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(whPhraseSubtree)); //Replace the pp placeholder with the left over preposition. //This may happen when the answer phrase was a PP. //e.g., John went to the game. -> What did John go to? prepPlaceholderParent = matcher.getNode("ph2Parent"); int index = prepPlaceholderParent.indexOf(matcher.getNode("ph2")); if (leftOverPreposition != null && leftOverPreposition.length() > 0) { prepPlaceholderParent.addChild(index, AnalysisUtilities.getInstance().readTreeFromString(leftOverPreposition)); } //now remove the left-over-preposition placeholder ps.clear(); ps.add(Tsurgeon.parseOperation("prune ph2")); p = Tsurgeon.collectOperations(ps); ops.clear(); ops.add(new Pair<TregexPattern, TsurgeonPattern>(TregexPatternFactory.getPattern("PREPPLACEHOLDER=ph2"), p)); Tsurgeon.processPatternsOnTree(ops, copyTree2); copyTree2 = moveLeadingAdjuncts(copyTree2); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: " + copyTree2.toString()); res.add(copyTree2); } return res; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * This method decomposes the main verb of the sentence * for yes-no questions and WH questions where the answer * phrase is not the subject./*from ww w .j a va2s . c om*/ * * e.g., I met John -> I did meet John. * (which would later become "Who did I meet?") * */ private Tree decomposePredicate(Tree inputTree) { Tree copyTree = inputTree.deeperCopy(); List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); String tregexOpStr; List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); TregexPattern matchPattern; TsurgeonPattern p; TregexMatcher matcher; Tree tmpNode; //tregexOpStr = "ROOT < (S=mainclause < (VP=predphrase < /VB.?/=tensedverb !< (VP < /VB.?/)))"; //tregexOpStr = "ROOT < (S=mainclause < (VP=predphrase < (/VB.?/=tensedverb !< is|was|were|am|are|has|have|had|do|does|did)))"; //This rather complex rule identifies predicates to decompose. //There are two cases, separated by a disjunction. //One could break it apart into separate rules to make it simpler... // //The first part of the disjunction //(i.e., < (/VB.?/=tensedverb !< is|was|were|am|are|has|have|had|do|does|did) ) //is for handling basic sentences //(e.g., John bought an apple -> What did John buy?), //sentences with auxiliaries //(e.g., John had bought an apple -> Had John bought an apple?), //and sentences with participial phrases //(e.g., John seemed finished with the apple -> What did John seem finished with?). // //The second part of the disjunction //(i.e., < /VB.?/=tensedverb !< VP ) //is for handling sentences that have predicates //that can also be auxiliaries (e.g., I have a book). //In these cases, we do want to decompose have, has, had, etc. //(e.g., What did I have?) tregexOpStr = "ROOT < (S=mainclause < (VP=predphrase [ < (/VB.?/=tensedverb !< is|was|were|am|are|has|have|had|do|does|did) | < /VB.?/=tensedverb !< VP ]))"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(copyTree); if (matcher.find()) { Tree subtree = matcher.getNode("tensedverb"); String lemma = AnalysisUtilities.getInstance().getLemma(subtree.getChild(0).label().toString(), subtree.label().toString()); String aux = getAuxiliarySubtree(subtree); if (!lemma.equals("be")) { ps.add(Tsurgeon.parseOperation("replace predphrase (MAINVP=newpred PLACEHOLDER)")); ps.add(Tsurgeon.parseOperation("insert predphrase >-1 newpred")); ps.add(Tsurgeon.parseOperation("insert (VBLEMMA PLACEHOLDER) $+ tensedverb")); ps.add(Tsurgeon.parseOperation("delete tensedverb")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, copyTree); matchPattern = TregexPatternFactory.getPattern("MAINVP=mainvp"); matcher = matchPattern.matcher(copyTree); matcher.find(); tmpNode = matcher.getNode("mainvp"); tmpNode.removeChild(0); tmpNode.label().setValue("VP"); tmpNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(aux)); matchPattern = TregexPatternFactory.getPattern("VBLEMMA=vblemma"); matcher = matchPattern.matcher(copyTree); matcher.find(); tmpNode = matcher.getNode("vblemma"); tmpNode.removeChild(0); tmpNode.label().setValue("VB"); tmpNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(lemma)); } } if (GlobalProperties.getDebug()) System.err.println("decomposePredicate: " + copyTree.toString()); return copyTree; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * Changes the inflection of the main verb for questions with * first and second person pronouns are the subject. * Note: this probably isn't necessary for most applications. * * E.g.,/*ww w.j ava 2s .co m*/ * Affects: * I walk -> Who walks? (rather than, Who walk?) * * Does not affect: * He walks -> Who walks? * */ private void ensureVerbAgreementForSubjectWH(Tree inputTree) { String tregexOpStr; TregexMatcher matcher; TregexPattern matchPattern; Tree subjectTree; String subjectString; tregexOpStr = "/^(NP|PP|SBAR)-" + 0 + "$/"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); if (matcher.find()) { subjectTree = matcher.getMatch(); subjectString = subjectTree.yield().toString(); if (subjectString.equalsIgnoreCase("I") || subjectString.equalsIgnoreCase("you")) { tregexOpStr = "ROOT=root < (S=mainclause < (VP=verbphrase < (/VB.?/=tensedverb)))"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); if (matcher.find()) { Tree verbSubtree = matcher.getNode("tensedverb"); Tree vpSubtree = matcher.getNode("verbphrase"); Tree singularFormSubtree = AnalysisUtilities.getInstance() .readTreeFromString(getSingularFormSubtree(verbSubtree)); int index = vpSubtree.indexOf(verbSubtree); vpSubtree.removeChild(index); vpSubtree.addChild(index, singularFormSubtree); if (GlobalProperties.getDebug()) System.err.println("ensureVerbAgreementForSubjectWH: " + inputTree.toString()); } } } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and James like Susan. -> John likes Susan. * /*from www . j av a2 s . co m*/ */ private void extractConjoinedNPs(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; Tree conjoinedNode; Tree parent; TregexMatcher matcher; Question newQuestion; //only extract conjoined NPs that are arguments or adjuncts of the main verb // in the tree, this means the closest S will be the one under the root tregexOpStr = "NP=parent < (CONJP|CC !< or|nor [ " + " $+ /^(N.*|PRP|SBAR)$/=child $-- /^(N.*|PRP|SBAR)$/ | " //there must be a noun on each side of the conjunction + " $-- /^(N.*|PRP|SBAR)$/=child $+ /^(N.*|PRP|SBAR)$/ ] ) " //this avoids extracting from flat NPs such as "the smaller and darker form" + " !>> (/.*/ $ (CC|CONJP !< or|nor)) " //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) + " !$ (CC|CONJP !< or|nor)" + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR "; //+ " >> (ROOT !< (S <+(VP) (/^VB.*$/ < are|were|be|seem|appear))) " ; //don't break plural predicate nominatives (e.g., "John and Mary are two of my best friends.") matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); List<Integer> nodeIndexes = new ArrayList<Integer>(); List<Integer> parentIDs = new ArrayList<Integer>(); while (matcher.find()) { //store the parents' IDs (in the tree) parent = matcher.getNode("parent"); parentIDs.add(parent.nodeNumber(input.getIntermediateTree())); conjoinedNode = matcher.getNode("child"); //store the conjoined nodes' index into their parent's list of children int idx = parent.indexOf(conjoinedNode); if (!nodeIndexes.contains(idx)) nodeIndexes.add(idx); } //for each of the conjoined children, //create a new tree by removing all the nodes they are conjoined with Collections.sort(nodeIndexes);//sort, just to keep them in the original order for (int i = 0; i < nodeIndexes.size(); i++) { newQuestion = input.deeperCopy(); Tree t = newQuestion.getIntermediateTree(); parent = t.getNodeNumber(parentIDs.get(i)); Tree gparent = parent.parent(t); conjoinedNode = parent.getChild(nodeIndexes.get(i)); String siblingLabel; //Remove all the nodes that are conjoined //with the selected noun (or are conjunctions, commas). //These can have labels NP, NN, ..., PRP for pronouns, CC, "," for commas, ":" for semi-colons for (int j = 0; j < parent.numChildren(); j++) { if (parent.getChild(j) == conjoinedNode) continue; siblingLabel = parent.getChild(j).label().toString(); if (siblingLabel.matches("^[NCP,:S].*")) { parent.removeChild(j); j--; } } //if there is an trivial unary "NP -> NP", //remove the parent and put the child in its place if (parent.numChildren() == 1 && parent.getChild(0).label().equals("NP")) { int tmpIndex = gparent.indexOf(parent); gparent.removeChild(tmpIndex); gparent.addChild(tmpIndex, parent.getChild(0)); } correctTense(conjoinedNode, gparent); addQuotationMarksIfNeeded(newQuestion.getIntermediateTree()); if (GlobalProperties.getDebug()) System.err.println("extractConjoinedNPs: " + newQuestion.getIntermediateTree().toString()); if (GlobalProperties.getComputeFeatures()) newQuestion.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name if (GlobalProperties.getComputeFeatures()) newQuestion.setFeatureValue("extractedFromConjoinedNPs", 1.0); extracted.add(newQuestion); } }