List of usage examples for edu.stanford.nlp.trees Tree numChildren
public int numChildren()
From source file:KleinBilingualParser.java
private static double numChildren(Tree nodeF, Tree nodeE) { if (nodeF.numChildren() == nodeE.numChildren()) { return 1.0 / 10; } else {/*from w ww . ja v a 2 s. com*/ return 0.0; } }
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
public double validateBinarizedTree(Tree tree, int start) { if (tree.isLeaf()) { return 0.0; }//from w ww . ja va 2 s. c o m float epsilon = 0.0001f; if (tree.isPreTerminal()) { String wordStr = tree.children()[0].label().value(); int tag = tagIndex.indexOf(tree.label().value()); int word = wordIndex.indexOf(wordStr); IntTaggedWord iTW = new IntTaggedWord(word, tag); float score = lex.score(iTW, start, wordStr, null); float bound = iScore[start][start + 1][stateIndex.indexOf(tree.label().value())]; if (score > bound + epsilon) { System.out.println("Invalid tagging:"); System.out.println(" Tag: " + tree.label().value()); System.out.println(" Word: " + tree.children()[0].label().value()); System.out.println(" Score: " + score); System.out.println(" Bound: " + bound); } return score; } int parent = stateIndex.indexOf(tree.label().value()); int firstChild = stateIndex.indexOf(tree.children()[0].label().value()); if (tree.numChildren() == 1) { UnaryRule ur = new UnaryRule(parent, firstChild); double score = SloppyMath.max(ug.scoreRule(ur), -10000.0) + validateBinarizedTree(tree.children()[0], start); double bound = iScore[start][start + tree.yield().size()][parent]; if (score > bound + epsilon) { System.out.println("Invalid unary:"); System.out.println(" Parent: " + tree.label().value()); System.out.println(" Child: " + tree.children()[0].label().value()); System.out.println(" Start: " + start); System.out.println(" End: " + (start + tree.yield().size())); System.out.println(" Score: " + score); System.out.println(" Bound: " + bound); } return score; } int secondChild = stateIndex.indexOf(tree.children()[1].label().value()); BinaryRule br = new BinaryRule(parent, firstChild, secondChild); double score = SloppyMath.max(bg.scoreRule(br), -10000.0) + validateBinarizedTree(tree.children()[0], start) + validateBinarizedTree(tree.children()[1], start + tree.children()[0].yield().size()); double bound = iScore[start][start + tree.yield().size()][parent]; if (score > bound + epsilon) { System.out.println("Invalid binary:"); System.out.println(" Parent: " + tree.label().value()); System.out.println(" LChild: " + tree.children()[0].label().value()); System.out.println(" RChild: " + tree.children()[1].label().value()); System.out.println(" Start: " + start); System.out.println(" End: " + (start + tree.yield().size())); System.out.println(" Score: " + score); System.out.println(" Bound: " + bound); } return score; }
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
public double scoreBinarizedTree(Tree tree, int start, int debugLvl) { if (tree.isLeaf()) { return 0.0; }//from www. ja v a2s .c om if (tree.isPreTerminal()) { String wordStr = tree.children()[0].label().value(); int tag = tagIndex.indexOf(tree.label().value()); int word = wordIndex.indexOf(wordStr); IntTaggedWord iTW = new IntTaggedWord(word, tag); // if (lex.score(iTW,(leftmost ? 0 : 1)) == Double.NEGATIVE_INFINITY) { // System.out.println("NO SCORE FOR: "+iTW); // } float score = lex.score(iTW, start, wordStr, null); tree.setScore(score); if (debugLvl > 0) System.out.println(score + " " + tree.getSpan()); return score; } int parent = stateIndex.indexOf(tree.label().value()); int firstChild = stateIndex.indexOf(tree.children()[0].label().value()); if (tree.numChildren() == 1) { UnaryRule ur = new UnaryRule(parent, firstChild); //+ DEBUG // if (ug.scoreRule(ur) < -10000) { // System.out.println("Grammar doesn't have rule: " + ur); // } // return SloppyMath.max(ug.scoreRule(ur), -10000.0) + scoreBinarizedTree(tree.children()[0], leftmost); double score = ug.scoreRule(ur) + scoreBinarizedTree(tree.children()[0], start, debugLvl) + lex.score(ur, start, start + tree.children()[0].yield().size()); tree.setScore(score); if (debugLvl > 0) System.out.println(score + " " + tree.getSpan()); return score; } int secondChild = stateIndex.indexOf(tree.children()[1].label().value()); BinaryRule br = new BinaryRule(parent, firstChild, secondChild); //+ DEBUG // if (bg.scoreRule(br) < -10000) { // System.out.println("Grammar doesn't have rule: " + br); // } // return SloppyMath.max(bg.scoreRule(br), -10000.0) + // scoreBinarizedTree(tree.children()[0], leftmost) + // scoreBinarizedTree(tree.children()[1], false); int sz0 = tree.children()[0].yield().size(); double score = bg.scoreRule(br) + scoreBinarizedTree(tree.children()[0], start, debugLvl) + scoreBinarizedTree(tree.children()[1], start + sz0, debugLvl) + lex.score(br, start, start + sz0 + tree.children()[1].yield().size(), start + sz0); tree.setScore(score); if (debugLvl > 0) System.out.println(score + " " + tree.getSpan() + " " + (sz0 + start)); return score; }
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
public static boolean cCommands(Tree root, Tree n1, Tree n2) { if (n1.dominates(n2)) return false; Tree n1Parent = n1.parent(root); while (n1Parent != null && n1Parent.numChildren() == 1) { n1Parent = n1Parent.parent(root); }//w w w . jav a 2s. co m if (n1Parent != null && n1Parent.dominates(n2)) return true; return false; }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and James like Susan. -> John likes Susan. * //www .j av a2 s.com */ private void extractConjoinedNPs(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; Tree conjoinedNode; Tree parent; TregexMatcher matcher; Question newQuestion; //only extract conjoined NPs that are arguments or adjuncts of the main verb // in the tree, this means the closest S will be the one under the root tregexOpStr = "NP=parent < (CONJP|CC !< or|nor [ " + " $+ /^(N.*|PRP|SBAR)$/=child $-- /^(N.*|PRP|SBAR)$/ | " //there must be a noun on each side of the conjunction + " $-- /^(N.*|PRP|SBAR)$/=child $+ /^(N.*|PRP|SBAR)$/ ] ) " //this avoids extracting from flat NPs such as "the smaller and darker form" + " !>> (/.*/ $ (CC|CONJP !< or|nor)) " //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) + " !$ (CC|CONJP !< or|nor)" + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR "; //+ " >> (ROOT !< (S <+(VP) (/^VB.*$/ < are|were|be|seem|appear))) " ; //don't break plural predicate nominatives (e.g., "John and Mary are two of my best friends.") matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); List<Integer> nodeIndexes = new ArrayList<Integer>(); List<Integer> parentIDs = new ArrayList<Integer>(); while (matcher.find()) { //store the parents' IDs (in the tree) parent = matcher.getNode("parent"); parentIDs.add(parent.nodeNumber(input.getIntermediateTree())); conjoinedNode = matcher.getNode("child"); //store the conjoined nodes' index into their parent's list of children int idx = parent.objectIndexOf(conjoinedNode); if (!nodeIndexes.contains(idx)) nodeIndexes.add(idx); } //for each of the conjoined children, //create a new tree by removing all the nodes they are conjoined with Collections.sort(nodeIndexes);//sort, just to keep them in the original order for (int i = 0; i < nodeIndexes.size(); i++) { newQuestion = input.deeperCopy(); Tree t = newQuestion.getIntermediateTree(); parent = t.getNodeNumber(parentIDs.get(i)); Tree gparent = parent.parent(t); conjoinedNode = parent.getChild(nodeIndexes.get(i)); String siblingLabel; //Remove all the nodes that are conjoined //with the selected noun (or are conjunctions, commas). //These can have labels NP, NN, ..., PRP for pronouns, CC, "," for commas, ":" for semi-colons for (int j = 0; j < parent.numChildren(); j++) { if (parent.getChild(j) == conjoinedNode) continue; siblingLabel = parent.getChild(j).label().toString(); if (siblingLabel.matches("^[NCP,:S].*")) { parent.removeChild(j); j--; } } //if there is an trivial unary "NP -> NP", //remove the parent and put the child in its place if (parent.numChildren() == 1 && parent.getChild(0).label().equals("NP")) { int tmpIndex = gparent.objectIndexOf(parent); gparent.removeChild(tmpIndex); gparent.addChild(tmpIndex, parent.getChild(0)); } correctTense(conjoinedNode, gparent); addQuotationMarksIfNeeded(newQuestion.getIntermediateTree()); //if(GlobalProperties.getDebug()) System.err.println("extractConjoinedNPs: "+newQuestion.getIntermediateTree().toString()); if (this.getComputeFeatures) newQuestion.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name if (this.getComputeFeatures) newQuestion.setFeatureValue("extractedFromConjoinedNPs", 1.0); extracted.add(newQuestion); } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John, who is a friend of mine, likes Susan. -> John is a friend of mine. * /*from w w w . j a v a 2s . co m*/ */ private void extractNonRestrictiveRelativeClauses(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; TregexMatcher matcherclause; tregexOpStr = "NP=np < (SBAR=sbar [ < (WHADVP=wherecomp < (WRB < where)) " + " | < (WHNP !< /WP\\$/) " + " | < (WHNP=possessive < /WP\\$/)" //John, whose car was + " | < (WHPP < IN|TO=preposition) ] $-- NP $- /,/ " + " < S=relclause !< WHADJP)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); //iterate over all the relative clauses in the input //and create an output sentence for each one. while (matcher.find()) { Tree missingArgumentTree = matcher.getNode("np"); Tree relclause = matcher.getNode("relclause"); if (missingArgumentTree == null || relclause == null) continue; missingArgumentTree = missingArgumentTree.deepCopy(); relclause = relclause.deepCopy(); Tree possessive = matcher.getNode("possessive"); Tree sbar = matcher.getNode("sbar").deepCopy(); makeDeterminerDefinite(missingArgumentTree); if (possessive != null) { possessive = possessive.deepCopy(); possessive.removeChild(0); String newTree = "(NP (NP " + missingArgumentTree.toString() + " (POS 's))"; for (int i = 0; i < possessive.numChildren(); i++) newTree += possessive.getChild(i).toString() + " "; newTree += ")"; missingArgumentTree = QuestionUtil.readTreeFromString(newTree); } //remove the relative clause and the commas surrounding it from the missing argument tree for (int i = 0; i < missingArgumentTree.numChildren(); i++) { if (missingArgumentTree.getChild(i).equals(sbar)) { //remove the relative clause missingArgumentTree.removeChild(i); //remove the comma after the relative clause if (i < missingArgumentTree.numChildren() && missingArgumentTree.getChild(i).label().toString().equals(",")) { missingArgumentTree.removeChild(i); } //remove the comma before the relative clause if (i > 0 && missingArgumentTree.getChild(i - 1).label().toString().equals(",")) { missingArgumentTree.removeChild(i - 1); i--; } i--; } } //put the noun in the clause at the topmost place with an opening for a noun. //Note that this may mess up if there are noun phrase adjuncts like "The man I met Tuesday". //specifically: //the parent of the noun can be either a clause (S) as in "The man who met me" //or a verb phrase as in "The man who I met". //for verb phrases, add the noun to the end since it will be an object. //for clauses, add the noun to the beginning since it will be the subject. tregexOpStr = "S|VP=newparent !< NP < (VP=verb !< TO !$ TO)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcherclause = matchPattern.matcher(relclause); boolean subjectMovement = true; if (!matcherclause.find()) { tregexOpStr = "VP=newparent !< VP < /VB.*/=verb !>> (S !< NP) !<< (VP !< VP !< NP)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcherclause = matchPattern.matcher(relclause); subjectMovement = false; } //reset (so the first match isn't skipped) matcherclause = matchPattern.matcher(relclause); if (matcherclause.find()) { Tree newparenttree = matcherclause.getNode("newparent"); Tree verbtree = matcherclause.getNode("verb"); boolean ppRelativeClause = false; if (matcher.getNode("wherecomp") != null) { String tmp = "(PP (IN at) " + missingArgumentTree.toString() + ")"; missingArgumentTree = QuestionUtil.readTreeFromString(tmp); ppRelativeClause = true; subjectMovement = false; } else if (matcher.getNode("preposition") != null) { String tmp = "(PP (IN " + matcher.getNode("preposition").yield().toString() + ") " + missingArgumentTree.toString() + ")"; missingArgumentTree = QuestionUtil.readTreeFromString(tmp); ppRelativeClause = true; } if (subjectMovement) { //subject newparenttree.addChild(newparenttree.objectIndexOf(verbtree), missingArgumentTree); } else { // newparentlabel is VP if (ppRelativeClause) newparenttree.addChild(newparenttree.numChildren(), missingArgumentTree); else newparenttree.addChild(newparenttree.objectIndexOf(verbtree) + 1, missingArgumentTree); } //create a new tree with punctuation Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>()); newTree.addChild(relclause); QuestionUtil.addPeriodIfNeeded(newTree); //if(GlobalProperties.getDebug()) System.err.println("extractRelativeClauses: "+ newTree.toString()); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromRelativeClause", 1.0); addIfNovel(extracted, newTreeWithFeatures); } } }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * * This method removes the answer phrase from its original position * and places it at the front of the main clause. * * Note: Tsurgeon operations are perhaps not optimal here. * Using the Stanford API to move nodes directly might be simpler... * */// w w w . jav a 2s .com private List<Tree> moveWHPhraseUnmovable(Tree inputTree, Tree intermediateTree, int i, boolean subjectMovement) { Tree copyTree2; List<Tree> res = new ArrayList<Tree>(); Tree mainclauseNode; String marker = "/^(UNMOVABLE-NP|UNMOVABLE-PP|UNMOVABLE-SBAR)-" + i + "$/"; List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); String tregexOpStr; TregexPattern matchPattern; //extract the "answer" phrase and generate a WH phrase from it tregexOpStr = "ROOT=root < (SQ=qclause << " + marker + "=answer < VP=predicate)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: inputTree:" + inputTree.toString()); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: tregexOpStr:" + tregexOpStr); TregexMatcher matcher = matchPattern.matcher(inputTree); matcher.find(); Tree phraseToMove = matcher.getNode("answer"); String whPhraseSubtree; if (printExtractedPhrases) System.out.println("EXTRACTED\t" + phraseToMove.yield().toString()); whGen.generateWHPhraseSubtrees(removeMarkersFromTree(phraseToMove), intermediateTree.yield().toString()); List<String> whPhraseSubtrees = whGen.getWHPhraseSubtrees(); List<String> leftOverPrepositions = whGen.getLeftOverPrepositions(); //copyTree = inputTree.deeperCopy(); //The placeholder is necessary because tsurgeon will complain //if an added node has no children. This placeholder is removed below. // ps.add(Tsurgeon.parseOperation("insert (PREPPLACEHOLDER dummy) $+ answer")); // ps.add(Tsurgeon.parseOperation("prune answer")); // ps.add(Tsurgeon.parseOperation("insert (SBARQ=mainclause PLACEHOLDER=placeholder) >0 root")); // ps.add(Tsurgeon.parseOperation("move qclause >-1 mainclause")); // p = Tsurgeon.collectOperations(ps); // ops.add(new Pair<TregexPattern,TsurgeonPattern>(matchPattern,p)); // Tsurgeon.processPatternsOnTree(ops, copyTree); //copyTree = removeMarkersFromTree(copyTree); //Now put each WH phrase into the tree and remove the original answer. //Operate on the tree directly rather than using tsurgeon //because tsurgeon can't parse operations that insert trees with special characters (e.g., ":") for (int j = 0; j < whPhraseSubtrees.size(); j++) { copyTree2 = inputTree.deeperCopy(); whPhraseSubtree = whPhraseSubtrees.get(j); // if(GlobalProperties.getDebug()) System.err.println("moveWHPhrase: whPhraseSubtree:"+whPhraseSubtree); // tregexOpStr = "ROOT < (SBARQ=mainclause < PLACEHOLDER=ph1) << (__=ph2Parent < PREPPLACEHOLDER=ph2)"; // matchPattern = TregexPatternFactory.getPattern(tregexOpStr); // matcher = matchPattern.matcher(copyTree2); // if(!matcher.find()){ // continue; // } matcher = matchPattern.matcher(copyTree2); matcher.find(); mainclauseNode = matcher.getNode("answer"); if (mainclauseNode == null) continue; //replace the wh placeholder with a wh phrase int cc = mainclauseNode.numChildren(); for (int c = 0; c < cc; c++) mainclauseNode.removeChild(0); mainclauseNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(whPhraseSubtree)); copyTree2 = removeMarkersFromTree(copyTree2); //Replace the pp placeholder with the left over preposition. //This may happen when the answer phrase was a PP. //e.g., John went to the game. -> What did John go to? // prepPlaceholderParent = matcher.getNode("ph2Parent"); // int index = prepPlaceholderParent.indexOf(matcher.getNode("ph2")); // if(leftOverPreposition != null && leftOverPreposition.length()>0){ // prepPlaceholderParent.addChild(index, AnalysisUtilities.getInstance().readTreeFromString(leftOverPreposition)); // } // //now remove the left-over-preposition placeholder // ps.clear(); // ps.add(Tsurgeon.parseOperation("prune ph2")); // p = Tsurgeon.collectOperations(ps); // ops.clear(); // ops.add(new Pair<TregexPattern,TsurgeonPattern>(TregexPatternFactory.getPattern("PREPPLACEHOLDER=ph2"),p)); // Tsurgeon.processPatternsOnTree(ops, copyTree2); copyTree2 = moveLeadingAdjuncts(copyTree2); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: " + copyTree2.toString()); res.add(copyTree2); } return res; }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and James like Susan. -> John likes Susan. * /* w ww . java 2s .c o m*/ */ private void extractConjoinedNPs(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; Tree conjoinedNode; Tree parent; TregexMatcher matcher; Question newQuestion; //only extract conjoined NPs that are arguments or adjuncts of the main verb // in the tree, this means the closest S will be the one under the root tregexOpStr = "NP=parent < (CONJP|CC !< or|nor [ " + " $+ /^(N.*|PRP|SBAR)$/=child $-- /^(N.*|PRP|SBAR)$/ | " //there must be a noun on each side of the conjunction + " $-- /^(N.*|PRP|SBAR)$/=child $+ /^(N.*|PRP|SBAR)$/ ] ) " //this avoids extracting from flat NPs such as "the smaller and darker form" + " !>> (/.*/ $ (CC|CONJP !< or|nor)) " //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) + " !$ (CC|CONJP !< or|nor)" + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR "; //+ " >> (ROOT !< (S <+(VP) (/^VB.*$/ < are|were|be|seem|appear))) " ; //don't break plural predicate nominatives (e.g., "John and Mary are two of my best friends.") matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); List<Integer> nodeIndexes = new ArrayList<Integer>(); List<Integer> parentIDs = new ArrayList<Integer>(); while (matcher.find()) { //store the parents' IDs (in the tree) parent = matcher.getNode("parent"); parentIDs.add(parent.nodeNumber(input.getIntermediateTree())); conjoinedNode = matcher.getNode("child"); //store the conjoined nodes' index into their parent's list of children int idx = parent.indexOf(conjoinedNode); if (!nodeIndexes.contains(idx)) nodeIndexes.add(idx); } //for each of the conjoined children, //create a new tree by removing all the nodes they are conjoined with Collections.sort(nodeIndexes);//sort, just to keep them in the original order for (int i = 0; i < nodeIndexes.size(); i++) { newQuestion = input.deeperCopy(); Tree t = newQuestion.getIntermediateTree(); parent = t.getNodeNumber(parentIDs.get(i)); Tree gparent = parent.parent(t); conjoinedNode = parent.getChild(nodeIndexes.get(i)); String siblingLabel; //Remove all the nodes that are conjoined //with the selected noun (or are conjunctions, commas). //These can have labels NP, NN, ..., PRP for pronouns, CC, "," for commas, ":" for semi-colons for (int j = 0; j < parent.numChildren(); j++) { if (parent.getChild(j) == conjoinedNode) continue; siblingLabel = parent.getChild(j).label().toString(); if (siblingLabel.matches("^[NCP,:S].*")) { parent.removeChild(j); j--; } } //if there is an trivial unary "NP -> NP", //remove the parent and put the child in its place if (parent.numChildren() == 1 && parent.getChild(0).label().equals("NP")) { int tmpIndex = gparent.indexOf(parent); gparent.removeChild(tmpIndex); gparent.addChild(tmpIndex, parent.getChild(0)); } correctTense(conjoinedNode, gparent); addQuotationMarksIfNeeded(newQuestion.getIntermediateTree()); if (GlobalProperties.getDebug()) System.err.println("extractConjoinedNPs: " + newQuestion.getIntermediateTree().toString()); if (GlobalProperties.getComputeFeatures()) newQuestion.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name if (GlobalProperties.getComputeFeatures()) newQuestion.setFeatureValue("extractedFromConjoinedNPs", 1.0); extracted.add(newQuestion); } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., John, who is a friend of mine, likes Susan. -> John is a friend of mine. * //from w w w . jav a 2 s . c o m */ private void extractNonRestrictiveRelativeClauses(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; TregexMatcher matcherclause; tregexOpStr = "NP=np < (SBAR=sbar [ < (WHADVP=wherecomp < (WRB < where)) " + " | < (WHNP !< /WP\\$/) " + " | < (WHNP=possessive < /WP\\$/)" //John, whose car was + " | < (WHPP < IN|TO=preposition) ] $-- NP $- /,/ " + " < S=relclause !< WHADJP)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); //iterate over all the relative clauses in the input //and create an output sentence for each one. while (matcher.find()) { Tree missingArgumentTree = matcher.getNode("np"); Tree relclause = matcher.getNode("relclause"); if (missingArgumentTree == null || relclause == null) continue; missingArgumentTree = missingArgumentTree.deeperCopy(); relclause = relclause.deeperCopy(); Tree possessive = matcher.getNode("possessive"); Tree sbar = matcher.getNode("sbar").deeperCopy(); makeDeterminerDefinite(missingArgumentTree); if (possessive != null) { possessive = possessive.deeperCopy(); possessive.removeChild(0); String newTree = "(NP (NP " + missingArgumentTree.toString() + " (POS 's))"; for (int i = 0; i < possessive.numChildren(); i++) newTree += possessive.getChild(i).toString() + " "; newTree += ")"; missingArgumentTree = AnalysisUtilities.getInstance().readTreeFromString(newTree); } //remove the relative clause and the commas surrounding it from the missing argument tree for (int i = 0; i < missingArgumentTree.numChildren(); i++) { if (missingArgumentTree.getChild(i).equals(sbar)) { //remove the relative clause missingArgumentTree.removeChild(i); //remove the comma after the relative clause if (i < missingArgumentTree.numChildren() && missingArgumentTree.getChild(i).label().toString().equals(",")) { missingArgumentTree.removeChild(i); } //remove the comma before the relative clause if (i > 0 && missingArgumentTree.getChild(i - 1).label().toString().equals(",")) { missingArgumentTree.removeChild(i - 1); i--; } i--; } } //put the noun in the clause at the topmost place with an opening for a noun. //Note that this may mess up if there are noun phrase adjuncts like "The man I met Tuesday". //specifically: //the parent of the noun can be either a clause (S) as in "The man who met me" //or a verb phrase as in "The man who I met". //for verb phrases, add the noun to the end since it will be an object. //for clauses, add the noun to the beginning since it will be the subject. tregexOpStr = "S|VP=newparent !< NP < (VP=verb !< TO !$ TO)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcherclause = matchPattern.matcher(relclause); boolean subjectMovement = true; if (!matcherclause.find()) { tregexOpStr = "VP=newparent !< VP < /VB.*/=verb !>> (S !< NP) !<< (VP !< VP !< NP)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcherclause = matchPattern.matcher(relclause); subjectMovement = false; } //reset (so the first match isn't skipped) matcherclause = matchPattern.matcher(relclause); if (matcherclause.find()) { Tree newparenttree = matcherclause.getNode("newparent"); Tree verbtree = matcherclause.getNode("verb"); boolean ppRelativeClause = false; if (matcher.getNode("wherecomp") != null) { String tmp = "(PP (IN at) " + missingArgumentTree.toString() + ")"; missingArgumentTree = AnalysisUtilities.getInstance().readTreeFromString(tmp); ppRelativeClause = true; subjectMovement = false; } else if (matcher.getNode("preposition") != null) { String tmp = "(PP (IN " + matcher.getNode("preposition").yield().toString() + ") " + missingArgumentTree.toString() + ")"; missingArgumentTree = AnalysisUtilities.getInstance().readTreeFromString(tmp); ppRelativeClause = true; } if (subjectMovement) { //subject newparenttree.addChild(newparenttree.indexOf(verbtree), missingArgumentTree); } else { // newparentlabel is VP if (ppRelativeClause) newparenttree.addChild(newparenttree.numChildren(), missingArgumentTree); else newparenttree.addChild(newparenttree.indexOf(verbtree) + 1, missingArgumentTree); } //create a new tree with punctuation Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>()); newTree.addChild(relclause); AnalysisUtilities.addPeriodIfNeeded(newTree); if (GlobalProperties.getDebug()) System.err.println("extractRelativeClauses: " + newTree.toString()); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromRelativeClause", 1.0); addIfNovel(extracted, newTreeWithFeatures); } } }
From source file:edu.jhu.hlt.concrete.stanford.PreNERCoreMapWrapper.java
License:Open Source License
/** * * @param root//from ww w .j av a 2s. co m * @param left * @param right * @param n * is the length of the sentence is tokens. * @param p * @param tokenizationUUID * @return The constituent ID * @throws AnalyticException */ private static int constructConstituent(Tree root, int left, int right, int n, Parse p, UUID tokenizationUUID, HeadFinder hf) throws AnalyticException { Constituent constituent = new Constituent(); constituent.setId(p.getConstituentListSize()); constituent.setTag(root.value()); constituent.setStart(left); constituent.setEnding(right); p.addToConstituentList(constituent); Tree headTree = null; if (!root.isLeaf()) { try { headTree = hf.determineHead(root); } catch (java.lang.IllegalArgumentException iae) { LOGGER.warn("Failed to find head, falling back on rightmost constituent."); headTree = root.children()[root.numChildren() - 1]; } } int i = 0, headTreeIdx = -1; int leftPtr = left; for (Tree child : root.getChildrenAsList()) { int width = child.getLeaves().size(); int childId = constructConstituent(child, leftPtr, leftPtr + width, n, p, tokenizationUUID, hf); constituent.addToChildList(childId); leftPtr += width; if (headTree != null && child == headTree) { assert (headTreeIdx < 0); headTreeIdx = i; } i++; } if (headTreeIdx >= 0) constituent.setHeadChildIndex(headTreeIdx); if (!constituent.isSetChildList()) constituent.setChildList(new ArrayList<Integer>()); return constituent.getId(); }