List of usage examples for edu.stanford.nlp.trees Tree toString
@Override
public String toString()
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
private Tree removeMarkersFromTree(Tree inputTree) { if (inputTree == null) return null; Tree res;/*from w w w . j av a2 s .c o m*/ String treeStr = inputTree.toString(); treeStr = treeStr.replaceAll("UNMOVABLE-", ""); treeStr = treeStr.replaceAll("-\\d+ ", " "); res = AnalysisUtilities.getInstance().readTreeFromString(treeStr); return res; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * Thsi method returns the node for the ith possible answer phrase in this sentence * (after potential answer phrases have been identified by marking unmovable ones) * * @param inputTree//www . ja v a 2s .c o m * @param i * @return */ private Tree getAnswerPhrase(Tree inputTree, int i) { Tree answerPhrase; String tregexOpStr; TregexPattern matchPattern; String marker = "/^(NP|PP|SBAR)-" + i + "$/"; tregexOpStr = marker + "=answer"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: inputTree:" + inputTree.toString()); //if(GlobalProperties.getDebug()) System.err.println("moveWHPhrase: tregexOpStr:" + tregexOpStr); TregexMatcher matcher = matchPattern.matcher(inputTree); matcher.find(); answerPhrase = matcher.getNode("answer"); return answerPhrase; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * Thsi method returns the node for the ith possible answer phrase in this sentence * (after potential answer phrases have been identified by marking unmovable ones) * * @param inputTree//w w w . j a va 2s. c o m * @param i * @return */ private Tree getAnswerPhraseUnmovable(Tree inputTree, int i) { Tree answerPhrase; String tregexOpStr; TregexPattern matchPattern; String marker = "/^(UNMOVABLE-NP|PP|SBAR)-" + i + "$/"; tregexOpStr = marker + "=answer"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: inputTree:" + inputTree.toString()); //if(GlobalProperties.getDebug()) System.err.println("moveWHPhrase: tregexOpStr:" + tregexOpStr); TregexMatcher matcher = matchPattern.matcher(inputTree); matcher.find(); answerPhrase = matcher.getNode("answer"); return answerPhrase; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * * This method removes the answer phrase from its original position * and places it at the front of the main clause. * * Note: Tsurgeon operations are perhaps not optimal here. * Using the Stanford API to move nodes directly might be simpler... * *//*from w w w. ja v a 2 s . com*/ private List<Tree> moveWHPhraseUnmovable(Tree inputTree, Tree intermediateTree, int i, boolean subjectMovement) { Tree copyTree2; List<Tree> res = new ArrayList<Tree>(); Tree mainclauseNode; String marker = "/^(UNMOVABLE-NP|UNMOVABLE-PP|UNMOVABLE-SBAR)-" + i + "$/"; List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); String tregexOpStr; TregexPattern matchPattern; //extract the "answer" phrase and generate a WH phrase from it tregexOpStr = "ROOT=root < (SQ=qclause << " + marker + "=answer < VP=predicate)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: inputTree:" + inputTree.toString()); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: tregexOpStr:" + tregexOpStr); TregexMatcher matcher = matchPattern.matcher(inputTree); matcher.find(); Tree phraseToMove = matcher.getNode("answer"); String whPhraseSubtree; if (printExtractedPhrases) System.out.println("EXTRACTED\t" + phraseToMove.yield().toString()); whGen.generateWHPhraseSubtrees(removeMarkersFromTree(phraseToMove), intermediateTree.yield().toString()); List<String> whPhraseSubtrees = whGen.getWHPhraseSubtrees(); List<String> leftOverPrepositions = whGen.getLeftOverPrepositions(); //copyTree = inputTree.deeperCopy(); //The placeholder is necessary because tsurgeon will complain //if an added node has no children. This placeholder is removed below. // ps.add(Tsurgeon.parseOperation("insert (PREPPLACEHOLDER dummy) $+ answer")); // ps.add(Tsurgeon.parseOperation("prune answer")); // ps.add(Tsurgeon.parseOperation("insert (SBARQ=mainclause PLACEHOLDER=placeholder) >0 root")); // ps.add(Tsurgeon.parseOperation("move qclause >-1 mainclause")); // p = Tsurgeon.collectOperations(ps); // ops.add(new Pair<TregexPattern,TsurgeonPattern>(matchPattern,p)); // Tsurgeon.processPatternsOnTree(ops, copyTree); //copyTree = removeMarkersFromTree(copyTree); //Now put each WH phrase into the tree and remove the original answer. //Operate on the tree directly rather than using tsurgeon //because tsurgeon can't parse operations that insert trees with special characters (e.g., ":") for (int j = 0; j < whPhraseSubtrees.size(); j++) { copyTree2 = inputTree.deeperCopy(); whPhraseSubtree = whPhraseSubtrees.get(j); // if(GlobalProperties.getDebug()) System.err.println("moveWHPhrase: whPhraseSubtree:"+whPhraseSubtree); // tregexOpStr = "ROOT < (SBARQ=mainclause < PLACEHOLDER=ph1) << (__=ph2Parent < PREPPLACEHOLDER=ph2)"; // matchPattern = TregexPatternFactory.getPattern(tregexOpStr); // matcher = matchPattern.matcher(copyTree2); // if(!matcher.find()){ // continue; // } matcher = matchPattern.matcher(copyTree2); matcher.find(); mainclauseNode = matcher.getNode("answer"); if (mainclauseNode == null) continue; //replace the wh placeholder with a wh phrase int cc = mainclauseNode.numChildren(); for (int c = 0; c < cc; c++) mainclauseNode.removeChild(0); mainclauseNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(whPhraseSubtree)); copyTree2 = removeMarkersFromTree(copyTree2); //Replace the pp placeholder with the left over preposition. //This may happen when the answer phrase was a PP. //e.g., John went to the game. -> What did John go to? // prepPlaceholderParent = matcher.getNode("ph2Parent"); // int index = prepPlaceholderParent.indexOf(matcher.getNode("ph2")); // if(leftOverPreposition != null && leftOverPreposition.length()>0){ // prepPlaceholderParent.addChild(index, AnalysisUtilities.getInstance().readTreeFromString(leftOverPreposition)); // } // //now remove the left-over-preposition placeholder // ps.clear(); // ps.add(Tsurgeon.parseOperation("prune ph2")); // p = Tsurgeon.collectOperations(ps); // ops.clear(); // ops.add(new Pair<TregexPattern,TsurgeonPattern>(TregexPatternFactory.getPattern("PREPPLACEHOLDER=ph2"),p)); // Tsurgeon.processPatternsOnTree(ops, copyTree2); copyTree2 = moveLeadingAdjuncts(copyTree2); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: " + copyTree2.toString()); res.add(copyTree2); } return res; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * * This method removes the answer phrase from its original position * and places it at the front of the main clause. * * Note: Tsurgeon operations are perhaps not optimal here. * Using the Stanford API to move nodes directly might be simpler... * */// www. j av a 2 s . c o m private List<Tree> moveWHPhrase(Tree inputTree, Tree intermediateTree, int i, boolean subjectMovement) { Tree copyTree; Tree copyTree2; List<Tree> res = new ArrayList<Tree>(); Tree mainclauseNode; Tree prepPlaceholderParent; String marker = "/^(NP|PP|SBAR)-" + i + "$/"; List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; //extract the "answer" phrase and generate a WH phrase from it tregexOpStr = "ROOT=root < (SQ=qclause << " + marker + "=answer < VP=predicate)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: inputTree:" + inputTree.toString()); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: tregexOpStr:" + tregexOpStr); TregexMatcher matcher = matchPattern.matcher(inputTree); matcher.find(); Tree phraseToMove = matcher.getNode("answer"); String whPhraseSubtree; String leftOverPreposition; if (printExtractedPhrases) System.out.println("EXTRACTED\t" + phraseToMove.yield().toString()); whGen.generateWHPhraseSubtrees(removeMarkersFromTree(phraseToMove), intermediateTree.yield().toString()); List<String> whPhraseSubtrees = whGen.getWHPhraseSubtrees(); List<String> leftOverPrepositions = whGen.getLeftOverPrepositions(); copyTree = inputTree.deeperCopy(); //The placeholder is necessary because tsurgeon will complain //if an added node has no children. This placeholder is removed below. ps.add(Tsurgeon.parseOperation("insert (PREPPLACEHOLDER dummy) $+ answer")); ps.add(Tsurgeon.parseOperation("prune answer")); ps.add(Tsurgeon.parseOperation("insert (SBARQ=mainclause PLACEHOLDER=placeholder) >0 root")); ps.add(Tsurgeon.parseOperation("move qclause >-1 mainclause")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, copyTree); copyTree = removeMarkersFromTree(copyTree); //Now put each WH phrase into the tree and remove the original answer. //Operate on the tree directly rather than using tsurgeon //because tsurgeon can't parse operations that insert trees with special characters (e.g., ":") for (int j = 0; j < whPhraseSubtrees.size(); j++) { copyTree2 = copyTree.deeperCopy(); whPhraseSubtree = whPhraseSubtrees.get(j); leftOverPreposition = leftOverPrepositions.get(j); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: whPhraseSubtree:" + whPhraseSubtree); tregexOpStr = "ROOT < (SBARQ=mainclause < PLACEHOLDER=ph1) << (__=ph2Parent < PREPPLACEHOLDER=ph2)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(copyTree2); if (!matcher.find()) { continue; } mainclauseNode = matcher.getNode("mainclause"); //replace the wh placeholder with a wh phrase mainclauseNode.removeChild(0); mainclauseNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(whPhraseSubtree)); //Replace the pp placeholder with the left over preposition. //This may happen when the answer phrase was a PP. //e.g., John went to the game. -> What did John go to? prepPlaceholderParent = matcher.getNode("ph2Parent"); int index = prepPlaceholderParent.indexOf(matcher.getNode("ph2")); if (leftOverPreposition != null && leftOverPreposition.length() > 0) { prepPlaceholderParent.addChild(index, AnalysisUtilities.getInstance().readTreeFromString(leftOverPreposition)); } //now remove the left-over-preposition placeholder ps.clear(); ps.add(Tsurgeon.parseOperation("prune ph2")); p = Tsurgeon.collectOperations(ps); ops.clear(); ops.add(new Pair<TregexPattern, TsurgeonPattern>(TregexPatternFactory.getPattern("PREPPLACEHOLDER=ph2"), p)); Tsurgeon.processPatternsOnTree(ops, copyTree2); copyTree2 = moveLeadingAdjuncts(copyTree2); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: " + copyTree2.toString()); res.add(copyTree2); } return res; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * This method moves adjunct phrases that appear prior to the first possible subject. * e.g., in order to produce "WHILE I WAS AT THE STORE, who did I meet?" * from "WHILE I WAS AT THE STORE, I met him." * * This operation is not actually used in the full system because * leading modifiers are either moved or removed by the simplified * factual statement extraction step in stage 1. * *//*from ww w . j a v a2 s . c om*/ private Tree moveLeadingAdjuncts(Tree inputTree) { if (GlobalProperties.getDebug()) System.err.println("moveLeadingAdjuncts:" + inputTree.toString()); Tree copyTree = inputTree.deeperCopy(); String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; boolean matchFound = true; List<Pair<TregexPattern, TsurgeonPattern>> ops; List<TsurgeonPattern> ps; TsurgeonPattern p; while (true) { ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); ps = new ArrayList<TsurgeonPattern>(); tregexOpStr = "TMPROOT=root"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(copyTree); matchFound = matcher.find(); ps.add(Tsurgeon.parseOperation("relabel root ROOT")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, copyTree); ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); ps = new ArrayList<TsurgeonPattern>(); //for yes/no questions, find any phrases that precede the first possible subject (NP|SBAR) // and move them to the front of the question clause. tregexOpStr = "ROOT=root < (SQ=mainclause < (/,|ADVP|ADJP|SBAR|S|PP/=mover $,, /MD|VB.*/=pivot $ NP=subject))"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(copyTree); matchFound = matcher.find(); if (!matchFound) { //for WH questions, move any phrases that precede the first potential subject //--or verb phrase for when the original subject is the answer phrase tregexOpStr = "ROOT=root < (SBARQ=mainclause < WHNP|WHPP|WHADJP|WHADVP=pivot < (SQ=invertedclause < (/,|S|ADVP|ADJP|SBAR|PP/=mover !$,, /\\*/ $.. /^VP|VB.*/)))"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(copyTree); matchFound = matcher.find(); } if (!matchFound) { break; } //need to relabel as TMPROOT so things are moved one at a time, to preserve their order ps.add(Tsurgeon.parseOperation("move mover $+ pivot")); ps.add(Tsurgeon.parseOperation("relabel root TMPROOT")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, copyTree); //System.err.println("moving..."+copyTree.toString()); } //remove extra commas for sentences like "Bill, while walking, saw John." Tree firstChild = copyTree.getChild(0); if (firstChild.getChild(0).label().toString().equals(",")) { firstChild.removeChild(0); } if (GlobalProperties.getDebug()) System.err.println("moveLeadingAdjuncts(out):" + copyTree.toString()); return copyTree; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * This method decomposes the main verb of the sentence * for yes-no questions and WH questions where the answer * phrase is not the subject./*from w w w.ja v a 2 s . co m*/ * * e.g., I met John -> I did meet John. * (which would later become "Who did I meet?") * */ private Tree decomposePredicate(Tree inputTree) { Tree copyTree = inputTree.deeperCopy(); List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); String tregexOpStr; List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); TregexPattern matchPattern; TsurgeonPattern p; TregexMatcher matcher; Tree tmpNode; //tregexOpStr = "ROOT < (S=mainclause < (VP=predphrase < /VB.?/=tensedverb !< (VP < /VB.?/)))"; //tregexOpStr = "ROOT < (S=mainclause < (VP=predphrase < (/VB.?/=tensedverb !< is|was|were|am|are|has|have|had|do|does|did)))"; //This rather complex rule identifies predicates to decompose. //There are two cases, separated by a disjunction. //One could break it apart into separate rules to make it simpler... // //The first part of the disjunction //(i.e., < (/VB.?/=tensedverb !< is|was|were|am|are|has|have|had|do|does|did) ) //is for handling basic sentences //(e.g., John bought an apple -> What did John buy?), //sentences with auxiliaries //(e.g., John had bought an apple -> Had John bought an apple?), //and sentences with participial phrases //(e.g., John seemed finished with the apple -> What did John seem finished with?). // //The second part of the disjunction //(i.e., < /VB.?/=tensedverb !< VP ) //is for handling sentences that have predicates //that can also be auxiliaries (e.g., I have a book). //In these cases, we do want to decompose have, has, had, etc. //(e.g., What did I have?) tregexOpStr = "ROOT < (S=mainclause < (VP=predphrase [ < (/VB.?/=tensedverb !< is|was|were|am|are|has|have|had|do|does|did) | < /VB.?/=tensedverb !< VP ]))"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(copyTree); if (matcher.find()) { Tree subtree = matcher.getNode("tensedverb"); String lemma = AnalysisUtilities.getInstance().getLemma(subtree.getChild(0).label().toString(), subtree.label().toString()); String aux = getAuxiliarySubtree(subtree); if (!lemma.equals("be")) { ps.add(Tsurgeon.parseOperation("replace predphrase (MAINVP=newpred PLACEHOLDER)")); ps.add(Tsurgeon.parseOperation("insert predphrase >-1 newpred")); ps.add(Tsurgeon.parseOperation("insert (VBLEMMA PLACEHOLDER) $+ tensedverb")); ps.add(Tsurgeon.parseOperation("delete tensedverb")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, copyTree); matchPattern = TregexPatternFactory.getPattern("MAINVP=mainvp"); matcher = matchPattern.matcher(copyTree); matcher.find(); tmpNode = matcher.getNode("mainvp"); tmpNode.removeChild(0); tmpNode.label().setValue("VP"); tmpNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(aux)); matchPattern = TregexPatternFactory.getPattern("VBLEMMA=vblemma"); matcher = matchPattern.matcher(copyTree); matcher.find(); tmpNode = matcher.getNode("vblemma"); tmpNode.removeChild(0); tmpNode.label().setValue("VB"); tmpNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(lemma)); } } if (GlobalProperties.getDebug()) System.err.println("decomposePredicate: " + copyTree.toString()); return copyTree; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * Returns the singular present tense form of a tensed verb. * This only affects the output when generating from sentences where * first and second person pronouns are the subject. * * E.g.,/*www .j av a 2 s . c o m*/ * Affects: * I walk -> Who walks? (rather than, Who walk?) * * Does not affect: * He walks -> Who walks? * */ private String getSingularFormSubtree(Tree tensedVerbSubtree) { String res = ""; String lemma = AnalysisUtilities.getInstance().getLemma(tensedVerbSubtree.getChild(0).label().toString(), tensedVerbSubtree.label().toString()); String pos = tensedVerbSubtree.value(); if (pos.equals("VBD")) { res = tensedVerbSubtree.toString(); } else { res = "(VBZ " + AnalysisUtilities.getInstance().getSurfaceForm(lemma, "VBZ") + ")"; } return res; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * This method is used to decompose the main verb. * e.g.,// w w w. j a v a 2s . c om * input: (VBD walked) * output: (VBD did) * * Note: another method would extract the base form of the verb "(VB walk)" * * @param tensedverb * @return */ private String getAuxiliarySubtree(Tree tensedverb) { if (tensedverb == null) { return ""; } String res = ""; String label; Pattern p = Pattern.compile("\\((\\S+) [^\\)]*\\)"); Matcher m = p.matcher(tensedverb.toString()); m.find(); label = m.group(1); if (label.equals("VBD")) { res = "(VBD did)"; } else if (label.equals("VBZ")) { res = "(VBZ does)"; } else if (label.equals("VBP")) { res = "(VBP do)"; } else { res = "(VB do)"; } return res; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * Moves an auxiliary verb to the front of the main clause (i.e., before the subject). * This is used in yes-no questions and WH questions where the answer phrase * is not the subject/*from w ww . j ava 2s . c om*/ * * E.g., * John did meet Paul -> Did John meet Paul (which will then become "Who did John meet?") * */ private Tree subjectAuxiliaryInversion(Tree inputTree) { Tree copyTree = inputTree.deeperCopy(); List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; //auxilaries tregexOpStr = "ROOT=root < (S=mainclause <+(/VP.*/) (VP < /(MD|VB.?)/=aux < (VP < /VB.?/=baseform)))"; ps.add(Tsurgeon.parseOperation("relabel root TMPROOT")); ps.add(Tsurgeon.parseOperation("prune aux")); ps.add(Tsurgeon.parseOperation("insert aux >0 mainclause")); matchPattern = TregexPatternFactory.getPattern(tregexOpStr); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, copyTree); //copula ops.clear(); ps.clear(); tregexOpStr = "ROOT=root < (S=mainclause <+(/VP.*/) (VP < (/VB.?/=copula < is|are|was|were|am) !< VP))"; ps.add(Tsurgeon.parseOperation("relabel root TMPROOT")); ps.add(Tsurgeon.parseOperation("prune copula\n")); ps.add(Tsurgeon.parseOperation("insert copula >0 mainclause")); matchPattern = TregexPatternFactory.getPattern(tregexOpStr); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, copyTree); ops.clear(); ps.clear(); tregexOpStr = "TMPROOT=root"; ps.add(Tsurgeon.parseOperation("relabel root ROOT")); matchPattern = TregexPatternFactory.getPattern(tregexOpStr); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, copyTree); if (GlobalProperties.getDebug()) System.err.println("subjectAuxiliaryInversion: " + copyTree.toString()); return copyTree; }