List of usage examples for edu.stanford.nlp.trees Tree yield
public ArrayList<Label> yield()
From source file:edu.cmu.ark.nlp.question.Question.java
License:Open Source License
public List<Tree> findLogicalWordsAboveIntermediateTree() { List<Tree> res = new ArrayList<Tree>(); Tree pred = intermediateTree.getChild(0).headPreTerminal(this.hf); String lemma = QuestionUtil.getLemma(pred.yield().toString(), pred.label().toString()); String tregexOpStr;//from w ww .j ava 2s. c o m TregexPattern matchPattern; TregexMatcher matcher; Tree sourcePred = null; for (Tree leaf : sourceTree.getLeaves()) { Tree tmp = leaf.parent(sourceTree); String sourceLemma = QuestionUtil.getLemma(leaf.label().toString(), tmp.label().toString()); if (sourceLemma.equals(lemma)) { sourcePred = tmp; break; } } tregexOpStr = "RB|VB|VBD|VBP|VBZ|IN|MD|WRB|WDT|CC=command"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(sourceTree); Tree command; while (matcher.find() && sourcePred != null) { command = matcher.getNode("command"); if (QuestionUtil.cCommands(sourceTree, command, sourcePred) && command.parent(sourceTree) != sourcePred.parent(sourceTree)) { res.add(command); } } return res; }
From source file:edu.cmu.ark.nlp.question.QuestionUtil.java
License:Open Source License
public static String getCleanedUpYield(Tree inputTree) { Tree copyTree = inputTree.deepCopy(); //if(GlobalProperties.getDebug()) System.err.println("yield:"+copyTree.toString()); return cleanUpSentenceString(copyTree.yield().toString()); }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and Mary like Bill. -> John LIKES Bill. Mary LIKES Bill. * John and I like Bill -> John LIKES Bill. I LIKE Bill. * John and I are old. -> I IS old. John IS old. *//*from w w w. j a va 2 s . c om*/ private void correctTense(Tree subject, Tree clause) { int tmpIndex; //correct verb tense when modifying subjects for (Tree uncle : clause.getChildrenAsList()) { String newVerbPOS = null; Tree verbPreterminal = null; boolean needToModifyVerb = false; //if the node is a subject (i.e., its uncle is a VP), then check //to see if its tense needs to be changed String headPOS = subject.headPreTerminal(this.hf).label().toString(); if (uncle.label().toString().equals("VP") && !headPOS.endsWith("S")) { verbPreterminal = uncle.headPreTerminal(this.hf); //original main verb was plural but the conjoined subject word is singular //e.g., John (and Mary) like Bill. -> John like Bill. if ((verbPreterminal.label().toString().equals("VB") || verbPreterminal.label().toString().equals("VBP"))) { //the parser confuses VBP with VB if (subject.yield().toString().equals("I") || subject.yield().toString().equals("you")) { newVerbPOS = "VBP"; } else { newVerbPOS = "VBZ"; } needToModifyVerb = true; } else if (verbPreterminal.label().toString().equals("VBD")) { newVerbPOS = "VBD"; needToModifyVerb = true; } } //if needed, change the tense of the verb if (needToModifyVerb) { String verbLemma = QuestionUtil.getLemma(verbPreterminal.getChild(0).label().toString(), verbPreterminal.label().toString()); String newVerb; //special cases if (verbLemma.equals("be") && newVerbPOS.equals("VBD")) { if (subject.label().toString().endsWith("S")) newVerb = "were"; else newVerb = "was"; } else if (verbLemma.equals("be") && subject.yield().toString().equals("I") && newVerbPOS.equals("VBP")) { newVerb = "am"; } else { //default newVerb = this.conjugator.getSurfaceForm(verbLemma, newVerbPOS); } tmpIndex = verbPreterminal.parent(uncle).objectIndexOf(verbPreterminal); Tree verbParent = verbPreterminal.parent(uncle); verbParent.removeChild(tmpIndex); verbParent.addChild(tmpIndex, QuestionUtil.readTreeFromString("(" + newVerbPOS + " " + newVerb + ")")); } } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
private String findTense(Tree node) { if (node.label().equals("MD")) { if (node.yield().toString().matches("^(would|could)$")) { return "VBD"; }/* ww w . j a v a2 s . c o m*/ } return node.label().toString(); }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
private void extractComplementClauses(Collection<Question> extracted, Question input) { Tree subord;/*from w w w. ja v a 2 s. c o m*/ String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; //TODO should also address infinitive complements tregexOpStr = "SBAR " + " < (S=sub !< (VP < VBG)) " + //+ //not a participial phrase " !> NP|PP " + //not part of a noun phrase or PP (other methods for those) " [ $- /^VB.*/=verb | >+(SBAR) (SBAR $- /^VB.*/=verb) ] "; //complement of a VP (follows the verb) matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>()); subord = matcher.getNode("sub"); Tree verb = matcher.getNode("verb"); String verbLemma = QuestionUtil.getLemma(verb.yield().toString(), verb.label().toString()); if (!verbImpliesComplement(verbLemma)) { continue; } newTree.addChild(subord.deepCopy()); QuestionUtil.addPeriodIfNeeded(newTree); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromFiniteClause", 1.0); //old feature name if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromComplementClause", 1.0); //if(GlobalProperties.getDebug()) System.err.println("extractComplementClauses: "+newTree.toString()); addIfNovel(extracted, newTreeWithFeatures); } }
From source file:edu.cmu.ark.Question.java
License:Open Source License
public List<Tree> findLogicalWordsAboveIntermediateTree() { List<Tree> res = new ArrayList<Tree>(); Tree pred = intermediateTree.getChild(0).headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()); String lemma = AnalysisUtilities.getInstance().getLemma(pred.yield().toString(), pred.label().toString()); String tregexOpStr;//from www .ja v a 2 s . c o m TregexPattern matchPattern; TregexMatcher matcher; Tree sourcePred = null; for (Tree leaf : sourceTree.getLeaves()) { Tree tmp = leaf.parent(sourceTree); String sourceLemma = AnalysisUtilities.getInstance().getLemma(leaf.label().toString(), tmp.label().toString()); if (sourceLemma.equals(lemma)) { sourcePred = tmp; break; } } tregexOpStr = "RB|VB|VBD|VBP|VBZ|IN|MD|WRB|WDT|CC=command"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(sourceTree); Tree command; while (matcher.find() && sourcePred != null) { command = matcher.getNode("command"); if (AnalysisUtilities.cCommands(sourceTree, command, sourcePred) && command.parent(sourceTree) != sourcePred.parent(sourceTree)) { res.add(command); } } return res; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * * This method removes the answer phrase from its original position * and places it at the front of the main clause. * * Note: Tsurgeon operations are perhaps not optimal here. * Using the Stanford API to move nodes directly might be simpler... * */// w w w.jav a 2 s. co m private List<Tree> moveWHPhraseUnmovable(Tree inputTree, Tree intermediateTree, int i, boolean subjectMovement) { Tree copyTree2; List<Tree> res = new ArrayList<Tree>(); Tree mainclauseNode; String marker = "/^(UNMOVABLE-NP|UNMOVABLE-PP|UNMOVABLE-SBAR)-" + i + "$/"; List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); String tregexOpStr; TregexPattern matchPattern; //extract the "answer" phrase and generate a WH phrase from it tregexOpStr = "ROOT=root < (SQ=qclause << " + marker + "=answer < VP=predicate)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: inputTree:" + inputTree.toString()); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: tregexOpStr:" + tregexOpStr); TregexMatcher matcher = matchPattern.matcher(inputTree); matcher.find(); Tree phraseToMove = matcher.getNode("answer"); String whPhraseSubtree; if (printExtractedPhrases) System.out.println("EXTRACTED\t" + phraseToMove.yield().toString()); whGen.generateWHPhraseSubtrees(removeMarkersFromTree(phraseToMove), intermediateTree.yield().toString()); List<String> whPhraseSubtrees = whGen.getWHPhraseSubtrees(); List<String> leftOverPrepositions = whGen.getLeftOverPrepositions(); //copyTree = inputTree.deeperCopy(); //The placeholder is necessary because tsurgeon will complain //if an added node has no children. This placeholder is removed below. // ps.add(Tsurgeon.parseOperation("insert (PREPPLACEHOLDER dummy) $+ answer")); // ps.add(Tsurgeon.parseOperation("prune answer")); // ps.add(Tsurgeon.parseOperation("insert (SBARQ=mainclause PLACEHOLDER=placeholder) >0 root")); // ps.add(Tsurgeon.parseOperation("move qclause >-1 mainclause")); // p = Tsurgeon.collectOperations(ps); // ops.add(new Pair<TregexPattern,TsurgeonPattern>(matchPattern,p)); // Tsurgeon.processPatternsOnTree(ops, copyTree); //copyTree = removeMarkersFromTree(copyTree); //Now put each WH phrase into the tree and remove the original answer. //Operate on the tree directly rather than using tsurgeon //because tsurgeon can't parse operations that insert trees with special characters (e.g., ":") for (int j = 0; j < whPhraseSubtrees.size(); j++) { copyTree2 = inputTree.deeperCopy(); whPhraseSubtree = whPhraseSubtrees.get(j); // if(GlobalProperties.getDebug()) System.err.println("moveWHPhrase: whPhraseSubtree:"+whPhraseSubtree); // tregexOpStr = "ROOT < (SBARQ=mainclause < PLACEHOLDER=ph1) << (__=ph2Parent < PREPPLACEHOLDER=ph2)"; // matchPattern = TregexPatternFactory.getPattern(tregexOpStr); // matcher = matchPattern.matcher(copyTree2); // if(!matcher.find()){ // continue; // } matcher = matchPattern.matcher(copyTree2); matcher.find(); mainclauseNode = matcher.getNode("answer"); if (mainclauseNode == null) continue; //replace the wh placeholder with a wh phrase int cc = mainclauseNode.numChildren(); for (int c = 0; c < cc; c++) mainclauseNode.removeChild(0); mainclauseNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(whPhraseSubtree)); copyTree2 = removeMarkersFromTree(copyTree2); //Replace the pp placeholder with the left over preposition. //This may happen when the answer phrase was a PP. //e.g., John went to the game. -> What did John go to? // prepPlaceholderParent = matcher.getNode("ph2Parent"); // int index = prepPlaceholderParent.indexOf(matcher.getNode("ph2")); // if(leftOverPreposition != null && leftOverPreposition.length()>0){ // prepPlaceholderParent.addChild(index, AnalysisUtilities.getInstance().readTreeFromString(leftOverPreposition)); // } // //now remove the left-over-preposition placeholder // ps.clear(); // ps.add(Tsurgeon.parseOperation("prune ph2")); // p = Tsurgeon.collectOperations(ps); // ops.clear(); // ops.add(new Pair<TregexPattern,TsurgeonPattern>(TregexPatternFactory.getPattern("PREPPLACEHOLDER=ph2"),p)); // Tsurgeon.processPatternsOnTree(ops, copyTree2); copyTree2 = moveLeadingAdjuncts(copyTree2); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: " + copyTree2.toString()); res.add(copyTree2); } return res; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * * This method removes the answer phrase from its original position * and places it at the front of the main clause. * * Note: Tsurgeon operations are perhaps not optimal here. * Using the Stanford API to move nodes directly might be simpler... * *//* w ww . ja v a 2 s . c o m*/ private List<Tree> moveWHPhrase(Tree inputTree, Tree intermediateTree, int i, boolean subjectMovement) { Tree copyTree; Tree copyTree2; List<Tree> res = new ArrayList<Tree>(); Tree mainclauseNode; Tree prepPlaceholderParent; String marker = "/^(NP|PP|SBAR)-" + i + "$/"; List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; //extract the "answer" phrase and generate a WH phrase from it tregexOpStr = "ROOT=root < (SQ=qclause << " + marker + "=answer < VP=predicate)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: inputTree:" + inputTree.toString()); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: tregexOpStr:" + tregexOpStr); TregexMatcher matcher = matchPattern.matcher(inputTree); matcher.find(); Tree phraseToMove = matcher.getNode("answer"); String whPhraseSubtree; String leftOverPreposition; if (printExtractedPhrases) System.out.println("EXTRACTED\t" + phraseToMove.yield().toString()); whGen.generateWHPhraseSubtrees(removeMarkersFromTree(phraseToMove), intermediateTree.yield().toString()); List<String> whPhraseSubtrees = whGen.getWHPhraseSubtrees(); List<String> leftOverPrepositions = whGen.getLeftOverPrepositions(); copyTree = inputTree.deeperCopy(); //The placeholder is necessary because tsurgeon will complain //if an added node has no children. This placeholder is removed below. ps.add(Tsurgeon.parseOperation("insert (PREPPLACEHOLDER dummy) $+ answer")); ps.add(Tsurgeon.parseOperation("prune answer")); ps.add(Tsurgeon.parseOperation("insert (SBARQ=mainclause PLACEHOLDER=placeholder) >0 root")); ps.add(Tsurgeon.parseOperation("move qclause >-1 mainclause")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, copyTree); copyTree = removeMarkersFromTree(copyTree); //Now put each WH phrase into the tree and remove the original answer. //Operate on the tree directly rather than using tsurgeon //because tsurgeon can't parse operations that insert trees with special characters (e.g., ":") for (int j = 0; j < whPhraseSubtrees.size(); j++) { copyTree2 = copyTree.deeperCopy(); whPhraseSubtree = whPhraseSubtrees.get(j); leftOverPreposition = leftOverPrepositions.get(j); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: whPhraseSubtree:" + whPhraseSubtree); tregexOpStr = "ROOT < (SBARQ=mainclause < PLACEHOLDER=ph1) << (__=ph2Parent < PREPPLACEHOLDER=ph2)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(copyTree2); if (!matcher.find()) { continue; } mainclauseNode = matcher.getNode("mainclause"); //replace the wh placeholder with a wh phrase mainclauseNode.removeChild(0); mainclauseNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(whPhraseSubtree)); //Replace the pp placeholder with the left over preposition. //This may happen when the answer phrase was a PP. //e.g., John went to the game. -> What did John go to? prepPlaceholderParent = matcher.getNode("ph2Parent"); int index = prepPlaceholderParent.indexOf(matcher.getNode("ph2")); if (leftOverPreposition != null && leftOverPreposition.length() > 0) { prepPlaceholderParent.addChild(index, AnalysisUtilities.getInstance().readTreeFromString(leftOverPreposition)); } //now remove the left-over-preposition placeholder ps.clear(); ps.add(Tsurgeon.parseOperation("prune ph2")); p = Tsurgeon.collectOperations(ps); ops.clear(); ops.add(new Pair<TregexPattern, TsurgeonPattern>(TregexPatternFactory.getPattern("PREPPLACEHOLDER=ph2"), p)); Tsurgeon.processPatternsOnTree(ops, copyTree2); copyTree2 = moveLeadingAdjuncts(copyTree2); if (GlobalProperties.getDebug()) System.err.println("moveWHPhrase: " + copyTree2.toString()); res.add(copyTree2); } return res; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * Changes the inflection of the main verb for questions with * first and second person pronouns are the subject. * Note: this probably isn't necessary for most applications. * * E.g.,//from www. j a v a 2s . c om * Affects: * I walk -> Who walks? (rather than, Who walk?) * * Does not affect: * He walks -> Who walks? * */ private void ensureVerbAgreementForSubjectWH(Tree inputTree) { String tregexOpStr; TregexMatcher matcher; TregexPattern matchPattern; Tree subjectTree; String subjectString; tregexOpStr = "/^(NP|PP|SBAR)-" + 0 + "$/"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); if (matcher.find()) { subjectTree = matcher.getMatch(); subjectString = subjectTree.yield().toString(); if (subjectString.equalsIgnoreCase("I") || subjectString.equalsIgnoreCase("you")) { tregexOpStr = "ROOT=root < (S=mainclause < (VP=verbphrase < (/VB.?/=tensedverb)))"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); if (matcher.find()) { Tree verbSubtree = matcher.getNode("tensedverb"); Tree vpSubtree = matcher.getNode("verbphrase"); Tree singularFormSubtree = AnalysisUtilities.getInstance() .readTreeFromString(getSingularFormSubtree(verbSubtree)); int index = vpSubtree.indexOf(verbSubtree); vpSubtree.removeChild(index); vpSubtree.addChild(index, singularFormSubtree); if (GlobalProperties.getDebug()) System.err.println("ensureVerbAgreementForSubjectWH: " + inputTree.toString()); } } } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and Mary like Bill. -> John LIKES Bill. Mary LIKES Bill. * John and I like Bill -> John LIKES Bill. I LIKE Bill. * John and I are old. -> I IS old. John IS old. *//*from www .j a v a2 s . co m*/ private void correctTense(Tree subject, Tree clause) { int tmpIndex; //correct verb tense when modifying subjects for (Tree uncle : clause.getChildrenAsList()) { String newVerbPOS = null; Tree verbPreterminal = null; boolean needToModifyVerb = false; //if the node is a subject (i.e., its uncle is a VP), then check //to see if its tense needs to be changed String headPOS = subject.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()).label() .toString(); if (uncle.label().toString().equals("VP") && !headPOS.endsWith("S")) { verbPreterminal = uncle.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()); //original main verb was plural but the conjoined subject word is singular //e.g., John (and Mary) like Bill. -> John like Bill. if ((verbPreterminal.label().toString().equals("VB") || verbPreterminal.label().toString().equals("VBP"))) { //the parser confuses VBP with VB if (subject.yield().toString().equals("I") || subject.yield().toString().equals("you")) { newVerbPOS = "VBP"; } else { newVerbPOS = "VBZ"; } needToModifyVerb = true; } else if (verbPreterminal.label().toString().equals("VBD")) { newVerbPOS = "VBD"; needToModifyVerb = true; } } //if needed, change the tense of the verb if (needToModifyVerb) { String verbLemma = AnalysisUtilities.getInstance().getLemma( verbPreterminal.getChild(0).label().toString(), verbPreterminal.label().toString()); String newVerb; //special cases if (verbLemma.equals("be") && newVerbPOS.equals("VBD")) { if (subject.label().toString().endsWith("S")) newVerb = "were"; else newVerb = "was"; } else if (verbLemma.equals("be") && subject.yield().toString().equals("I") && newVerbPOS.equals("VBP")) { newVerb = "am"; } else { //default newVerb = AnalysisUtilities.getInstance().getSurfaceForm(verbLemma, newVerbPOS); } tmpIndex = verbPreterminal.parent(uncle).indexOf(verbPreterminal); Tree verbParent = verbPreterminal.parent(uncle); verbParent.removeChild(tmpIndex); verbParent.addChild(tmpIndex, AnalysisUtilities.getInstance().readTreeFromString("(" + newVerbPOS + " " + newVerb + ")")); } } }