List of usage examples for edu.stanford.nlp.trees Tree toString
@Override
public String toString()
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * Changes the inflection of the main verb for questions with * first and second person pronouns are the subject. * Note: this probably isn't necessary for most applications. * * E.g.,/*ww w .ja v a 2 s.c om*/ * Affects: * I walk -> Who walks? (rather than, Who walk?) * * Does not affect: * He walks -> Who walks? * */ private void ensureVerbAgreementForSubjectWH(Tree inputTree) { String tregexOpStr; TregexMatcher matcher; TregexPattern matchPattern; Tree subjectTree; String subjectString; tregexOpStr = "/^(NP|PP|SBAR)-" + 0 + "$/"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); if (matcher.find()) { subjectTree = matcher.getMatch(); subjectString = subjectTree.yield().toString(); if (subjectString.equalsIgnoreCase("I") || subjectString.equalsIgnoreCase("you")) { tregexOpStr = "ROOT=root < (S=mainclause < (VP=verbphrase < (/VB.?/=tensedverb)))"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); if (matcher.find()) { Tree verbSubtree = matcher.getNode("tensedverb"); Tree vpSubtree = matcher.getNode("verbphrase"); Tree singularFormSubtree = AnalysisUtilities.getInstance() .readTreeFromString(getSingularFormSubtree(verbSubtree)); int index = vpSubtree.indexOf(verbSubtree); vpSubtree.removeChild(index); vpSubtree.addChild(index, singularFormSubtree); if (GlobalProperties.getDebug()) System.err.println("ensureVerbAgreementForSubjectWH: " + inputTree.toString()); } } } }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * Also mark UNMOVABLE phrases as possible answers * * @param inputTree//from w w w .j a va 2 s .c om * @return */ private Tree markPossibleAnswerPhrasesUnmovable(Tree inputTree) { Tree copyTree = inputTree.deeperCopy(); numWHPhrasesUnmovable = 0; List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; Tree tmp; //find and mark the main clause subject tregexOpStr = "ROOT < (S < (NP|SBAR=subj $+ /,/ !$++ NP|SBAR))"; ps.add(Tsurgeon.parseOperation("relabel subj NP-0")); matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(copyTree); if (matcher.find()) { tmp = matcher.getNode("subj"); tmp.label().setValue(tmp.label().toString() + "-0"); numWHPhrasesUnmovable++; } //noun phrases tregexOpStr = "ROOT=root << UNMOVABLE-NP=np"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(copyTree); while (matcher.find()) { tmp = matcher.getNode("np"); tmp.label().setValue(tmp.label().toString() + "-" + numWHPhrasesUnmovable); numWHPhrasesUnmovable++; } if (GlobalProperties.getDebug()) System.err.println("markPossibleAnswerPhrases: " + copyTree.toString()); return copyTree; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * Marks possible answer phrase nodes with indexes for later processing. * This step might be easier with the Stanford Parser API's Tree class methods * than with Tsurgeon...//from w w w. j a va 2 s . c o m * * @param inputTree * @return */ private Tree markPossibleAnswerPhrases(Tree inputTree) { Tree copyTree = inputTree.deeperCopy(); numWHPhrases = 0; List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; Tree tmp; //find and mark the main clause subject tregexOpStr = "ROOT < (S < (NP|SBAR=subj $+ /,/ !$++ NP|SBAR))"; ps.add(Tsurgeon.parseOperation("relabel subj NP-0")); matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(copyTree); if (matcher.find()) { tmp = matcher.getNode("subj"); tmp.label().setValue(tmp.label().toString() + "-0"); numWHPhrases++; } //noun phrases tregexOpStr = "ROOT=root << NP|PP|SBAR=np"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(copyTree); while (matcher.find()) { tmp = matcher.getNode("np"); tmp.label().setValue(tmp.label().toString() + "-" + numWHPhrases); numWHPhrases++; } if (GlobalProperties.getDebug()) System.err.println("markPossibleAnswerPhrases: " + copyTree.toString()); return copyTree; }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., John ran and Bill walked. -> John ran. Bill walked. * //from w ww . ja va 2 s . c o m */ private void extractConjoinedPhrases(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; Tree conjoinedNode; TregexMatcher matcher; //Tree newTree = copy.getIntermediateTree(); Tree newTree; int nodeindex; tregexOpStr = "__ " + " [ < (VP < (/VB.*/=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) " + //get the first conjunction, to avoid spurious duplicate matches " | < (VP < (VP=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) " + // verb phrases may be conjoined by commas and adverbs (e.g., "John ran, then walked.") " | < (S|SINV < (S|SINV=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) " + " | < (S|SINV < (S|SINV=child $ (/:/ < /;/ !$++ /:/))) " + //" | < (ADJP < (JJ|JJR|ADJP=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) " + //" | < (ADVP < (RB|RBR|ADVP=child $ RB|RBR|ADVP=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) "+ //" | < (PP < (PP=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) " + " | < (SBAR < (SBAR=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) ] " + " !$ (CC|CONJP !< or|nor)" + //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) " !< (CC|CONJP !< or|nor) " + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { conjoinedNode = matcher.getNode("child"); nodeindex = conjoinedNode.nodeNumber(input.getIntermediateTree()); //make a copy of the input for this iteration newTree = input.getIntermediateTree().deeperCopy(); removeConjoinedSiblingsHelper(newTree, nodeindex); //for conjoined main clauses, add punctuation if necessary AnalysisUtilities.addPeriodIfNeeded(newTree); //make a new Question object and add it addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (GlobalProperties.getDebug()) System.err.println("extractConjoinedPhrases: " + newTree.toString()); if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromConjoined", 1.0); addIfNovel(extracted, newTreeWithFeatures); } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
private void removeConjoinedSiblingsHelper(Tree copy, int childindex) { if (GlobalProperties.getDebug()) System.err.println("removeConjoinedSiblingsHelper: " + copy.toString()); Tree child = copy.getNodeNumber(childindex); Tree parent = child.parent(copy);/*w w w .j ava 2 s . c o m*/ Tree gparent = parent.parent(copy); int parentIdx = gparent.indexOf(parent); //By an annoying PTB convention, some verb phrase conjunctions //can conjoin two verb preterminals under a VP, //rather than only allowing VP nodes to be conjoined. //e.g., John walked and played. //So, we add an extra VP node in between if necessary if (child.label().toString().startsWith("VB")) { gparent.removeChild(parentIdx); Tree newTree = factory.newTreeNode("VP", new ArrayList<Tree>()); newTree.addChild(child); gparent.addChild(parentIdx, newTree); } else { gparent.setChild(parentIdx, child); } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * // w ww .ja v a2 s. c o m * John studied, hoping to get a good grade. -> John hoped to get a good grade. * * @param extracted * @param input */ private void extractVerbParticipialModifiers(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = "S=sub $- /,/ !< NP < (VP=participial < VBG=verb) " + " >+(VP) (S|SINV < NP=subj) " + " >> (ROOT <<# /VB.*/=tense) "; //tense determined by top-most verb matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { String verbPOS = findTense(matcher.getNode("tense")); Tree p = matcher.getNode("participial").deeperCopy(); Tree verb = matcher.getNode("verb"); String verbLemma = AnalysisUtilities.getInstance().getLemma(verb.getChild(0).label().toString(), verb.label().toString()); String newVerb = AnalysisUtilities.getInstance().getSurfaceForm(verbLemma, verbPOS); int verbIndex = p.indexOf(verb); p.removeChild(verbIndex); p.addChild(verbIndex, AnalysisUtilities.getInstance().readTreeFromString("(" + verbPOS + " " + newVerb + ")")); String treeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + p.toString() + " (. .)))"; Tree newTree = AnalysisUtilities.getInstance().readTreeFromString(treeStr); correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0)); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0); if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromVerbParticipial", 1.0); if (GlobalProperties.getDebug()) System.err.println("extractVerbParticipialModifiers: " + newTree.toString()); addIfNovel(extracted, newTreeWithFeatures); } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., As John slept, I studied. -> John slept. * //ww w .j a v a 2s. c o m */ private void extractSubordinateClauses(Collection<Question> extracted, Question input) { Tree subord; String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = " SBAR [ > VP < IN | > S|SINV ] " + //not a complement " !< (IN < if|unless|that)" + //not a conditional antecedent " < (S=sub !< (VP < VBG)) " + //+ //not a participial phrase " >S|SINV|VP "; //not part of a noun phrase or PP (other methods for those) matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>()); subord = matcher.getNode("sub"); newTree.addChild(subord.deeperCopy()); AnalysisUtilities.addPeriodIfNeeded(newTree); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromFiniteClause", 1.0); //old feature name if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromSubordinateClause", 1.0); if (GlobalProperties.getDebug()) System.err.println("extractSubordinateClauses: " + newTree.toString()); addIfNovel(extracted, newTreeWithFeatures); } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
private void extractComplementClauses(Collection<Question> extracted, Question input) { Tree subord;//from w w w. j av a2 s . c om String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; //TODO should also address infinitive complements tregexOpStr = "SBAR " + " < (S=sub !< (VP < VBG)) " + //+ //not a participial phrase " !> NP|PP " + //not part of a noun phrase or PP (other methods for those) " [ $- /^VB.*/=verb | >+(SBAR) (SBAR $- /^VB.*/=verb) ] "; //complement of a VP (follows the verb) matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>()); subord = matcher.getNode("sub"); Tree verb = matcher.getNode("verb"); String verbLemma = AnalysisUtilities.getInstance().getLemma(verb.yield().toString(), verb.label().toString()); if (!verbImpliesComplement(verbLemma)) { continue; } newTree.addChild(subord.deeperCopy()); AnalysisUtilities.addPeriodIfNeeded(newTree); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromFiniteClause", 1.0); //old feature name if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromComplementClause", 1.0); if (GlobalProperties.getDebug()) System.err.println("extractComplementClauses: " + newTree.toString()); addIfNovel(extracted, newTreeWithFeatures); } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., Lincoln, the 16th president, was tall. -> Lincoln was the 16th president. * The meeting, in 1984, was important. -> The meeting was in 1984. *///from www . j a v a 2s.c o m private void extractAppositives(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = "NP < (NP=noun !$-- NP $+ (/,/ $++ NP|PP=appositive !$ CC|CONJP)) " + " >> (ROOT <<# /^VB.*/=mainverb) "; //extract the main verb to capture the verb tense matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { Tree verbtree = matcher.getNode("mainverb"); Tree nountree = matcher.getNode("noun").deeperCopy(); Tree appositivetree = matcher.getNode("appositive"); makeDeterminerDefinite(nountree); //if both are proper nouns, do not extract because this is not an appositive(e.g., "Pittsburgh, PA") /*if(nountree.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()).label().toString().equals("NNP") && appositivetree.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()).label().toString().equals("NNP")) { continue; }*/ //make a new tree for a copula sentence with the noun and appositive String pos = verbtree.label().toString(); String copula; if (pos.equals("VBD")) { if (isPlural(nountree)) { copula = "(VBD were)"; } else { copula = "(VBD was)"; } } else { if (isPlural(nountree)) { copula = "(VBD are)"; } else { copula = "(VBD is)"; } } Tree newTree = AnalysisUtilities.getInstance().readTreeFromString( "(ROOT (S " + nountree + " (VP " + copula + " " + appositivetree + ") (. .)))"); addQuotationMarksIfNeeded(newTree); if (GlobalProperties.getDebug()) System.err.println("extractAppositives: " + newTree.toString()); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromAppositive", 1.0); addIfNovel(extracted, newTreeWithFeatures); } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., John, who is a friend of mine, likes Susan. -> John is a friend of mine. * //from w w w. j a v a2 s . c o m */ private void extractNonRestrictiveRelativeClauses(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; TregexMatcher matcherclause; tregexOpStr = "NP=np < (SBAR=sbar [ < (WHADVP=wherecomp < (WRB < where)) " + " | < (WHNP !< /WP\\$/) " + " | < (WHNP=possessive < /WP\\$/)" //John, whose car was + " | < (WHPP < IN|TO=preposition) ] $-- NP $- /,/ " + " < S=relclause !< WHADJP)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); //iterate over all the relative clauses in the input //and create an output sentence for each one. while (matcher.find()) { Tree missingArgumentTree = matcher.getNode("np"); Tree relclause = matcher.getNode("relclause"); if (missingArgumentTree == null || relclause == null) continue; missingArgumentTree = missingArgumentTree.deeperCopy(); relclause = relclause.deeperCopy(); Tree possessive = matcher.getNode("possessive"); Tree sbar = matcher.getNode("sbar").deeperCopy(); makeDeterminerDefinite(missingArgumentTree); if (possessive != null) { possessive = possessive.deeperCopy(); possessive.removeChild(0); String newTree = "(NP (NP " + missingArgumentTree.toString() + " (POS 's))"; for (int i = 0; i < possessive.numChildren(); i++) newTree += possessive.getChild(i).toString() + " "; newTree += ")"; missingArgumentTree = AnalysisUtilities.getInstance().readTreeFromString(newTree); } //remove the relative clause and the commas surrounding it from the missing argument tree for (int i = 0; i < missingArgumentTree.numChildren(); i++) { if (missingArgumentTree.getChild(i).equals(sbar)) { //remove the relative clause missingArgumentTree.removeChild(i); //remove the comma after the relative clause if (i < missingArgumentTree.numChildren() && missingArgumentTree.getChild(i).label().toString().equals(",")) { missingArgumentTree.removeChild(i); } //remove the comma before the relative clause if (i > 0 && missingArgumentTree.getChild(i - 1).label().toString().equals(",")) { missingArgumentTree.removeChild(i - 1); i--; } i--; } } //put the noun in the clause at the topmost place with an opening for a noun. //Note that this may mess up if there are noun phrase adjuncts like "The man I met Tuesday". //specifically: //the parent of the noun can be either a clause (S) as in "The man who met me" //or a verb phrase as in "The man who I met". //for verb phrases, add the noun to the end since it will be an object. //for clauses, add the noun to the beginning since it will be the subject. tregexOpStr = "S|VP=newparent !< NP < (VP=verb !< TO !$ TO)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcherclause = matchPattern.matcher(relclause); boolean subjectMovement = true; if (!matcherclause.find()) { tregexOpStr = "VP=newparent !< VP < /VB.*/=verb !>> (S !< NP) !<< (VP !< VP !< NP)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcherclause = matchPattern.matcher(relclause); subjectMovement = false; } //reset (so the first match isn't skipped) matcherclause = matchPattern.matcher(relclause); if (matcherclause.find()) { Tree newparenttree = matcherclause.getNode("newparent"); Tree verbtree = matcherclause.getNode("verb"); boolean ppRelativeClause = false; if (matcher.getNode("wherecomp") != null) { String tmp = "(PP (IN at) " + missingArgumentTree.toString() + ")"; missingArgumentTree = AnalysisUtilities.getInstance().readTreeFromString(tmp); ppRelativeClause = true; subjectMovement = false; } else if (matcher.getNode("preposition") != null) { String tmp = "(PP (IN " + matcher.getNode("preposition").yield().toString() + ") " + missingArgumentTree.toString() + ")"; missingArgumentTree = AnalysisUtilities.getInstance().readTreeFromString(tmp); ppRelativeClause = true; } if (subjectMovement) { //subject newparenttree.addChild(newparenttree.indexOf(verbtree), missingArgumentTree); } else { // newparentlabel is VP if (ppRelativeClause) newparenttree.addChild(newparenttree.numChildren(), missingArgumentTree); else newparenttree.addChild(newparenttree.indexOf(verbtree) + 1, missingArgumentTree); } //create a new tree with punctuation Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>()); newTree.addChild(relclause); AnalysisUtilities.addPeriodIfNeeded(newTree); if (GlobalProperties.getDebug()) System.err.println("extractRelativeClauses: " + newTree.toString()); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromRelativeClause", 1.0); addIfNovel(extracted, newTreeWithFeatures); } } }