List of usage examples for edu.stanford.nlp.trees Tree label
@Override
public Label label()
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., Lincoln, the 16th president, was tall. -> Lincoln was the 16th president. * The meeting, in 1984, was important. -> The meeting was in 1984. *///from ww w . jav a 2 s . co m private void extractAppositives(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = "NP < (NP=noun !$-- NP $+ (/,/ $++ NP|PP=appositive !$ CC|CONJP)) " + " >> (ROOT <<# /^VB.*/=mainverb) "; //extract the main verb to capture the verb tense matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { Tree verbtree = matcher.getNode("mainverb"); Tree nountree = matcher.getNode("noun").deepCopy(); Tree appositivetree = matcher.getNode("appositive"); makeDeterminerDefinite(nountree); //if both are proper nouns, do not extract because this is not an appositive(e.g., "Pittsburgh, PA") /*if(nountree.headPreTerminal(this.hf).label().toString().equals("NNP") && appositivetree.headPreTerminal(this.hf).label().toString().equals("NNP")) { continue; }*/ //make a new tree for a copula sentence with the noun and appositive String pos = verbtree.label().toString(); String copula; if (pos.equals("VBD")) { if (isPlural(nountree)) { copula = "(VBD were)"; } else { copula = "(VBD was)"; } } else { if (isPlural(nountree)) { copula = "(VBD are)"; } else { copula = "(VBD is)"; } } Tree newTree = QuestionUtil.readTreeFromString( "(ROOT (S " + nountree + " (VP " + copula + " " + appositivetree + ") (. .)))"); addQuotationMarksIfNeeded(newTree); //if(GlobalProperties.getDebug()) System.err.println("extractAppositives: "+ newTree.toString()); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromAppositive", 1.0); addIfNovel(extracted, newTreeWithFeatures); } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John, hoping to get a good grade, studied. -> John hoped to get a good grade. * Walking to the store, John saw Susan -> John was walking to the store. * // ww w . j a v a 2s . c o m * NOTE: This method produces false positives for sentences like, * "Broadly speaking, the project was successful." * where the participial phrase does not modify the subject. * * @param extracted * @param input */ private void extractNounParticipialModifiers(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = "ROOT < (S " + " [ << (NP < (NP=subj $++ (/,/ $+ (VP=modifier <# VBN|VBG|VP=tense )))) " //modifiers that appear after nouns + " | < (S !< NP|SBAR < (VP=modifier <# VBN|VBG|VP=tense) $+ (/,/ $+ NP=subj)) " //modifiers before the subject. e.g., Founded by John, the company... + " | < (SBAR < (S !< NP|SBAR < (VP=modifier <# VBN|VBG=tense)) $+ (/,/ $+ NP=subj)) " //e.g., While walking to the store, John saw Susan. + " | < (PP=modifier !< NP <# VBG=tense $+ (/,/ $+ NP=subj)) ] ) " // e.g., Walking to the store, John saw Susan. + " <<# /^VB.*$/=maintense "; //tense determined by top-most verb matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { Tree nountree = matcher.getNode("subj").deepCopy(); Tree vptree = matcher.getNode("modifier"); Tree verb = matcher.getNode("tense"); makeDeterminerDefinite(nountree); if (vptree.label().toString().equals("PP")) vptree.label().setValue("VP"); String verbPOS = findTense(matcher.getNode("maintense")); if (vptree == null || nountree == null) return; String newTreeStr; if (verb.label().toString().equals("VBG")) { //for present partcipials, change the tense to the tense of the main verb //e.g., walking to the store -> walked to the store String verbLemma = QuestionUtil.getLemma(verb.getChild(0).label().toString(), verb.label().toString()); String newVerb = this.conjugator.getSurfaceForm(verbLemma, verbPOS); int verbIndex = vptree.objectIndexOf(verb); vptree = vptree.deepCopy(); vptree.removeChild(verbIndex); vptree.addChild(verbIndex, QuestionUtil.readTreeFromString("(" + verbPOS + " " + newVerb + ")")); newTreeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + vptree.toString() + " (. .)))"; } else { //for past participials, add a copula //e.g., John, exhausted, -> John was exhausted //(or for conjunctions, just add the copula---kind of a hack to make the moby dick sentence work out) String auxiliary; if (verbPOS.equals("VBP") || verbPOS.equals("VBD")) { if (isPlural(nountree)) auxiliary = "(VBD were)"; else auxiliary = "(VBD was)"; } else { if (isPlural(nountree)) auxiliary = "(VB are)"; else auxiliary = "(VBZ is)"; } newTreeStr = "(ROOT (S " + nountree + " (VP " + auxiliary + " " + vptree + ") (. .)))"; } Tree newTree = QuestionUtil.readTreeFromString(newTreeStr); correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0)); addQuotationMarksIfNeeded(newTree); //if(GlobalProperties.getDebug()) System.err.println("extractNounParticipialModifiers: "+ newTree.toString()); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0); //old feature name if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromNounParticipial", 1.0); extracted.add(newTreeWithFeatures); } }
From source file:edu.cmu.ark.Question.java
License:Open Source License
public List<Tree> findLogicalWordsAboveIntermediateTree() { List<Tree> res = new ArrayList<Tree>(); Tree pred = intermediateTree.getChild(0).headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()); String lemma = AnalysisUtilities.getInstance().getLemma(pred.yield().toString(), pred.label().toString()); String tregexOpStr;//www . j a v a 2 s . co m TregexPattern matchPattern; TregexMatcher matcher; Tree sourcePred = null; for (Tree leaf : sourceTree.getLeaves()) { Tree tmp = leaf.parent(sourceTree); String sourceLemma = AnalysisUtilities.getInstance().getLemma(leaf.label().toString(), tmp.label().toString()); if (sourceLemma.equals(lemma)) { sourcePred = tmp; break; } } tregexOpStr = "RB|VB|VBD|VBP|VBZ|IN|MD|WRB|WDT|CC=command"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(sourceTree); Tree command; while (matcher.find() && sourcePred != null) { command = matcher.getNode("command"); if (AnalysisUtilities.cCommands(sourceTree, command, sourcePred) && command.parent(sourceTree) != sourcePred.parent(sourceTree)) { res.add(command); } } return res; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * Note: It would probably be easier to use the Tregex operation to find the nodes * and then change the labels directly rather than writing a Tsurgeon operation. * But, when I wrote the original code, I used Tsurgeon. Probably not worth refactoring. * * @param inputTree// w w w . j a va 2s . c o m * @param tregexOpStr */ private void markNodesAsUnmovableUsingPattern(Tree inputTree, String tregexOpStr) { TregexPattern matchPattern; TregexMatcher matcher; String label; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); Tree tmp; while (matcher.find()) { tmp = matcher.getNode("unmovable"); label = tmp.label().toString(); tmp.label().setValue("UNMOVABLE-" + label); } }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * This method decomposes the main verb of the sentence * for yes-no questions and WH questions where the answer * phrase is not the subject./*from w ww . j a va 2s . co m*/ * * e.g., I met John -> I did meet John. * (which would later become "Who did I meet?") * */ private Tree decomposePredicate(Tree inputTree) { Tree copyTree = inputTree.deeperCopy(); List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); String tregexOpStr; List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); TregexPattern matchPattern; TsurgeonPattern p; TregexMatcher matcher; Tree tmpNode; //tregexOpStr = "ROOT < (S=mainclause < (VP=predphrase < /VB.?/=tensedverb !< (VP < /VB.?/)))"; //tregexOpStr = "ROOT < (S=mainclause < (VP=predphrase < (/VB.?/=tensedverb !< is|was|were|am|are|has|have|had|do|does|did)))"; //This rather complex rule identifies predicates to decompose. //There are two cases, separated by a disjunction. //One could break it apart into separate rules to make it simpler... // //The first part of the disjunction //(i.e., < (/VB.?/=tensedverb !< is|was|were|am|are|has|have|had|do|does|did) ) //is for handling basic sentences //(e.g., John bought an apple -> What did John buy?), //sentences with auxiliaries //(e.g., John had bought an apple -> Had John bought an apple?), //and sentences with participial phrases //(e.g., John seemed finished with the apple -> What did John seem finished with?). // //The second part of the disjunction //(i.e., < /VB.?/=tensedverb !< VP ) //is for handling sentences that have predicates //that can also be auxiliaries (e.g., I have a book). //In these cases, we do want to decompose have, has, had, etc. //(e.g., What did I have?) tregexOpStr = "ROOT < (S=mainclause < (VP=predphrase [ < (/VB.?/=tensedverb !< is|was|were|am|are|has|have|had|do|does|did) | < /VB.?/=tensedverb !< VP ]))"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(copyTree); if (matcher.find()) { Tree subtree = matcher.getNode("tensedverb"); String lemma = AnalysisUtilities.getInstance().getLemma(subtree.getChild(0).label().toString(), subtree.label().toString()); String aux = getAuxiliarySubtree(subtree); if (!lemma.equals("be")) { ps.add(Tsurgeon.parseOperation("replace predphrase (MAINVP=newpred PLACEHOLDER)")); ps.add(Tsurgeon.parseOperation("insert predphrase >-1 newpred")); ps.add(Tsurgeon.parseOperation("insert (VBLEMMA PLACEHOLDER) $+ tensedverb")); ps.add(Tsurgeon.parseOperation("delete tensedverb")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, copyTree); matchPattern = TregexPatternFactory.getPattern("MAINVP=mainvp"); matcher = matchPattern.matcher(copyTree); matcher.find(); tmpNode = matcher.getNode("mainvp"); tmpNode.removeChild(0); tmpNode.label().setValue("VP"); tmpNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(aux)); matchPattern = TregexPatternFactory.getPattern("VBLEMMA=vblemma"); matcher = matchPattern.matcher(copyTree); matcher.find(); tmpNode = matcher.getNode("vblemma"); tmpNode.removeChild(0); tmpNode.label().setValue("VB"); tmpNode.addChild(0, AnalysisUtilities.getInstance().readTreeFromString(lemma)); } } if (GlobalProperties.getDebug()) System.err.println("decomposePredicate: " + copyTree.toString()); return copyTree; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * Returns the singular present tense form of a tensed verb. * This only affects the output when generating from sentences where * first and second person pronouns are the subject. * * E.g.,//from w ww. java2s .c om * Affects: * I walk -> Who walks? (rather than, Who walk?) * * Does not affect: * He walks -> Who walks? * */ private String getSingularFormSubtree(Tree tensedVerbSubtree) { String res = ""; String lemma = AnalysisUtilities.getInstance().getLemma(tensedVerbSubtree.getChild(0).label().toString(), tensedVerbSubtree.label().toString()); String pos = tensedVerbSubtree.value(); if (pos.equals("VBD")) { res = tensedVerbSubtree.toString(); } else { res = "(VBZ " + AnalysisUtilities.getInstance().getSurfaceForm(lemma, "VBZ") + ")"; } return res; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * Also mark UNMOVABLE phrases as possible answers * * @param inputTree//from www. ja v a 2s . c om * @return */ private Tree markPossibleAnswerPhrasesUnmovable(Tree inputTree) { Tree copyTree = inputTree.deeperCopy(); numWHPhrasesUnmovable = 0; List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; Tree tmp; //find and mark the main clause subject tregexOpStr = "ROOT < (S < (NP|SBAR=subj $+ /,/ !$++ NP|SBAR))"; ps.add(Tsurgeon.parseOperation("relabel subj NP-0")); matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(copyTree); if (matcher.find()) { tmp = matcher.getNode("subj"); tmp.label().setValue(tmp.label().toString() + "-0"); numWHPhrasesUnmovable++; } //noun phrases tregexOpStr = "ROOT=root << UNMOVABLE-NP=np"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(copyTree); while (matcher.find()) { tmp = matcher.getNode("np"); tmp.label().setValue(tmp.label().toString() + "-" + numWHPhrasesUnmovable); numWHPhrasesUnmovable++; } if (GlobalProperties.getDebug()) System.err.println("markPossibleAnswerPhrases: " + copyTree.toString()); return copyTree; }
From source file:edu.cmu.ark.QuestionTransducer.java
License:Open Source License
/** * Marks possible answer phrase nodes with indexes for later processing. * This step might be easier with the Stanford Parser API's Tree class methods * than with Tsurgeon...//from w w w . j av a 2s . co m * * @param inputTree * @return */ private Tree markPossibleAnswerPhrases(Tree inputTree) { Tree copyTree = inputTree.deeperCopy(); numWHPhrases = 0; List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; Tree tmp; //find and mark the main clause subject tregexOpStr = "ROOT < (S < (NP|SBAR=subj $+ /,/ !$++ NP|SBAR))"; ps.add(Tsurgeon.parseOperation("relabel subj NP-0")); matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(copyTree); if (matcher.find()) { tmp = matcher.getNode("subj"); tmp.label().setValue(tmp.label().toString() + "-0"); numWHPhrases++; } //noun phrases tregexOpStr = "ROOT=root << NP|PP|SBAR=np"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(copyTree); while (matcher.find()) { tmp = matcher.getNode("np"); tmp.label().setValue(tmp.label().toString() + "-" + numWHPhrases); numWHPhrases++; } if (GlobalProperties.getDebug()) System.err.println("markPossibleAnswerPhrases: " + copyTree.toString()); return copyTree; }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and Mary like Bill. -> John LIKES Bill. Mary LIKES Bill. * John and I like Bill -> John LIKES Bill. I LIKE Bill. * John and I are old. -> I IS old. John IS old. */// w ww . j av a 2 s. c o m private void correctTense(Tree subject, Tree clause) { int tmpIndex; //correct verb tense when modifying subjects for (Tree uncle : clause.getChildrenAsList()) { String newVerbPOS = null; Tree verbPreterminal = null; boolean needToModifyVerb = false; //if the node is a subject (i.e., its uncle is a VP), then check //to see if its tense needs to be changed String headPOS = subject.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()).label() .toString(); if (uncle.label().toString().equals("VP") && !headPOS.endsWith("S")) { verbPreterminal = uncle.headPreTerminal(AnalysisUtilities.getInstance().getHeadFinder()); //original main verb was plural but the conjoined subject word is singular //e.g., John (and Mary) like Bill. -> John like Bill. if ((verbPreterminal.label().toString().equals("VB") || verbPreterminal.label().toString().equals("VBP"))) { //the parser confuses VBP with VB if (subject.yield().toString().equals("I") || subject.yield().toString().equals("you")) { newVerbPOS = "VBP"; } else { newVerbPOS = "VBZ"; } needToModifyVerb = true; } else if (verbPreterminal.label().toString().equals("VBD")) { newVerbPOS = "VBD"; needToModifyVerb = true; } } //if needed, change the tense of the verb if (needToModifyVerb) { String verbLemma = AnalysisUtilities.getInstance().getLemma( verbPreterminal.getChild(0).label().toString(), verbPreterminal.label().toString()); String newVerb; //special cases if (verbLemma.equals("be") && newVerbPOS.equals("VBD")) { if (subject.label().toString().endsWith("S")) newVerb = "were"; else newVerb = "was"; } else if (verbLemma.equals("be") && subject.yield().toString().equals("I") && newVerbPOS.equals("VBP")) { newVerb = "am"; } else { //default newVerb = AnalysisUtilities.getInstance().getSurfaceForm(verbLemma, newVerbPOS); } tmpIndex = verbPreterminal.parent(uncle).indexOf(verbPreterminal); Tree verbParent = verbPreterminal.parent(uncle); verbParent.removeChild(tmpIndex); verbParent.addChild(tmpIndex, AnalysisUtilities.getInstance().readTreeFromString("(" + newVerbPOS + " " + newVerb + ")")); } } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
private void removeConjoinedSiblingsHelper(Tree copy, int childindex) { if (GlobalProperties.getDebug()) System.err.println("removeConjoinedSiblingsHelper: " + copy.toString()); Tree child = copy.getNodeNumber(childindex); Tree parent = child.parent(copy);/*from w w w .jav a 2 s . c o m*/ Tree gparent = parent.parent(copy); int parentIdx = gparent.indexOf(parent); //By an annoying PTB convention, some verb phrase conjunctions //can conjoin two verb preterminals under a VP, //rather than only allowing VP nodes to be conjoined. //e.g., John walked and played. //So, we add an extra VP node in between if necessary if (child.label().toString().startsWith("VB")) { gparent.removeChild(parentIdx); Tree newTree = factory.newTreeNode("VP", new ArrayList<Tree>()); newTree.addChild(child); gparent.addChild(parentIdx, newTree); } else { gparent.setChild(parentIdx, child); } }