List of usage examples for edu.stanford.nlp.trees Tree label
@Override
public Label label()
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
public static void downcaseFirstToken(Tree inputTree) { Tree firstWordTree = inputTree.getLeaves().get(0); if (firstWordTree == null) return;// ww w .ja va 2 s.c o m Tree preterm = firstWordTree.parent(inputTree); String firstWord = firstWordTree.yield().toString(); if (!preterm.label().toString().matches("^NNP.*") && !firstWord.equals("I")) { //if(firstWord.indexOf('-') == -1 && !firstWord.equals("I")){ firstWord = firstWord.substring(0, 1).toLowerCase() + firstWord.substring(1); firstWordTree.label().setValue(firstWord); } //if(QuestionTransducer.DEBUG) System.err.println("downcaseFirstToken: "+inputTree.toString()); }
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
public static void upcaseFirstToken(Tree inputTree) { Tree firstWordTree = inputTree.getLeaves().get(0); if (firstWordTree == null) return;// w w w. j a va 2 s . c o m String firstWord = firstWordTree.yield().toString(); firstWord = firstWord.substring(0, 1).toUpperCase() + firstWord.substring(1); firstWordTree.label().setValue(firstWord); //if(QuestionTransducer.DEBUG) System.err.println("upcaseFirstToken: "+inputTree.toString()); }
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
/** * Remove traces and non-terminal decorations (e.g., "-SUBJ" in "NP-SUBJ") from a Penn Treebank-style tree. * * @param inputTree/*from w w w.j av a2 s .co m*/ */ public void normalizeTree(Tree inputTree) { inputTree.label().setFromString("ROOT"); List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; TregexMatcher matcher; tregexOpStr = "/\\-NONE\\-/=emptynode"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); ps.add(Tsurgeon.parseOperation("prune emptynode")); matchPattern = TregexPatternFactory.getPattern(tregexOpStr); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, inputTree); Label nonterminalLabel; tregexOpStr = "/.+\\-.+/=nonterminal < __"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); while (matcher.find()) { nonterminalLabel = matcher.getNode("nonterminal"); if (nonterminalLabel == null) continue; nonterminalLabel.setFromString(tlp.basicCategory(nonterminalLabel.value())); } }
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
public String getContentWords(String sentence) { String content = ""; ParseResult result = this.parseSentence(sentence); List<Tree> leaves = result.parse.getLeaves(); for (int i = 0; i < leaves.size(); i++) { String word = leaves.get(i).label().toString(); Tree preterm = leaves.get(i).parent(result.parse); String pos = preterm.label().toString(); if (pos.equals("DT") || pos.equals("CD") || pos.equals("IN")) // we don't want determiner, number and prepositions. continue; content += " " + word; }/*from w ww. ja v a 2 s . c o m*/ if (content.length() > 0) return content.substring(1); else return sentence; }
From source file:edu.cmu.ark.nlp.question.Question.java
License:Open Source License
public List<Tree> findLogicalWordsAboveIntermediateTree() { List<Tree> res = new ArrayList<Tree>(); Tree pred = intermediateTree.getChild(0).headPreTerminal(this.hf); String lemma = QuestionUtil.getLemma(pred.yield().toString(), pred.label().toString()); String tregexOpStr;//from ww w . j av a 2 s .c om TregexPattern matchPattern; TregexMatcher matcher; Tree sourcePred = null; for (Tree leaf : sourceTree.getLeaves()) { Tree tmp = leaf.parent(sourceTree); String sourceLemma = QuestionUtil.getLemma(leaf.label().toString(), tmp.label().toString()); if (sourceLemma.equals(lemma)) { sourcePred = tmp; break; } } tregexOpStr = "RB|VB|VBD|VBP|VBZ|IN|MD|WRB|WDT|CC=command"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(sourceTree); Tree command; while (matcher.find() && sourcePred != null) { command = matcher.getNode("command"); if (QuestionUtil.cCommands(sourceTree, command, sourcePred) && command.parent(sourceTree) != sourcePred.parent(sourceTree)) { res.add(command); } } return res; }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and Mary like Bill. -> John LIKES Bill. Mary LIKES Bill. * John and I like Bill -> John LIKES Bill. I LIKE Bill. * John and I are old. -> I IS old. John IS old. *//*from w w w.j a v a 2s .co m*/ private void correctTense(Tree subject, Tree clause) { int tmpIndex; //correct verb tense when modifying subjects for (Tree uncle : clause.getChildrenAsList()) { String newVerbPOS = null; Tree verbPreterminal = null; boolean needToModifyVerb = false; //if the node is a subject (i.e., its uncle is a VP), then check //to see if its tense needs to be changed String headPOS = subject.headPreTerminal(this.hf).label().toString(); if (uncle.label().toString().equals("VP") && !headPOS.endsWith("S")) { verbPreterminal = uncle.headPreTerminal(this.hf); //original main verb was plural but the conjoined subject word is singular //e.g., John (and Mary) like Bill. -> John like Bill. if ((verbPreterminal.label().toString().equals("VB") || verbPreterminal.label().toString().equals("VBP"))) { //the parser confuses VBP with VB if (subject.yield().toString().equals("I") || subject.yield().toString().equals("you")) { newVerbPOS = "VBP"; } else { newVerbPOS = "VBZ"; } needToModifyVerb = true; } else if (verbPreterminal.label().toString().equals("VBD")) { newVerbPOS = "VBD"; needToModifyVerb = true; } } //if needed, change the tense of the verb if (needToModifyVerb) { String verbLemma = QuestionUtil.getLemma(verbPreterminal.getChild(0).label().toString(), verbPreterminal.label().toString()); String newVerb; //special cases if (verbLemma.equals("be") && newVerbPOS.equals("VBD")) { if (subject.label().toString().endsWith("S")) newVerb = "were"; else newVerb = "was"; } else if (verbLemma.equals("be") && subject.yield().toString().equals("I") && newVerbPOS.equals("VBP")) { newVerb = "am"; } else { //default newVerb = this.conjugator.getSurfaceForm(verbLemma, newVerbPOS); } tmpIndex = verbPreterminal.parent(uncle).objectIndexOf(verbPreterminal); Tree verbParent = verbPreterminal.parent(uncle); verbParent.removeChild(tmpIndex); verbParent.addChild(tmpIndex, QuestionUtil.readTreeFromString("(" + newVerbPOS + " " + newVerb + ")")); } } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
private void removeConjoinedSiblingsHelper(Tree copy, int childindex) { //if(GlobalProperties.getDebug()) System.err.println("removeConjoinedSiblingsHelper: "+copy.toString()); Tree child = copy.getNodeNumber(childindex); Tree parent = child.parent(copy);//from w w w. j a va2 s . c om Tree gparent = parent.parent(copy); int parentIdx = gparent.objectIndexOf(parent); //By an annoying PTB convention, some verb phrase conjunctions //can conjoin two verb preterminals under a VP, //rather than only allowing VP nodes to be conjoined. //e.g., John walked and played. //So, we add an extra VP node in between if necessary if (child.label().toString().startsWith("VB")) { gparent.removeChild(parentIdx); Tree newTree = factory.newTreeNode("VP", new ArrayList<Tree>()); newTree.addChild(child); gparent.addChild(parentIdx, newTree); } else { gparent.setChild(parentIdx, child); } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * //from w ww . j a va 2 s .c om * John studied, hoping to get a good grade. -> John hoped to get a good grade. * * @param extracted * @param input */ private void extractVerbParticipialModifiers(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = "S=sub $- /,/ !< NP < (VP=participial < VBG=verb) " + " >+(VP) (S|SINV < NP=subj) " + " >> (ROOT <<# /VB.*/=tense) "; //tense determined by top-most verb matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { String verbPOS = findTense(matcher.getNode("tense")); Tree p = matcher.getNode("participial").deepCopy(); Tree verb = matcher.getNode("verb"); String verbLemma = QuestionUtil.getLemma(verb.getChild(0).label().toString(), verb.label().toString()); String newVerb = this.conjugator.getSurfaceForm(verbLemma, verbPOS); int verbIndex = p.objectIndexOf(verb); p.removeChild(verbIndex); p.addChild(verbIndex, QuestionUtil.readTreeFromString("(" + verbPOS + " " + newVerb + ")")); String treeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + p.toString() + " (. .)))"; Tree newTree = QuestionUtil.readTreeFromString(treeStr); correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0)); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromVerbParticipial", 1.0); if (this.getComputeFeatures) System.err.println("extractVerbParticipialModifiers: " + newTree.toString()); addIfNovel(extracted, newTreeWithFeatures); } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
private String findTense(Tree node) { if (node.label().equals("MD")) { if (node.yield().toString().matches("^(would|could)$")) { return "VBD"; }//from w w w. j a va2 s.co m } return node.label().toString(); }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
private void extractComplementClauses(Collection<Question> extracted, Question input) { Tree subord;/*from w w w . ja v a 2 s . c o m*/ String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; //TODO should also address infinitive complements tregexOpStr = "SBAR " + " < (S=sub !< (VP < VBG)) " + //+ //not a participial phrase " !> NP|PP " + //not part of a noun phrase or PP (other methods for those) " [ $- /^VB.*/=verb | >+(SBAR) (SBAR $- /^VB.*/=verb) ] "; //complement of a VP (follows the verb) matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>()); subord = matcher.getNode("sub"); Tree verb = matcher.getNode("verb"); String verbLemma = QuestionUtil.getLemma(verb.yield().toString(), verb.label().toString()); if (!verbImpliesComplement(verbLemma)) { continue; } newTree.addChild(subord.deepCopy()); QuestionUtil.addPeriodIfNeeded(newTree); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromFiniteClause", 1.0); //old feature name if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromComplementClause", 1.0); //if(GlobalProperties.getDebug()) System.err.println("extractComplementClauses: "+newTree.toString()); addIfNovel(extracted, newTreeWithFeatures); } }