List of usage examples for edu.stanford.nlp.trees Tree deepCopy
public Tree deepCopy()
From source file:CollapseUnaryTransformer.java
License:Apache License
public Tree transformTree(Tree tree) { if (tree.isPreTerminal() || tree.isLeaf()) { return tree.deepCopy(); }/*from w w w. j a v a 2 s. co m*/ Label label = tree.label().labelFactory().newLabel(tree.label()); Tree[] children = tree.children(); while (children.length == 1 && !children[0].isLeaf()) { children = children[0].children(); } List<Tree> processedChildren = Generics.newArrayList(); for (Tree child : children) { processedChildren.add(transformTree(child)); } return tree.treeFactory().newTreeNode(label, processedChildren); }
From source file:com.github.kutschkem.Qgen.annotators.SimpleSentenceDecompositionAnnotator.java
License:Open Source License
private List<String> decompose(String documentText) { List<Tree> trees = new ArrayList<Tree>(); for (String sentence : AnalysisUtilities.getSentences(documentText)) { trees.add(AnalysisUtilities.getInstance().parseSentence(sentence).parse); }//from w w w . j a v a 2s . com List<String> result = new ArrayList<String>(); for (Tree t : trees) { TregexPattern p = TregexPattern.compile("ROOT << (NP=np $++ VP=vp) "); TregexMatcher m = p.matcher(t); while (m.find()) { Tree np = m.getNode("np"); Tree vp = m.getNode("vp"); Tree np2 = np.deepCopy(); TregexPattern p2 = TregexPattern.compile("NP << (/^S.*/=sbarq ?. /,/=c1 ?, /,/=c2)"); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); ps.add(Tsurgeon.parseOperation("prune sbarq")); ps.add(Tsurgeon.parseOperation("prune c1")); ps.add(Tsurgeon.parseOperation("prune c2")); Tsurgeon.processPattern(p2, Tsurgeon.collectOperations(ps), np2); np = np2; Tree newTree = Tree.valueOf("(S " + np + vp + "(. .))"); result.add(AnalysisUtilities.orginialSentence(newTree.yield())); } } return result; }
From source file:edu.albany.cubism.sentiment.Sandbox.java
public void addMapping(int recursion_level, Tree tree) { if (!this.recusion_mapping.containsKey(recursion_level)) { this.recusion_mapping.put(recursion_level, new ArrayList()); this.recusion_mapping.get(recursion_level).add(tree.deepCopy()); } else {//from w w w . ja va 2s .c o m this.recusion_mapping.get(recursion_level).add(tree.deepCopy()); } }
From source file:edu.cmu.ark.nlp.question.QuestionUtil.java
License:Open Source License
public static String getCleanedUpYield(Tree inputTree) { Tree copyTree = inputTree.deepCopy(); //if(GlobalProperties.getDebug()) System.err.println("yield:"+copyTree.toString()); return cleanUpSentenceString(copyTree.yield().toString()); }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * Primary method for simplifying sentences. * Takes an input sentence in the form of a tree * and returns a list of Question objects, which help * to track what operations were performed. * /*from w w w . j a v a 2 s . c o m*/ * @param sentence * @param fixCapitalization * @return */ public List<Question> simplify(Tree sentence, boolean fixCapitalization) { List<Question> treeList = new ArrayList<Question>(); numSimplifyHelperCalls = 0; //if(GlobalProperties.getDebug()) System.err.println("simplify input:"+ sentence); //add original tree Question orig = new Question(this.props); orig.setSourceTree(sentence); orig.setIntermediateTree(sentence.deepCopy()); //if the input contains any UCP or other odd nodes, then just return the original sentence //such nodes indicate that the parse failed, or at least that our system will likely produce bad output if (uglyParse(sentence)) { treeList.add(orig); return treeList; } QuestionUtil.downcaseFirstToken(orig.getIntermediateTree()); //treeSet.add(originalWithFeatures); Question current = orig.deeperCopy(); List<Question> extracted = new ArrayList<Question>(); //for each nested element in the INPUT... (nested elements include finite verbs, non-restrictive relative clauses, appositives, conjunction of VPs, conjunction of clauses, participial phrases) //transform the nested element into a declarative sentence (preserving tense), removing conjunctions, etc. extracted.add(current); extractSubordinateClauses(extracted, orig); extractNounParticipialModifiers(extracted, orig); extractNonRestrictiveRelativeClauses(extracted, orig); extractAppositives(extracted, orig); extractVerbParticipialModifiers(extracted, orig); //extractWITHPartcipialPhrases(extracted, orig); //too rare to worry about if (extractFromVerbComplements) extractComplementClauses(extracted, orig); for (Question q : extracted) { addAllIfNovel(treeList, simplifyHelper(q)); } //make sure there is at least one output if (treeList.size() == 0) { addIfNovel(treeList, current); } if (fixCapitalization) { //upcase the first tokens of all output trees. for (Question q : treeList) { QuestionUtil.upcaseFirstToken(q.getIntermediateTree()); } } //clean up the output for (Question q : treeList) { QuestionUtil.removeExtraQuotes(q.getIntermediateTree()); } if (this.getComputeFeatures) { Question t = treeList.get(0); boolean fromMainClause = true; String[] nestedExtractionFeatureNames = { "extractedFromParticipial", "extractedFromVerbParticipial", "extractedFromFiniteClause", "extractedFromSubordinateClause", "extractedFromComplementClause", "extractedFromAppositive", "extractedFromRelativeClause", "extractedFromParticipial", "extractedFromNounParticipial" }; for (String name : nestedExtractionFeatureNames) { if (t.getFeatureValue(name) != 0) { fromMainClause = false; break; } } if (fromMainClause) t.setFeatureValue("extractedFromLeftMostMainClause", 1.0); //t.setFeatureValue("extractedFromLeftMostMainClause", 1.0); } //if(GlobalProperties.getDebug()) System.err.println("simplifyHelperCalls:\t"+numSimplifyHelperCalls); if (mainClauseOnly) { Question tmp = treeList.get(0); treeList.clear(); if (tmp.getFeatureValue("extractedFromLeftMostMainClause") == 1.0) treeList.add(tmp); } return treeList; }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., As John slept, I studied. -> John slept. * //from www . ja v a 2 s . com */ private void extractSubordinateClauses(Collection<Question> extracted, Question input) { Tree subord; String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = " SBAR [ > VP < IN | > S|SINV ] " + //not a complement " !< (IN < if|unless|that)" + //not a conditional antecedent " < (S=sub !< (VP < VBG)) " + //+ //not a participial phrase " >S|SINV|VP "; //not part of a noun phrase or PP (other methods for those) matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>()); subord = matcher.getNode("sub"); newTree.addChild(subord.deepCopy()); QuestionUtil.addPeriodIfNeeded(newTree); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromFiniteClause", 1.0); //old feature name if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromSubordinateClause", 1.0); //if(GlobalProperties.getDebug()) System.err.println("extractSubordinateClauses: "+newTree.toString()); addIfNovel(extracted, newTreeWithFeatures); } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
private void extractComplementClauses(Collection<Question> extracted, Question input) { Tree subord; String tregexOpStr;/* w ww. j av a 2 s. com*/ TregexPattern matchPattern; TregexMatcher matcher; //TODO should also address infinitive complements tregexOpStr = "SBAR " + " < (S=sub !< (VP < VBG)) " + //+ //not a participial phrase " !> NP|PP " + //not part of a noun phrase or PP (other methods for those) " [ $- /^VB.*/=verb | >+(SBAR) (SBAR $- /^VB.*/=verb) ] "; //complement of a VP (follows the verb) matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>()); subord = matcher.getNode("sub"); Tree verb = matcher.getNode("verb"); String verbLemma = QuestionUtil.getLemma(verb.yield().toString(), verb.label().toString()); if (!verbImpliesComplement(verbLemma)) { continue; } newTree.addChild(subord.deepCopy()); QuestionUtil.addPeriodIfNeeded(newTree); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromFiniteClause", 1.0); //old feature name if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromComplementClause", 1.0); //if(GlobalProperties.getDebug()) System.err.println("extractComplementClauses: "+newTree.toString()); addIfNovel(extracted, newTreeWithFeatures); } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John, who is a friend of mine, likes Susan. -> John is a friend of mine. * //from ww w.j ava2s . co m */ private void extractNonRestrictiveRelativeClauses(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; TregexMatcher matcherclause; tregexOpStr = "NP=np < (SBAR=sbar [ < (WHADVP=wherecomp < (WRB < where)) " + " | < (WHNP !< /WP\\$/) " + " | < (WHNP=possessive < /WP\\$/)" //John, whose car was + " | < (WHPP < IN|TO=preposition) ] $-- NP $- /,/ " + " < S=relclause !< WHADJP)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); //iterate over all the relative clauses in the input //and create an output sentence for each one. while (matcher.find()) { Tree missingArgumentTree = matcher.getNode("np"); Tree relclause = matcher.getNode("relclause"); if (missingArgumentTree == null || relclause == null) continue; missingArgumentTree = missingArgumentTree.deepCopy(); relclause = relclause.deepCopy(); Tree possessive = matcher.getNode("possessive"); Tree sbar = matcher.getNode("sbar").deepCopy(); makeDeterminerDefinite(missingArgumentTree); if (possessive != null) { possessive = possessive.deepCopy(); possessive.removeChild(0); String newTree = "(NP (NP " + missingArgumentTree.toString() + " (POS 's))"; for (int i = 0; i < possessive.numChildren(); i++) newTree += possessive.getChild(i).toString() + " "; newTree += ")"; missingArgumentTree = QuestionUtil.readTreeFromString(newTree); } //remove the relative clause and the commas surrounding it from the missing argument tree for (int i = 0; i < missingArgumentTree.numChildren(); i++) { if (missingArgumentTree.getChild(i).equals(sbar)) { //remove the relative clause missingArgumentTree.removeChild(i); //remove the comma after the relative clause if (i < missingArgumentTree.numChildren() && missingArgumentTree.getChild(i).label().toString().equals(",")) { missingArgumentTree.removeChild(i); } //remove the comma before the relative clause if (i > 0 && missingArgumentTree.getChild(i - 1).label().toString().equals(",")) { missingArgumentTree.removeChild(i - 1); i--; } i--; } } //put the noun in the clause at the topmost place with an opening for a noun. //Note that this may mess up if there are noun phrase adjuncts like "The man I met Tuesday". //specifically: //the parent of the noun can be either a clause (S) as in "The man who met me" //or a verb phrase as in "The man who I met". //for verb phrases, add the noun to the end since it will be an object. //for clauses, add the noun to the beginning since it will be the subject. tregexOpStr = "S|VP=newparent !< NP < (VP=verb !< TO !$ TO)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcherclause = matchPattern.matcher(relclause); boolean subjectMovement = true; if (!matcherclause.find()) { tregexOpStr = "VP=newparent !< VP < /VB.*/=verb !>> (S !< NP) !<< (VP !< VP !< NP)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcherclause = matchPattern.matcher(relclause); subjectMovement = false; } //reset (so the first match isn't skipped) matcherclause = matchPattern.matcher(relclause); if (matcherclause.find()) { Tree newparenttree = matcherclause.getNode("newparent"); Tree verbtree = matcherclause.getNode("verb"); boolean ppRelativeClause = false; if (matcher.getNode("wherecomp") != null) { String tmp = "(PP (IN at) " + missingArgumentTree.toString() + ")"; missingArgumentTree = QuestionUtil.readTreeFromString(tmp); ppRelativeClause = true; subjectMovement = false; } else if (matcher.getNode("preposition") != null) { String tmp = "(PP (IN " + matcher.getNode("preposition").yield().toString() + ") " + missingArgumentTree.toString() + ")"; missingArgumentTree = QuestionUtil.readTreeFromString(tmp); ppRelativeClause = true; } if (subjectMovement) { //subject newparenttree.addChild(newparenttree.objectIndexOf(verbtree), missingArgumentTree); } else { // newparentlabel is VP if (ppRelativeClause) newparenttree.addChild(newparenttree.numChildren(), missingArgumentTree); else newparenttree.addChild(newparenttree.objectIndexOf(verbtree) + 1, missingArgumentTree); } //create a new tree with punctuation Tree newTree = factory.newTreeNode("ROOT", new ArrayList<Tree>()); newTree.addChild(relclause); QuestionUtil.addPeriodIfNeeded(newTree); //if(GlobalProperties.getDebug()) System.err.println("extractRelativeClauses: "+ newTree.toString()); addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromRelativeClause", 1.0); addIfNovel(extracted, newTreeWithFeatures); } } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John, hoping to get a good grade, studied. -> John hoped to get a good grade. * Walking to the store, John saw Susan -> John was walking to the store. * // w ww . j av a 2 s . c om * NOTE: This method produces false positives for sentences like, * "Broadly speaking, the project was successful." * where the participial phrase does not modify the subject. * * @param extracted * @param input */ private void extractNounParticipialModifiers(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = "ROOT < (S " + " [ << (NP < (NP=subj $++ (/,/ $+ (VP=modifier <# VBN|VBG|VP=tense )))) " //modifiers that appear after nouns + " | < (S !< NP|SBAR < (VP=modifier <# VBN|VBG|VP=tense) $+ (/,/ $+ NP=subj)) " //modifiers before the subject. e.g., Founded by John, the company... + " | < (SBAR < (S !< NP|SBAR < (VP=modifier <# VBN|VBG=tense)) $+ (/,/ $+ NP=subj)) " //e.g., While walking to the store, John saw Susan. + " | < (PP=modifier !< NP <# VBG=tense $+ (/,/ $+ NP=subj)) ] ) " // e.g., Walking to the store, John saw Susan. + " <<# /^VB.*$/=maintense "; //tense determined by top-most verb matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { Tree nountree = matcher.getNode("subj").deepCopy(); Tree vptree = matcher.getNode("modifier"); Tree verb = matcher.getNode("tense"); makeDeterminerDefinite(nountree); if (vptree.label().toString().equals("PP")) vptree.label().setValue("VP"); String verbPOS = findTense(matcher.getNode("maintense")); if (vptree == null || nountree == null) return; String newTreeStr; if (verb.label().toString().equals("VBG")) { //for present partcipials, change the tense to the tense of the main verb //e.g., walking to the store -> walked to the store String verbLemma = QuestionUtil.getLemma(verb.getChild(0).label().toString(), verb.label().toString()); String newVerb = this.conjugator.getSurfaceForm(verbLemma, verbPOS); int verbIndex = vptree.objectIndexOf(verb); vptree = vptree.deepCopy(); vptree.removeChild(verbIndex); vptree.addChild(verbIndex, QuestionUtil.readTreeFromString("(" + verbPOS + " " + newVerb + ")")); newTreeStr = "(ROOT (S " + matcher.getNode("subj").toString() + " " + vptree.toString() + " (. .)))"; } else { //for past participials, add a copula //e.g., John, exhausted, -> John was exhausted //(or for conjunctions, just add the copula---kind of a hack to make the moby dick sentence work out) String auxiliary; if (verbPOS.equals("VBP") || verbPOS.equals("VBD")) { if (isPlural(nountree)) auxiliary = "(VBD were)"; else auxiliary = "(VBD was)"; } else { if (isPlural(nountree)) auxiliary = "(VB are)"; else auxiliary = "(VBZ is)"; } newTreeStr = "(ROOT (S " + nountree + " (VP " + auxiliary + " " + vptree + ") (. .)))"; } Tree newTree = QuestionUtil.readTreeFromString(newTreeStr); correctTense(newTree.getChild(0).getChild(0), newTree.getChild(0)); addQuotationMarksIfNeeded(newTree); //if(GlobalProperties.getDebug()) System.err.println("extractNounParticipialModifiers: "+ newTree.toString()); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromParticipial", 1.0); //old feature name if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromNounParticipial", 1.0); extracted.add(newTreeWithFeatures); } }
From source file:elkfed.coref.mentions.Mention.java
License:Apache License
/** * Takes the sentence tree of this mention and replaces the unlexicalized * tokens in the leaves with their discourse ids. Usefull to derive spans * based on syntax./*from ww w .j a va 2s. c om*/ * * @author samuel * @param sentTree * original sentence tree * @return Tree with discourse ids in leaves */ private Tree getSentenceTreeWithDiscurseIdsInLeaves(Tree sentTree) { Tree result = sentTree.deepCopy(); List<Tree> leaves = result.getLeaves(); String[] discElements = getSentenceDiscourseElements(); String[] discIDs = getSentenceDiscourseElementIDs(); if (discElements.length != leaves.size() || discIDs.length != leaves.size()) { throw new RuntimeException("SentenceTree leaves and discourse elements do not match for mention " + this.getMarkable().getID()); } /* * sometimes tueba-d/z punctuations in the parse tree are out of context * appended to the root of the tree hence the corrector part * * yv: ... and we need to unescape those words for the semeval2mmax * output. duh. */ int corrector = 0; for (int i = 0; i < discElements.length; i++) { String leafWord = leaves.get(i - corrector).value().toLowerCase(); leafWord = leafWord.replaceAll("-lrb-", "("); leafWord = leafWord.replaceAll("-rrb-", ")"); leafWord = leafWord.replaceAll("\\\\", ""); String discWord = discElements[i].replaceAll("\\\\", ""); if (discWord.equalsIgnoreCase(leafWord)) { leaves.get(i - corrector).setValue(discIDs[i]); } else { System.err.format("non-matching: %s vs %s\n", leafWord, discElements[i]); corrector++; } } return result; }