List of usage examples for edu.stanford.nlp.trees.tregex.tsurgeon Tsurgeon processPatternsOnTree
@SuppressWarnings("StringContatenationInLoop") public static Tree processPatternsOnTree(List<Pair<TregexPattern, TsurgeonPattern>> ops, Tree t)
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
public static void addPeriodIfNeeded(Tree input) { String tregexOpStr = "ROOT < (S=mainclause !< /\\./)"; TregexPattern matchPattern = TregexPatternFactory.getPattern(tregexOpStr); TregexMatcher matcher = matchPattern.matcher(input); if (matcher.find()) { TsurgeonPattern p;/*from w w w . ja va2s .c o m*/ List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); ps.add(Tsurgeon.parseOperation("insert (. .) >-1 mainclause")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, input); } }
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
/** * Remove traces and non-terminal decorations (e.g., "-SUBJ" in "NP-SUBJ") from a Penn Treebank-style tree. * * @param inputTree/*www .j av a 2 s .c om*/ */ public void normalizeTree(Tree inputTree) { inputTree.label().setFromString("ROOT"); List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; TregexMatcher matcher; tregexOpStr = "/\\-NONE\\-/=emptynode"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); ps.add(Tsurgeon.parseOperation("prune emptynode")); matchPattern = TregexPatternFactory.getPattern(tregexOpStr); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, inputTree); Label nonterminalLabel; tregexOpStr = "/.+\\-.+/=nonterminal < __"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); while (matcher.find()) { nonterminalLabel = matcher.getNode("nonterminal"); if (nonterminalLabel == null) continue; nonterminalLabel.setFromString(tlp.basicCategory(nonterminalLabel.value())); } }
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
/** * remove extra quotation marks//w w w . ja va2 s . co m * (a hack due to annoying PTB conventions by which quote marks aren't in the same consituent) * * @param input */ public static void removeExtraQuotes(Tree input) { List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; List<TsurgeonPattern> ps; ps = new ArrayList<TsurgeonPattern>(); tregexOpStr = "ROOT [ << (``=quote < `` !.. ('' < '')) | << (''=quote < '' !,, (`` < ``)) ] "; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); ps.add(Tsurgeon.parseOperation("prune quote")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, input); }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * /* www .j ava2 s . c o m*/ * e.g., However, John did not study. -> John did not study. * * @param q * @return */ private boolean removeClauseLevelModifiers(Question q) { List<Pair<TregexPattern, TsurgeonPattern>> ops; String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; List<TsurgeonPattern> ps; boolean modified = false; //remove subordinate clauses and various phrases //leave conditional antecedents (i.e., with "if" or "unless" as complementizers. punt on "even if") tregexOpStr = "ROOT=root < (S=mainclause < (/SBAR|ADVP|ADJP|CC|PP|S|NP/=fronted !< (IN < if|unless) !$ `` $++ NP=subject))"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); TregexMatcher matcher = matchPattern.matcher(q.getIntermediateTree()); if (matcher.find()) { ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); ps = new ArrayList<TsurgeonPattern>(); tregexOpStr = "ROOT=root < (S=mainclause < (/[,:]/=comma $ (/SBAR|ADVP|ADJP|CC|PP|S|NP/=fronted !< (IN < if|unless) $++ NP=subject)))"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); ps.add(Tsurgeon.parseOperation("prune comma")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree()); ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); ps = new ArrayList<TsurgeonPattern>(); tregexOpStr = "ROOT=root < (S=mainclause < (/SBAR|ADVP|ADJP|CC|PP|S|NP/=fronted !< (IN < if|unless) $++ NP=subject))"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); ps.add(Tsurgeon.parseOperation("prune fronted")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree()); addQuotationMarksIfNeeded(q.getIntermediateTree()); if (this.getComputeFeatures) q.setFeatureValue("removedClauseLevelModifiers", 1.0); modified = true; } return modified; }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * /* w w w. j a v a 2 s . c o m*/ * e.g., John studied, hoping to get a good grade. -> John studied. * * @param input * @return whether or not a change was made */ private boolean removeVerbalModifiersAfterCommas(Question q) { List<Pair<TregexPattern, TsurgeonPattern>> ops; String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; List<TsurgeonPattern> ps; ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); tregexOpStr = "ROOT=root << (VP !< VP < (/,/=comma $+ /[^`].*/=modifier))"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); //remove modifiers ps = new ArrayList<TsurgeonPattern>(); if (matchPattern.matcher(q.getIntermediateTree()).find()) { ps.add(Tsurgeon.parseOperation("prune modifier")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree()); //now remove the comma ops.clear(); ps.clear(); tregexOpStr = "ROOT=root << (VP !< VP < /,/=comma)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); ps.add(Tsurgeon.parseOperation("prune comma")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree()); addQuotationMarksIfNeeded(q.getIntermediateTree()); if (this.getComputeFeatures) q.setFeatureValue("removedVerbalModifiersAfterCommas", 1.0); return true; } else { return false; } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
private void addQuotationMarksIfNeeded(Tree input) { String tregexOpStr;/*from ww w. j a va 2 s.co m*/ TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = "__=parent < (/`/ !.. /'/)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input); if (matcher.find()) { TsurgeonPattern p; List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); ps.add(Tsurgeon.parseOperation("insert ('' '') >-1 parent")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, input); } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * Convert a non-definite determiner to "the". * Used when extracting from noun modifiers such as relative clauses. * E.g., "A tall man, who was named Bob, entered the store." * -> "A tall man was named Bob."/* w ww.j a v a2s.c o m*/ * -> "THE tall man was named Bob." * * @param np */ private void makeDeterminerDefinite(Tree np) { String tregexOpStr = "NP !> __ <+(NP) (DT=det !< the)"; TregexPattern matchPattern = TregexPatternFactory.getPattern(tregexOpStr); TsurgeonPattern p; List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); ps.add(Tsurgeon.parseOperation("replace det (DT the)")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, np); }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John, who hoped to get a good grade, studied. -> John studied. * // w w w.java 2 s . co m */ private boolean removeNonRestrRelClausesAndParticipials(Question q) { List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; List<TsurgeonPattern> ps; ps = new ArrayList<TsurgeonPattern>(); tregexOpStr = "NP < (VP|SBAR=mod $- /,/=punc !$+ /,/ !$ CC|CONJP)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); boolean modified = false; if (matchPattern.matcher(q.getIntermediateTree()).find()) { ps.add(Tsurgeon.parseOperation("prune punc")); ps.add(Tsurgeon.parseOperation("prune mod")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree()); if (this.getComputeFeatures) q.setFeatureValue("removedNonRestrRelClausesAndParticipials", 1.0); modified = true; } ps = new ArrayList<TsurgeonPattern>(); tregexOpStr = "NP < (VP|SBAR=mod $- /,/=punc $+ /,/=punc2 !$ CC|CONJP)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); if (matchPattern.matcher(q.getIntermediateTree()).find()) { ps.add(Tsurgeon.parseOperation("prune punc")); ps.add(Tsurgeon.parseOperation("prune mod")); ps.add(Tsurgeon.parseOperation("prune punc2")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree()); if (this.getComputeFeatures) q.setFeatureValue("removedNonRestrRelClausesAndParticipials", 1.0); modified = true; } return modified; }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * //from w w w . j a va 2 s . c o m * e.g., John Smith (1931-1992) was a fireman. -> John Smith was a Fireman. * * @return whether or not a change was made */ private boolean removeParentheticals(Question q) { List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; List<TsurgeonPattern> ps; boolean res = false; ps = new ArrayList<TsurgeonPattern>(); tregexOpStr = "__=parenthetical [ $- /-LRB-/=leadingpunc $+ /-RRB-/=trailingpunc " + " | $+ /,/=leadingpunc $- /,/=trailingpunc !$ CC|CONJP " + " | $+ (/:/=leadingpunc < --) $- (/:/=trailingpunc < /--/) ]"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); if (matchPattern.matcher(q.getIntermediateTree()).find()) { ps.add(Tsurgeon.parseOperation("prune leadingpunc")); ps.add(Tsurgeon.parseOperation("prune parenthetical")); ps.add(Tsurgeon.parseOperation("prune trailingpunc")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree()); if (res) addQuotationMarksIfNeeded(q.getIntermediateTree()); if (this.getComputeFeatures) q.setFeatureValue("removedParentheticals", 1.0); res = true; } ps = new ArrayList<TsurgeonPattern>(); tregexOpStr = "PRN=parenthetical"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); if (matchPattern.matcher(q.getIntermediateTree()).find()) { ps.add(Tsurgeon.parseOperation("prune parenthetical")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree()); if (this.getComputeFeatures) q.setFeatureValue("removedParentheticals", 1.0); res = true; } return res; }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * //from ww w . j a v a 2s . c o m * e.g., John, the painter, knew Susan. -> John knew Susan. * * @param q * @return whether or not a change was made */ private boolean removeAppositives(Question q) { List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; List<TsurgeonPattern> ps; ps = new ArrayList<TsurgeonPattern>(); tregexOpStr = "NP=parent < (NP=child $++ (/,/ $++ NP|PP=appositive) !$-- /,/) !< CC|CONJP"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); if (matchPattern.matcher(q.getIntermediateTree()).find()) { ps.add(Tsurgeon.parseOperation("move child $+ parent")); ps.add(Tsurgeon.parseOperation("prune parent")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree()); addQuotationMarksIfNeeded(q.getIntermediateTree()); if (this.getComputeFeatures) q.setFeatureValue("removedAppositives", 1.0); return true; } else { return false; } }