List of usage examples for edu.stanford.nlp.trees.tregex.tsurgeon Tsurgeon collectOperations
public static TsurgeonPattern collectOperations(List<TsurgeonPattern> patterns)
From source file:com.github.kutschkem.Qgen.annotators.SimpleSentenceDecompositionAnnotator.java
License:Open Source License
private List<String> decompose(String documentText) { List<Tree> trees = new ArrayList<Tree>(); for (String sentence : AnalysisUtilities.getSentences(documentText)) { trees.add(AnalysisUtilities.getInstance().parseSentence(sentence).parse); }/* w w w . j av a2 s. c o m*/ List<String> result = new ArrayList<String>(); for (Tree t : trees) { TregexPattern p = TregexPattern.compile("ROOT << (NP=np $++ VP=vp) "); TregexMatcher m = p.matcher(t); while (m.find()) { Tree np = m.getNode("np"); Tree vp = m.getNode("vp"); Tree np2 = np.deepCopy(); TregexPattern p2 = TregexPattern.compile("NP << (/^S.*/=sbarq ?. /,/=c1 ?, /,/=c2)"); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); ps.add(Tsurgeon.parseOperation("prune sbarq")); ps.add(Tsurgeon.parseOperation("prune c1")); ps.add(Tsurgeon.parseOperation("prune c2")); Tsurgeon.processPattern(p2, Tsurgeon.collectOperations(ps), np2); np = np2; Tree newTree = Tree.valueOf("(S " + np + vp + "(. .))"); result.add(AnalysisUtilities.orginialSentence(newTree.yield())); } } return result; }
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
public static void addPeriodIfNeeded(Tree input) { String tregexOpStr = "ROOT < (S=mainclause !< /\\./)"; TregexPattern matchPattern = TregexPatternFactory.getPattern(tregexOpStr); TregexMatcher matcher = matchPattern.matcher(input); if (matcher.find()) { TsurgeonPattern p;/*from www.j a v a2 s. c o m*/ List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); ps.add(Tsurgeon.parseOperation("insert (. .) >-1 mainclause")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, input); } }
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
/** * Remove traces and non-terminal decorations (e.g., "-SUBJ" in "NP-SUBJ") from a Penn Treebank-style tree. * * @param inputTree//from w w w . j a va 2 s . c om */ public void normalizeTree(Tree inputTree) { inputTree.label().setFromString("ROOT"); List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; TregexMatcher matcher; tregexOpStr = "/\\-NONE\\-/=emptynode"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); ps.add(Tsurgeon.parseOperation("prune emptynode")); matchPattern = TregexPatternFactory.getPattern(tregexOpStr); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, inputTree); Label nonterminalLabel; tregexOpStr = "/.+\\-.+/=nonterminal < __"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(inputTree); while (matcher.find()) { nonterminalLabel = matcher.getNode("nonterminal"); if (nonterminalLabel == null) continue; nonterminalLabel.setFromString(tlp.basicCategory(nonterminalLabel.value())); } }
From source file:edu.cmu.ark.AnalysisUtilities.java
License:Open Source License
/** * remove extra quotation marks/*from www. j av a 2s . c o m*/ * (a hack due to annoying PTB conventions by which quote marks aren't in the same consituent) * * @param input */ public static void removeExtraQuotes(Tree input) { List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; List<TsurgeonPattern> ps; ps = new ArrayList<TsurgeonPattern>(); tregexOpStr = "ROOT [ << (``=quote < `` !.. ('' < '')) | << (''=quote < '' !,, (`` < ``)) ] "; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); ps.add(Tsurgeon.parseOperation("prune quote")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, input); }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * // w w w . j a va2 s . c o m * e.g., However, John did not study. -> John did not study. * * @param q * @return */ private boolean removeClauseLevelModifiers(Question q) { List<Pair<TregexPattern, TsurgeonPattern>> ops; String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; List<TsurgeonPattern> ps; boolean modified = false; //remove subordinate clauses and various phrases //leave conditional antecedents (i.e., with "if" or "unless" as complementizers. punt on "even if") tregexOpStr = "ROOT=root < (S=mainclause < (/SBAR|ADVP|ADJP|CC|PP|S|NP/=fronted !< (IN < if|unless) !$ `` $++ NP=subject))"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); TregexMatcher matcher = matchPattern.matcher(q.getIntermediateTree()); if (matcher.find()) { ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); ps = new ArrayList<TsurgeonPattern>(); tregexOpStr = "ROOT=root < (S=mainclause < (/[,:]/=comma $ (/SBAR|ADVP|ADJP|CC|PP|S|NP/=fronted !< (IN < if|unless) $++ NP=subject)))"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); ps.add(Tsurgeon.parseOperation("prune comma")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree()); ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); ps = new ArrayList<TsurgeonPattern>(); tregexOpStr = "ROOT=root < (S=mainclause < (/SBAR|ADVP|ADJP|CC|PP|S|NP/=fronted !< (IN < if|unless) $++ NP=subject))"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); ps.add(Tsurgeon.parseOperation("prune fronted")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree()); addQuotationMarksIfNeeded(q.getIntermediateTree()); if (this.getComputeFeatures) q.setFeatureValue("removedClauseLevelModifiers", 1.0); modified = true; } return modified; }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * /*from w w w . j a v a 2 s . c o m*/ * e.g., John studied, hoping to get a good grade. -> John studied. * * @param input * @return whether or not a change was made */ private boolean removeVerbalModifiersAfterCommas(Question q) { List<Pair<TregexPattern, TsurgeonPattern>> ops; String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; List<TsurgeonPattern> ps; ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); tregexOpStr = "ROOT=root << (VP !< VP < (/,/=comma $+ /[^`].*/=modifier))"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); //remove modifiers ps = new ArrayList<TsurgeonPattern>(); if (matchPattern.matcher(q.getIntermediateTree()).find()) { ps.add(Tsurgeon.parseOperation("prune modifier")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree()); //now remove the comma ops.clear(); ps.clear(); tregexOpStr = "ROOT=root << (VP !< VP < /,/=comma)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); ps.add(Tsurgeon.parseOperation("prune comma")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree()); addQuotationMarksIfNeeded(q.getIntermediateTree()); if (this.getComputeFeatures) q.setFeatureValue("removedVerbalModifiersAfterCommas", 1.0); return true; } else { return false; } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
private void addQuotationMarksIfNeeded(Tree input) { String tregexOpStr;//w w w .jav a2 s .c o m TregexPattern matchPattern; TregexMatcher matcher; tregexOpStr = "__=parent < (/`/ !.. /'/)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input); if (matcher.find()) { TsurgeonPattern p; List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); ps.add(Tsurgeon.parseOperation("insert ('' '') >-1 parent")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, input); } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * Convert a non-definite determiner to "the". * Used when extracting from noun modifiers such as relative clauses. * E.g., "A tall man, who was named Bob, entered the store." * -> "A tall man was named Bob."/*from w w w . j ava 2 s. c om*/ * -> "THE tall man was named Bob." * * @param np */ private void makeDeterminerDefinite(Tree np) { String tregexOpStr = "NP !> __ <+(NP) (DT=det !< the)"; TregexPattern matchPattern = TregexPatternFactory.getPattern(tregexOpStr); TsurgeonPattern p; List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>(); List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); ps.add(Tsurgeon.parseOperation("replace det (DT the)")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, np); }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John, who hoped to get a good grade, studied. -> John studied. * //w w w .j av a2 s.c om */ private boolean removeNonRestrRelClausesAndParticipials(Question q) { List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; List<TsurgeonPattern> ps; ps = new ArrayList<TsurgeonPattern>(); tregexOpStr = "NP < (VP|SBAR=mod $- /,/=punc !$+ /,/ !$ CC|CONJP)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); boolean modified = false; if (matchPattern.matcher(q.getIntermediateTree()).find()) { ps.add(Tsurgeon.parseOperation("prune punc")); ps.add(Tsurgeon.parseOperation("prune mod")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree()); if (this.getComputeFeatures) q.setFeatureValue("removedNonRestrRelClausesAndParticipials", 1.0); modified = true; } ps = new ArrayList<TsurgeonPattern>(); tregexOpStr = "NP < (VP|SBAR=mod $- /,/=punc $+ /,/=punc2 !$ CC|CONJP)"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); if (matchPattern.matcher(q.getIntermediateTree()).find()) { ps.add(Tsurgeon.parseOperation("prune punc")); ps.add(Tsurgeon.parseOperation("prune mod")); ps.add(Tsurgeon.parseOperation("prune punc2")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree()); if (this.getComputeFeatures) q.setFeatureValue("removedNonRestrRelClausesAndParticipials", 1.0); modified = true; } return modified; }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * /*from w w w .j a v a 2s .c o m*/ * e.g., John Smith (1931-1992) was a fireman. -> John Smith was a Fireman. * * @return whether or not a change was made */ private boolean removeParentheticals(Question q) { List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>(); String tregexOpStr; TregexPattern matchPattern; TsurgeonPattern p; List<TsurgeonPattern> ps; boolean res = false; ps = new ArrayList<TsurgeonPattern>(); tregexOpStr = "__=parenthetical [ $- /-LRB-/=leadingpunc $+ /-RRB-/=trailingpunc " + " | $+ /,/=leadingpunc $- /,/=trailingpunc !$ CC|CONJP " + " | $+ (/:/=leadingpunc < --) $- (/:/=trailingpunc < /--/) ]"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); if (matchPattern.matcher(q.getIntermediateTree()).find()) { ps.add(Tsurgeon.parseOperation("prune leadingpunc")); ps.add(Tsurgeon.parseOperation("prune parenthetical")); ps.add(Tsurgeon.parseOperation("prune trailingpunc")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree()); if (res) addQuotationMarksIfNeeded(q.getIntermediateTree()); if (this.getComputeFeatures) q.setFeatureValue("removedParentheticals", 1.0); res = true; } ps = new ArrayList<TsurgeonPattern>(); tregexOpStr = "PRN=parenthetical"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); if (matchPattern.matcher(q.getIntermediateTree()).find()) { ps.add(Tsurgeon.parseOperation("prune parenthetical")); p = Tsurgeon.collectOperations(ps); ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p)); Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree()); if (this.getComputeFeatures) q.setFeatureValue("removedParentheticals", 1.0); res = true; } return res; }