Example usage for edu.stanford.nlp.trees.tregex.tsurgeon Tsurgeon collectOperations

List of usage examples for edu.stanford.nlp.trees.tregex.tsurgeon Tsurgeon collectOperations

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees.tregex.tsurgeon Tsurgeon collectOperations.

Prototype

public static TsurgeonPattern collectOperations(List<TsurgeonPattern> patterns) 

Source Link

Document

Collects a list of operation patterns into a sequence of operations to be applied.

Usage

From source file:com.github.kutschkem.Qgen.annotators.SimpleSentenceDecompositionAnnotator.java

License:Open Source License

private List<String> decompose(String documentText) {
    List<Tree> trees = new ArrayList<Tree>();
    for (String sentence : AnalysisUtilities.getSentences(documentText)) {
        trees.add(AnalysisUtilities.getInstance().parseSentence(sentence).parse);
    }/* w  w  w . j  av a2 s.  c o m*/

    List<String> result = new ArrayList<String>();

    for (Tree t : trees) {
        TregexPattern p = TregexPattern.compile("ROOT << (NP=np $++ VP=vp) ");
        TregexMatcher m = p.matcher(t);
        while (m.find()) {
            Tree np = m.getNode("np");
            Tree vp = m.getNode("vp");

            Tree np2 = np.deepCopy();
            TregexPattern p2 = TregexPattern.compile("NP << (/^S.*/=sbarq ?. /,/=c1 ?, /,/=c2)");
            List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
            ps.add(Tsurgeon.parseOperation("prune sbarq"));
            ps.add(Tsurgeon.parseOperation("prune c1"));
            ps.add(Tsurgeon.parseOperation("prune c2"));

            Tsurgeon.processPattern(p2, Tsurgeon.collectOperations(ps), np2);
            np = np2;

            Tree newTree = Tree.valueOf("(S " + np + vp + "(. .))");
            result.add(AnalysisUtilities.orginialSentence(newTree.yield()));
        }

    }

    return result;
}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

public static void addPeriodIfNeeded(Tree input) {
    String tregexOpStr = "ROOT < (S=mainclause !< /\\./)";
    TregexPattern matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    TregexMatcher matcher = matchPattern.matcher(input);

    if (matcher.find()) {
        TsurgeonPattern p;/*from   www.j a  v a2  s. c o  m*/
        List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
        List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();

        ps.add(Tsurgeon.parseOperation("insert (. .) >-1 mainclause"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, input);
    }
}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

/**
 * Remove traces and non-terminal decorations (e.g., "-SUBJ" in "NP-SUBJ") from a Penn Treebank-style tree.
 *
 * @param inputTree//from  w w  w .  j  a  va  2 s .  c om
 */
public void normalizeTree(Tree inputTree) {
    inputTree.label().setFromString("ROOT");

    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;
    TregexMatcher matcher;

    tregexOpStr = "/\\-NONE\\-/=emptynode";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(inputTree);
    ps.add(Tsurgeon.parseOperation("prune emptynode"));
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    p = Tsurgeon.collectOperations(ps);
    ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
    Tsurgeon.processPatternsOnTree(ops, inputTree);

    Label nonterminalLabel;

    tregexOpStr = "/.+\\-.+/=nonterminal < __";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(inputTree);
    while (matcher.find()) {
        nonterminalLabel = matcher.getNode("nonterminal");
        if (nonterminalLabel == null)
            continue;
        nonterminalLabel.setFromString(tlp.basicCategory(nonterminalLabel.value()));
    }

}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

/**
 * remove extra quotation marks/*from   www. j av  a  2s .  c  o  m*/
 * (a hack due to annoying PTB conventions by which quote marks aren't in the same consituent)
 *
 * @param input
 */
public static void removeExtraQuotes(Tree input) {
    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;
    List<TsurgeonPattern> ps;

    ps = new ArrayList<TsurgeonPattern>();
    tregexOpStr = "ROOT [ << (``=quote < `` !.. ('' < '')) | << (''=quote < '' !,, (`` < ``)) ] ";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    ps.add(Tsurgeon.parseOperation("prune quote"));
    p = Tsurgeon.collectOperations(ps);
    ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
    Tsurgeon.processPatternsOnTree(ops, input);

}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * //  w  w  w  .  j a  va2 s  .  c o m
 * e.g., However, John did not study. -> John did not study.
 * 
 * @param q
 * @return
 */
private boolean removeClauseLevelModifiers(Question q) {
    List<Pair<TregexPattern, TsurgeonPattern>> ops;
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;
    List<TsurgeonPattern> ps;

    boolean modified = false;

    //remove subordinate clauses and various phrases
    //leave conditional antecedents (i.e., with "if" or "unless" as complementizers.  punt on "even if")
    tregexOpStr = "ROOT=root < (S=mainclause < (/SBAR|ADVP|ADJP|CC|PP|S|NP/=fronted !< (IN < if|unless) !$ ``  $++ NP=subject))";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    TregexMatcher matcher = matchPattern.matcher(q.getIntermediateTree());
    if (matcher.find()) {
        ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
        ps = new ArrayList<TsurgeonPattern>();
        tregexOpStr = "ROOT=root < (S=mainclause < (/[,:]/=comma $ (/SBAR|ADVP|ADJP|CC|PP|S|NP/=fronted !< (IN < if|unless) $++ NP=subject)))";
        matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
        ps.add(Tsurgeon.parseOperation("prune comma"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree());

        ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
        ps = new ArrayList<TsurgeonPattern>();
        tregexOpStr = "ROOT=root < (S=mainclause < (/SBAR|ADVP|ADJP|CC|PP|S|NP/=fronted !< (IN < if|unless) $++ NP=subject))";
        matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
        ps.add(Tsurgeon.parseOperation("prune fronted"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree());

        addQuotationMarksIfNeeded(q.getIntermediateTree());
        if (this.getComputeFeatures)
            q.setFeatureValue("removedClauseLevelModifiers", 1.0);
        modified = true;
    }

    return modified;
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * /*from  w w w . j  a  v  a 2 s .  c o  m*/
 * e.g., John studied, hoping to get a good grade. -> John studied.
 * 
 * @param input
 * @return whether or not a change was made
 */
private boolean removeVerbalModifiersAfterCommas(Question q) {
    List<Pair<TregexPattern, TsurgeonPattern>> ops;
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;
    List<TsurgeonPattern> ps;

    ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();

    tregexOpStr = "ROOT=root << (VP !< VP < (/,/=comma $+ /[^`].*/=modifier))";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);

    //remove modifiers
    ps = new ArrayList<TsurgeonPattern>();
    if (matchPattern.matcher(q.getIntermediateTree()).find()) {
        ps.add(Tsurgeon.parseOperation("prune modifier"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree());

        //now remove the comma
        ops.clear();
        ps.clear();
        tregexOpStr = "ROOT=root << (VP !< VP < /,/=comma)";
        matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
        ps.add(Tsurgeon.parseOperation("prune comma"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree());
        addQuotationMarksIfNeeded(q.getIntermediateTree());

        if (this.getComputeFeatures)
            q.setFeatureValue("removedVerbalModifiersAfterCommas", 1.0);
        return true;
    } else {
        return false;
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

private void addQuotationMarksIfNeeded(Tree input) {
    String tregexOpStr;//w w  w  .jav  a2  s .c o m
    TregexPattern matchPattern;
    TregexMatcher matcher;

    tregexOpStr = "__=parent < (/`/ !.. /'/)";

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input);

    if (matcher.find()) {
        TsurgeonPattern p;
        List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
        List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();

        ps.add(Tsurgeon.parseOperation("insert ('' '') >-1 parent"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, input);
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * Convert a non-definite determiner to "the".
 * Used when extracting from noun modifiers such as relative clauses.
 * E.g., "A tall man, who was named Bob, entered the store." 
 * -> "A tall man was named Bob."/*from  w w  w  .  j ava 2  s.  c om*/
 * -> "THE tall man was named Bob."
 *  
 * @param np
 */
private void makeDeterminerDefinite(Tree np) {
    String tregexOpStr = "NP !> __ <+(NP) (DT=det !< the)";
    TregexPattern matchPattern = TregexPatternFactory.getPattern(tregexOpStr);

    TsurgeonPattern p;
    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();

    ps.add(Tsurgeon.parseOperation("replace det (DT the)"));
    p = Tsurgeon.collectOperations(ps);
    ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
    Tsurgeon.processPatternsOnTree(ops, np);
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John, who hoped to get a good grade, studied. -> John studied.
 *   //w w w  .j  av a2 s.c  om
 */
private boolean removeNonRestrRelClausesAndParticipials(Question q) {
    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;
    List<TsurgeonPattern> ps;

    ps = new ArrayList<TsurgeonPattern>();
    tregexOpStr = "NP < (VP|SBAR=mod $- /,/=punc !$+ /,/ !$ CC|CONJP)";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    boolean modified = false;
    if (matchPattern.matcher(q.getIntermediateTree()).find()) {
        ps.add(Tsurgeon.parseOperation("prune punc"));
        ps.add(Tsurgeon.parseOperation("prune mod"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree());
        if (this.getComputeFeatures)
            q.setFeatureValue("removedNonRestrRelClausesAndParticipials", 1.0);
        modified = true;
    }

    ps = new ArrayList<TsurgeonPattern>();
    tregexOpStr = "NP < (VP|SBAR=mod $- /,/=punc $+ /,/=punc2 !$ CC|CONJP)";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    if (matchPattern.matcher(q.getIntermediateTree()).find()) {
        ps.add(Tsurgeon.parseOperation("prune punc"));
        ps.add(Tsurgeon.parseOperation("prune mod"));
        ps.add(Tsurgeon.parseOperation("prune punc2"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree());
        if (this.getComputeFeatures)
            q.setFeatureValue("removedNonRestrRelClausesAndParticipials", 1.0);
        modified = true;
    }

    return modified;
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * /*from   w w w .j a v  a 2s  .c  o m*/
 * e.g., John Smith (1931-1992) was a fireman. -> John Smith was a Fireman.
 * 
 * @return whether or not a change was made
 */
private boolean removeParentheticals(Question q) {
    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;
    List<TsurgeonPattern> ps;
    boolean res = false;

    ps = new ArrayList<TsurgeonPattern>();
    tregexOpStr = "__=parenthetical [ $- /-LRB-/=leadingpunc $+ /-RRB-/=trailingpunc "
            + " | $+ /,/=leadingpunc $- /,/=trailingpunc !$ CC|CONJP "
            + " | $+ (/:/=leadingpunc < --) $- (/:/=trailingpunc < /--/) ]";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    if (matchPattern.matcher(q.getIntermediateTree()).find()) {
        ps.add(Tsurgeon.parseOperation("prune leadingpunc"));
        ps.add(Tsurgeon.parseOperation("prune parenthetical"));
        ps.add(Tsurgeon.parseOperation("prune trailingpunc"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree());

        if (res)
            addQuotationMarksIfNeeded(q.getIntermediateTree());
        if (this.getComputeFeatures)
            q.setFeatureValue("removedParentheticals", 1.0);
        res = true;
    }

    ps = new ArrayList<TsurgeonPattern>();
    tregexOpStr = "PRN=parenthetical";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    if (matchPattern.matcher(q.getIntermediateTree()).find()) {
        ps.add(Tsurgeon.parseOperation("prune parenthetical"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree());
        if (this.getComputeFeatures)
            q.setFeatureValue("removedParentheticals", 1.0);
        res = true;
    }

    return res;
}