Example usage for edu.stanford.nlp.trees.tregex.tsurgeon Tsurgeon processPatternsOnTree

List of usage examples for edu.stanford.nlp.trees.tregex.tsurgeon Tsurgeon processPatternsOnTree

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees.tregex.tsurgeon Tsurgeon processPatternsOnTree.

Prototype

@SuppressWarnings("StringContatenationInLoop")
    public static Tree processPatternsOnTree(List<Pair<TregexPattern, TsurgeonPattern>> ops, Tree t) 

Source Link

Usage

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

public static void addPeriodIfNeeded(Tree input) {
    String tregexOpStr = "ROOT < (S=mainclause !< /\\./)";
    TregexPattern matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    TregexMatcher matcher = matchPattern.matcher(input);

    if (matcher.find()) {
        TsurgeonPattern p;/*from  w w  w  .  ja  va2s  .c o m*/
        List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
        List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();

        ps.add(Tsurgeon.parseOperation("insert (. .) >-1 mainclause"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, input);
    }
}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

/**
 * Remove traces and non-terminal decorations (e.g., "-SUBJ" in "NP-SUBJ") from a Penn Treebank-style tree.
 *
 * @param inputTree/*www  .j  av  a 2 s .c om*/
 */
public void normalizeTree(Tree inputTree) {
    inputTree.label().setFromString("ROOT");

    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;
    TregexMatcher matcher;

    tregexOpStr = "/\\-NONE\\-/=emptynode";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(inputTree);
    ps.add(Tsurgeon.parseOperation("prune emptynode"));
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    p = Tsurgeon.collectOperations(ps);
    ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
    Tsurgeon.processPatternsOnTree(ops, inputTree);

    Label nonterminalLabel;

    tregexOpStr = "/.+\\-.+/=nonterminal < __";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(inputTree);
    while (matcher.find()) {
        nonterminalLabel = matcher.getNode("nonterminal");
        if (nonterminalLabel == null)
            continue;
        nonterminalLabel.setFromString(tlp.basicCategory(nonterminalLabel.value()));
    }

}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

/**
 * remove extra quotation marks//w w w .  ja va2 s  .  co  m
 * (a hack due to annoying PTB conventions by which quote marks aren't in the same consituent)
 *
 * @param input
 */
public static void removeExtraQuotes(Tree input) {
    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;
    List<TsurgeonPattern> ps;

    ps = new ArrayList<TsurgeonPattern>();
    tregexOpStr = "ROOT [ << (``=quote < `` !.. ('' < '')) | << (''=quote < '' !,, (`` < ``)) ] ";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    ps.add(Tsurgeon.parseOperation("prune quote"));
    p = Tsurgeon.collectOperations(ps);
    ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
    Tsurgeon.processPatternsOnTree(ops, input);

}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * /* www .j  ava2 s .  c  o m*/
 * e.g., However, John did not study. -> John did not study.
 * 
 * @param q
 * @return
 */
private boolean removeClauseLevelModifiers(Question q) {
    List<Pair<TregexPattern, TsurgeonPattern>> ops;
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;
    List<TsurgeonPattern> ps;

    boolean modified = false;

    //remove subordinate clauses and various phrases
    //leave conditional antecedents (i.e., with "if" or "unless" as complementizers.  punt on "even if")
    tregexOpStr = "ROOT=root < (S=mainclause < (/SBAR|ADVP|ADJP|CC|PP|S|NP/=fronted !< (IN < if|unless) !$ ``  $++ NP=subject))";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    TregexMatcher matcher = matchPattern.matcher(q.getIntermediateTree());
    if (matcher.find()) {
        ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
        ps = new ArrayList<TsurgeonPattern>();
        tregexOpStr = "ROOT=root < (S=mainclause < (/[,:]/=comma $ (/SBAR|ADVP|ADJP|CC|PP|S|NP/=fronted !< (IN < if|unless) $++ NP=subject)))";
        matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
        ps.add(Tsurgeon.parseOperation("prune comma"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree());

        ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
        ps = new ArrayList<TsurgeonPattern>();
        tregexOpStr = "ROOT=root < (S=mainclause < (/SBAR|ADVP|ADJP|CC|PP|S|NP/=fronted !< (IN < if|unless) $++ NP=subject))";
        matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
        ps.add(Tsurgeon.parseOperation("prune fronted"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree());

        addQuotationMarksIfNeeded(q.getIntermediateTree());
        if (this.getComputeFeatures)
            q.setFeatureValue("removedClauseLevelModifiers", 1.0);
        modified = true;
    }

    return modified;
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * /*  w  w  w.  j a v  a 2  s . c  o m*/
 * e.g., John studied, hoping to get a good grade. -> John studied.
 * 
 * @param input
 * @return whether or not a change was made
 */
private boolean removeVerbalModifiersAfterCommas(Question q) {
    List<Pair<TregexPattern, TsurgeonPattern>> ops;
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;
    List<TsurgeonPattern> ps;

    ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();

    tregexOpStr = "ROOT=root << (VP !< VP < (/,/=comma $+ /[^`].*/=modifier))";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);

    //remove modifiers
    ps = new ArrayList<TsurgeonPattern>();
    if (matchPattern.matcher(q.getIntermediateTree()).find()) {
        ps.add(Tsurgeon.parseOperation("prune modifier"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree());

        //now remove the comma
        ops.clear();
        ps.clear();
        tregexOpStr = "ROOT=root << (VP !< VP < /,/=comma)";
        matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
        ps.add(Tsurgeon.parseOperation("prune comma"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree());
        addQuotationMarksIfNeeded(q.getIntermediateTree());

        if (this.getComputeFeatures)
            q.setFeatureValue("removedVerbalModifiersAfterCommas", 1.0);
        return true;
    } else {
        return false;
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

private void addQuotationMarksIfNeeded(Tree input) {
    String tregexOpStr;/*from   ww  w. j a  va 2  s.co  m*/
    TregexPattern matchPattern;
    TregexMatcher matcher;

    tregexOpStr = "__=parent < (/`/ !.. /'/)";

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input);

    if (matcher.find()) {
        TsurgeonPattern p;
        List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
        List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();

        ps.add(Tsurgeon.parseOperation("insert ('' '') >-1 parent"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, input);
    }
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * Convert a non-definite determiner to "the".
 * Used when extracting from noun modifiers such as relative clauses.
 * E.g., "A tall man, who was named Bob, entered the store." 
 * -> "A tall man was named Bob."/* w ww.j  a v  a2s.c  o m*/
 * -> "THE tall man was named Bob."
 *  
 * @param np
 */
private void makeDeterminerDefinite(Tree np) {
    String tregexOpStr = "NP !> __ <+(NP) (DT=det !< the)";
    TregexPattern matchPattern = TregexPatternFactory.getPattern(tregexOpStr);

    TsurgeonPattern p;
    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();

    ps.add(Tsurgeon.parseOperation("replace det (DT the)"));
    p = Tsurgeon.collectOperations(ps);
    ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
    Tsurgeon.processPatternsOnTree(ops, np);
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John, who hoped to get a good grade, studied. -> John studied.
 *   //  w  w  w.java  2  s .  co  m
 */
private boolean removeNonRestrRelClausesAndParticipials(Question q) {
    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;
    List<TsurgeonPattern> ps;

    ps = new ArrayList<TsurgeonPattern>();
    tregexOpStr = "NP < (VP|SBAR=mod $- /,/=punc !$+ /,/ !$ CC|CONJP)";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    boolean modified = false;
    if (matchPattern.matcher(q.getIntermediateTree()).find()) {
        ps.add(Tsurgeon.parseOperation("prune punc"));
        ps.add(Tsurgeon.parseOperation("prune mod"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree());
        if (this.getComputeFeatures)
            q.setFeatureValue("removedNonRestrRelClausesAndParticipials", 1.0);
        modified = true;
    }

    ps = new ArrayList<TsurgeonPattern>();
    tregexOpStr = "NP < (VP|SBAR=mod $- /,/=punc $+ /,/=punc2 !$ CC|CONJP)";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    if (matchPattern.matcher(q.getIntermediateTree()).find()) {
        ps.add(Tsurgeon.parseOperation("prune punc"));
        ps.add(Tsurgeon.parseOperation("prune mod"));
        ps.add(Tsurgeon.parseOperation("prune punc2"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree());
        if (this.getComputeFeatures)
            q.setFeatureValue("removedNonRestrRelClausesAndParticipials", 1.0);
        modified = true;
    }

    return modified;
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * //from   w w w . j  a  va  2 s  .  c o  m
 * e.g., John Smith (1931-1992) was a fireman. -> John Smith was a Fireman.
 * 
 * @return whether or not a change was made
 */
private boolean removeParentheticals(Question q) {
    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;
    List<TsurgeonPattern> ps;
    boolean res = false;

    ps = new ArrayList<TsurgeonPattern>();
    tregexOpStr = "__=parenthetical [ $- /-LRB-/=leadingpunc $+ /-RRB-/=trailingpunc "
            + " | $+ /,/=leadingpunc $- /,/=trailingpunc !$ CC|CONJP "
            + " | $+ (/:/=leadingpunc < --) $- (/:/=trailingpunc < /--/) ]";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    if (matchPattern.matcher(q.getIntermediateTree()).find()) {
        ps.add(Tsurgeon.parseOperation("prune leadingpunc"));
        ps.add(Tsurgeon.parseOperation("prune parenthetical"));
        ps.add(Tsurgeon.parseOperation("prune trailingpunc"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree());

        if (res)
            addQuotationMarksIfNeeded(q.getIntermediateTree());
        if (this.getComputeFeatures)
            q.setFeatureValue("removedParentheticals", 1.0);
        res = true;
    }

    ps = new ArrayList<TsurgeonPattern>();
    tregexOpStr = "PRN=parenthetical";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    if (matchPattern.matcher(q.getIntermediateTree()).find()) {
        ps.add(Tsurgeon.parseOperation("prune parenthetical"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree());
        if (this.getComputeFeatures)
            q.setFeatureValue("removedParentheticals", 1.0);
        res = true;
    }

    return res;
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * //from ww  w .  j a  v a 2s  .  c  o m
 * e.g., John, the painter, knew Susan.  -> John knew Susan.
 * 
 * @param q
 * @return whether or not a change was made
 */
private boolean removeAppositives(Question q) {
    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;
    List<TsurgeonPattern> ps;

    ps = new ArrayList<TsurgeonPattern>();
    tregexOpStr = "NP=parent < (NP=child $++ (/,/ $++ NP|PP=appositive) !$-- /,/) !< CC|CONJP";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    if (matchPattern.matcher(q.getIntermediateTree()).find()) {

        ps.add(Tsurgeon.parseOperation("move child $+ parent"));
        ps.add(Tsurgeon.parseOperation("prune parent"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));

        Tsurgeon.processPatternsOnTree(ops, q.getIntermediateTree());
        addQuotationMarksIfNeeded(q.getIntermediateTree());

        if (this.getComputeFeatures)
            q.setFeatureValue("removedAppositives", 1.0);
        return true;
    } else {
        return false;
    }
}