Example usage for edu.stanford.nlp.trees.tregex TregexMatcher find

List of usage examples for edu.stanford.nlp.trees.tregex TregexMatcher find

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees.tregex TregexMatcher find.

Prototype

public boolean find() 

Source Link

Document

Find the next match of the pattern on the tree.

Usage

From source file:Ceist.CeistView.java

License:Open Source License

/**
 * Begin a search/*  w  w w  .j av a2s.  com*/
 */
private void runSearch() {
    //setTregexState(true); Disable buttons while searching

    Thread searchThread = new Thread() {
        @Override
        public void run() {
            lblSearchStatus.setText("Searching...");

            // Initialise search patterns
            final TregexPattern primary = MatchPattern.getMatchPattern(txtCurrentPattern);

            if (primary == null) {
                lblSearchStatus.setText("Bad Pattern!");
                return;
            }

            SwingUtilities.invokeLater(new Runnable() {
                public void run() {

                    Treebank treebank = new MemoryTreebank();

                    // Add the test data set if selected and loaded
                    if (dataSet.testData.isLoaded() && btnUseTestData.isSelected())
                        treebank.addAll(dataSet.testData.getTreebank());

                    // Add the development data set if selected and loaded
                    if (dataSet.testData.isLoaded() && btnUseDevelopmentData.isSelected())
                        treebank.addAll(dataSet.devData.getTreebank());

                    int treeCount = treebank.size();
                    int count = 0;

                    // Copy the current matches
                    diffTrees.clear();
                    diffTrees.addAll(matchedTrees);

                    if (!chkShowPreview.isSelected()) {
                        matchedTrees.clear();

                        // Clear the table
                        ((DefaultTableModel) tableMatches.getModel()).setRowCount(0);
                    }

                    for (Tree testTree : treebank) {
                        count++;
                        lblSearchStatus.setText(String.format("Searching %d of %d", count, treeCount));
                        TregexMatcher m = primary.matcher(testTree);

                        //Tree lastMatchingRootNode = null;
                        boolean bMatchFound = false;

                        while (m.find() && !bMatchFound) {

                            matchedTrees.add(testTree);

                            if (chkShowTagged.isSelected())
                                ((DefaultTableModel) tableMatches.getModel())
                                        .addRow(getMatcherTableRow(m, testTree, true));
                            else
                                ((DefaultTableModel) tableMatches.getModel())
                                        .addRow(getMatcherTableRow(m, testTree, false));
                            bMatchFound = true;
                        }
                    }

                    if (matchedTrees.size() > 0)
                        lblSearchStatus.setText(String.format("Found %d matches.", matchedTrees.size()));
                    else
                        lblSearchStatus.setText(String.format("No matches found!"));
                }
            });
        }
    };

    searchThread.start();
}

From source file:com.github.kutschkem.Qgen.annotators.SimpleSentenceDecompositionAnnotator.java

License:Open Source License

private List<String> decompose(String documentText) {
    List<Tree> trees = new ArrayList<Tree>();
    for (String sentence : AnalysisUtilities.getSentences(documentText)) {
        trees.add(AnalysisUtilities.getInstance().parseSentence(sentence).parse);
    }/*w  ww .j a  v a2 s . c  om*/

    List<String> result = new ArrayList<String>();

    for (Tree t : trees) {
        TregexPattern p = TregexPattern.compile("ROOT << (NP=np $++ VP=vp) ");
        TregexMatcher m = p.matcher(t);
        while (m.find()) {
            Tree np = m.getNode("np");
            Tree vp = m.getNode("vp");

            Tree np2 = np.deepCopy();
            TregexPattern p2 = TregexPattern.compile("NP << (/^S.*/=sbarq ?. /,/=c1 ?, /,/=c2)");
            List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
            ps.add(Tsurgeon.parseOperation("prune sbarq"));
            ps.add(Tsurgeon.parseOperation("prune c1"));
            ps.add(Tsurgeon.parseOperation("prune c2"));

            Tsurgeon.processPattern(p2, Tsurgeon.collectOperations(ps), np2);
            np = np2;

            Tree newTree = Tree.valueOf("(S " + np + vp + "(. .))");
            result.add(AnalysisUtilities.orginialSentence(newTree.yield()));
        }

    }

    return result;
}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

public static void addPeriodIfNeeded(Tree input) {
    String tregexOpStr = "ROOT < (S=mainclause !< /\\./)";
    TregexPattern matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    TregexMatcher matcher = matchPattern.matcher(input);

    if (matcher.find()) {
        TsurgeonPattern p;//from  w  w  w . j  av  a  2 s  .  c  o m
        List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
        List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();

        ps.add(Tsurgeon.parseOperation("insert (. .) >-1 mainclause"));
        p = Tsurgeon.collectOperations(ps);
        ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
        Tsurgeon.processPatternsOnTree(ops, input);
    }
}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

public static int getNumberOfMatchesInTree(String tregexExpression, Tree t) {
    int res = 0;// ww  w.j a v a 2 s . c om
    TregexMatcher m = TregexPatternFactory.getPattern(tregexExpression).matcher(t);
    while (m.find()) {
        res++;
    }
    return res;
}

From source file:edu.cmu.ark.AnalysisUtilities.java

License:Open Source License

/**
 * Remove traces and non-terminal decorations (e.g., "-SUBJ" in "NP-SUBJ") from a Penn Treebank-style tree.
 *
 * @param inputTree/*from  ww  w  .  j  av  a2 s . co m*/
 */
public void normalizeTree(Tree inputTree) {
    inputTree.label().setFromString("ROOT");

    List<Pair<TregexPattern, TsurgeonPattern>> ops = new ArrayList<Pair<TregexPattern, TsurgeonPattern>>();
    List<TsurgeonPattern> ps = new ArrayList<TsurgeonPattern>();
    String tregexOpStr;
    TregexPattern matchPattern;
    TsurgeonPattern p;
    TregexMatcher matcher;

    tregexOpStr = "/\\-NONE\\-/=emptynode";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(inputTree);
    ps.add(Tsurgeon.parseOperation("prune emptynode"));
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    p = Tsurgeon.collectOperations(ps);
    ops.add(new Pair<TregexPattern, TsurgeonPattern>(matchPattern, p));
    Tsurgeon.processPatternsOnTree(ops, inputTree);

    Label nonterminalLabel;

    tregexOpStr = "/.+\\-.+/=nonterminal < __";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(inputTree);
    while (matcher.find()) {
        nonterminalLabel = matcher.getNode("nonterminal");
        if (nonterminalLabel == null)
            continue;
        nonterminalLabel.setFromString(tlp.basicCategory(nonterminalLabel.value()));
    }

}

From source file:edu.cmu.ark.nlp.question.Question.java

License:Open Source License

public List<Tree> findLogicalWordsAboveIntermediateTree() {
    List<Tree> res = new ArrayList<Tree>();

    Tree pred = intermediateTree.getChild(0).headPreTerminal(this.hf);
    String lemma = QuestionUtil.getLemma(pred.yield().toString(), pred.label().toString());

    String tregexOpStr;//from w w w  .  j  av  a2  s.c o m
    TregexPattern matchPattern;
    TregexMatcher matcher;

    Tree sourcePred = null;
    for (Tree leaf : sourceTree.getLeaves()) {
        Tree tmp = leaf.parent(sourceTree);
        String sourceLemma = QuestionUtil.getLemma(leaf.label().toString(), tmp.label().toString());
        if (sourceLemma.equals(lemma)) {
            sourcePred = tmp;
            break;
        }
    }

    tregexOpStr = "RB|VB|VBD|VBP|VBZ|IN|MD|WRB|WDT|CC=command";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(sourceTree);

    Tree command;
    while (matcher.find() && sourcePred != null) {
        command = matcher.getNode("command");
        if (QuestionUtil.cCommands(sourceTree, command, sourcePred)
                && command.parent(sourceTree) != sourcePred.parent(sourceTree)) {
            res.add(command);
        }
    }

    return res;
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * This is a simple hack to avoid bad output for a few special cases.
 * Specifically, we want to avoid extracting 
 * from phrases with "according" and "including",
 * which syntactically look like participial phrases.
 *    //from  www .j av a  2  s. co  m
 */
private boolean mainVerbOK(Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;

    //avoid extracting sentences from "...according to X..."
    tregexOpStr = "ROOT <+(VP|S) (/VB.*/ < /(accord.*|includ.*)/)";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    boolean res = !matcher.find();

    return res;
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

private boolean hasBreakableConjunction(Question input) {
    String tregexOpStr;// w  w w  .  ja  v a  2s .  co  m
    TregexPattern matchPattern;
    TregexMatcher matcher;

    //conjoined VPs, clauses, etc.
    tregexOpStr = "CONJP|CC !< either|or|neither|nor > S|SBAR|VP" + " [ $ SBAR|S | !>> SBAR ] "; //we can break conjoined SBARs, but not anything else under an SBAR node
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    if (matcher.find()) {
        return true;
    }

    //clauses conjoined by semi-colons
    tregexOpStr = " S < (S=child $ (/:/ < /;/) !$++ (/:/ < /;/) ) ";
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    if (matcher.find()) {
        return true;
    }

    if (breakNPs) {
        tregexOpStr = "CONJP|CC !< either|or|neither|nor > NP !>> SBAR "
                + " !> (NP < (/^(N.*|SBAR|PRP)$/ !$ /^(N.*|SBAR|PRP)$/))";
        //the latter part is to address special cases of flat NPs in treebank:
        //we allow NPs like "(NP (JJ eastern) (CC and) (JJ western) (NNS coasts))" 
        //because we can't easily split them

        matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
        matcher = matchPattern.matcher(input.getIntermediateTree());
        if (matcher.find()) {
            return true;
        }
    }

    return false;
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * Returns whether the input sentence has a subject and a finite main verb.
 * If it does not, then we do not want to add it to the output. 
 * /*ww w  . j a va2s . co  m*/
 * @param input
 * @return
 */
private boolean hasSubjectAndFiniteMainVerb(Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    TregexMatcher matcher;
    tregexOpStr = "ROOT " + //main clause dominates...
            " <+(S) NP|SBAR  <+(VP|S) VB|VBD|VBP|VBZ  !<+(VP) TO"; //AND also dominates a finite, non-participle verb
    //allowing VBN would allow participial phrases like "founded by Bill Gates"

    //" [ < /^(PRP|N.*|SBAR|PP)$/ " + //either PRP for pronoun, N for NP|NN|NNS...
    //" | < (S < (VP < TO|VBG)) ] " + // or a non-finite verb phrase (e.g., "walking")

    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    boolean res = matcher.find();
    return res;
}

From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java

License:Open Source License

/**
 * e.g., John and James like Susan.  ->  John likes Susan.
 * /*from   w  ww .j  ava  2 s . c  o m*/
 */
private void extractConjoinedNPs(Collection<Question> extracted, Question input) {
    String tregexOpStr;
    TregexPattern matchPattern;
    Tree conjoinedNode;
    Tree parent;

    TregexMatcher matcher;
    Question newQuestion;

    //only extract conjoined NPs that are arguments or adjuncts of the main verb
    // in the tree, this means the closest S will be the one under the root
    tregexOpStr = "NP=parent < (CONJP|CC !< or|nor [ "
            + " $+ /^(N.*|PRP|SBAR)$/=child $-- /^(N.*|PRP|SBAR)$/ | " //there must be a noun on each side of the conjunction
            + " $-- /^(N.*|PRP|SBAR)$/=child $+ /^(N.*|PRP|SBAR)$/ ] ) " //this avoids extracting from flat NPs such as "the smaller and darker form"
            + " !>> (/.*/ $ (CC|CONJP !< or|nor)) " //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) 
            + " !$ (CC|CONJP !< or|nor)" + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR ";
    //+ " >> (ROOT !< (S <+(VP) (/^VB.*$/ < are|were|be|seem|appear))) " ; //don't break plural predicate nominatives (e.g., "John and Mary are two of my best friends.")
    matchPattern = TregexPatternFactory.getPattern(tregexOpStr);
    matcher = matchPattern.matcher(input.getIntermediateTree());
    List<Integer> nodeIndexes = new ArrayList<Integer>();
    List<Integer> parentIDs = new ArrayList<Integer>();

    while (matcher.find()) {
        //store the parents' IDs (in the tree)
        parent = matcher.getNode("parent");
        parentIDs.add(parent.nodeNumber(input.getIntermediateTree()));

        conjoinedNode = matcher.getNode("child");
        //store the conjoined nodes' index into their parent's list of children
        int idx = parent.objectIndexOf(conjoinedNode);
        if (!nodeIndexes.contains(idx))
            nodeIndexes.add(idx);
    }

    //for each of the conjoined children,
    //create a new tree by removing all the nodes they are conjoined with
    Collections.sort(nodeIndexes);//sort, just to keep them in the original order
    for (int i = 0; i < nodeIndexes.size(); i++) {
        newQuestion = input.deeperCopy();

        Tree t = newQuestion.getIntermediateTree();
        parent = t.getNodeNumber(parentIDs.get(i));
        Tree gparent = parent.parent(t);
        conjoinedNode = parent.getChild(nodeIndexes.get(i));
        String siblingLabel;

        //Remove all the nodes that are conjoined
        //with the selected noun (or are conjunctions, commas).
        //These can have labels NP, NN, ..., PRP for pronouns, CC, "," for commas, ":" for semi-colons
        for (int j = 0; j < parent.numChildren(); j++) {
            if (parent.getChild(j) == conjoinedNode)
                continue;
            siblingLabel = parent.getChild(j).label().toString();
            if (siblingLabel.matches("^[NCP,:S].*")) {
                parent.removeChild(j);
                j--;
            }
        }

        //if there is an trivial unary "NP -> NP",
        //remove the parent and put the child in its place
        if (parent.numChildren() == 1 && parent.getChild(0).label().equals("NP")) {
            int tmpIndex = gparent.objectIndexOf(parent);
            gparent.removeChild(tmpIndex);
            gparent.addChild(tmpIndex, parent.getChild(0));
        }

        correctTense(conjoinedNode, gparent);
        addQuotationMarksIfNeeded(newQuestion.getIntermediateTree());

        //if(GlobalProperties.getDebug()) System.err.println("extractConjoinedNPs: "+newQuestion.getIntermediateTree().toString());
        if (this.getComputeFeatures)
            newQuestion.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name
        if (this.getComputeFeatures)
            newQuestion.setFeatureValue("extractedFromConjoinedNPs", 1.0);
        extracted.add(newQuestion);
    }
}