Example usage for edu.stanford.nlp.trees.tregex TregexMatcher findAt

List of usage examples for edu.stanford.nlp.trees.tregex TregexMatcher findAt

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees.tregex TregexMatcher findAt.

Prototype

public boolean findAt(Tree node) 

Source Link

Document

Similar to find() , but matches only if node is the root of the match.

Usage

From source file:org.lambda3.graphene.core.relation_extraction.impl.HeadRelationExtractor.java

License:Open Source License

@Override
public List<BinaryExtraction> doExtraction(Tree parseTree) {
    List<BinaryExtraction> extractions = new ArrayList<>();

    TregexPattern pattern = TregexPattern
            .compile("S !>> S < (NP=arg1 $.. (VP=vp [ <+(VP) (VP=lowestvp !< VP) | ==(VP=lowestvp !< VP) ]))");
    // this will generate nicer predicates that conform to matrix predicates (e.g. "failed to increase"), but lead to a minor decrease in P and R in benchmark test
    //      TregexPattern pattern = TregexPattern.compile("S !>> S < (NP=arg1 $.. (VP=vp [ <+(S|VP) (VP=lowestvp !< VP !< S) | ==(VP=lowestvp !< VP !< S) ]))");
    TregexMatcher matcher = pattern.matcher(parseTree);
    while (matcher.find()) {
        Tree arg1 = matcher.getNode("arg1");
        Tree vp = matcher.getNode("vp");
        Tree lowestvp = matcher.getNode("lowestvp");

        // has arg2 ?
        TregexPattern arg2Pattern = TregexPattern
                .compile(lowestvp.value() + " < (PP|NP|S|SBAR=arg2 !$,, (PP|NP|S|SBAR))");
        TregexMatcher arg2Matcher = arg2Pattern.matcher(lowestvp);
        if (arg2Matcher.findAt(lowestvp)) {
            Tree arg2 = arg2Matcher.getNode("arg2");

            List<Word> arg1Words = ParseTreeExtractionUtils.getContainingWords(arg1);
            List<Word> relationWords = ParseTreeExtractionUtils.getWordsInBetween(parseTree, vp, arg2, true,
                    false);/*w w  w . j av a  2  s  . co m*/
            List<Word> arg2Words = ParseTreeExtractionUtils.getFollowingWords(vp, arg2, true);

            extractions.add(new BinaryExtraction(null, WordsUtils.wordsToString(relationWords),
                    WordsUtils.wordsToString(arg1Words), WordsUtils.wordsToString(arg2Words)));
        } else {
            List<Word> arg1Words = ParseTreeExtractionUtils.getContainingWords(arg1);
            List<Word> relationWords = ParseTreeExtractionUtils.getContainingWords(vp);
            List<Word> arg2Words = new ArrayList<>();

            extractions.add(new BinaryExtraction(null, WordsUtils.wordsToString(relationWords),
                    WordsUtils.wordsToString(arg1Words), WordsUtils.wordsToString(arg2Words)));
        }
    }

    return extractions;
}

From source file:org.lambda3.graphene.core.relation_extraction.impl.NestedRelationExtractor.java

License:Open Source License

@Override
public List<BinaryExtraction> doExtraction(Tree parseTree) {
    List<BinaryExtraction> extractions = new ArrayList<>();

    TregexPattern pattern = TregexPattern
            .compile("NP=arg1 $.. (VP=vp [ <+(VP) (VP=lowestvp !< VP) | ==(VP=lowestvp !< VP) ])");
    // this will generate nicer predicates that conform to matrix predicates (e.g. "failed to increase"), but lead to a minor decrease in P and R in benchmark test
    //      TregexPattern pattern = TregexPattern.compile("NP=arg1 $.. (VP=vp [ <+(S|VP) (VP=lowestvp !< VP !< S) | ==(VP=lowestvp !< VP !< S) ])");
    TregexMatcher matcher = pattern.matcher(parseTree);
    while (matcher.find()) {
        Tree arg1 = matcher.getNode("arg1");
        Tree vp = matcher.getNode("vp");
        Tree lowestvp = matcher.getNode("lowestvp");

        // has arg2 ?
        TregexPattern arg2Pattern = TregexPattern
                .compile(lowestvp.value() + " < (PP|NP|S|SBAR=arg2 !$,, (PP|NP|S|SBAR))");
        TregexMatcher arg2Matcher = arg2Pattern.matcher(lowestvp);
        if (arg2Matcher.findAt(lowestvp)) {
            Tree arg2 = arg2Matcher.getNode("arg2");

            List<Word> arg1Words = ParseTreeExtractionUtils.getContainingWords(arg1);
            List<Word> relationWords = ParseTreeExtractionUtils.getWordsInBetween(parseTree, vp, arg2, true,
                    false);/*from   w  w w  .j  a v a  2  s  .co m*/
            List<Word> arg2Words = ParseTreeExtractionUtils.getFollowingWords(vp, arg2, true);

            extractions.add(new BinaryExtraction(null, WordsUtils.wordsToString(relationWords),
                    WordsUtils.wordsToString(arg1Words), WordsUtils.wordsToString(arg2Words)));
        } else {
            List<Word> arg1Words = ParseTreeExtractionUtils.getContainingWords(arg1);
            List<Word> relationWords = ParseTreeExtractionUtils.getContainingWords(vp);
            List<Word> arg2Words = new ArrayList<>();

            extractions.add(new BinaryExtraction(null, WordsUtils.wordsToString(relationWords),
                    WordsUtils.wordsToString(arg1Words), WordsUtils.wordsToString(arg2Words)));
        }
    }

    return extractions;
}

From source file:org.lambda3.graphene.core.relation_extraction.RelationExtractionRunner.java

License:Open Source License

private void processSimpleContext(Element element,
        org.lambda3.text.simplification.discourse.model.SimpleContext simpleContext,
        List<NewExtraction> newExtractions, List<SimpleContext> simpleContexts) {

    // yield additional extractions
    if (exploitContexts) {
        for (BinaryExtraction ex : extractor.extract(simpleContext.getParseTree())) {
            if (!ex.isCoreExtraction()) {
                newExtractions.add(createYieldedExtraction(element.getSentenceIdx(), ex));
            }/*from  w  ww .  j  a v a 2s  .  c o  m*/
        }
    }

    // rephrase as separate extractions
    if (simpleContext.getRelation().equals(Relation.NOUN_BASED) && separateNounBased) {

        // NOUN BASED
        List<BinaryExtraction> extractions = extractor.extract(simpleContext.getParseTree());
        extractions.stream().filter(ex -> ex.isCoreExtraction())
                .forEach(ex -> newExtractions.add(new NewExtraction(true, simpleContext.getRelation(),
                        new Extraction(ExtractionType.NOUN_BASED, ex.getConfidence().orElse(null),
                                element.getSentenceIdx(), element.getContextLayer(), ex.getRelation(),
                                ex.getArg1(), ex.getArg2()))));

    } else if (simpleContext.getRelation().equals(Relation.PURPOSE) && separatePurposes) {

        // PURPOSES
        TregexPattern pattern = TregexPattern.compile(
                "VP=vp !>> VP [ <+(VP) (VP !< VP < (PP|NP|S|SBAR=arg2 !$,, (PP|NP|S|SBAR))) | ==(VP !< VP < (PP|NP|S|SBAR=arg2 !$,, (PP|NP|S|SBAR))) ]");
        TregexMatcher matcher = pattern.matcher(simpleContext.getPhrase());
        while (matcher.find()) {
            Tree vp = matcher.getNode("vp");
            Tree arg2 = matcher.getNode("arg2");
            List<Word> relationWords = ParseTreeExtractionUtils.getWordsInBetween(simpleContext.getPhrase(), vp,
                    arg2, true, false);
            List<Word> arg2Words = ParseTreeExtractionUtils.getFollowingWords(vp, arg2, true);
            newExtractions.add(new NewExtraction(true, simpleContext.getRelation(),
                    new Extraction(ExtractionType.VERB_BASED, null, element.getSentenceIdx(),
                            element.getContextLayer() + 1, WordsUtils.wordsToString(relationWords),
                            element.getText(), WordsUtils.wordsToString(arg2Words))));
        }

    } else if (simpleContext.getRelation().equals(Relation.ATTRIBUTION) && separateAttributions) {

        // ATTRIBUTION
        TregexPattern pattern = TregexPattern.compile(
                "S !>> S < (NP=arg1 $.. (VP=vp [ <+(VP) (VP=lowestvp !< VP) | ==(VP=lowestvp !< VP) ]))");
        TregexMatcher matcher = pattern.matcher(simpleContext.getPhrase());
        while (matcher.find()) {
            Tree arg1 = matcher.getNode("arg1");
            Tree vp = matcher.getNode("vp");
            Tree lowestvp = matcher.getNode("lowestvp");

            List<Word> arg1Words;
            List<Word> relationWords;

            // has arg2 ?
            TregexPattern arg2Pattern = TregexPattern
                    .compile(lowestvp.value() + " < (PP|NP|S|SBAR=arg2 !$,, (PP|NP|S|SBAR))");
            TregexMatcher arg2Matcher = arg2Pattern.matcher(lowestvp);
            if (arg2Matcher.findAt(lowestvp)) {
                Tree arg2 = arg2Matcher.getNode("arg2");

                arg1Words = ParseTreeExtractionUtils.getContainingWords(arg1);
                relationWords = ParseTreeExtractionUtils.getWordsInBetween(simpleContext.getPhrase(), vp, arg2,
                        true, false);
            } else {
                arg1Words = ParseTreeExtractionUtils.getContainingWords(arg1);
                relationWords = ParseTreeExtractionUtils.getContainingWords(vp);
            }
            newExtractions.add(new NewExtraction(true, simpleContext.getRelation(),
                    new Extraction(ExtractionType.VERB_BASED, null, element.getSentenceIdx(),
                            element.getContextLayer() + 1, WordsUtils.wordsToString(relationWords),
                            WordsUtils.wordsToString(arg1Words), element.getText())));
        }
    } else {

        // add as simple context
        SimpleContext c = new SimpleContext(
                WordsUtils
                        .wordsToString(ParseTreeExtractionUtils.getContainingWords(simpleContext.getPhrase())),
                simpleContext.getRelation());
        simpleContext.getTimeInformation().ifPresent(t -> c.setTimeInformation(t));
        simpleContexts.add(c);
    }
}

From source file:org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule.java

License:Open Source License

protected static Optional<Word> getHeadVerb(Tree vp) {
    TregexPattern pattern = TregexPattern.compile(
            vp.value() + " [ <+(VP) (VP=lowestvp !< VP < /V../=v) | ==(VP=lowestvp !< VP < /V../=v) ]");
    TregexMatcher matcher = pattern.matcher(vp);
    while (matcher.findAt(vp)) {
        return Optional.of(ParseTreeExtractionUtils.getContainingWords(matcher.getNode("v")).get(0));
    }/*  w w  w.  ja va 2  s.com*/
    return Optional.empty();
}

From source file:org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule.java

License:Open Source License

private static List<Word> appendWordsFromTree(List<Word> words, Tree tree) {
    List<Word> res = new ArrayList<Word>();
    res.addAll(words);/*  www.j a  v a  2 s  . c o  m*/

    TregexPattern p = TregexPattern.compile(tree.value() + " <<, NNP|NNPS");
    TregexMatcher matcher = p.matcher(tree);

    boolean isFirst = true;
    for (Word word : tree.yieldWords()) {
        if ((isFirst) && (!matcher.findAt(tree))) {
            res.add(WordsUtils.lowercaseWord(word));
        } else {
            res.add(word);
        }
        isFirst = false;
    }

    return res;
}

From source file:org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule.java

License:Open Source License

protected static List<Word> rephraseIntraSententialAttribution(List<Word> words) {
    try {/*  ww w  . j  av  a  2s.com*/
        List<Word> res = new ArrayList<>();

        Tree parseTree = ParseTreeParser.parse(WordsUtils.wordsToProperSentenceString(words));

        TregexPattern p = TregexPattern.compile("ROOT << (S !> S < (NP=np ?$,, PP=pp $.. VP=vp))");
        TregexMatcher matcher = p.matcher(parseTree);
        if (matcher.findAt(parseTree)) {
            Tree pp = matcher.getNode("pp"); // optional
            Tree np = matcher.getNode("np");
            Tree vp = matcher.getNode("vp");

            Tense tense = getTense(vp);
            if (tense.equals(Tense.PRESENT)) {
                res.add(new Word("This"));
                res.add(new Word("is"));
                res.add(new Word("what"));
            } else {
                res.add(new Word("This"));
                res.add(new Word("was"));
                res.add(new Word("what"));
            }
            res = appendWordsFromTree(res, np);
            res = appendWordsFromTree(res, vp);
            if (pp != null) {
                res = appendWordsFromTree(res, pp);
            }
        }

        return res;
    } catch (ParseTreeException e) {
        return words;
    }
}

From source file:org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule.java

License:Open Source License

protected static List<Word> getRephrasedParticipalS(Tree np, Tree vp, Tree s, Tree vbgn) {
    Number number = getNumber(np);
    Tense tense = getTense(vp);/*from www .j a  va  2 s.c  om*/

    TregexPattern p = TregexPattern.compile(vbgn.value() + " <<: (having . (been . VBN=vbn))");
    TregexPattern p2 = TregexPattern.compile(vbgn.value() + " <<: (having . VBN=vbn)");
    TregexPattern p3 = TregexPattern.compile(vbgn.value() + " <<: (being . VBN=vbn)");

    TregexMatcher matcher = p.matcher(s);
    if (matcher.findAt(s)) {
        List<Word> res = new ArrayList<>();

        res.add(new Word((number.equals(Number.SINGULAR)) ? "has" : "have"));
        res.add(new Word("been"));
        List<Word> next = ParseTreeExtractionUtils.getFollowingWords(s, matcher.getNode("vbn"), true);
        if (next.size() > 0) {
            next.set(0, WordsUtils.lowercaseWord(next.get(0)));
        }
        res.addAll(next);

        return res;
    }

    matcher = p2.matcher(s);
    if (matcher.findAt(s)) {
        List<Word> res = new ArrayList<>();

        res.add(new Word((number.equals(Number.SINGULAR)) ? "has" : "have"));
        List<Word> next = ParseTreeExtractionUtils.getFollowingWords(s, matcher.getNode("vbn"), true);
        if (next.size() > 0) {
            next.set(0, WordsUtils.lowercaseWord(next.get(0)));
        }
        res.addAll(next);

        return res;
    }

    matcher = p3.matcher(s);
    if (matcher.findAt(s)) {
        List<Word> res = new ArrayList<>();
        if (tense.equals(Tense.PRESENT)) {
            res.add(new Word((number.equals(Number.SINGULAR)) ? "is" : "are"));
        } else {
            res.add(new Word((number.equals(Number.SINGULAR)) ? "was" : "were"));
        }
        List<Word> next = ParseTreeExtractionUtils.getFollowingWords(s, matcher.getNode("vbn"), true);
        if (next.size() > 0) {
            next.set(0, WordsUtils.lowercaseWord(next.get(0)));
        }
        res.addAll(next);

        return res;
    }

    // default
    List<Word> res = new ArrayList<>();
    if (tense.equals(Tense.PRESENT)) {
        res.add(new Word((number.equals(Number.SINGULAR)) ? "is" : "are"));
    } else {
        res.add(new Word((number.equals(Number.SINGULAR)) ? "was" : "were"));
    }
    List<Word> next = ParseTreeExtractionUtils.getFollowingWords(s, vbgn, true);
    if (next.size() > 0) {
        next.set(0, WordsUtils.lowercaseWord(next.get(0)));
    }
    res.addAll(next);

    return res;
}

From source file:org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules.CoordinationExtractor.java

License:Open Source License

private static boolean isNPVPClause(Tree s) {
    TregexPattern p = TregexPattern.compile(s.value() + " < (NP $.. VP)");
    TregexMatcher matcher = p.matcher(s);

    return (matcher.findAt(s));
}

From source file:org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules.CoordinationExtractor.java

License:Open Source License

@Override
public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {

    //        TregexPattern p = TregexPattern.compile("ROOT <<: (S=s < (S < (NP $.. VP) $.. (S < (NP $.. VP))))");
    TregexPattern p = TregexPattern.compile("ROOT <<: (S=s < (S $.. S))");
    TregexMatcher matcher = p.matcher(leaf.getParseTree());

    while (matcher.findAt(leaf.getParseTree())) {
        //            List<Tree> siblings = getSiblings(matcher.getNode("s"), Arrays.asList("S")).stream().filter(t -> isNPVPClause(t)).collect(Collectors.toList());
        List<Tree> siblings = getSiblings(matcher.getNode("s"), Arrays.asList("S")).stream()
                .collect(Collectors.toList());
        if (siblings.size() >= 2) {

            // constituents
            List<Word> precedingWords = ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(),
                    siblings.get(0), false);
            List<Word> followingWords = ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(),
                    siblings.get(siblings.size() - 1), false);

            List<Leaf> constituents = new ArrayList<>();
            for (Tree sibling : siblings) {
                List<Word> words = new ArrayList<Word>();
                words.addAll(precedingWords);
                words.addAll(ParseTreeExtractionUtils.getContainingWords(sibling));
                words.addAll(followingWords);

                Leaf constituent = new Leaf(getClass().getSimpleName(),
                        WordsUtils.wordsToProperSentenceString(words));
                constituents.add(constituent);
            }// ww w. j  av a2 s.  c om

            List<Word> cuePhraseWords = null;
            Relation relation = Relation.UNKNOWN_COORDINATION;
            if (constituents.size() == 2) {
                cuePhraseWords = ParseTreeExtractionUtils.getWordsInBetween(leaf.getParseTree(),
                        siblings.get(0), siblings.get(siblings.size() - 1), false, false);
                relation = classifer.classifyCoordinating(cuePhraseWords).orElse(Relation.UNKNOWN_COORDINATION);
            }

            Extraction res = new Extraction(getClass().getSimpleName(), false, cuePhraseWords, relation, true,
                    constituents);

            return Optional.of(res);
        }
    }

    return Optional.empty();
}

From source file:org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules.ListNP.ListNPExtractor.java

License:Open Source License

@Override
public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException {

    TregexPattern p = TregexPattern.compile(pattern);
    TregexMatcher matcher = p.matcher(leaf.getParseTree());

    while (matcher.findAt(leaf.getParseTree())) {

        Optional<ListNPSplitter.Result> r = ListNPSplitter.splitList(leaf.getParseTree(),
                matcher.getNode("np"));
        if (r.isPresent()) {

            // constituents
            List<Word> precedingWords = ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(),
                    matcher.getNode("np"), false);
            List<Word> followingWords = ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(),
                    matcher.getNode("np"), false);

            List<Leaf> constituents = new ArrayList<>();

            if (r.get().getIntroductionWords().isPresent()) {
                List<Word> words = new ArrayList<Word>();
                words.addAll(precedingWords);
                words.addAll(r.get().getIntroductionWords().get());
                words.addAll(followingWords);

                Leaf constituent = new Leaf(getClass().getSimpleName(),
                        WordsUtils.wordsToProperSentenceString(words));
                constituent.dontAllowSplit();
                constituents.add(constituent);
            }/*from  w w w  . ja  va2s.com*/

            for (List<Word> element : r.get().getElementsWords()) {
                List<Word> words = new ArrayList<Word>();
                words.addAll(precedingWords);
                words.addAll(element);
                words.addAll(followingWords);

                Leaf constituent = new Leaf(getClass().getSimpleName(),
                        WordsUtils.wordsToProperSentenceString(words));
                constituent.dontAllowSplit();
                constituents.add(constituent);
            }

            Extraction res = new Extraction(getClass().getSimpleName(), false, null, r.get().getRelation(),
                    true, constituents);

            return Optional.of(res);
        }
    }

    return Optional.empty();
}