List of usage examples for edu.stanford.nlp.trees.tregex TregexMatcher findAt
public boolean findAt(Tree node)
From source file:org.lambda3.graphene.core.relation_extraction.impl.HeadRelationExtractor.java
License:Open Source License
@Override public List<BinaryExtraction> doExtraction(Tree parseTree) { List<BinaryExtraction> extractions = new ArrayList<>(); TregexPattern pattern = TregexPattern .compile("S !>> S < (NP=arg1 $.. (VP=vp [ <+(VP) (VP=lowestvp !< VP) | ==(VP=lowestvp !< VP) ]))"); // this will generate nicer predicates that conform to matrix predicates (e.g. "failed to increase"), but lead to a minor decrease in P and R in benchmark test // TregexPattern pattern = TregexPattern.compile("S !>> S < (NP=arg1 $.. (VP=vp [ <+(S|VP) (VP=lowestvp !< VP !< S) | ==(VP=lowestvp !< VP !< S) ]))"); TregexMatcher matcher = pattern.matcher(parseTree); while (matcher.find()) { Tree arg1 = matcher.getNode("arg1"); Tree vp = matcher.getNode("vp"); Tree lowestvp = matcher.getNode("lowestvp"); // has arg2 ? TregexPattern arg2Pattern = TregexPattern .compile(lowestvp.value() + " < (PP|NP|S|SBAR=arg2 !$,, (PP|NP|S|SBAR))"); TregexMatcher arg2Matcher = arg2Pattern.matcher(lowestvp); if (arg2Matcher.findAt(lowestvp)) { Tree arg2 = arg2Matcher.getNode("arg2"); List<Word> arg1Words = ParseTreeExtractionUtils.getContainingWords(arg1); List<Word> relationWords = ParseTreeExtractionUtils.getWordsInBetween(parseTree, vp, arg2, true, false);/*w w w . j av a 2 s . co m*/ List<Word> arg2Words = ParseTreeExtractionUtils.getFollowingWords(vp, arg2, true); extractions.add(new BinaryExtraction(null, WordsUtils.wordsToString(relationWords), WordsUtils.wordsToString(arg1Words), WordsUtils.wordsToString(arg2Words))); } else { List<Word> arg1Words = ParseTreeExtractionUtils.getContainingWords(arg1); List<Word> relationWords = ParseTreeExtractionUtils.getContainingWords(vp); List<Word> arg2Words = new ArrayList<>(); extractions.add(new BinaryExtraction(null, WordsUtils.wordsToString(relationWords), WordsUtils.wordsToString(arg1Words), WordsUtils.wordsToString(arg2Words))); } } return extractions; }
From source file:org.lambda3.graphene.core.relation_extraction.impl.NestedRelationExtractor.java
License:Open Source License
@Override public List<BinaryExtraction> doExtraction(Tree parseTree) { List<BinaryExtraction> extractions = new ArrayList<>(); TregexPattern pattern = TregexPattern .compile("NP=arg1 $.. (VP=vp [ <+(VP) (VP=lowestvp !< VP) | ==(VP=lowestvp !< VP) ])"); // this will generate nicer predicates that conform to matrix predicates (e.g. "failed to increase"), but lead to a minor decrease in P and R in benchmark test // TregexPattern pattern = TregexPattern.compile("NP=arg1 $.. (VP=vp [ <+(S|VP) (VP=lowestvp !< VP !< S) | ==(VP=lowestvp !< VP !< S) ])"); TregexMatcher matcher = pattern.matcher(parseTree); while (matcher.find()) { Tree arg1 = matcher.getNode("arg1"); Tree vp = matcher.getNode("vp"); Tree lowestvp = matcher.getNode("lowestvp"); // has arg2 ? TregexPattern arg2Pattern = TregexPattern .compile(lowestvp.value() + " < (PP|NP|S|SBAR=arg2 !$,, (PP|NP|S|SBAR))"); TregexMatcher arg2Matcher = arg2Pattern.matcher(lowestvp); if (arg2Matcher.findAt(lowestvp)) { Tree arg2 = arg2Matcher.getNode("arg2"); List<Word> arg1Words = ParseTreeExtractionUtils.getContainingWords(arg1); List<Word> relationWords = ParseTreeExtractionUtils.getWordsInBetween(parseTree, vp, arg2, true, false);/*from w w w .j a v a 2 s .co m*/ List<Word> arg2Words = ParseTreeExtractionUtils.getFollowingWords(vp, arg2, true); extractions.add(new BinaryExtraction(null, WordsUtils.wordsToString(relationWords), WordsUtils.wordsToString(arg1Words), WordsUtils.wordsToString(arg2Words))); } else { List<Word> arg1Words = ParseTreeExtractionUtils.getContainingWords(arg1); List<Word> relationWords = ParseTreeExtractionUtils.getContainingWords(vp); List<Word> arg2Words = new ArrayList<>(); extractions.add(new BinaryExtraction(null, WordsUtils.wordsToString(relationWords), WordsUtils.wordsToString(arg1Words), WordsUtils.wordsToString(arg2Words))); } } return extractions; }
From source file:org.lambda3.graphene.core.relation_extraction.RelationExtractionRunner.java
License:Open Source License
private void processSimpleContext(Element element, org.lambda3.text.simplification.discourse.model.SimpleContext simpleContext, List<NewExtraction> newExtractions, List<SimpleContext> simpleContexts) { // yield additional extractions if (exploitContexts) { for (BinaryExtraction ex : extractor.extract(simpleContext.getParseTree())) { if (!ex.isCoreExtraction()) { newExtractions.add(createYieldedExtraction(element.getSentenceIdx(), ex)); }/*from w ww . j a v a 2s . c o m*/ } } // rephrase as separate extractions if (simpleContext.getRelation().equals(Relation.NOUN_BASED) && separateNounBased) { // NOUN BASED List<BinaryExtraction> extractions = extractor.extract(simpleContext.getParseTree()); extractions.stream().filter(ex -> ex.isCoreExtraction()) .forEach(ex -> newExtractions.add(new NewExtraction(true, simpleContext.getRelation(), new Extraction(ExtractionType.NOUN_BASED, ex.getConfidence().orElse(null), element.getSentenceIdx(), element.getContextLayer(), ex.getRelation(), ex.getArg1(), ex.getArg2())))); } else if (simpleContext.getRelation().equals(Relation.PURPOSE) && separatePurposes) { // PURPOSES TregexPattern pattern = TregexPattern.compile( "VP=vp !>> VP [ <+(VP) (VP !< VP < (PP|NP|S|SBAR=arg2 !$,, (PP|NP|S|SBAR))) | ==(VP !< VP < (PP|NP|S|SBAR=arg2 !$,, (PP|NP|S|SBAR))) ]"); TregexMatcher matcher = pattern.matcher(simpleContext.getPhrase()); while (matcher.find()) { Tree vp = matcher.getNode("vp"); Tree arg2 = matcher.getNode("arg2"); List<Word> relationWords = ParseTreeExtractionUtils.getWordsInBetween(simpleContext.getPhrase(), vp, arg2, true, false); List<Word> arg2Words = ParseTreeExtractionUtils.getFollowingWords(vp, arg2, true); newExtractions.add(new NewExtraction(true, simpleContext.getRelation(), new Extraction(ExtractionType.VERB_BASED, null, element.getSentenceIdx(), element.getContextLayer() + 1, WordsUtils.wordsToString(relationWords), element.getText(), WordsUtils.wordsToString(arg2Words)))); } } else if (simpleContext.getRelation().equals(Relation.ATTRIBUTION) && separateAttributions) { // ATTRIBUTION TregexPattern pattern = TregexPattern.compile( "S !>> S < (NP=arg1 $.. (VP=vp [ <+(VP) (VP=lowestvp !< VP) | ==(VP=lowestvp !< VP) ]))"); TregexMatcher matcher = pattern.matcher(simpleContext.getPhrase()); while (matcher.find()) { Tree arg1 = matcher.getNode("arg1"); Tree vp = matcher.getNode("vp"); Tree lowestvp = matcher.getNode("lowestvp"); List<Word> arg1Words; List<Word> relationWords; // has arg2 ? TregexPattern arg2Pattern = TregexPattern .compile(lowestvp.value() + " < (PP|NP|S|SBAR=arg2 !$,, (PP|NP|S|SBAR))"); TregexMatcher arg2Matcher = arg2Pattern.matcher(lowestvp); if (arg2Matcher.findAt(lowestvp)) { Tree arg2 = arg2Matcher.getNode("arg2"); arg1Words = ParseTreeExtractionUtils.getContainingWords(arg1); relationWords = ParseTreeExtractionUtils.getWordsInBetween(simpleContext.getPhrase(), vp, arg2, true, false); } else { arg1Words = ParseTreeExtractionUtils.getContainingWords(arg1); relationWords = ParseTreeExtractionUtils.getContainingWords(vp); } newExtractions.add(new NewExtraction(true, simpleContext.getRelation(), new Extraction(ExtractionType.VERB_BASED, null, element.getSentenceIdx(), element.getContextLayer() + 1, WordsUtils.wordsToString(relationWords), WordsUtils.wordsToString(arg1Words), element.getText()))); } } else { // add as simple context SimpleContext c = new SimpleContext( WordsUtils .wordsToString(ParseTreeExtractionUtils.getContainingWords(simpleContext.getPhrase())), simpleContext.getRelation()); simpleContext.getTimeInformation().ifPresent(t -> c.setTimeInformation(t)); simpleContexts.add(c); } }
From source file:org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule.java
License:Open Source License
protected static Optional<Word> getHeadVerb(Tree vp) { TregexPattern pattern = TregexPattern.compile( vp.value() + " [ <+(VP) (VP=lowestvp !< VP < /V../=v) | ==(VP=lowestvp !< VP < /V../=v) ]"); TregexMatcher matcher = pattern.matcher(vp); while (matcher.findAt(vp)) { return Optional.of(ParseTreeExtractionUtils.getContainingWords(matcher.getNode("v")).get(0)); }/* w w w. ja va 2 s.com*/ return Optional.empty(); }
From source file:org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule.java
License:Open Source License
private static List<Word> appendWordsFromTree(List<Word> words, Tree tree) { List<Word> res = new ArrayList<Word>(); res.addAll(words);/* www.j a v a 2 s . c o m*/ TregexPattern p = TregexPattern.compile(tree.value() + " <<, NNP|NNPS"); TregexMatcher matcher = p.matcher(tree); boolean isFirst = true; for (Word word : tree.yieldWords()) { if ((isFirst) && (!matcher.findAt(tree))) { res.add(WordsUtils.lowercaseWord(word)); } else { res.add(word); } isFirst = false; } return res; }
From source file:org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule.java
License:Open Source License
protected static List<Word> rephraseIntraSententialAttribution(List<Word> words) { try {/* ww w . j av a 2s.com*/ List<Word> res = new ArrayList<>(); Tree parseTree = ParseTreeParser.parse(WordsUtils.wordsToProperSentenceString(words)); TregexPattern p = TregexPattern.compile("ROOT << (S !> S < (NP=np ?$,, PP=pp $.. VP=vp))"); TregexMatcher matcher = p.matcher(parseTree); if (matcher.findAt(parseTree)) { Tree pp = matcher.getNode("pp"); // optional Tree np = matcher.getNode("np"); Tree vp = matcher.getNode("vp"); Tense tense = getTense(vp); if (tense.equals(Tense.PRESENT)) { res.add(new Word("This")); res.add(new Word("is")); res.add(new Word("what")); } else { res.add(new Word("This")); res.add(new Word("was")); res.add(new Word("what")); } res = appendWordsFromTree(res, np); res = appendWordsFromTree(res, vp); if (pp != null) { res = appendWordsFromTree(res, pp); } } return res; } catch (ParseTreeException e) { return words; } }
From source file:org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule.java
License:Open Source License
protected static List<Word> getRephrasedParticipalS(Tree np, Tree vp, Tree s, Tree vbgn) { Number number = getNumber(np); Tense tense = getTense(vp);/*from www .j a va 2 s.c om*/ TregexPattern p = TregexPattern.compile(vbgn.value() + " <<: (having . (been . VBN=vbn))"); TregexPattern p2 = TregexPattern.compile(vbgn.value() + " <<: (having . VBN=vbn)"); TregexPattern p3 = TregexPattern.compile(vbgn.value() + " <<: (being . VBN=vbn)"); TregexMatcher matcher = p.matcher(s); if (matcher.findAt(s)) { List<Word> res = new ArrayList<>(); res.add(new Word((number.equals(Number.SINGULAR)) ? "has" : "have")); res.add(new Word("been")); List<Word> next = ParseTreeExtractionUtils.getFollowingWords(s, matcher.getNode("vbn"), true); if (next.size() > 0) { next.set(0, WordsUtils.lowercaseWord(next.get(0))); } res.addAll(next); return res; } matcher = p2.matcher(s); if (matcher.findAt(s)) { List<Word> res = new ArrayList<>(); res.add(new Word((number.equals(Number.SINGULAR)) ? "has" : "have")); List<Word> next = ParseTreeExtractionUtils.getFollowingWords(s, matcher.getNode("vbn"), true); if (next.size() > 0) { next.set(0, WordsUtils.lowercaseWord(next.get(0))); } res.addAll(next); return res; } matcher = p3.matcher(s); if (matcher.findAt(s)) { List<Word> res = new ArrayList<>(); if (tense.equals(Tense.PRESENT)) { res.add(new Word((number.equals(Number.SINGULAR)) ? "is" : "are")); } else { res.add(new Word((number.equals(Number.SINGULAR)) ? "was" : "were")); } List<Word> next = ParseTreeExtractionUtils.getFollowingWords(s, matcher.getNode("vbn"), true); if (next.size() > 0) { next.set(0, WordsUtils.lowercaseWord(next.get(0))); } res.addAll(next); return res; } // default List<Word> res = new ArrayList<>(); if (tense.equals(Tense.PRESENT)) { res.add(new Word((number.equals(Number.SINGULAR)) ? "is" : "are")); } else { res.add(new Word((number.equals(Number.SINGULAR)) ? "was" : "were")); } List<Word> next = ParseTreeExtractionUtils.getFollowingWords(s, vbgn, true); if (next.size() > 0) { next.set(0, WordsUtils.lowercaseWord(next.get(0))); } res.addAll(next); return res; }
From source file:org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules.CoordinationExtractor.java
License:Open Source License
private static boolean isNPVPClause(Tree s) { TregexPattern p = TregexPattern.compile(s.value() + " < (NP $.. VP)"); TregexMatcher matcher = p.matcher(s); return (matcher.findAt(s)); }
From source file:org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules.CoordinationExtractor.java
License:Open Source License
@Override public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException { // TregexPattern p = TregexPattern.compile("ROOT <<: (S=s < (S < (NP $.. VP) $.. (S < (NP $.. VP))))"); TregexPattern p = TregexPattern.compile("ROOT <<: (S=s < (S $.. S))"); TregexMatcher matcher = p.matcher(leaf.getParseTree()); while (matcher.findAt(leaf.getParseTree())) { // List<Tree> siblings = getSiblings(matcher.getNode("s"), Arrays.asList("S")).stream().filter(t -> isNPVPClause(t)).collect(Collectors.toList()); List<Tree> siblings = getSiblings(matcher.getNode("s"), Arrays.asList("S")).stream() .collect(Collectors.toList()); if (siblings.size() >= 2) { // constituents List<Word> precedingWords = ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), siblings.get(0), false); List<Word> followingWords = ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), siblings.get(siblings.size() - 1), false); List<Leaf> constituents = new ArrayList<>(); for (Tree sibling : siblings) { List<Word> words = new ArrayList<Word>(); words.addAll(precedingWords); words.addAll(ParseTreeExtractionUtils.getContainingWords(sibling)); words.addAll(followingWords); Leaf constituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(words)); constituents.add(constituent); }// ww w. j av a2 s. c om List<Word> cuePhraseWords = null; Relation relation = Relation.UNKNOWN_COORDINATION; if (constituents.size() == 2) { cuePhraseWords = ParseTreeExtractionUtils.getWordsInBetween(leaf.getParseTree(), siblings.get(0), siblings.get(siblings.size() - 1), false, false); relation = classifer.classifyCoordinating(cuePhraseWords).orElse(Relation.UNKNOWN_COORDINATION); } Extraction res = new Extraction(getClass().getSimpleName(), false, cuePhraseWords, relation, true, constituents); return Optional.of(res); } } return Optional.empty(); }
From source file:org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules.ListNP.ListNPExtractor.java
License:Open Source License
@Override public Optional<Extraction> extract(Leaf leaf) throws ParseTreeException { TregexPattern p = TregexPattern.compile(pattern); TregexMatcher matcher = p.matcher(leaf.getParseTree()); while (matcher.findAt(leaf.getParseTree())) { Optional<ListNPSplitter.Result> r = ListNPSplitter.splitList(leaf.getParseTree(), matcher.getNode("np")); if (r.isPresent()) { // constituents List<Word> precedingWords = ParseTreeExtractionUtils.getPrecedingWords(leaf.getParseTree(), matcher.getNode("np"), false); List<Word> followingWords = ParseTreeExtractionUtils.getFollowingWords(leaf.getParseTree(), matcher.getNode("np"), false); List<Leaf> constituents = new ArrayList<>(); if (r.get().getIntroductionWords().isPresent()) { List<Word> words = new ArrayList<Word>(); words.addAll(precedingWords); words.addAll(r.get().getIntroductionWords().get()); words.addAll(followingWords); Leaf constituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(words)); constituent.dontAllowSplit(); constituents.add(constituent); }/*from w w w . ja va2s.com*/ for (List<Word> element : r.get().getElementsWords()) { List<Word> words = new ArrayList<Word>(); words.addAll(precedingWords); words.addAll(element); words.addAll(followingWords); Leaf constituent = new Leaf(getClass().getSimpleName(), WordsUtils.wordsToProperSentenceString(words)); constituent.dontAllowSplit(); constituents.add(constituent); } Extraction res = new Extraction(getClass().getSimpleName(), false, null, r.get().getRelation(), true, constituents); return Optional.of(res); } } return Optional.empty(); }