List of usage examples for edu.stanford.nlp.trees Tree value
@Override
public String value()
From source file:opennlp.tools.parse_thicket.kernel_interface.TreeExtenderByAnotherLinkedTree.java
License:Apache License
public StringBuilder toStringBuilderExtenderByAnotherLinkedTree(StringBuilder sb, Tree t, Tree treeToInsert) { if (t.isLeaf()) { if (t.label() != null) { sb.append(t.label().value()); }// w ww . j a v a 2s .c o m return sb; } else { sb.append('('); if (t.label() != null) { if (t.value() != null) { sb.append(t.label().value()); } } boolean bInsertNow = false; // we try match trees to find out if we are at the insertion // position if (treeToInsert != null) { List<ParseTreeNode> bigTreeNodes = parsePhrase(t.label().value()); List<ParseTreeNode> smallTreeNodes = parsePhrase( treeToInsert.getChild(0).getChild(0).getChild(0).label().value()); System.out.println(t + " \n " + treeToInsert + "\n"); if (smallTreeNodes.size() > 0 && bigTreeNodes.size() > 0) for (ParseTreeNode smallNode : smallTreeNodes) { if (!bigTreeNodes.get(0).getWord().equals("") && bigTreeNodes.get(0).getWord().equalsIgnoreCase(smallNode.getWord())) bInsertNow = true; } } if (bInsertNow) { Tree[] kids = t.children(); if (kids != null) { for (Tree kid : kids) { sb.append(' '); toStringBuilderExtenderByAnotherLinkedTree(sb, kid, null); } sb.append(' '); toStringBuilderExtenderByAnotherLinkedTree(sb, treeToInsert.getChild(0).getChild(1), null); int z = 0; z++; } } else { Tree[] kids = t.children(); if (kids != null) { for (Tree kid : kids) { sb.append(' '); toStringBuilderExtenderByAnotherLinkedTree(sb, kid, treeToInsert); } } } return sb.append(')'); } }
From source file:opennlp.tools.parse_thicket.kernel_interface.TreeExtenderByAnotherLinkedTree.java
License:Apache License
public StringBuilder toStringBuilder(StringBuilder sb, Tree t) { if (t.isLeaf()) { if (t.label() != null) { sb.append(t.label().value()); }/*w ww.ja v a 2 s.co m*/ return sb; } else { sb.append('('); if (t.label() != null) { if (t.value() != null) { sb.append(t.label().value()); } } Tree[] kids = t.children(); if (kids != null) { for (Tree kid : kids) { sb.append(' '); toStringBuilder(sb, kid); } } return sb.append(')'); } }
From source file:opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder.java
License:Apache License
private void navigateR(Tree t, List<ParseTreeNode> sentence, List<List<ParseTreeNode>> phrases) { if (!t.isPreTerminal()) { if (t.label() != null) { if (t.value() != null) { // if ROOT or S, returns empty List<ParseTreeNode> nodes = parsePhrase(t.label().value(), t.toString()); nodes = assignIndexToNodes(nodes, sentence); if (!nodes.isEmpty()) phrases.add(nodes);//from w w w . j av a 2 s . co m if (nodes.size() > 0 && nodes.get(0).getId() == null) { if (nodes.size() > 1 && nodes.get(1) != null && nodes.get(1).getId() != null) { try { ParseTreeNode n = nodes.get(0); n.setId(nodes.get(1).getId() - 1); nodes.set(0, n); } catch (Exception e) { e.printStackTrace(); } } else { log.severe("Failed alignment:" + nodes); } } } } Tree[] kids = t.children(); if (kids != null) { for (Tree kid : kids) { navigateR(kid, sentence, phrases); } } return; } }
From source file:opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder.java
License:Apache License
private void navigateR1(Tree t, List<ParseTreeNode> sentence, int l, List<List<ParseTreeNode>> phrases) { if (t.isPreTerminal()) { if (t.label() != null) { List<ParseTreeNode> node = parsePhrase(t.toString()); if (!node.isEmpty()) phrases.add(node);//from www. j ava2 s . co m } return; } else { if (t.label() != null) { if (t.value() != null) { List<ParseTreeNode> node = parsePhrase(t.label().value()); if (!node.isEmpty()) phrases.add(node); } } Tree[] kids = t.children(); if (kids != null) { for (Tree kid : kids) { navigateR1(kid, sentence, l, phrases); } } return; } }
From source file:opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder.java
License:Apache License
private StringBuilder toStringBuilder(StringBuilder sb, Tree t) { if (t.isLeaf()) { if (t.label() != null) { sb.append(t.label().value()); }//from w w w.j av a 2 s . c o m return sb; } else { sb.append('('); if (t.label() != null) { if (t.value() != null) { sb.append(t.label().value()); } } Tree[] kids = t.children(); if (kids != null) { for (Tree kid : kids) { sb.append(' '); toStringBuilder(sb, kid); } } return sb.append(')'); } }
From source file:org.aksw.simba.bengal.triple2nl.property.PropertyVerbalizer.java
License:Apache License
private PropertyVerbalization getTypeByLinguisticAnalysis(String propertyURI, String propertyText) { logger.debug("...using linguistical analysis..."); Annotation document = new Annotation(propertyText); pipeline.annotate(document);/*from www . j ava2 s . com*/ List<CoreMap> sentences = document.get(SentencesAnnotation.class); String pattern = ""; PropertyVerbalizationType verbalizationType = PropertyVerbalizationType.UNSPECIFIED; boolean firstTokenAuxiliary = false; for (CoreMap sentence : sentences) { List<CoreLabel> tokens = sentence.get(TokensAnnotation.class); // get the first word and check if it's 'is' or 'has' CoreLabel token = tokens.get(0); String word = token.get(TextAnnotation.class); String pos = token.get(PartOfSpeechAnnotation.class); String lemma = token.getString(LemmaAnnotation.class); firstTokenAuxiliary = auxiliaryVerbs.contains(lemma); if (lemma.equals("be") || word.equals("have")) { pattern += lemma.toUpperCase(); } else { if (pos.startsWith("N")) { pattern += "NP"; } else if (pos.startsWith("V")) { pattern += "VP"; } else { pattern += pos; } } if (tokens.size() > 1) { pattern += " "; for (int i = 1; i < tokens.size(); i++) { token = tokens.get(i); pos = token.get(PartOfSpeechAnnotation.class); if (pos.startsWith("N")) { pattern += "NP"; } else if (pos.startsWith("V")) { pattern += "VP"; } else { pattern += pos; } pattern += " "; } } // get the parse tree Tree tree = sentence.get(TreeAnnotation.class); // skip ROOT tag tree = tree.skipRoot(); logger.debug("Parse tree:" + tree.pennString()); // tree.pennPrint(); // check if VP is directly followed by NP // sometimes parent node is S,SINV,etc. if (tree.value().matches(Joiner.on('|').join(Lists.newArrayList(S, SBAR, SBARQ, SINV, FRAGMENT)))) { tree = tree.getChild(0); } boolean useDeterminer = false; if (tree.value().equals(VERB_PHRASE.getTag())) { for (Tree child : tree.getChildrenAsList()) { // check if first non terminal is NP and not contains a // determiner if (!child.isPreTerminal()) { if (child.value().equals(NOUN_PHRASE.getTag()) && !child.getChild(0).value().equals(DETERMINER.getTag())) { useDeterminer = true; } break; } } } // add determiner tag if (useDeterminer) { String[] split = pattern.split(" "); pattern = split[0] + " DET " + Joiner.on(" ").join(Arrays.copyOfRange(split, 1, split.length)); } } pattern = pattern.trim(); // if first token is an auxiliary can return verb if (firstTokenAuxiliary) { verbalizationType = PropertyVerbalizationType.VERB; } // check if pattern matches if (pattern.matches(VERB_PATTERN)) { logger.debug("...successfully determined type."); verbalizationType = PropertyVerbalizationType.VERB; } return new PropertyVerbalization(propertyURI, propertyText, pattern, verbalizationType); }
From source file:org.lambda3.graphene.core.relation_extraction.impl.HeadRelationExtractor.java
License:Open Source License
@Override public List<BinaryExtraction> doExtraction(Tree parseTree) { List<BinaryExtraction> extractions = new ArrayList<>(); TregexPattern pattern = TregexPattern .compile("S !>> S < (NP=arg1 $.. (VP=vp [ <+(VP) (VP=lowestvp !< VP) | ==(VP=lowestvp !< VP) ]))"); // this will generate nicer predicates that conform to matrix predicates (e.g. "failed to increase"), but lead to a minor decrease in P and R in benchmark test // TregexPattern pattern = TregexPattern.compile("S !>> S < (NP=arg1 $.. (VP=vp [ <+(S|VP) (VP=lowestvp !< VP !< S) | ==(VP=lowestvp !< VP !< S) ]))"); TregexMatcher matcher = pattern.matcher(parseTree); while (matcher.find()) { Tree arg1 = matcher.getNode("arg1"); Tree vp = matcher.getNode("vp"); Tree lowestvp = matcher.getNode("lowestvp"); // has arg2 ? TregexPattern arg2Pattern = TregexPattern .compile(lowestvp.value() + " < (PP|NP|S|SBAR=arg2 !$,, (PP|NP|S|SBAR))"); TregexMatcher arg2Matcher = arg2Pattern.matcher(lowestvp); if (arg2Matcher.findAt(lowestvp)) { Tree arg2 = arg2Matcher.getNode("arg2"); List<Word> arg1Words = ParseTreeExtractionUtils.getContainingWords(arg1); List<Word> relationWords = ParseTreeExtractionUtils.getWordsInBetween(parseTree, vp, arg2, true, false);//from w w w. j av a 2 s. c o m List<Word> arg2Words = ParseTreeExtractionUtils.getFollowingWords(vp, arg2, true); extractions.add(new BinaryExtraction(null, WordsUtils.wordsToString(relationWords), WordsUtils.wordsToString(arg1Words), WordsUtils.wordsToString(arg2Words))); } else { List<Word> arg1Words = ParseTreeExtractionUtils.getContainingWords(arg1); List<Word> relationWords = ParseTreeExtractionUtils.getContainingWords(vp); List<Word> arg2Words = new ArrayList<>(); extractions.add(new BinaryExtraction(null, WordsUtils.wordsToString(relationWords), WordsUtils.wordsToString(arg1Words), WordsUtils.wordsToString(arg2Words))); } } return extractions; }
From source file:org.lambda3.graphene.core.relation_extraction.impl.NestedRelationExtractor.java
License:Open Source License
@Override public List<BinaryExtraction> doExtraction(Tree parseTree) { List<BinaryExtraction> extractions = new ArrayList<>(); TregexPattern pattern = TregexPattern .compile("NP=arg1 $.. (VP=vp [ <+(VP) (VP=lowestvp !< VP) | ==(VP=lowestvp !< VP) ])"); // this will generate nicer predicates that conform to matrix predicates (e.g. "failed to increase"), but lead to a minor decrease in P and R in benchmark test // TregexPattern pattern = TregexPattern.compile("NP=arg1 $.. (VP=vp [ <+(S|VP) (VP=lowestvp !< VP !< S) | ==(VP=lowestvp !< VP !< S) ])"); TregexMatcher matcher = pattern.matcher(parseTree); while (matcher.find()) { Tree arg1 = matcher.getNode("arg1"); Tree vp = matcher.getNode("vp"); Tree lowestvp = matcher.getNode("lowestvp"); // has arg2 ? TregexPattern arg2Pattern = TregexPattern .compile(lowestvp.value() + " < (PP|NP|S|SBAR=arg2 !$,, (PP|NP|S|SBAR))"); TregexMatcher arg2Matcher = arg2Pattern.matcher(lowestvp); if (arg2Matcher.findAt(lowestvp)) { Tree arg2 = arg2Matcher.getNode("arg2"); List<Word> arg1Words = ParseTreeExtractionUtils.getContainingWords(arg1); List<Word> relationWords = ParseTreeExtractionUtils.getWordsInBetween(parseTree, vp, arg2, true, false);/*from w w w. j a va 2 s . c om*/ List<Word> arg2Words = ParseTreeExtractionUtils.getFollowingWords(vp, arg2, true); extractions.add(new BinaryExtraction(null, WordsUtils.wordsToString(relationWords), WordsUtils.wordsToString(arg1Words), WordsUtils.wordsToString(arg2Words))); } else { List<Word> arg1Words = ParseTreeExtractionUtils.getContainingWords(arg1); List<Word> relationWords = ParseTreeExtractionUtils.getContainingWords(vp); List<Word> arg2Words = new ArrayList<>(); extractions.add(new BinaryExtraction(null, WordsUtils.wordsToString(relationWords), WordsUtils.wordsToString(arg1Words), WordsUtils.wordsToString(arg2Words))); } } return extractions; }
From source file:org.lambda3.graphene.core.relation_extraction.RelationExtractionRunner.java
License:Open Source License
private void processSimpleContext(Element element, org.lambda3.text.simplification.discourse.model.SimpleContext simpleContext, List<NewExtraction> newExtractions, List<SimpleContext> simpleContexts) { // yield additional extractions if (exploitContexts) { for (BinaryExtraction ex : extractor.extract(simpleContext.getParseTree())) { if (!ex.isCoreExtraction()) { newExtractions.add(createYieldedExtraction(element.getSentenceIdx(), ex)); }// w w w . j ava 2s .co m } } // rephrase as separate extractions if (simpleContext.getRelation().equals(Relation.NOUN_BASED) && separateNounBased) { // NOUN BASED List<BinaryExtraction> extractions = extractor.extract(simpleContext.getParseTree()); extractions.stream().filter(ex -> ex.isCoreExtraction()) .forEach(ex -> newExtractions.add(new NewExtraction(true, simpleContext.getRelation(), new Extraction(ExtractionType.NOUN_BASED, ex.getConfidence().orElse(null), element.getSentenceIdx(), element.getContextLayer(), ex.getRelation(), ex.getArg1(), ex.getArg2())))); } else if (simpleContext.getRelation().equals(Relation.PURPOSE) && separatePurposes) { // PURPOSES TregexPattern pattern = TregexPattern.compile( "VP=vp !>> VP [ <+(VP) (VP !< VP < (PP|NP|S|SBAR=arg2 !$,, (PP|NP|S|SBAR))) | ==(VP !< VP < (PP|NP|S|SBAR=arg2 !$,, (PP|NP|S|SBAR))) ]"); TregexMatcher matcher = pattern.matcher(simpleContext.getPhrase()); while (matcher.find()) { Tree vp = matcher.getNode("vp"); Tree arg2 = matcher.getNode("arg2"); List<Word> relationWords = ParseTreeExtractionUtils.getWordsInBetween(simpleContext.getPhrase(), vp, arg2, true, false); List<Word> arg2Words = ParseTreeExtractionUtils.getFollowingWords(vp, arg2, true); newExtractions.add(new NewExtraction(true, simpleContext.getRelation(), new Extraction(ExtractionType.VERB_BASED, null, element.getSentenceIdx(), element.getContextLayer() + 1, WordsUtils.wordsToString(relationWords), element.getText(), WordsUtils.wordsToString(arg2Words)))); } } else if (simpleContext.getRelation().equals(Relation.ATTRIBUTION) && separateAttributions) { // ATTRIBUTION TregexPattern pattern = TregexPattern.compile( "S !>> S < (NP=arg1 $.. (VP=vp [ <+(VP) (VP=lowestvp !< VP) | ==(VP=lowestvp !< VP) ]))"); TregexMatcher matcher = pattern.matcher(simpleContext.getPhrase()); while (matcher.find()) { Tree arg1 = matcher.getNode("arg1"); Tree vp = matcher.getNode("vp"); Tree lowestvp = matcher.getNode("lowestvp"); List<Word> arg1Words; List<Word> relationWords; // has arg2 ? TregexPattern arg2Pattern = TregexPattern .compile(lowestvp.value() + " < (PP|NP|S|SBAR=arg2 !$,, (PP|NP|S|SBAR))"); TregexMatcher arg2Matcher = arg2Pattern.matcher(lowestvp); if (arg2Matcher.findAt(lowestvp)) { Tree arg2 = arg2Matcher.getNode("arg2"); arg1Words = ParseTreeExtractionUtils.getContainingWords(arg1); relationWords = ParseTreeExtractionUtils.getWordsInBetween(simpleContext.getPhrase(), vp, arg2, true, false); } else { arg1Words = ParseTreeExtractionUtils.getContainingWords(arg1); relationWords = ParseTreeExtractionUtils.getContainingWords(vp); } newExtractions.add(new NewExtraction(true, simpleContext.getRelation(), new Extraction(ExtractionType.VERB_BASED, null, element.getSentenceIdx(), element.getContextLayer() + 1, WordsUtils.wordsToString(relationWords), WordsUtils.wordsToString(arg1Words), element.getText()))); } } else { // add as simple context SimpleContext c = new SimpleContext( WordsUtils .wordsToString(ParseTreeExtractionUtils.getContainingWords(simpleContext.getPhrase())), simpleContext.getRelation()); simpleContext.getTimeInformation().ifPresent(t -> c.setTimeInformation(t)); simpleContexts.add(c); } }
From source file:org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule.java
License:Open Source License
protected static Optional<Word> getHeadVerb(Tree vp) { TregexPattern pattern = TregexPattern.compile( vp.value() + " [ <+(VP) (VP=lowestvp !< VP < /V../=v) | ==(VP=lowestvp !< VP < /V../=v) ]"); TregexMatcher matcher = pattern.matcher(vp); while (matcher.findAt(vp)) { return Optional.of(ParseTreeExtractionUtils.getContainingWords(matcher.getNode("v")).get(0)); }/*from w ww .j av a2 s.c o m*/ return Optional.empty(); }