List of usage examples for edu.stanford.nlp.trees Tree value
@Override
public String value()
From source file:org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule.java
License:Open Source License
private static List<Word> appendWordsFromTree(List<Word> words, Tree tree) { List<Word> res = new ArrayList<Word>(); res.addAll(words);//from ww w. j a v a 2 s . c o m TregexPattern p = TregexPattern.compile(tree.value() + " <<, NNP|NNPS"); TregexMatcher matcher = p.matcher(tree); boolean isFirst = true; for (Word word : tree.yieldWords()) { if ((isFirst) && (!matcher.findAt(tree))) { res.add(WordsUtils.lowercaseWord(word)); } else { res.add(word); } isFirst = false; } return res; }
From source file:org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.ExtractionRule.java
License:Open Source License
protected static List<Word> getRephrasedParticipalS(Tree np, Tree vp, Tree s, Tree vbgn) { Number number = getNumber(np); Tense tense = getTense(vp);/* w w w .ja va 2s. c om*/ TregexPattern p = TregexPattern.compile(vbgn.value() + " <<: (having . (been . VBN=vbn))"); TregexPattern p2 = TregexPattern.compile(vbgn.value() + " <<: (having . VBN=vbn)"); TregexPattern p3 = TregexPattern.compile(vbgn.value() + " <<: (being . VBN=vbn)"); TregexMatcher matcher = p.matcher(s); if (matcher.findAt(s)) { List<Word> res = new ArrayList<>(); res.add(new Word((number.equals(Number.SINGULAR)) ? "has" : "have")); res.add(new Word("been")); List<Word> next = ParseTreeExtractionUtils.getFollowingWords(s, matcher.getNode("vbn"), true); if (next.size() > 0) { next.set(0, WordsUtils.lowercaseWord(next.get(0))); } res.addAll(next); return res; } matcher = p2.matcher(s); if (matcher.findAt(s)) { List<Word> res = new ArrayList<>(); res.add(new Word((number.equals(Number.SINGULAR)) ? "has" : "have")); List<Word> next = ParseTreeExtractionUtils.getFollowingWords(s, matcher.getNode("vbn"), true); if (next.size() > 0) { next.set(0, WordsUtils.lowercaseWord(next.get(0))); } res.addAll(next); return res; } matcher = p3.matcher(s); if (matcher.findAt(s)) { List<Word> res = new ArrayList<>(); if (tense.equals(Tense.PRESENT)) { res.add(new Word((number.equals(Number.SINGULAR)) ? "is" : "are")); } else { res.add(new Word((number.equals(Number.SINGULAR)) ? "was" : "were")); } List<Word> next = ParseTreeExtractionUtils.getFollowingWords(s, matcher.getNode("vbn"), true); if (next.size() > 0) { next.set(0, WordsUtils.lowercaseWord(next.get(0))); } res.addAll(next); return res; } // default List<Word> res = new ArrayList<>(); if (tense.equals(Tense.PRESENT)) { res.add(new Word((number.equals(Number.SINGULAR)) ? "is" : "are")); } else { res.add(new Word((number.equals(Number.SINGULAR)) ? "was" : "were")); } List<Word> next = ParseTreeExtractionUtils.getFollowingWords(s, vbgn, true); if (next.size() > 0) { next.set(0, WordsUtils.lowercaseWord(next.get(0))); } res.addAll(next); return res; }
From source file:org.lambda3.text.simplification.discourse.runner.discourse_tree.extraction.rules.CoordinationExtractor.java
License:Open Source License
private static boolean isNPVPClause(Tree s) { TregexPattern p = TregexPattern.compile(s.value() + " < (NP $.. VP)"); TregexMatcher matcher = p.matcher(s); return (matcher.findAt(s)); }
From source file:org.lambda3.text.simplification.discourse.utils.SPOSplitter.java
License:Open Source License
public static Optional<Result> split(String sentence) { try {/*from ww w. j a v a 2 s . c o m*/ Tree parseTree = ParseTreeParser.parse(sentence); // LOGGER.info(ParseTreeVisualizer.prettyPrint(parseTree)); // pattern with object TregexPattern p = TregexPattern.compile( "ROOT <<: (S < (NP=np $.. (VP=vp [ <+(VP) (VP=lowestvp !< VP) | ==VP=lowestvp !< VP ])))"); TregexMatcher matcher = p.matcher(parseTree); while (matcher.findAt(parseTree)) { Tree np = matcher.getNode("np"); Tree vp = matcher.getNode("vp"); Tree lowestvp = matcher.getNode("lowestvp"); // has object? TregexPattern op = TregexPattern .compile(lowestvp.value() + " < (PP|NP|S|SBAR=ob !$,, (PP|NP|S|SBAR))"); TregexMatcher omatcher = op.matcher(lowestvp); if (omatcher.findAt(lowestvp)) { Tree ob = omatcher.getNode("ob"); List<Word> subjectWords = ParseTreeExtractionUtils.getContainingWords(np); List<Word> predicateWords = ParseTreeExtractionUtils.getWordsInBetween(parseTree, vp, ob, true, false); List<Word> objectWords = ParseTreeExtractionUtils.getFollowingWords(vp, ob, true); return Optional.of(new Result(WordsUtils.wordsToString(subjectWords), WordsUtils.wordsToString(predicateWords), WordsUtils.wordsToString(objectWords))); } else { List<Word> subjectWords = ParseTreeExtractionUtils.getContainingWords(np); List<Word> predicateWords = ParseTreeExtractionUtils.getContainingWords(vp); List<Word> objectWords = new ArrayList<>(); return Optional.of(new Result(WordsUtils.wordsToString(subjectWords), WordsUtils.wordsToString(predicateWords), WordsUtils.wordsToString(objectWords))); } } } catch (ParseTreeException e) { LOGGER.error("Could not parse sentence '{}'", sentence); } return Optional.empty(); }
From source file:sg.edu.nus.comp.pdtb.parser.ArgExtComp.java
License:Open Source License
private static List<String> findUpwardPath(Tree root, Tree connNode, Tree lca) { List<String> path = new ArrayList<>(); if (connNode == null || lca == null) {// || connNode.equals(lca)) { return path; }/*from w w w. ja va2 s . c om*/ Tree curr = connNode; while (curr != null && !curr.equals(lca)) { String val = curr.value(); if (val != null) { int t = val.indexOf("-"); if (t > 0) { val = val.substring(0, t); } path.add(val); } curr = curr.parent(root); } if (curr != null && curr.equals(lca)) { String val = curr.value(); if (val != null) { int t = val.indexOf("-"); if (t > 0) { val = val.substring(0, t); } path.add(val); } } if (curr == null && path.isEmpty()) { return new ArrayList<>(); } else { return path; } }
From source file:sg.edu.nus.comp.pdtb.parser.ArgExtComp.java
License:Open Source License
private Tree[] getBioArgNodes(Tree root, String[] cols, ArrayList<String> spanArray, String connCat, Tree connNode) { List<Tree> arg1Nodes = getTreeNodesFromSpan(cols[14], spanArray); if (connCat.equals("Coordinator") && connNode.value().equals("CC")) { Tree[] children = connNode.parent(root).children(); for (Tree child : children) { int ind = arg1Nodes.indexOf(child); if (ind == -1 && isPuncTag(child.value())) { arg1Nodes.add(child);/*from ww w . j a va2s. c o m*/ } } } Tree arg1Node = (arg1Nodes.size() == 1) ? arg1Nodes.get(0) : getLCA(root, arg1Nodes); List<Tree> arg2Nodes = getTreeNodesFromSpan(cols[20], spanArray); Tree arg2Node = (arg2Nodes.size() == 1) ? arg2Nodes.get(0) : getLCA(root, arg2Nodes); return new Tree[] { arg1Node, arg2Node }; }
From source file:sg.edu.nus.comp.pdtb.parser.ArgExtComp.java
License:Open Source License
private Tree[] getArgNodes(Tree root, String[] cols, ArrayList<String> spanArray, String connCat, Tree connNode) { List<Tree> arg1Nodes = getTreeNodesFromSpan(cols[22], spanArray); arg1Nodes.addAll(getTreeNodesFromSpan(cols[29], spanArray)); if (connCat.equals("Coordinator") && connNode.value().equals("CC")) { Tree[] children = connNode.parent(root).children(); for (Tree child : children) { int ind = arg1Nodes.indexOf(child); if (ind == -1 && isPuncTag(child.value())) { arg1Nodes.add(child);//from w w w . j a va 2 s. c om } } } Tree arg1Node = (arg1Nodes.size() == 1) ? arg1Nodes.get(0) : getLCA(root, arg1Nodes); List<Tree> arg2Nodes = getTreeNodesFromSpan(cols[32], spanArray); arg2Nodes.addAll(getTreeNodesFromSpan(cols[39], spanArray)); Tree arg2Node = (arg2Nodes.size() == 1) ? arg2Nodes.get(0) : getLCA(root, arg2Nodes); return new Tree[] { arg1Node, arg2Node }; }
From source file:sg.edu.nus.comp.pdtb.parser.ArgExtComp.java
License:Open Source License
private static List<Tree> getInternalNodes(Tree root, Tree node) { // @child_nodes.size == 1 and @child_nodes.first.class != Node List<Tree> result = new ArrayList<>(); if (node != null && !(node.children().length == 1 && node.firstChild() != null && node.firstChild().isLeaf())) { Tree parent = node.parent(root); if (parent != null && !node.value().equals("-NONE-")) { result.add(node);//from ww w . ja v a 2 s .c om } Tree[] children = node.children(); for (Tree child : children) { result.addAll(getInternalNodes(root, child)); } } return result; }
From source file:sg.edu.nus.comp.pdtb.parser.ArgPosComp.java
License:Open Source License
public List<String[]> generateFeatures(Corpus.Type corpus, File article, FeatureType featureType) throws IOException { List<String[]> features = new ArrayList<>(); ArrayList<String> spanMap = null; Map<String, String> spanHashMap = null; List<Tree> trees = null; List<String> explicitSpans = null; if (corpus.equals(Type.PDTB)) { trees = Corpus.getTrees(article, featureType); spanHashMap = Corpus.getSpanMap(article, featureType); spanMap = Corpus.getSpanMapAsList(article, featureType); explicitSpans = Corpus.getExplicitSpans(article, featureType); if (featureType == FeatureType.ErrorPropagation || featureType == FeatureType.Auto) { explicitSpans = Corpus.filterErrorProp(explicitSpans, article, featureType); }/*from w w w .j a v a 2s . com*/ } else if (corpus.equals(Type.BIO_DRB)) { trees = Corpus.getBioTrees(article, featureType); spanHashMap = Corpus.getBioSpanMap(article, featureType); spanMap = Corpus.getBioSpanMapAsList(article, featureType); explicitSpans = Corpus.getBioExplicitSpans(article, featureType); if (featureType == FeatureType.ErrorPropagation || featureType == FeatureType.Auto) { explicitSpans = Corpus.filterBioErrorProp(explicitSpans, article, featureType); } } else { log.error("Unimplemented corpus type: " + corpus); } int index = 0; int contIndex = 0; for (String rel : explicitSpans) { String[] cols = rel.split("\\|", -1); Set<Integer> done = new HashSet<>(); String label = null; if (featureType == FeatureType.AnyText) { label = "NA"; } else { if (corpus == Type.PDTB) { label = Corpus.getLabel(cols[23], cols[33]); } else { label = Corpus.getBioLabel(cols[14], cols[20], spanMap); } } label = label.endsWith("PS") ? "PS" : label; if (label.equals("FS")) { continue; } index = contIndex; List<Node> nodes = new ArrayList<>(); Tree root = null; String[] spans = corpus.equals(Type.PDTB) ? cols[3].split(";") : cols[1].split(";"); for (String spanTmp : spans) { String[] span = spanTmp.split("\\.\\."); for (; index < spanMap.size(); ++index) { // wsj_1371,0,6,9..21,Shareholders String line = spanMap.get(index); String[] spanCols = line.split(","); String[] canSpan = spanCols[3].split("\\.\\."); // Start matches if (span[0].equals(canSpan[0]) || (nodes.size() > 0 && spans.length == 1 && Integer.parseInt(canSpan[1]) <= Integer.parseInt(span[1]))) { if (nodes.size() == 0) { contIndex = index; } root = trees.get(Integer.parseInt(spanCols[1])); List<Tree> leaves = root.getLeaves(); int start = Integer.parseInt(spanCols[2]); Tree node = root.getNodeNumber(start); int nodeNum = 0; for (; nodeNum < leaves.size(); ++nodeNum) { Tree potNode = leaves.get(nodeNum); if (node.equals(potNode)) { int tmp = potNode.nodeNumber(root); String tmpSpan = spanHashMap.get(spanCols[1] + ":" + tmp); if (tmpSpan.equals(spanCols[3])) { break; } } } if (corpus == Type.PDTB) { if (!done.contains(nodeNum) && cols[5].contains(node.value().trim())) { done.add(nodeNum); nodes.add(new Node(node, nodeNum)); } } else { if (!done.contains(nodeNum)) { done.add(nodeNum); nodes.add(new Node(node, nodeNum)); } } if (span[1].equals(canSpan[1])) { ++index; break; } } } } if (!nodes.isEmpty()) { String feature = printFeature(root, nodes, label); features.add(new String[] { feature, rel }); } } return features; }
From source file:sg.edu.nus.comp.pdtb.parser.ArgPosComp.java
License:Open Source License
private String printFeature(Tree root, List<Node> nodes, String label) { StringBuilder tmp = new StringBuilder(); StringBuilder tmp2 = new StringBuilder(); for (Node node : nodes) { if (node.tree.parent(root) != null) { tmp.append(node.tree.parent(root).value() + " "); tmp2.append(node.tree.value() + " "); }//from w w w. j a va2 s .co m } String POS = tmp.toString().trim().replace(' ', '_'); String connStr = tmp2.toString().trim().replace(' ', '_'); if (connStr.equalsIgnoreCase("if_then") || connStr.equalsIgnoreCase("either_or") || connStr.equalsIgnoreCase("neither_nor")) { connStr = connStr.replaceAll("_", ".."); } List<Tree> leaves = root.getLeaves(); int firstNodeNum = nodes.get(0).index; Tree prevNode = firstNodeNum > 0 ? leaves.get(--firstNodeNum) : null; StringBuilder feature = new StringBuilder(); feature.append("conn:"); feature.append(connStr); feature.append(' '); feature.append("conn_POS:"); feature.append(POS); feature.append(' '); if (!connStr.contains("..")) { int pos = nodes.get(0).index; if (pos <= 2) { feature.append("sent_pos:"); feature.append(pos); feature.append(' '); } else { pos = nodes.get(nodes.size() - 1).index; if (pos >= leaves.size() - 3) { int pos2 = pos - leaves.size(); feature.append("sent_pos:"); feature.append(pos2); feature.append(' '); } } if (prevNode != null) { while (prevNode.parent(root).value().equals("-NONE-") && firstNodeNum > 0) { prevNode = leaves.get(--firstNodeNum); } if (prevNode != null) { String prevPOS = prevNode.parent(root).value().replace(' ', '_'); String prev = prevNode.value().replace(' ', '_'); feature.append("prev1:"); feature.append(prev); feature.append(' '); feature.append("prev1_POS:"); feature.append(prevPOS); feature.append(' '); feature.append("with_prev1_full:"); feature.append(prev); feature.append('_'); feature.append(connStr); feature.append(' '); feature.append("with_prev1_POS_full:"); feature.append(prevPOS); feature.append('_'); feature.append(POS); feature.append(' '); if (firstNodeNum > 0) { Tree prev2Node = leaves.get(--firstNodeNum); if (prev2Node != null) { while (prev2Node.parent(root).value().equals("-NONE-") && firstNodeNum > 0) { prev2Node = leaves.get(--firstNodeNum); } if (prev2Node != null) { String prev2POS = prev2Node.parent(root).value().replace(' ', '_'); String prev2 = prev2Node.value().replace(' ', '_'); feature.append("prev2:"); feature.append(prev2); feature.append(' '); feature.append("prev2_POS:"); feature.append(prev2POS); feature.append(' '); feature.append("with_prev2_full:"); feature.append(prev2); feature.append('_'); feature.append(connStr); feature.append(' '); feature.append("with_prev2_POS_full:"); feature.append(prev2POS); feature.append('_'); feature.append(POS); feature.append(' '); } } } } } } feature.append(label.replace(' ', '_')); return feature.toString().replaceAll("/", "\\\\/"); }