List of usage examples for edu.stanford.nlp.trees Tree getNodeNumber
public Tree getNodeNumber(int i)
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and James like Susan. -> John likes Susan. * //from w w w. j a v a2s.c o m */ private void extractConjoinedNPs(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; Tree conjoinedNode; Tree parent; TregexMatcher matcher; Question newQuestion; //only extract conjoined NPs that are arguments or adjuncts of the main verb // in the tree, this means the closest S will be the one under the root tregexOpStr = "NP=parent < (CONJP|CC !< or|nor [ " + " $+ /^(N.*|PRP|SBAR)$/=child $-- /^(N.*|PRP|SBAR)$/ | " //there must be a noun on each side of the conjunction + " $-- /^(N.*|PRP|SBAR)$/=child $+ /^(N.*|PRP|SBAR)$/ ] ) " //this avoids extracting from flat NPs such as "the smaller and darker form" + " !>> (/.*/ $ (CC|CONJP !< or|nor)) " //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) + " !$ (CC|CONJP !< or|nor)" + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR "; //+ " >> (ROOT !< (S <+(VP) (/^VB.*$/ < are|were|be|seem|appear))) " ; //don't break plural predicate nominatives (e.g., "John and Mary are two of my best friends.") matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); List<Integer> nodeIndexes = new ArrayList<Integer>(); List<Integer> parentIDs = new ArrayList<Integer>(); while (matcher.find()) { //store the parents' IDs (in the tree) parent = matcher.getNode("parent"); parentIDs.add(parent.nodeNumber(input.getIntermediateTree())); conjoinedNode = matcher.getNode("child"); //store the conjoined nodes' index into their parent's list of children int idx = parent.objectIndexOf(conjoinedNode); if (!nodeIndexes.contains(idx)) nodeIndexes.add(idx); } //for each of the conjoined children, //create a new tree by removing all the nodes they are conjoined with Collections.sort(nodeIndexes);//sort, just to keep them in the original order for (int i = 0; i < nodeIndexes.size(); i++) { newQuestion = input.deeperCopy(); Tree t = newQuestion.getIntermediateTree(); parent = t.getNodeNumber(parentIDs.get(i)); Tree gparent = parent.parent(t); conjoinedNode = parent.getChild(nodeIndexes.get(i)); String siblingLabel; //Remove all the nodes that are conjoined //with the selected noun (or are conjunctions, commas). //These can have labels NP, NN, ..., PRP for pronouns, CC, "," for commas, ":" for semi-colons for (int j = 0; j < parent.numChildren(); j++) { if (parent.getChild(j) == conjoinedNode) continue; siblingLabel = parent.getChild(j).label().toString(); if (siblingLabel.matches("^[NCP,:S].*")) { parent.removeChild(j); j--; } } //if there is an trivial unary "NP -> NP", //remove the parent and put the child in its place if (parent.numChildren() == 1 && parent.getChild(0).label().equals("NP")) { int tmpIndex = gparent.objectIndexOf(parent); gparent.removeChild(tmpIndex); gparent.addChild(tmpIndex, parent.getChild(0)); } correctTense(conjoinedNode, gparent); addQuotationMarksIfNeeded(newQuestion.getIntermediateTree()); //if(GlobalProperties.getDebug()) System.err.println("extractConjoinedNPs: "+newQuestion.getIntermediateTree().toString()); if (this.getComputeFeatures) newQuestion.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name if (this.getComputeFeatures) newQuestion.setFeatureValue("extractedFromConjoinedNPs", 1.0); extracted.add(newQuestion); } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
private void removeConjoinedSiblingsHelper(Tree copy, int childindex) { //if(GlobalProperties.getDebug()) System.err.println("removeConjoinedSiblingsHelper: "+copy.toString()); Tree child = copy.getNodeNumber(childindex); Tree parent = child.parent(copy);//from w w w. j a v a2 s . c o m Tree gparent = parent.parent(copy); int parentIdx = gparent.objectIndexOf(parent); //By an annoying PTB convention, some verb phrase conjunctions //can conjoin two verb preterminals under a VP, //rather than only allowing VP nodes to be conjoined. //e.g., John walked and played. //So, we add an extra VP node in between if necessary if (child.label().toString().startsWith("VB")) { gparent.removeChild(parentIdx); Tree newTree = factory.newTreeNode("VP", new ArrayList<Tree>()); newTree.addChild(child); gparent.addChild(parentIdx, newTree); } else { gparent.setChild(parentIdx, child); } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and James like Susan. -> John likes Susan. * //from ww w . j a v a2 s . co m */ private void extractConjoinedNPs(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; Tree conjoinedNode; Tree parent; TregexMatcher matcher; Question newQuestion; //only extract conjoined NPs that are arguments or adjuncts of the main verb // in the tree, this means the closest S will be the one under the root tregexOpStr = "NP=parent < (CONJP|CC !< or|nor [ " + " $+ /^(N.*|PRP|SBAR)$/=child $-- /^(N.*|PRP|SBAR)$/ | " //there must be a noun on each side of the conjunction + " $-- /^(N.*|PRP|SBAR)$/=child $+ /^(N.*|PRP|SBAR)$/ ] ) " //this avoids extracting from flat NPs such as "the smaller and darker form" + " !>> (/.*/ $ (CC|CONJP !< or|nor)) " //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) + " !$ (CC|CONJP !< or|nor)" + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR "; //+ " >> (ROOT !< (S <+(VP) (/^VB.*$/ < are|were|be|seem|appear))) " ; //don't break plural predicate nominatives (e.g., "John and Mary are two of my best friends.") matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); List<Integer> nodeIndexes = new ArrayList<Integer>(); List<Integer> parentIDs = new ArrayList<Integer>(); while (matcher.find()) { //store the parents' IDs (in the tree) parent = matcher.getNode("parent"); parentIDs.add(parent.nodeNumber(input.getIntermediateTree())); conjoinedNode = matcher.getNode("child"); //store the conjoined nodes' index into their parent's list of children int idx = parent.indexOf(conjoinedNode); if (!nodeIndexes.contains(idx)) nodeIndexes.add(idx); } //for each of the conjoined children, //create a new tree by removing all the nodes they are conjoined with Collections.sort(nodeIndexes);//sort, just to keep them in the original order for (int i = 0; i < nodeIndexes.size(); i++) { newQuestion = input.deeperCopy(); Tree t = newQuestion.getIntermediateTree(); parent = t.getNodeNumber(parentIDs.get(i)); Tree gparent = parent.parent(t); conjoinedNode = parent.getChild(nodeIndexes.get(i)); String siblingLabel; //Remove all the nodes that are conjoined //with the selected noun (or are conjunctions, commas). //These can have labels NP, NN, ..., PRP for pronouns, CC, "," for commas, ":" for semi-colons for (int j = 0; j < parent.numChildren(); j++) { if (parent.getChild(j) == conjoinedNode) continue; siblingLabel = parent.getChild(j).label().toString(); if (siblingLabel.matches("^[NCP,:S].*")) { parent.removeChild(j); j--; } } //if there is an trivial unary "NP -> NP", //remove the parent and put the child in its place if (parent.numChildren() == 1 && parent.getChild(0).label().equals("NP")) { int tmpIndex = gparent.indexOf(parent); gparent.removeChild(tmpIndex); gparent.addChild(tmpIndex, parent.getChild(0)); } correctTense(conjoinedNode, gparent); addQuotationMarksIfNeeded(newQuestion.getIntermediateTree()); if (GlobalProperties.getDebug()) System.err.println("extractConjoinedNPs: " + newQuestion.getIntermediateTree().toString()); if (GlobalProperties.getComputeFeatures()) newQuestion.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name if (GlobalProperties.getComputeFeatures()) newQuestion.setFeatureValue("extractedFromConjoinedNPs", 1.0); extracted.add(newQuestion); } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
private void removeConjoinedSiblingsHelper(Tree copy, int childindex) { if (GlobalProperties.getDebug()) System.err.println("removeConjoinedSiblingsHelper: " + copy.toString()); Tree child = copy.getNodeNumber(childindex); Tree parent = child.parent(copy);/*from w w w .ja v a 2s . c o m*/ Tree gparent = parent.parent(copy); int parentIdx = gparent.indexOf(parent); //By an annoying PTB convention, some verb phrase conjunctions //can conjoin two verb preterminals under a VP, //rather than only allowing VP nodes to be conjoined. //e.g., John walked and played. //So, we add an extra VP node in between if necessary if (child.label().toString().startsWith("VB")) { gparent.removeChild(parentIdx); Tree newTree = factory.newTreeNode("VP", new ArrayList<Tree>()); newTree.addChild(child); gparent.addChild(parentIdx, newTree); } else { gparent.setChild(parentIdx, child); } }
From source file:info.mhaas.ma.Evaluation.CCMatchObject.java
private Tree getParentNode(Tree someTree) { // parent() method requires tree root to be passed in. // Calling parent() w/o this will return null. Tree leftParent = someTree.getNodeNumber(this.getLeftIdx()).parent(someTree); Tree rightParent = someTree.getNodeNumber(this.getRightIdx()).parent(someTree); assert leftParent == rightParent; assert leftParent != null; return leftParent; }
From source file:org.lambda3.text.simplification.discourse.utils.ner.NERStringParser.java
License:Open Source License
public static TNERString parse(Tree parseTree) throws NERStringParseException { List<TNERToken> tokens = new ArrayList<>(); List<Integer> parseTreeLeafNumbers = ParseTreeExtractionUtils.getLeafNumbers(parseTree, parseTree); String nerString = NER_CLASSIFIER.classifyToString(WordsUtils.wordsToString(parseTree.yieldWords())); String[] nerTokens = nerString.split(" "); if (parseTreeLeafNumbers.size() != nerTokens.length) { throw new NERStringParseException("Could not map NER string to parseTree"); }/*from w w w. j a v a2 s . c om*/ int idx = 0; for (String nerToken : nerTokens) { int sep_idx = nerToken.lastIndexOf("/"); // create token String text = nerToken.substring(0, sep_idx); String category = nerToken.substring(sep_idx + 1); TNERToken token = new TNERToken(idx, text, category, parseTree.getNodeNumber(parseTreeLeafNumbers.get(idx))); tokens.add(token); ++idx; } return new TNERString(tokens, parseTree); }
From source file:org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils.java
License:Open Source License
public static List<Tree> getLeavesInBetween(Tree anchorTree, Tree leftNode, Tree rightNode, boolean includeLeft, boolean includeRight) { List<Tree> res = new ArrayList<>(); if (leftNode == null) { leftNode = getFirstLeaf(anchorTree); }// w w w.j a va2 s . c om if (rightNode == null) { rightNode = getLastLeaf(anchorTree); } int startLeafNumber = (includeLeft) ? getFirstLeaf(leftNode).nodeNumber(anchorTree) : getLastLeaf(leftNode).nodeNumber(anchorTree) + 1; int endLeafNumber = (includeRight) ? getLastLeaf(rightNode).nodeNumber(anchorTree) : getFirstLeaf(rightNode).nodeNumber(anchorTree) - 1; if ((startLeafNumber < 0) || (endLeafNumber < 0)) { return res; } for (int i = startLeafNumber; i <= endLeafNumber; ++i) { Tree node = anchorTree.getNodeNumber(i); if (node.isLeaf()) { res.addAll(node); } } return res; }
From source file:sg.edu.nus.comp.pdtb.parser.ArgExtComp.java
License:Open Source License
public List<String[]> generateFeatures(Type corpus, File article, FeatureType featureType) throws IOException { List<String[]> features = new ArrayList<>(); ArrayList<String> spanArray = null; Map<String, String> spanHashMap = null; List<String> explicitSpans = null; if (corpus.equals(Type.PDTB)) { spanArray = Corpus.getSpanMapAsList(article, featureType); spanHashMap = Corpus.getSpanMap(article, featureType); trees = Corpus.getTrees(article, featureType); explicitSpans = Corpus.getExplicitSpans(article, featureType); if (featureType == FeatureType.ErrorPropagation || featureType == FeatureType.Auto) { explicitSpans = Corpus.filterErrorProp(explicitSpans, article, featureType); }// w w w. ja va 2s. c om if (featureType == FeatureType.Auto) { sentMap = Corpus.getSentMap(article); } } else { spanArray = Corpus.getBioSpanMapAsList(article, featureType); spanHashMap = Corpus.getBioSpanMap(article, featureType); trees = Corpus.getBioTrees(article, featureType); explicitSpans = Corpus.getBioExplicitSpans(article, featureType); if (featureType == FeatureType.ErrorPropagation || featureType == FeatureType.Auto) { explicitSpans = Corpus.filterBioErrorProp(explicitSpans, article, featureType); } } int index = 0; int contIndex = 0; for (String rel : explicitSpans) { String[] cols = rel.split("\\|", -1); String argPos = null; if (corpus == Type.PDTB) { if (featureType == FeatureType.Training) { argPos = Corpus.getLabel(cols[23], cols[33]); } else { argPos = labels.get(majorIndex); } } else { argPos = Corpus.getBioLabel(cols[14], cols[20], spanArray); } if (argPos.equals("FS")) { continue; } if (featureType != FeatureType.Training) { argPos = labels.get(majorIndex); } ++majorIndex; if (argPos.equals("SS")) { Set<Integer> done = new HashSet<>(); index = contIndex; List<Node> nodes = new ArrayList<>(); Tree root = null; String[] spans = corpus.equals(Type.PDTB) ? cols[3].split(";") : cols[1].split(";"); for (String spanTmp : spans) { String[] span = spanTmp.split("\\.\\."); for (; index < spanArray.size(); ++index) { // wsj_1371,0,6,9..21,Shareholders String line = spanArray.get(index); String[] spanCols = line.split(","); String[] canSpan = spanCols[3].split("\\.\\."); // Start matches // if (span[0].equals(canSpan[0]) || nodes.size() > 0) { boolean flag = span[0].equals(canSpan[0]) || (nodes.size() > 0 && spans.length == 1 && Integer.parseInt(canSpan[1]) <= Integer.parseInt(span[1])); if (corpus.equals(Type.BIO_DRB)) { int start = Integer.parseInt(canSpan[0]); int end = Integer.parseInt(canSpan[1]); int outStart = Integer.parseInt(span[0]); int outEnd = Integer.parseInt(span[1]); flag = outStart <= start && end <= outEnd; } if (flag) { if (nodes.size() == 0) { contIndex = index; } root = trees.get(Integer.parseInt(spanCols[1])); List<Tree> leaves = root.getLeaves(); int start = Integer.parseInt(spanCols[2]); Tree node = root.getNodeNumber(start); int nodeNum = 0; for (; nodeNum < leaves.size(); ++nodeNum) { Tree potNode = leaves.get(nodeNum); if (node.equals(potNode)) { int tmp = potNode.nodeNumber(root); String tmpSpan = spanHashMap.get(spanCols[1] + ":" + tmp); if (tmpSpan.equals(spanCols[3])) { break; } } } if (!done.contains(nodeNum)) { done.add(nodeNum); nodes.add(new Node(node, nodeNum)); } if (span[1].equals(canSpan[1])) { ++index; break; } } } } if (!nodes.isEmpty()) { String connStr = null; String connCat = null; if (corpus.equals(Type.PDTB)) { connStr = cols[8].trim().replace(' ', '_'); String c = cols[5].substring(0, 1); if (c.toLowerCase().equals(connStr.substring(0, 1))) { connStr = c + connStr.substring(1); } connCat = ConnComp.findCategory(cols[8]); } else { StringBuilder tmp = new StringBuilder(); for (Node node : nodes) { tmp.append(node.tree + " "); } connStr = tmp.toString(); connStr = connStr.trim().toLowerCase().replace(' ', '_'); connCat = ConnComp.findCategory(connStr.replace('_', ' '), corpus); } Tree connNode = nodes.get(nodes.size() - 1).tree.parent(root); Tree[] argNodes = corpus.equals(Type.PDTB) ? getArgNodes(root, cols, spanArray, connCat, connNode) : getBioArgNodes(root, cols, spanArray, connCat, connNode); List<Tree> internal = getInternalNodes(root, root); String treeNum; String line; int total = (doneSoFar + features.size()); if (corpus.equals(Type.PDTB)) { treeNum = featureType == FeatureType.AnyText ? cols[7] : getNodeNum(cols[23], featureType); line = article.getName() + ":" + total + "-" + (total + internal.size()) + ":Arg1(" + cols[22] + "):Arg2(" + cols[32] + "):" + cols[3]; } else { treeNum = Corpus.spanToSenIds(cols[14], spanArray).getFirst().toString(); line = article.getName() + ":" + total + "-" + (total + internal.size()) + ":Arg1(" + cols[14] + "):Arg2(" + cols[20] + "):" + cols[1]; } for (Tree node : internal) { String label = ""; if (node.equals(argNodes[0])) { label = "arg1_node"; } else if (node.equals(argNodes[1])) { label = "arg2_node"; } else { label = "none"; } if (featureType != FeatureType.Training) { label = treeNum + ":" + node.nodeNumber(root); } String feature = printFeature(root, node, connStr, connCat, connNode, label); features.add(new String[] { feature, rel, line }); } } } } return features; }
From source file:sg.edu.nus.comp.pdtb.parser.ArgExtComp.java
License:Open Source License
private String calcNodesSpan(List<String> nodes, Map<String, String> spanHashMap, String connSpan, List<String> otherArg) { Set<String> conn = new HashSet<>(); String[] c = connSpan.split(";"); for (String e : c) { conn.add(e);/*from w ww . ja v a2 s . c om*/ } List<Span> spans = new ArrayList<>(); String[] d = otherArg != null ? otherArg.get(0).split(":") : null; int skipTreeNum = -1; int skipNodeNumber = -1; if (d != null) { skipTreeNum = Integer.parseInt(d[0]); skipNodeNumber = Integer.parseInt(d[1]); } // for debugging purposes @SuppressWarnings("unused") Tree skipNode = d != null ? trees.get(skipTreeNum).getNodeNumber(skipNodeNumber) : null; for (String txt : nodes) { String[] tmp = txt.split(":"); int treeNum = Integer.parseInt(tmp[0]); Tree root = trees.get(treeNum); Tree node = root.getNodeNumber(Integer.parseInt(tmp[1])); if (node == null) { continue; } Queue<Tree> children = new LinkedList<>(); children.add(node); while (children.size() > 0) { Tree child = children.poll(); // the same tree and the same node number as the other argument if (skipTreeNum == treeNum && child.nodeNumber(root) == skipNodeNumber) { continue; } else if (!child.isLeaf()) { children.addAll(child.getChildrenAsList()); } else { int nodeNum = child.nodeNumber(root); String span = spanHashMap.get(treeNum + ":" + nodeNum); if (span != null && !hasIntersection(span, conn)) { spans.add(new Span(span)); } } } } Collections.sort(spans); StringBuilder sb = new StringBuilder(); for (Span span : spans) { if (sb.length() > 0) { String end = sb.substring(sb.lastIndexOf(".") + 1); String start = Integer.toString(span.start); if (Integer.parseInt(start) - Integer.parseInt(end) > 2) { sb.append(";"); sb.append(span); } else { sb.delete(sb.lastIndexOf(".") + 1, sb.length()); try { end = Integer.toString(span.end); } catch (StringIndexOutOfBoundsException e) { log.error("Error: " + e.getMessage()); e.printStackTrace(); } sb.append(end); } } else { sb.append(span); } } String result = sb.toString(); String out = removePunctuation(result); return out; }
From source file:sg.edu.nus.comp.pdtb.parser.ArgPosComp.java
License:Open Source License
public List<String[]> generateFeatures(Corpus.Type corpus, File article, FeatureType featureType) throws IOException { List<String[]> features = new ArrayList<>(); ArrayList<String> spanMap = null; Map<String, String> spanHashMap = null; List<Tree> trees = null; List<String> explicitSpans = null; if (corpus.equals(Type.PDTB)) { trees = Corpus.getTrees(article, featureType); spanHashMap = Corpus.getSpanMap(article, featureType); spanMap = Corpus.getSpanMapAsList(article, featureType); explicitSpans = Corpus.getExplicitSpans(article, featureType); if (featureType == FeatureType.ErrorPropagation || featureType == FeatureType.Auto) { explicitSpans = Corpus.filterErrorProp(explicitSpans, article, featureType); }// www . j a va 2 s .c o m } else if (corpus.equals(Type.BIO_DRB)) { trees = Corpus.getBioTrees(article, featureType); spanHashMap = Corpus.getBioSpanMap(article, featureType); spanMap = Corpus.getBioSpanMapAsList(article, featureType); explicitSpans = Corpus.getBioExplicitSpans(article, featureType); if (featureType == FeatureType.ErrorPropagation || featureType == FeatureType.Auto) { explicitSpans = Corpus.filterBioErrorProp(explicitSpans, article, featureType); } } else { log.error("Unimplemented corpus type: " + corpus); } int index = 0; int contIndex = 0; for (String rel : explicitSpans) { String[] cols = rel.split("\\|", -1); Set<Integer> done = new HashSet<>(); String label = null; if (featureType == FeatureType.AnyText) { label = "NA"; } else { if (corpus == Type.PDTB) { label = Corpus.getLabel(cols[23], cols[33]); } else { label = Corpus.getBioLabel(cols[14], cols[20], spanMap); } } label = label.endsWith("PS") ? "PS" : label; if (label.equals("FS")) { continue; } index = contIndex; List<Node> nodes = new ArrayList<>(); Tree root = null; String[] spans = corpus.equals(Type.PDTB) ? cols[3].split(";") : cols[1].split(";"); for (String spanTmp : spans) { String[] span = spanTmp.split("\\.\\."); for (; index < spanMap.size(); ++index) { // wsj_1371,0,6,9..21,Shareholders String line = spanMap.get(index); String[] spanCols = line.split(","); String[] canSpan = spanCols[3].split("\\.\\."); // Start matches if (span[0].equals(canSpan[0]) || (nodes.size() > 0 && spans.length == 1 && Integer.parseInt(canSpan[1]) <= Integer.parseInt(span[1]))) { if (nodes.size() == 0) { contIndex = index; } root = trees.get(Integer.parseInt(spanCols[1])); List<Tree> leaves = root.getLeaves(); int start = Integer.parseInt(spanCols[2]); Tree node = root.getNodeNumber(start); int nodeNum = 0; for (; nodeNum < leaves.size(); ++nodeNum) { Tree potNode = leaves.get(nodeNum); if (node.equals(potNode)) { int tmp = potNode.nodeNumber(root); String tmpSpan = spanHashMap.get(spanCols[1] + ":" + tmp); if (tmpSpan.equals(spanCols[3])) { break; } } } if (corpus == Type.PDTB) { if (!done.contains(nodeNum) && cols[5].contains(node.value().trim())) { done.add(nodeNum); nodes.add(new Node(node, nodeNum)); } } else { if (!done.contains(nodeNum)) { done.add(nodeNum); nodes.add(new Node(node, nodeNum)); } } if (span[1].equals(canSpan[1])) { ++index; break; } } } } if (!nodes.isEmpty()) { String feature = printFeature(root, nodes, label); features.add(new String[] { feature, rel }); } } return features; }