List of usage examples for edu.stanford.nlp.trees Tree nodeNumber
public int nodeNumber(Tree root)
From source file:ConstituencyParse.java
License:Apache License
public int[] constTreeParents(Tree tree) { Tree binarized = binarizer.transformTree(tree); Tree collapsedUnary = transformer.transformTree(binarized); Trees.convertToCoreLabels(collapsedUnary); collapsedUnary.indexSpans();//w w w . j a v a 2 s . c o m List<Tree> leaves = collapsedUnary.getLeaves(); int size = collapsedUnary.size() - leaves.size(); int[] parents = new int[size]; HashMap<Integer, Integer> index = new HashMap<Integer, Integer>(); int idx = leaves.size(); int leafIdx = 0; for (Tree leaf : leaves) { Tree cur = leaf.parent(collapsedUnary); // go to preterminal int curIdx = leafIdx++; boolean done = false; while (!done) { Tree parent = cur.parent(collapsedUnary); if (parent == null) { parents[curIdx] = 0; break; } int parentIdx; int parentNumber = parent.nodeNumber(collapsedUnary); if (!index.containsKey(parentNumber)) { parentIdx = idx++; index.put(parentNumber, parentIdx); } else { parentIdx = index.get(parentNumber); done = true; } parents[curIdx] = parentIdx + 1; cur = parent; curIdx = parentIdx; } } return parents; }
From source file:cc.vidr.parseviz.ParseViz.java
License:Open Source License
public static void printTreeDot(Tree tree, StringBuilder sb) { sb.append("graph{\nnode[shape=none];\n"); printTreeDot(tree, sb, tree);/* w w w .j a va 2s.c om*/ sb.append("{rank=same;\n"); for (Tree leaf : tree.getLeaves()) sb.append("n").append(leaf.nodeNumber(tree)).append(";\n"); sb.append("};\n"); sb.append("}\n"); }
From source file:cc.vidr.parseviz.ParseViz.java
License:Open Source License
public static void printTreeDot(Tree tree, StringBuilder sb, Tree root) { sb.append("n").append(tree.nodeNumber(root)).append("[label=\"").append(tree.label()).append("\"];\n"); for (Tree child : tree.children()) { sb.append("n").append(tree.nodeNumber(root)).append("--n").append(child.nodeNumber(root)).append(";\n"); printTreeDot(child, sb, root);/*w w w . j ava 2 s . c om*/ } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and James like Susan. -> John likes Susan. * /*from ww w . ja v a 2 s. c o m*/ */ private void extractConjoinedNPs(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; Tree conjoinedNode; Tree parent; TregexMatcher matcher; Question newQuestion; //only extract conjoined NPs that are arguments or adjuncts of the main verb // in the tree, this means the closest S will be the one under the root tregexOpStr = "NP=parent < (CONJP|CC !< or|nor [ " + " $+ /^(N.*|PRP|SBAR)$/=child $-- /^(N.*|PRP|SBAR)$/ | " //there must be a noun on each side of the conjunction + " $-- /^(N.*|PRP|SBAR)$/=child $+ /^(N.*|PRP|SBAR)$/ ] ) " //this avoids extracting from flat NPs such as "the smaller and darker form" + " !>> (/.*/ $ (CC|CONJP !< or|nor)) " //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) + " !$ (CC|CONJP !< or|nor)" + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR "; //+ " >> (ROOT !< (S <+(VP) (/^VB.*$/ < are|were|be|seem|appear))) " ; //don't break plural predicate nominatives (e.g., "John and Mary are two of my best friends.") matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); List<Integer> nodeIndexes = new ArrayList<Integer>(); List<Integer> parentIDs = new ArrayList<Integer>(); while (matcher.find()) { //store the parents' IDs (in the tree) parent = matcher.getNode("parent"); parentIDs.add(parent.nodeNumber(input.getIntermediateTree())); conjoinedNode = matcher.getNode("child"); //store the conjoined nodes' index into their parent's list of children int idx = parent.objectIndexOf(conjoinedNode); if (!nodeIndexes.contains(idx)) nodeIndexes.add(idx); } //for each of the conjoined children, //create a new tree by removing all the nodes they are conjoined with Collections.sort(nodeIndexes);//sort, just to keep them in the original order for (int i = 0; i < nodeIndexes.size(); i++) { newQuestion = input.deeperCopy(); Tree t = newQuestion.getIntermediateTree(); parent = t.getNodeNumber(parentIDs.get(i)); Tree gparent = parent.parent(t); conjoinedNode = parent.getChild(nodeIndexes.get(i)); String siblingLabel; //Remove all the nodes that are conjoined //with the selected noun (or are conjunctions, commas). //These can have labels NP, NN, ..., PRP for pronouns, CC, "," for commas, ":" for semi-colons for (int j = 0; j < parent.numChildren(); j++) { if (parent.getChild(j) == conjoinedNode) continue; siblingLabel = parent.getChild(j).label().toString(); if (siblingLabel.matches("^[NCP,:S].*")) { parent.removeChild(j); j--; } } //if there is an trivial unary "NP -> NP", //remove the parent and put the child in its place if (parent.numChildren() == 1 && parent.getChild(0).label().equals("NP")) { int tmpIndex = gparent.objectIndexOf(parent); gparent.removeChild(tmpIndex); gparent.addChild(tmpIndex, parent.getChild(0)); } correctTense(conjoinedNode, gparent); addQuotationMarksIfNeeded(newQuestion.getIntermediateTree()); //if(GlobalProperties.getDebug()) System.err.println("extractConjoinedNPs: "+newQuestion.getIntermediateTree().toString()); if (this.getComputeFeatures) newQuestion.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name if (this.getComputeFeatures) newQuestion.setFeatureValue("extractedFromConjoinedNPs", 1.0); extracted.add(newQuestion); } }
From source file:edu.cmu.ark.nlp.sent.SentenceSimplifier.java
License:Open Source License
/** * e.g., John ran and Bill walked. -> John ran. Bill walked. * //from w w w .j ava 2 s. c o m */ private void extractConjoinedPhrases(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; Tree conjoinedNode; TregexMatcher matcher; //Tree newTree = copy.getIntermediateTree(); Tree newTree; int nodeindex; tregexOpStr = "__ " + " [ < (VP < (/VB.*/=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) " + //get the first conjunction, to avoid spurious duplicate matches " | < (VP < (VP=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) " + // verb phrases may be conjoined by commas and adverbs (e.g., "John ran, then walked.") " | < (S|SINV < (S|SINV=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) " + " | < (S|SINV < (S|SINV=child $ (/:/ < /;/ !$++ /:/))) " + //" | < (ADJP < (JJ|JJR|ADJP=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) " + //" | < (ADVP < (RB|RBR|ADVP=child $ RB|RBR|ADVP=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) "+ //" | < (PP < (PP=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) " + " | < (SBAR < (SBAR=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) ] " + " !$ (CC|CONJP !< or|nor)" + //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) " !< (CC|CONJP !< or|nor) " + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { conjoinedNode = matcher.getNode("child"); nodeindex = conjoinedNode.nodeNumber(input.getIntermediateTree()); //make a copy of the input for this iteration newTree = input.getIntermediateTree().deepCopy(); removeConjoinedSiblingsHelper(newTree, nodeindex); //for conjoined main clauses, add punctuation if necessary QuestionUtil.addPeriodIfNeeded(newTree); //make a new Question object and add it addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); //if(GlobalProperties.getDebug()) System.err.println("extractConjoinedPhrases: "+newTree.toString()); if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name if (this.getComputeFeatures) newTreeWithFeatures.setFeatureValue("extractedFromConjoined", 1.0); addIfNovel(extracted, newTreeWithFeatures); } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., John and James like Susan. -> John likes Susan. * //from w w w. j av a 2 s . co m */ private void extractConjoinedNPs(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; Tree conjoinedNode; Tree parent; TregexMatcher matcher; Question newQuestion; //only extract conjoined NPs that are arguments or adjuncts of the main verb // in the tree, this means the closest S will be the one under the root tregexOpStr = "NP=parent < (CONJP|CC !< or|nor [ " + " $+ /^(N.*|PRP|SBAR)$/=child $-- /^(N.*|PRP|SBAR)$/ | " //there must be a noun on each side of the conjunction + " $-- /^(N.*|PRP|SBAR)$/=child $+ /^(N.*|PRP|SBAR)$/ ] ) " //this avoids extracting from flat NPs such as "the smaller and darker form" + " !>> (/.*/ $ (CC|CONJP !< or|nor)) " //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) + " !$ (CC|CONJP !< or|nor)" + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR "; //+ " >> (ROOT !< (S <+(VP) (/^VB.*$/ < are|were|be|seem|appear))) " ; //don't break plural predicate nominatives (e.g., "John and Mary are two of my best friends.") matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); List<Integer> nodeIndexes = new ArrayList<Integer>(); List<Integer> parentIDs = new ArrayList<Integer>(); while (matcher.find()) { //store the parents' IDs (in the tree) parent = matcher.getNode("parent"); parentIDs.add(parent.nodeNumber(input.getIntermediateTree())); conjoinedNode = matcher.getNode("child"); //store the conjoined nodes' index into their parent's list of children int idx = parent.indexOf(conjoinedNode); if (!nodeIndexes.contains(idx)) nodeIndexes.add(idx); } //for each of the conjoined children, //create a new tree by removing all the nodes they are conjoined with Collections.sort(nodeIndexes);//sort, just to keep them in the original order for (int i = 0; i < nodeIndexes.size(); i++) { newQuestion = input.deeperCopy(); Tree t = newQuestion.getIntermediateTree(); parent = t.getNodeNumber(parentIDs.get(i)); Tree gparent = parent.parent(t); conjoinedNode = parent.getChild(nodeIndexes.get(i)); String siblingLabel; //Remove all the nodes that are conjoined //with the selected noun (or are conjunctions, commas). //These can have labels NP, NN, ..., PRP for pronouns, CC, "," for commas, ":" for semi-colons for (int j = 0; j < parent.numChildren(); j++) { if (parent.getChild(j) == conjoinedNode) continue; siblingLabel = parent.getChild(j).label().toString(); if (siblingLabel.matches("^[NCP,:S].*")) { parent.removeChild(j); j--; } } //if there is an trivial unary "NP -> NP", //remove the parent and put the child in its place if (parent.numChildren() == 1 && parent.getChild(0).label().equals("NP")) { int tmpIndex = gparent.indexOf(parent); gparent.removeChild(tmpIndex); gparent.addChild(tmpIndex, parent.getChild(0)); } correctTense(conjoinedNode, gparent); addQuotationMarksIfNeeded(newQuestion.getIntermediateTree()); if (GlobalProperties.getDebug()) System.err.println("extractConjoinedNPs: " + newQuestion.getIntermediateTree().toString()); if (GlobalProperties.getComputeFeatures()) newQuestion.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name if (GlobalProperties.getComputeFeatures()) newQuestion.setFeatureValue("extractedFromConjoinedNPs", 1.0); extracted.add(newQuestion); } }
From source file:edu.cmu.ark.SentenceSimplifier.java
License:Open Source License
/** * e.g., John ran and Bill walked. -> John ran. Bill walked. * //from ww w .j av a 2 s . co m */ private void extractConjoinedPhrases(Collection<Question> extracted, Question input) { String tregexOpStr; TregexPattern matchPattern; Tree conjoinedNode; TregexMatcher matcher; //Tree newTree = copy.getIntermediateTree(); Tree newTree; int nodeindex; tregexOpStr = "__ " + " [ < (VP < (/VB.*/=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) " + //get the first conjunction, to avoid spurious duplicate matches " | < (VP < (VP=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) " + // verb phrases may be conjoined by commas and adverbs (e.g., "John ran, then walked.") " | < (S|SINV < (S|SINV=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) " + " | < (S|SINV < (S|SINV=child $ (/:/ < /;/ !$++ /:/))) " + //" | < (ADJP < (JJ|JJR|ADJP=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) " + //" | < (ADVP < (RB|RBR|ADVP=child $ RB|RBR|ADVP=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) "+ //" | < (PP < (PP=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) " + " | < (SBAR < (SBAR=child $ (CC|CONJP !< or|nor !$++ CC|CONJP))) ] " + " !$ (CC|CONJP !< or|nor)" + //this cannot be nested within a larger conjunction or followed by a conjunction (we recur later to catch this) " !< (CC|CONJP !< or|nor) " + " !.. (CC|CONJP !< or|nor > NP|PP|S|SBAR|VP) !>> SBAR"; matchPattern = TregexPatternFactory.getPattern(tregexOpStr); matcher = matchPattern.matcher(input.getIntermediateTree()); while (matcher.find()) { conjoinedNode = matcher.getNode("child"); nodeindex = conjoinedNode.nodeNumber(input.getIntermediateTree()); //make a copy of the input for this iteration newTree = input.getIntermediateTree().deeperCopy(); removeConjoinedSiblingsHelper(newTree, nodeindex); //for conjoined main clauses, add punctuation if necessary AnalysisUtilities.addPeriodIfNeeded(newTree); //make a new Question object and add it addQuotationMarksIfNeeded(newTree); Question newTreeWithFeatures = input.deeperCopy(); newTreeWithFeatures.setIntermediateTree(newTree); if (GlobalProperties.getDebug()) System.err.println("extractConjoinedPhrases: " + newTree.toString()); if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromConjoinedPhrases", 1.0); //old feature name if (GlobalProperties.getComputeFeatures()) newTreeWithFeatures.setFeatureValue("extractedFromConjoined", 1.0); addIfNovel(extracted, newTreeWithFeatures); } }
From source file:info.mhaas.ma.Evaluation.ContrastiveConjunctions.java
public void matchPattern(TregexPattern pat) { System.err.println("matchPattern called: " + pat.pattern()); int treesFound = 0; for (int i = 0; i < this.parses.size(); i++) { //System.out.println("Tree number " + i); Tree currentParse = this.parses.get(i); TregexMatcher matcher = pat.matcher(currentParse); if (!matcher.find()) { // no match here. let's try the next tree! continue; }//w ww . j a v a 2 s . c om if (this.matchedTrees.contains(i)) { System.err.println("Already seen tree, skipping: " + i); continue; } else { this.matchedTrees.add(i); } treesFound++; // Tree subTree = matcher.getMatch(); // Cool, we got a match. // Let's get left and right trees. Tree left = matcher.getNode("left"); Tree right = matcher.getNode("right"); int leftIdx = left.nodeNumber(currentParse); int rightIdx = right.nodeNumber(currentParse); Tree currentPred = this.predicted.get(i); Tree currentGold = this.gold.get(i); CCMatchObject mo = new CCMatchObject(currentPred, currentGold, currentParse, i, leftIdx, rightIdx); this.matches.add(mo); } System.err.println("Matched " + treesFound + " trees."); }
From source file:org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils.java
License:Open Source License
public static List<Integer> getLeafNumbers(Tree anchorTree, Tree node) { List<Integer> res = new ArrayList<>(); for (Tree leaf : node.getLeaves()) { res.add(leaf.nodeNumber(anchorTree)); }//from www. j a v a 2s. c o m return res; }
From source file:org.lambda3.text.simplification.discourse.utils.parseTree.ParseTreeExtractionUtils.java
License:Open Source License
private static Optional<Tree> findSpanningTreeRec(Tree anchorTree, Tree currTree, Tree firstLeaf, Tree lastLeaf) {/*from w ww. ja va 2s . c o m*/ int firstNumber = firstLeaf.nodeNumber(anchorTree); int lastNumber = lastLeaf.nodeNumber(anchorTree); int currFirstNumber = getFirstLeaf(currTree).nodeNumber(anchorTree); int currLastNumber = getLastLeaf(currTree).nodeNumber(anchorTree); if (((currFirstNumber <= firstNumber) && (firstNumber <= currLastNumber)) && ((currFirstNumber <= lastNumber) && (lastNumber <= currLastNumber))) { if ((currFirstNumber == firstNumber) && (lastNumber == currLastNumber)) { return Optional.of(currTree); } else { // recursion for (Tree child : currTree.getChildrenAsList()) { Optional<Tree> cr = findSpanningTreeRec(anchorTree, child, firstLeaf, lastLeaf); if (cr.isPresent()) { return Optional.of(cr.get()); } } } } return Optional.empty(); }