List of usage examples for edu.stanford.nlp.trees Tree label
@Override
public Label label()
From source file:Anaphora_Resolution.ParseAllXMLDocuments.java
public static ArrayList<Tree> findPronouns(Tree t) { ArrayList<Tree> pronouns = new ArrayList<Tree>(); if (t.label().value().equals("PRP") && !t.children()[0].label().value().equals("I") && !t.children()[0].label().value().equals("you") && !t.children()[0].label().value().equals("You")) { pronouns.add(t);// w w w. j a v a 2 s.c o m } else for (Tree child : t.children()) pronouns.addAll(findPronouns(child)); return pronouns; }
From source file:artinex.TypDep.java
public static void main(String[] args) { String str = "What is index in array"; TypDep parser = new TypDep(); Tree tree = parser.parse(str);//from w w w . j a v a 2s . co m List<Tree> leaves = tree.getLeaves(); // Print words and Pos Tags for (Tree leaf : leaves) { Tree parent = leaf.parent(tree); System.out.print(leaf.label().value() + "-" + parent.label().value() + " "); } System.out.println(); //Type dependencies // Tree tree1 = str.get(TreeAnnotation.class); // Get dependency tree TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); GrammaticalStructure gs = gsf.newGrammaticalStructure(tree); Collection<TypedDependency> td = gs.typedDependenciesCollapsed(); System.out.println(td); }
From source file:cc.vidr.parseviz.ParseViz.java
License:Open Source License
public static void printTreeDot(Tree tree, StringBuilder sb, Tree root) { sb.append("n").append(tree.nodeNumber(root)).append("[label=\"").append(tree.label()).append("\"];\n"); for (Tree child : tree.children()) { sb.append("n").append(tree.nodeNumber(root)).append("--n").append(child.nodeNumber(root)).append(";\n"); printTreeDot(child, sb, root);//from www . j av a 2 s .co m } }
From source file:com.tadbitstrange.tripletExtractionFromSentence.ExtractionService.java
License:Open Source License
public void checkParserFeedback() { Tree tree = parser.parse("My dog has fleas").get(0); log.info(tree.label()); log.info(tree.children()[0].label()); log.info(tree.children()[0].children()[0].label()); }
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
public double validateBinarizedTree(Tree tree, int start) { if (tree.isLeaf()) { return 0.0; }/* w w w .java2 s. c o m*/ float epsilon = 0.0001f; if (tree.isPreTerminal()) { String wordStr = tree.children()[0].label().value(); int tag = tagIndex.indexOf(tree.label().value()); int word = wordIndex.indexOf(wordStr); IntTaggedWord iTW = new IntTaggedWord(word, tag); float score = lex.score(iTW, start, wordStr, null); float bound = iScore[start][start + 1][stateIndex.indexOf(tree.label().value())]; if (score > bound + epsilon) { System.out.println("Invalid tagging:"); System.out.println(" Tag: " + tree.label().value()); System.out.println(" Word: " + tree.children()[0].label().value()); System.out.println(" Score: " + score); System.out.println(" Bound: " + bound); } return score; } int parent = stateIndex.indexOf(tree.label().value()); int firstChild = stateIndex.indexOf(tree.children()[0].label().value()); if (tree.numChildren() == 1) { UnaryRule ur = new UnaryRule(parent, firstChild); double score = SloppyMath.max(ug.scoreRule(ur), -10000.0) + validateBinarizedTree(tree.children()[0], start); double bound = iScore[start][start + tree.yield().size()][parent]; if (score > bound + epsilon) { System.out.println("Invalid unary:"); System.out.println(" Parent: " + tree.label().value()); System.out.println(" Child: " + tree.children()[0].label().value()); System.out.println(" Start: " + start); System.out.println(" End: " + (start + tree.yield().size())); System.out.println(" Score: " + score); System.out.println(" Bound: " + bound); } return score; } int secondChild = stateIndex.indexOf(tree.children()[1].label().value()); BinaryRule br = new BinaryRule(parent, firstChild, secondChild); double score = SloppyMath.max(bg.scoreRule(br), -10000.0) + validateBinarizedTree(tree.children()[0], start) + validateBinarizedTree(tree.children()[1], start + tree.children()[0].yield().size()); double bound = iScore[start][start + tree.yield().size()][parent]; if (score > bound + epsilon) { System.out.println("Invalid binary:"); System.out.println(" Parent: " + tree.label().value()); System.out.println(" LChild: " + tree.children()[0].label().value()); System.out.println(" RChild: " + tree.children()[1].label().value()); System.out.println(" Start: " + start); System.out.println(" End: " + (start + tree.yield().size())); System.out.println(" Score: " + score); System.out.println(" Bound: " + bound); } return score; }
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
public double scoreBinarizedTree(Tree tree, int start, int debugLvl) { if (tree.isLeaf()) { return 0.0; }//w w w .j a va 2s. c om if (tree.isPreTerminal()) { String wordStr = tree.children()[0].label().value(); int tag = tagIndex.indexOf(tree.label().value()); int word = wordIndex.indexOf(wordStr); IntTaggedWord iTW = new IntTaggedWord(word, tag); // if (lex.score(iTW,(leftmost ? 0 : 1)) == Double.NEGATIVE_INFINITY) { // System.out.println("NO SCORE FOR: "+iTW); // } float score = lex.score(iTW, start, wordStr, null); tree.setScore(score); if (debugLvl > 0) System.out.println(score + " " + tree.getSpan()); return score; } int parent = stateIndex.indexOf(tree.label().value()); int firstChild = stateIndex.indexOf(tree.children()[0].label().value()); if (tree.numChildren() == 1) { UnaryRule ur = new UnaryRule(parent, firstChild); //+ DEBUG // if (ug.scoreRule(ur) < -10000) { // System.out.println("Grammar doesn't have rule: " + ur); // } // return SloppyMath.max(ug.scoreRule(ur), -10000.0) + scoreBinarizedTree(tree.children()[0], leftmost); double score = ug.scoreRule(ur) + scoreBinarizedTree(tree.children()[0], start, debugLvl) + lex.score(ur, start, start + tree.children()[0].yield().size()); tree.setScore(score); if (debugLvl > 0) System.out.println(score + " " + tree.getSpan()); return score; } int secondChild = stateIndex.indexOf(tree.children()[1].label().value()); BinaryRule br = new BinaryRule(parent, firstChild, secondChild); //+ DEBUG // if (bg.scoreRule(br) < -10000) { // System.out.println("Grammar doesn't have rule: " + br); // } // return SloppyMath.max(bg.scoreRule(br), -10000.0) + // scoreBinarizedTree(tree.children()[0], leftmost) + // scoreBinarizedTree(tree.children()[1], false); int sz0 = tree.children()[0].yield().size(); double score = bg.scoreRule(br) + scoreBinarizedTree(tree.children()[0], start, debugLvl) + scoreBinarizedTree(tree.children()[1], start + sz0, debugLvl) + lex.score(br, start, start + sz0 + tree.children()[1].yield().size(), start + sz0); tree.setScore(score); if (debugLvl > 0) System.out.println(score + " " + tree.getSpan() + " " + (sz0 + start)); return score; }
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
private Tree extractBestParse(int goal, int start, int end) { // find source of inside score // no backtraces so we can speed up the parsing for its primary use double bestScore = iScore[start][end][goal]; double normBestScore = op.testOptions.lengthNormalization ? (bestScore / wordsInSpan[start][end][goal]) : bestScore;/*from www . j a va2 s . com*/ String goalStr = stateIndex.get(goal); // check tags if (end - start <= op.testOptions.maxSpanForTags && tagIndex.contains(goalStr)) { if (op.testOptions.maxSpanForTags > 1) { Tree wordNode = null; if (sentence != null) { StringBuilder word = new StringBuilder(); for (int i = start; i < end; i++) { if (sentence.get(i) instanceof HasWord) { HasWord cl = (HasWord) sentence.get(i); word.append(cl.word()); } else { word.append(sentence.get(i).toString()); } } wordNode = tf.newLeaf(word.toString()); } else if (lr != null) { List<LatticeEdge> latticeEdges = lr.getEdgesOverSpan(start, end); for (LatticeEdge edge : latticeEdges) { IntTaggedWord itw = new IntTaggedWord(edge.word, stateIndex.get(goal), wordIndex, tagIndex); float tagScore = (floodTags) ? -1000.0f : lex.score(itw, start, edge.word, null); if (matches(bestScore, tagScore + (float) edge.weight)) { wordNode = tf.newLeaf(edge.word); if (wordNode.label() instanceof CoreLabel) { CoreLabel cl = (CoreLabel) wordNode.label(); cl.setBeginPosition(start); cl.setEndPosition(end); } break; } } if (wordNode == null) { throw new RuntimeException( "could not find matching word from lattice in parse reconstruction"); } } else { throw new RuntimeException("attempt to get word when sentence and lattice are null!"); } Tree tagNode = tf.newTreeNode(goalStr, Collections.singletonList(wordNode)); tagNode.setScore(bestScore); if (originalTags[start] != null) { tagNode.label().setValue(originalTags[start].tag()); } return tagNode; } else { // normal lexicon is single words case IntTaggedWord tagging = new IntTaggedWord(words[start], tagIndex.indexOf(goalStr)); String contextStr = getCoreLabel(start).originalText(); float tagScore = lex.score(tagging, start, wordIndex.get(words[start]), contextStr); if (tagScore > Float.NEGATIVE_INFINITY || floodTags) { // return a pre-terminal tree CoreLabel terminalLabel = getCoreLabel(start); Tree wordNode = tf.newLeaf(terminalLabel); Tree tagNode = tf.newTreeNode(goalStr, Collections.singletonList(wordNode)); tagNode.setScore(bestScore); if (terminalLabel.tag() != null) { tagNode.label().setValue(terminalLabel.tag()); } if (tagNode.label() instanceof HasTag) { ((HasTag) tagNode.label()).setTag(tagNode.label().value()); } return tagNode; } } } // check binaries first for (int split = start + 1; split < end; split++) { for (Iterator<BinaryRule> binaryI = bg.ruleIteratorByParent(goal); binaryI.hasNext();) { BinaryRule br = binaryI.next(); double score = br.score + iScore[start][split][br.leftChild] + iScore[split][end][br.rightChild] + lex.score(br, start, end, split); boolean matches; if (op.testOptions.lengthNormalization) { double normScore = score / (wordsInSpan[start][split][br.leftChild] + wordsInSpan[split][end][br.rightChild]); matches = matches(normScore, normBestScore); } else { matches = matches(score, bestScore); } if (matches) { // build binary split Tree leftChildTree = extractBestParse(br.leftChild, start, split); Tree rightChildTree = extractBestParse(br.rightChild, split, end); List<Tree> children = new ArrayList<Tree>(); children.add(leftChildTree); children.add(rightChildTree); Tree result = tf.newTreeNode(goalStr, children); result.setScore(score); // System.err.println(" Found Binary node: "+result); return result; } } } // check unaries // note that even though we parse with the unary-closed grammar, we can // extract the best parse with the non-unary-closed grammar, since all // the intermediate states in the chain must have been built, and hence // we can exploit the sparser space and reconstruct the full tree as we go. // for (Iterator<UnaryRule> unaryI = ug.closedRuleIteratorByParent(goal); unaryI.hasNext(); ) { for (Iterator<UnaryRule> unaryI = ug.ruleIteratorByParent(goal); unaryI.hasNext();) { UnaryRule ur = unaryI.next(); // System.err.println(" Trying " + ur + " dtr score: " + iScore[start][end][ur.child]); double score = ur.score + iScore[start][end][ur.child] + lex.score(ur, start, end); boolean matches; if (op.testOptions.lengthNormalization) { double normScore = score / wordsInSpan[start][end][ur.child]; matches = matches(normScore, normBestScore); } else { matches = matches(score, bestScore); } if (ur.child != ur.parent && matches) { // build unary Tree childTree = extractBestParse(ur.child, start, end); Tree result = tf.newTreeNode(goalStr, Collections.singletonList(childTree)); // System.err.println(" Matched! Unary node: "+result); result.setScore(score); return result; } } System.err.println("Warning: no parse found in ExhaustivePCFGParser.extractBestParse: failing on: [" + start + ", " + end + "] looking for " + goalStr); return null; }
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
/** * Return all best parses (except no ties allowed on POS tags?). * Even though we parse with the unary-closed grammar, since all the * intermediate states in a chain must have been built, we can * reconstruct the unary chain as we go using the non-unary-closed grammar. *///from w w w . j a v a 2s .c om protected List<Tree> extractBestParses(int goal, int start, int end) { // find sources of inside score // no backtraces so we can speed up the parsing for its primary use double bestScore = iScore[start][end][goal]; String goalStr = stateIndex.get(goal); //System.out.println("Searching for "+goalStr+" from "+start+" to "+end+" scored "+bestScore); // check tags if (end - start == 1 && tagIndex.contains(goalStr)) { IntTaggedWord tagging = new IntTaggedWord(words[start], tagIndex.indexOf(goalStr)); String contextStr = getCoreLabel(start).originalText(); float tagScore = lex.score(tagging, start, wordIndex.get(words[start]), contextStr); if (tagScore > Float.NEGATIVE_INFINITY || floodTags) { // return a pre-terminal tree String wordStr = wordIndex.get(words[start]); Tree wordNode = tf.newLeaf(wordStr); Tree tagNode = tf.newTreeNode(goalStr, Collections.singletonList(wordNode)); if (originalTags[start] != null) { tagNode.label().setValue(originalTags[start].tag()); } //System.out.println("Tag node: "+tagNode); return Collections.singletonList(tagNode); } } // check binaries first List<Tree> bestTrees = new ArrayList<Tree>(); for (int split = start + 1; split < end; split++) { for (Iterator<BinaryRule> binaryI = bg.ruleIteratorByParent(goal); binaryI.hasNext();) { BinaryRule br = binaryI.next(); double score = br.score + iScore[start][split][br.leftChild] + iScore[split][end][br.rightChild] + lex.score(br, start, end, split); if (matches(score, bestScore)) { // build binary split List<Tree> leftChildTrees = extractBestParses(br.leftChild, start, split); List<Tree> rightChildTrees = extractBestParses(br.rightChild, split, end); // System.out.println("Found a best way to build " + goalStr + "(" + // start + "," + end + ") with " + // leftChildTrees.size() + "x" + // rightChildTrees.size() + " ways to build."); for (Tree leftChildTree : leftChildTrees) { for (Tree rightChildTree : rightChildTrees) { List<Tree> children = new ArrayList<Tree>(); children.add(leftChildTree); children.add(rightChildTree); Tree result = tf.newTreeNode(goalStr, children); //System.out.println("Binary node: "+result); bestTrees.add(result); } } } } } // check unaries for (Iterator<UnaryRule> unaryI = ug.ruleIteratorByParent(goal); unaryI.hasNext();) { UnaryRule ur = unaryI.next(); double score = ur.score + iScore[start][end][ur.child] + lex.score(ur, start, end); if (ur.child != ur.parent && matches(score, bestScore)) { // build unary List<Tree> childTrees = extractBestParses(ur.child, start, end); for (Tree childTree : childTrees) { Tree result = tf.newTreeNode(goalStr, Collections.singletonList(childTree)); //System.out.println("Unary node: "+result); bestTrees.add(result); } } } if (bestTrees.isEmpty()) { System.err.println("Warning: no parse found in ExhaustivePCFGParser.extractBestParse: failing on: [" + start + ", " + end + "] looking for " + goalStr); } return bestTrees; }
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
/** Get the kth best, when calculating kPrime best (e.g. 2nd best of 5). */ private Tree getTree(Vertex v, int k, int kPrime) { lazyKthBest(v, k, kPrime);// w ww .j av a 2 s.c o m String goalStr = stateIndex.get(v.goal); int start = v.start; // int end = v.end; List<Derivation> dHatV = dHat.get(v); // sunita, 21 Dec 2013: fix by sunita, otherwise tag scores where incorrect. if (isTag[v.goal] && v.end - v.start == 1) { IntTaggedWord tagging = new IntTaggedWord(words[start], tagIndex.indexOf(goalStr)); String contextStr = getCoreLabel(start).originalText(); float tagScore = lex.score(tagging, start, wordIndex.get(words[start]), contextStr); if (tagScore > Float.NEGATIVE_INFINITY || floodTags) { // return a pre-terminal tree CoreLabel terminalLabel = getCoreLabel(start); Tree wordNode = tf.newLeaf(terminalLabel); Tree tagNode = tf.newTreeNode(goalStr, Collections.singletonList(wordNode)); if (originalTags[start] != null) { tagNode.label().setValue(originalTags[start].tag()); } if (tagNode.label() instanceof HasTag) { ((HasTag) tagNode.label()).setTag(tagNode.label().value()); } assert (v.end - v.start == 1); return tagNode; } else { assert false; } } if (k - 1 >= dHatV.size()) { return null; } Derivation d = dHatV.get(k - 1); List<Tree> children = new ArrayList<Tree>(); for (int i = 0; i < d.arc.size(); i++) { Vertex child = d.arc.tails.get(i); Tree t = getTree(child, d.j.get(i), kPrime); assert (t != null); children.add(t); } Tree t = tf.newTreeNode(goalStr, children); assert (t.getLeaves().size() == v.end - v.start); return t; }
From source file:coreferenceresolver.element.NounPhrase.java
/** * @param headNode the headNode to set/*w w w . j a v a2 s.c o m*/ */ public void setHeadNode(Tree headNode) { this.headNode = headNode; if (this.headNode != null) { CoreLabel label = (CoreLabel) headNode.label(); this.setHeadLabel(label.get(CoreAnnotations.PartOfSpeechAnnotation.class)); } }