List of usage examples for edu.stanford.nlp.parser.lexparser IntTaggedWord IntTaggedWord
public IntTaggedWord(int word, int tag)
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
public double validateBinarizedTree(Tree tree, int start) { if (tree.isLeaf()) { return 0.0; }/*from w w w .jav a2 s .c o m*/ float epsilon = 0.0001f; if (tree.isPreTerminal()) { String wordStr = tree.children()[0].label().value(); int tag = tagIndex.indexOf(tree.label().value()); int word = wordIndex.indexOf(wordStr); IntTaggedWord iTW = new IntTaggedWord(word, tag); float score = lex.score(iTW, start, wordStr, null); float bound = iScore[start][start + 1][stateIndex.indexOf(tree.label().value())]; if (score > bound + epsilon) { System.out.println("Invalid tagging:"); System.out.println(" Tag: " + tree.label().value()); System.out.println(" Word: " + tree.children()[0].label().value()); System.out.println(" Score: " + score); System.out.println(" Bound: " + bound); } return score; } int parent = stateIndex.indexOf(tree.label().value()); int firstChild = stateIndex.indexOf(tree.children()[0].label().value()); if (tree.numChildren() == 1) { UnaryRule ur = new UnaryRule(parent, firstChild); double score = SloppyMath.max(ug.scoreRule(ur), -10000.0) + validateBinarizedTree(tree.children()[0], start); double bound = iScore[start][start + tree.yield().size()][parent]; if (score > bound + epsilon) { System.out.println("Invalid unary:"); System.out.println(" Parent: " + tree.label().value()); System.out.println(" Child: " + tree.children()[0].label().value()); System.out.println(" Start: " + start); System.out.println(" End: " + (start + tree.yield().size())); System.out.println(" Score: " + score); System.out.println(" Bound: " + bound); } return score; } int secondChild = stateIndex.indexOf(tree.children()[1].label().value()); BinaryRule br = new BinaryRule(parent, firstChild, secondChild); double score = SloppyMath.max(bg.scoreRule(br), -10000.0) + validateBinarizedTree(tree.children()[0], start) + validateBinarizedTree(tree.children()[1], start + tree.children()[0].yield().size()); double bound = iScore[start][start + tree.yield().size()][parent]; if (score > bound + epsilon) { System.out.println("Invalid binary:"); System.out.println(" Parent: " + tree.label().value()); System.out.println(" LChild: " + tree.children()[0].label().value()); System.out.println(" RChild: " + tree.children()[1].label().value()); System.out.println(" Start: " + start); System.out.println(" End: " + (start + tree.yield().size())); System.out.println(" Score: " + score); System.out.println(" Bound: " + bound); } return score; }
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
public double scoreBinarizedTree(Tree tree, int start, int debugLvl) { if (tree.isLeaf()) { return 0.0; }//from ww w. ja v a 2 s . c o m if (tree.isPreTerminal()) { String wordStr = tree.children()[0].label().value(); int tag = tagIndex.indexOf(tree.label().value()); int word = wordIndex.indexOf(wordStr); IntTaggedWord iTW = new IntTaggedWord(word, tag); // if (lex.score(iTW,(leftmost ? 0 : 1)) == Double.NEGATIVE_INFINITY) { // System.out.println("NO SCORE FOR: "+iTW); // } float score = lex.score(iTW, start, wordStr, null); tree.setScore(score); if (debugLvl > 0) System.out.println(score + " " + tree.getSpan()); return score; } int parent = stateIndex.indexOf(tree.label().value()); int firstChild = stateIndex.indexOf(tree.children()[0].label().value()); if (tree.numChildren() == 1) { UnaryRule ur = new UnaryRule(parent, firstChild); //+ DEBUG // if (ug.scoreRule(ur) < -10000) { // System.out.println("Grammar doesn't have rule: " + ur); // } // return SloppyMath.max(ug.scoreRule(ur), -10000.0) + scoreBinarizedTree(tree.children()[0], leftmost); double score = ug.scoreRule(ur) + scoreBinarizedTree(tree.children()[0], start, debugLvl) + lex.score(ur, start, start + tree.children()[0].yield().size()); tree.setScore(score); if (debugLvl > 0) System.out.println(score + " " + tree.getSpan()); return score; } int secondChild = stateIndex.indexOf(tree.children()[1].label().value()); BinaryRule br = new BinaryRule(parent, firstChild, secondChild); //+ DEBUG // if (bg.scoreRule(br) < -10000) { // System.out.println("Grammar doesn't have rule: " + br); // } // return SloppyMath.max(bg.scoreRule(br), -10000.0) + // scoreBinarizedTree(tree.children()[0], leftmost) + // scoreBinarizedTree(tree.children()[1], false); int sz0 = tree.children()[0].yield().size(); double score = bg.scoreRule(br) + scoreBinarizedTree(tree.children()[0], start, debugLvl) + scoreBinarizedTree(tree.children()[1], start + sz0, debugLvl) + lex.score(br, start, start + sz0 + tree.children()[1].yield().size(), start + sz0); tree.setScore(score); if (debugLvl > 0) System.out.println(score + " " + tree.getSpan() + " " + (sz0 + start)); return score; }
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
/** * Return all best parses (except no ties allowed on POS tags?). * Even though we parse with the unary-closed grammar, since all the * intermediate states in a chain must have been built, we can * reconstruct the unary chain as we go using the non-unary-closed grammar. *//* www .j a va2s . c o m*/ protected List<Tree> extractBestParses(int goal, int start, int end) { // find sources of inside score // no backtraces so we can speed up the parsing for its primary use double bestScore = iScore[start][end][goal]; String goalStr = stateIndex.get(goal); //System.out.println("Searching for "+goalStr+" from "+start+" to "+end+" scored "+bestScore); // check tags if (end - start == 1 && tagIndex.contains(goalStr)) { IntTaggedWord tagging = new IntTaggedWord(words[start], tagIndex.indexOf(goalStr)); String contextStr = getCoreLabel(start).originalText(); float tagScore = lex.score(tagging, start, wordIndex.get(words[start]), contextStr); if (tagScore > Float.NEGATIVE_INFINITY || floodTags) { // return a pre-terminal tree String wordStr = wordIndex.get(words[start]); Tree wordNode = tf.newLeaf(wordStr); Tree tagNode = tf.newTreeNode(goalStr, Collections.singletonList(wordNode)); if (originalTags[start] != null) { tagNode.label().setValue(originalTags[start].tag()); } //System.out.println("Tag node: "+tagNode); return Collections.singletonList(tagNode); } } // check binaries first List<Tree> bestTrees = new ArrayList<Tree>(); for (int split = start + 1; split < end; split++) { for (Iterator<BinaryRule> binaryI = bg.ruleIteratorByParent(goal); binaryI.hasNext();) { BinaryRule br = binaryI.next(); double score = br.score + iScore[start][split][br.leftChild] + iScore[split][end][br.rightChild] + lex.score(br, start, end, split); if (matches(score, bestScore)) { // build binary split List<Tree> leftChildTrees = extractBestParses(br.leftChild, start, split); List<Tree> rightChildTrees = extractBestParses(br.rightChild, split, end); // System.out.println("Found a best way to build " + goalStr + "(" + // start + "," + end + ") with " + // leftChildTrees.size() + "x" + // rightChildTrees.size() + " ways to build."); for (Tree leftChildTree : leftChildTrees) { for (Tree rightChildTree : rightChildTrees) { List<Tree> children = new ArrayList<Tree>(); children.add(leftChildTree); children.add(rightChildTree); Tree result = tf.newTreeNode(goalStr, children); //System.out.println("Binary node: "+result); bestTrees.add(result); } } } } } // check unaries for (Iterator<UnaryRule> unaryI = ug.ruleIteratorByParent(goal); unaryI.hasNext();) { UnaryRule ur = unaryI.next(); double score = ur.score + iScore[start][end][ur.child] + lex.score(ur, start, end); if (ur.child != ur.parent && matches(score, bestScore)) { // build unary List<Tree> childTrees = extractBestParses(ur.child, start, end); for (Tree childTree : childTrees) { Tree result = tf.newTreeNode(goalStr, Collections.singletonList(childTree)); //System.out.println("Unary node: "+result); bestTrees.add(result); } } } if (bestTrees.isEmpty()) { System.err.println("Warning: no parse found in ExhaustivePCFGParser.extractBestParse: failing on: [" + start + ", " + end + "] looking for " + goalStr); } return bestTrees; }
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
/** Get the kth best, when calculating kPrime best (e.g. 2nd best of 5). */ private Tree getTree(Vertex v, int k, int kPrime) { lazyKthBest(v, k, kPrime);//w w w . j a v a 2s .c om String goalStr = stateIndex.get(v.goal); int start = v.start; // int end = v.end; List<Derivation> dHatV = dHat.get(v); // sunita, 21 Dec 2013: fix by sunita, otherwise tag scores where incorrect. if (isTag[v.goal] && v.end - v.start == 1) { IntTaggedWord tagging = new IntTaggedWord(words[start], tagIndex.indexOf(goalStr)); String contextStr = getCoreLabel(start).originalText(); float tagScore = lex.score(tagging, start, wordIndex.get(words[start]), contextStr); if (tagScore > Float.NEGATIVE_INFINITY || floodTags) { // return a pre-terminal tree CoreLabel terminalLabel = getCoreLabel(start); Tree wordNode = tf.newLeaf(terminalLabel); Tree tagNode = tf.newTreeNode(goalStr, Collections.singletonList(wordNode)); if (originalTags[start] != null) { tagNode.label().setValue(originalTags[start].tag()); } if (tagNode.label() instanceof HasTag) { ((HasTag) tagNode.label()).setTag(tagNode.label().value()); } assert (v.end - v.start == 1); return tagNode; } else { assert false; } } if (k - 1 >= dHatV.size()) { return null; } Derivation d = dHatV.get(k - 1); List<Tree> children = new ArrayList<Tree>(); for (int i = 0; i < d.arc.size(); i++) { Vertex child = d.arc.tails.get(i); Tree t = getTree(child, d.j.get(i), kPrime); assert (t != null); children.add(t); } Tree t = tf.newTreeNode(goalStr, children); assert (t.getLeaves().size() == v.end - v.start); return t; }