Example usage for edu.stanford.nlp.parser.lexparser IntTaggedWord IntTaggedWord

List of usage examples for edu.stanford.nlp.parser.lexparser IntTaggedWord IntTaggedWord

Introduction

In this page you can find the example usage for edu.stanford.nlp.parser.lexparser IntTaggedWord IntTaggedWord.

Prototype

public IntTaggedWord(int word, int tag) 

Source Link

Usage

From source file:conditionalCFG.ConditionalCFGParser.java

License:Open Source License

public double validateBinarizedTree(Tree tree, int start) {
    if (tree.isLeaf()) {
        return 0.0;
    }/*from  w  w w  .jav  a2  s  .c  o m*/
    float epsilon = 0.0001f;
    if (tree.isPreTerminal()) {
        String wordStr = tree.children()[0].label().value();
        int tag = tagIndex.indexOf(tree.label().value());
        int word = wordIndex.indexOf(wordStr);
        IntTaggedWord iTW = new IntTaggedWord(word, tag);
        float score = lex.score(iTW, start, wordStr, null);
        float bound = iScore[start][start + 1][stateIndex.indexOf(tree.label().value())];
        if (score > bound + epsilon) {
            System.out.println("Invalid tagging:");
            System.out.println("  Tag: " + tree.label().value());
            System.out.println("  Word: " + tree.children()[0].label().value());
            System.out.println("  Score: " + score);
            System.out.println("  Bound: " + bound);
        }
        return score;
    }
    int parent = stateIndex.indexOf(tree.label().value());
    int firstChild = stateIndex.indexOf(tree.children()[0].label().value());
    if (tree.numChildren() == 1) {
        UnaryRule ur = new UnaryRule(parent, firstChild);
        double score = SloppyMath.max(ug.scoreRule(ur), -10000.0)
                + validateBinarizedTree(tree.children()[0], start);
        double bound = iScore[start][start + tree.yield().size()][parent];
        if (score > bound + epsilon) {
            System.out.println("Invalid unary:");
            System.out.println("  Parent: " + tree.label().value());
            System.out.println("  Child: " + tree.children()[0].label().value());
            System.out.println("  Start: " + start);
            System.out.println("  End: " + (start + tree.yield().size()));
            System.out.println("  Score: " + score);
            System.out.println("  Bound: " + bound);
        }
        return score;
    }
    int secondChild = stateIndex.indexOf(tree.children()[1].label().value());
    BinaryRule br = new BinaryRule(parent, firstChild, secondChild);
    double score = SloppyMath.max(bg.scoreRule(br), -10000.0) + validateBinarizedTree(tree.children()[0], start)
            + validateBinarizedTree(tree.children()[1], start + tree.children()[0].yield().size());
    double bound = iScore[start][start + tree.yield().size()][parent];
    if (score > bound + epsilon) {
        System.out.println("Invalid binary:");
        System.out.println("  Parent: " + tree.label().value());
        System.out.println("  LChild: " + tree.children()[0].label().value());
        System.out.println("  RChild: " + tree.children()[1].label().value());
        System.out.println("  Start: " + start);
        System.out.println("  End: " + (start + tree.yield().size()));
        System.out.println("  Score: " + score);
        System.out.println("  Bound: " + bound);
    }
    return score;
}

From source file:conditionalCFG.ConditionalCFGParser.java

License:Open Source License

public double scoreBinarizedTree(Tree tree, int start, int debugLvl) {
    if (tree.isLeaf()) {
        return 0.0;
    }//from ww w.  ja v  a  2  s . c  o  m
    if (tree.isPreTerminal()) {
        String wordStr = tree.children()[0].label().value();
        int tag = tagIndex.indexOf(tree.label().value());
        int word = wordIndex.indexOf(wordStr);
        IntTaggedWord iTW = new IntTaggedWord(word, tag);
        // if (lex.score(iTW,(leftmost ? 0 : 1)) == Double.NEGATIVE_INFINITY) {
        //   System.out.println("NO SCORE FOR: "+iTW);
        // }
        float score = lex.score(iTW, start, wordStr, null);
        tree.setScore(score);
        if (debugLvl > 0)
            System.out.println(score + " " + tree.getSpan());
        return score;
    }
    int parent = stateIndex.indexOf(tree.label().value());
    int firstChild = stateIndex.indexOf(tree.children()[0].label().value());
    if (tree.numChildren() == 1) {
        UnaryRule ur = new UnaryRule(parent, firstChild);
        //+ DEBUG
        // if (ug.scoreRule(ur) < -10000) {
        //        System.out.println("Grammar doesn't have rule: " + ur);
        // }
        //      return SloppyMath.max(ug.scoreRule(ur), -10000.0) + scoreBinarizedTree(tree.children()[0], leftmost);
        double score = ug.scoreRule(ur) + scoreBinarizedTree(tree.children()[0], start, debugLvl)
                + lex.score(ur, start, start + tree.children()[0].yield().size());
        tree.setScore(score);
        if (debugLvl > 0)
            System.out.println(score + " " + tree.getSpan());
        return score;
    }
    int secondChild = stateIndex.indexOf(tree.children()[1].label().value());
    BinaryRule br = new BinaryRule(parent, firstChild, secondChild);
    //+ DEBUG
    // if (bg.scoreRule(br) < -10000) {
    //  System.out.println("Grammar doesn't have rule: " + br);
    // }
    //    return SloppyMath.max(bg.scoreRule(br), -10000.0) +
    //            scoreBinarizedTree(tree.children()[0], leftmost) +
    //            scoreBinarizedTree(tree.children()[1], false);
    int sz0 = tree.children()[0].yield().size();
    double score = bg.scoreRule(br) + scoreBinarizedTree(tree.children()[0], start, debugLvl)
            + scoreBinarizedTree(tree.children()[1], start + sz0, debugLvl)
            + lex.score(br, start, start + sz0 + tree.children()[1].yield().size(), start + sz0);
    tree.setScore(score);
    if (debugLvl > 0)
        System.out.println(score + " " + tree.getSpan() + " " + (sz0 + start));
    return score;
}

From source file:conditionalCFG.ConditionalCFGParser.java

License:Open Source License

/**
 * Return all best parses (except no ties allowed on POS tags?).
 * Even though we parse with the unary-closed grammar, since all the
 * intermediate states in a chain must have been built, we can
 * reconstruct the unary chain as we go using the non-unary-closed grammar.
 *//*  www  .j a  va2s . c  o m*/
protected List<Tree> extractBestParses(int goal, int start, int end) {
    // find sources of inside score
    // no backtraces so we can speed up the parsing for its primary use
    double bestScore = iScore[start][end][goal];
    String goalStr = stateIndex.get(goal);
    //System.out.println("Searching for "+goalStr+" from "+start+" to "+end+" scored "+bestScore);
    // check tags
    if (end - start == 1 && tagIndex.contains(goalStr)) {
        IntTaggedWord tagging = new IntTaggedWord(words[start], tagIndex.indexOf(goalStr));
        String contextStr = getCoreLabel(start).originalText();
        float tagScore = lex.score(tagging, start, wordIndex.get(words[start]), contextStr);
        if (tagScore > Float.NEGATIVE_INFINITY || floodTags) {
            // return a pre-terminal tree
            String wordStr = wordIndex.get(words[start]);
            Tree wordNode = tf.newLeaf(wordStr);
            Tree tagNode = tf.newTreeNode(goalStr, Collections.singletonList(wordNode));
            if (originalTags[start] != null) {
                tagNode.label().setValue(originalTags[start].tag());
            }
            //System.out.println("Tag node: "+tagNode);
            return Collections.singletonList(tagNode);
        }
    }
    // check binaries first
    List<Tree> bestTrees = new ArrayList<Tree>();
    for (int split = start + 1; split < end; split++) {
        for (Iterator<BinaryRule> binaryI = bg.ruleIteratorByParent(goal); binaryI.hasNext();) {
            BinaryRule br = binaryI.next();
            double score = br.score + iScore[start][split][br.leftChild] + iScore[split][end][br.rightChild]
                    + lex.score(br, start, end, split);
            if (matches(score, bestScore)) {
                // build binary split
                List<Tree> leftChildTrees = extractBestParses(br.leftChild, start, split);
                List<Tree> rightChildTrees = extractBestParses(br.rightChild, split, end);
                // System.out.println("Found a best way to build " + goalStr + "(" +
                //                 start + "," + end + ") with " +
                //                 leftChildTrees.size() + "x" +
                //                 rightChildTrees.size() + " ways to build.");
                for (Tree leftChildTree : leftChildTrees) {
                    for (Tree rightChildTree : rightChildTrees) {
                        List<Tree> children = new ArrayList<Tree>();
                        children.add(leftChildTree);
                        children.add(rightChildTree);
                        Tree result = tf.newTreeNode(goalStr, children);
                        //System.out.println("Binary node: "+result);
                        bestTrees.add(result);
                    }
                }
            }
        }
    }
    // check unaries
    for (Iterator<UnaryRule> unaryI = ug.ruleIteratorByParent(goal); unaryI.hasNext();) {
        UnaryRule ur = unaryI.next();
        double score = ur.score + iScore[start][end][ur.child] + lex.score(ur, start, end);
        if (ur.child != ur.parent && matches(score, bestScore)) {
            // build unary
            List<Tree> childTrees = extractBestParses(ur.child, start, end);
            for (Tree childTree : childTrees) {
                Tree result = tf.newTreeNode(goalStr, Collections.singletonList(childTree));
                //System.out.println("Unary node: "+result);
                bestTrees.add(result);
            }
        }
    }
    if (bestTrees.isEmpty()) {
        System.err.println("Warning: no parse found in ExhaustivePCFGParser.extractBestParse: failing on: ["
                + start + ", " + end + "] looking for " + goalStr);
    }
    return bestTrees;
}

From source file:conditionalCFG.ConditionalCFGParser.java

License:Open Source License

/** Get the kth best, when calculating kPrime best (e.g. 2nd best of 5). */
private Tree getTree(Vertex v, int k, int kPrime) {
    lazyKthBest(v, k, kPrime);//w w  w . j  a v  a 2s  .c  om
    String goalStr = stateIndex.get(v.goal);
    int start = v.start;
    // int end = v.end;

    List<Derivation> dHatV = dHat.get(v);

    // sunita, 21 Dec 2013: fix by sunita, otherwise tag scores where incorrect.
    if (isTag[v.goal] && v.end - v.start == 1) {
        IntTaggedWord tagging = new IntTaggedWord(words[start], tagIndex.indexOf(goalStr));
        String contextStr = getCoreLabel(start).originalText();
        float tagScore = lex.score(tagging, start, wordIndex.get(words[start]), contextStr);
        if (tagScore > Float.NEGATIVE_INFINITY || floodTags) {
            // return a pre-terminal tree
            CoreLabel terminalLabel = getCoreLabel(start);

            Tree wordNode = tf.newLeaf(terminalLabel);
            Tree tagNode = tf.newTreeNode(goalStr, Collections.singletonList(wordNode));
            if (originalTags[start] != null) {
                tagNode.label().setValue(originalTags[start].tag());
            }
            if (tagNode.label() instanceof HasTag) {
                ((HasTag) tagNode.label()).setTag(tagNode.label().value());
            }
            assert (v.end - v.start == 1);
            return tagNode;
        } else {
            assert false;
        }
    }

    if (k - 1 >= dHatV.size()) {
        return null;
    }

    Derivation d = dHatV.get(k - 1);

    List<Tree> children = new ArrayList<Tree>();
    for (int i = 0; i < d.arc.size(); i++) {
        Vertex child = d.arc.tails.get(i);
        Tree t = getTree(child, d.j.get(i), kPrime);
        assert (t != null);
        children.add(t);
    }
    Tree t = tf.newTreeNode(goalStr, children);
    assert (t.getLeaves().size() == v.end - v.start);
    return t;
}