Example usage for edu.stanford.nlp.trees Tree isPreTerminal

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree isPreTerminal.

Prototype

public boolean isPreTerminal()

Source Link

Document

Return whether this node is a preterminal or not.

Usage

From source file:CollapseUnaryTransformer.java

License:Apache License

public Tree transformTree(Tree tree) {
    if (tree.isPreTerminal() || tree.isLeaf()) {
        return tree.deepCopy();
    }/* w ww .  j  ava 2s . c  om*/

    Label label = tree.label().labelFactory().newLabel(tree.label());
    Tree[] children = tree.children();
    while (children.length == 1 && !children[0].isLeaf()) {
        children = children[0].children();
    }
    List<Tree> processedChildren = Generics.newArrayList();
    for (Tree child : children) {
        processedChildren.add(transformTree(child));
    }
    return tree.treeFactory().newTreeNode(label, processedChildren);
}

From source file:conditionalCFG.ConditionalCFGParser.java

License:Open Source License

public double validateBinarizedTree(Tree tree, int start) {
    if (tree.isLeaf()) {
        return 0.0;
    }/*from   w  w w. ja v a 2s  . c om*/
    float epsilon = 0.0001f;
    if (tree.isPreTerminal()) {
        String wordStr = tree.children()[0].label().value();
        int tag = tagIndex.indexOf(tree.label().value());
        int word = wordIndex.indexOf(wordStr);
        IntTaggedWord iTW = new IntTaggedWord(word, tag);
        float score = lex.score(iTW, start, wordStr, null);
        float bound = iScore[start][start + 1][stateIndex.indexOf(tree.label().value())];
        if (score > bound + epsilon) {
            System.out.println("Invalid tagging:");
            System.out.println("  Tag: " + tree.label().value());
            System.out.println("  Word: " + tree.children()[0].label().value());
            System.out.println("  Score: " + score);
            System.out.println("  Bound: " + bound);
        }
        return score;
    }
    int parent = stateIndex.indexOf(tree.label().value());
    int firstChild = stateIndex.indexOf(tree.children()[0].label().value());
    if (tree.numChildren() == 1) {
        UnaryRule ur = new UnaryRule(parent, firstChild);
        double score = SloppyMath.max(ug.scoreRule(ur), -10000.0)
                + validateBinarizedTree(tree.children()[0], start);
        double bound = iScore[start][start + tree.yield().size()][parent];
        if (score > bound + epsilon) {
            System.out.println("Invalid unary:");
            System.out.println("  Parent: " + tree.label().value());
            System.out.println("  Child: " + tree.children()[0].label().value());
            System.out.println("  Start: " + start);
            System.out.println("  End: " + (start + tree.yield().size()));
            System.out.println("  Score: " + score);
            System.out.println("  Bound: " + bound);
        }
        return score;
    }
    int secondChild = stateIndex.indexOf(tree.children()[1].label().value());
    BinaryRule br = new BinaryRule(parent, firstChild, secondChild);
    double score = SloppyMath.max(bg.scoreRule(br), -10000.0) + validateBinarizedTree(tree.children()[0], start)
            + validateBinarizedTree(tree.children()[1], start + tree.children()[0].yield().size());
    double bound = iScore[start][start + tree.yield().size()][parent];
    if (score > bound + epsilon) {
        System.out.println("Invalid binary:");
        System.out.println("  Parent: " + tree.label().value());
        System.out.println("  LChild: " + tree.children()[0].label().value());
        System.out.println("  RChild: " + tree.children()[1].label().value());
        System.out.println("  Start: " + start);
        System.out.println("  End: " + (start + tree.yield().size()));
        System.out.println("  Score: " + score);
        System.out.println("  Bound: " + bound);
    }
    return score;
}

From source file:conditionalCFG.ConditionalCFGParser.java

License:Open Source License

public double scoreBinarizedTree(Tree tree, int start, int debugLvl) {
    if (tree.isLeaf()) {
        return 0.0;
    }//from  ww w . j a  va  2s .  c  o m
    if (tree.isPreTerminal()) {
        String wordStr = tree.children()[0].label().value();
        int tag = tagIndex.indexOf(tree.label().value());
        int word = wordIndex.indexOf(wordStr);
        IntTaggedWord iTW = new IntTaggedWord(word, tag);
        // if (lex.score(iTW,(leftmost ? 0 : 1)) == Double.NEGATIVE_INFINITY) {
        //   System.out.println("NO SCORE FOR: "+iTW);
        // }
        float score = lex.score(iTW, start, wordStr, null);
        tree.setScore(score);
        if (debugLvl > 0)
            System.out.println(score + " " + tree.getSpan());
        return score;
    }
    int parent = stateIndex.indexOf(tree.label().value());
    int firstChild = stateIndex.indexOf(tree.children()[0].label().value());
    if (tree.numChildren() == 1) {
        UnaryRule ur = new UnaryRule(parent, firstChild);
        //+ DEBUG
        // if (ug.scoreRule(ur) < -10000) {
        //        System.out.println("Grammar doesn't have rule: " + ur);
        // }
        //      return SloppyMath.max(ug.scoreRule(ur), -10000.0) + scoreBinarizedTree(tree.children()[0], leftmost);
        double score = ug.scoreRule(ur) + scoreBinarizedTree(tree.children()[0], start, debugLvl)
                + lex.score(ur, start, start + tree.children()[0].yield().size());
        tree.setScore(score);
        if (debugLvl > 0)
            System.out.println(score + " " + tree.getSpan());
        return score;
    }
    int secondChild = stateIndex.indexOf(tree.children()[1].label().value());
    BinaryRule br = new BinaryRule(parent, firstChild, secondChild);
    //+ DEBUG
    // if (bg.scoreRule(br) < -10000) {
    //  System.out.println("Grammar doesn't have rule: " + br);
    // }
    //    return SloppyMath.max(bg.scoreRule(br), -10000.0) +
    //            scoreBinarizedTree(tree.children()[0], leftmost) +
    //            scoreBinarizedTree(tree.children()[1], false);
    int sz0 = tree.children()[0].yield().size();
    double score = bg.scoreRule(br) + scoreBinarizedTree(tree.children()[0], start, debugLvl)
            + scoreBinarizedTree(tree.children()[1], start + sz0, debugLvl)
            + lex.score(br, start, start + sz0 + tree.children()[1].yield().size(), start + sz0);
    tree.setScore(score);
    if (debugLvl > 0)
        System.out.println(score + " " + tree.getSpan() + " " + (sz0 + start));
    return score;
}

From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.CoreNlp2DKPro.java

License:Open Source License

private static org.apache.uima.jcas.tcas.Annotation convertConstituentTreeNode(JCas aJCas,
        TreebankLanguagePack aTreebankLanguagePack, Tree aNode, org.apache.uima.jcas.tcas.Annotation aParentFS,
        boolean internStrings, MappingProvider constituentMappingProvider, List<CoreLabel> tokens) {
    // Get node label
    String nodeLabelValue = aNode.value();

    // Extract syntactic function from node label
    String syntacticFunction = null;
    AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack;
    int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter());
    if (gfIdx > 0) {
        syntacticFunction = nodeLabelValue.substring(gfIdx + 1);
        nodeLabelValue = nodeLabelValue.substring(0, gfIdx);
    }//from   w  ww . j  a  v a2 s  .  c o m

    // Check if node is a constituent node on sentence or phrase-level
    if (aNode.isPhrasal()) {
        Type constType = constituentMappingProvider.getTagType(nodeLabelValue);

        IntPair span = aNode.getSpan();
        int begin = tokens.get(span.getSource()).get(CharacterOffsetBeginAnnotation.class);
        int end = tokens.get(span.getTarget()).get(CharacterOffsetEndAnnotation.class);

        Constituent constituent = (Constituent) aJCas.getCas().createAnnotation(constType, begin, end);
        constituent.setConstituentType(internStrings ? nodeLabelValue.intern() : nodeLabelValue);
        constituent.setSyntacticFunction(
                internStrings && syntacticFunction != null ? syntacticFunction.intern() : syntacticFunction);
        constituent.setParent(aParentFS);

        // Do we have any children?
        List<org.apache.uima.jcas.tcas.Annotation> childAnnotations = new ArrayList<>();
        for (Tree child : aNode.getChildrenAsList()) {
            org.apache.uima.jcas.tcas.Annotation childAnnotation = convertConstituentTreeNode(aJCas,
                    aTreebankLanguagePack, child, constituent, internStrings, constituentMappingProvider,
                    tokens);
            if (childAnnotation != null) {
                childAnnotations.add(childAnnotation);
            }
        }

        // Now that we know how many children we have, link annotation of
        // current node with its children
        constituent.setChildren(FSCollectionFactory.createFSArray(aJCas, childAnnotations));

        constituent.addToIndexes();

        return constituent;
    }
    // Create parent link on token
    else if (aNode.isPreTerminal()) {
        // link token to its parent constituent
        List<Tree> children = aNode.getChildrenAsList();
        assert children.size() == 1;
        Tree terminal = children.get(0);
        CoreLabel label = (CoreLabel) terminal.label();
        Token token = label.get(TokenKey.class);
        token.setParent(aParentFS);
        return token;
    } else {
        throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal");
    }
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.StanfordAnnotator.java

License:Open Source License

/**
 * Creates linked constituent annotations + POS annotations
 * /*from   w  ww.  ja  v a  2s . com*/
 * @param aTreebankLanguagePack
 *            the language pack.
 * @param aNode
 *            the source tree
 * @param aParentFS
 *            the parent annotation
 * @param aCreatePos
 *            sets whether to create or not to create POS tags
 * @return the child-structure (needed for recursive call only)
 */
private Annotation createConstituentAnnotationFromTree(TreebankLanguagePack aTreebankLanguagePack, Tree aNode,
        Annotation aParentFS, boolean aCreatePos) {
    String nodeLabelValue = aNode.value();
    String syntacticFunction = null;
    AbstractTreebankLanguagePack tlp = (AbstractTreebankLanguagePack) aTreebankLanguagePack;
    int gfIdx = nodeLabelValue.indexOf(tlp.getGfCharacter());
    if (gfIdx > 0) {
        syntacticFunction = nodeLabelValue.substring(gfIdx + 1);
        nodeLabelValue = nodeLabelValue.substring(0, gfIdx);
    }

    // calculate span for the current subtree
    IntPair span = tokenTree.getSpan(aNode);

    // Check if the node has been marked by a TSurgeon operation.
    // If so, add a tag-annotation on the constituent
    if (nodeLabelValue.contains(TAG_SEPARATOR) && !nodeLabelValue.equals(TAG_SEPARATOR)) {
        int separatorIndex = nodeLabelValue.indexOf(TAG_SEPARATOR);
        String tag = nodeLabelValue.substring(0, separatorIndex);
        nodeLabelValue = nodeLabelValue.substring(separatorIndex + 1, nodeLabelValue.length());
        createTagAnnotation(span.getSource(), span.getTarget(), tag);
    }

    // Check if node is a constituent node on sentence or phrase-level
    if (aNode.isPhrasal()) {

        // add annotation to annotation tree
        Constituent constituent = createConstituentAnnotation(span.getSource(), span.getTarget(),
                nodeLabelValue, syntacticFunction);
        // link to parent
        if (aParentFS != null) {
            constituent.setParent(aParentFS);
        }

        // Do we have any children?
        List<Annotation> childAnnotations = new ArrayList<Annotation>();
        for (Tree child : aNode.getChildrenAsList()) {
            Annotation childAnnotation = createConstituentAnnotationFromTree(aTreebankLanguagePack, child,
                    constituent, aCreatePos);
            if (childAnnotation != null) {
                childAnnotations.add(childAnnotation);
            }
        }

        // Now that we know how many children we have, link annotation of
        // current node with its children
        FSArray children = new FSArray(jCas, childAnnotations.size());
        int curChildNum = 0;
        for (FeatureStructure child : childAnnotations) {
            children.set(curChildNum, child);
            curChildNum++;
        }
        constituent.setChildren(children);

        // write annotation for current node to index
        jCas.addFsToIndexes(constituent);

        return constituent;
    }
    // If the node is a word-level constituent node (== POS):
    // create parent link on token and (if not turned off) create POS tag
    else if (aNode.isPreTerminal()) {
        // create POS-annotation (annotation over the token)
        POS pos = createPOSAnnotation(span.getSource(), span.getTarget(), nodeLabelValue);

        // in any case: get the token that is covered by the POS
        // TODO how about multi word prepositions etc. (e.g. "such as")
        List<Token> coveredTokens = JCasUtil.selectCovered(jCas, Token.class, pos);
        // the POS should only cover one token
        assert coveredTokens.size() == 1;
        Token token = coveredTokens.get(0);

        // only add POS to index if we want POS-tagging
        if (aCreatePos) {
            jCas.addFsToIndexes(pos);
            token.setPos(pos);
        }

        // link token to its parent constituent
        if (aParentFS != null) {
            token.setParent(aParentFS);
        }

        return token;
    } else {
        throw new IllegalArgumentException("Node must be either phrasal nor pre-terminal");
    }
}

From source file:elkfed.coref.discourse_entities.DiscourseEntity.java

License:Open Source License

/**
 * Reads the premodifiers from the input Mention and creates Property objects as attributes for every premodifier
 * not part of an embedded NE. Embedded NEs are to be treated as relations to other Discourse Entities.
 * @param np The NP to be processed//from   ww w.j a v  a 2 s  . c om
 * @return Set A Set of Property Objects; one for every premodifier (attribute)
 */
private Set<Property> computeAttributes(Mention np) {
    LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin();
    Set<Property> result = new LinkedHashSet<Property>();
    List<Tree> preModifiers = np.getPremodifiers(); // straight from Mention
    //DEBUG
    //System.out.println("Number of premodifiers of "+np.getMarkableString()+" :"+
    //        preModifiers.size());
    char pos = '\0';
    if ((preModifiers != null) && (preModifiers.size() > 0)) {
        for (int i = 0; i < preModifiers.size(); i++) {
            Tree mod = preModifiers.get(i); // Expected structure:
            // (NP (DT the) (JJ last) (NN supper))
            if (mod.isLeaf()) {
                // this shouldn't happen'
                System.out.println("WARNING: UNEXPECTED LEAF " + mod.nodeString());
                //result.add(new Property(Property.ATTRIBUTE, mod.nodeString()));
                //result.add(new Property(Property.ATTRIBUTE, getSense(mod.nodeString())));
            } else {
                NodeCategory ncat = lang_plugin.labelCat(mod.nodeString());
                if (mod.isPreTerminal()) {
                    if (ncat == NodeCategory.CN || ncat == NodeCategory.ADJ) {
                        if (ncat == NodeCategory.CN) {
                            pos = 'N';
                        }
                        if (ncat == NodeCategory.ADJ) {
                            pos = 'A';
                        }

                        //System.out.println("Pre terminal node "+ mod.nodeString());
                        Tree wordNode = mod.firstChild();
                        _logger.fine("Adding attribute " + wordNode.nodeString() + " to entity");
                        result.add(new Property(Property.ATTRIBUTE, wordNode.nodeString(), pos));
                    }
                }
            }
        }
    }
    return result;
}

From source file:elkfed.coref.discourse_entities.DiscourseEntity.java

License:Open Source License

private Set<Property> computeInitialRelations(Mention np) {
    LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin();
    Set<Property> result = new LinkedHashSet<Property>();
    List<Tree> postModifiers = np.getPostmodifiers(); // straight from Mention

    char pos = '\0';
    //DEBUG// w w w. j  a va  2s .c o  m
    //System.out.println("Number of postmodifiers of "+np.getMarkableString()+" :"+
    //        postModifiers.size());
    if ((postModifiers != null) && (postModifiers.size() > 0)) {
        for (int i = 0; i < postModifiers.size(); i++) {
            Tree mod = postModifiers.get(i); // Expected structure:
            // (NP  (NN software) (PP from (NP India))
            if (mod.isLeaf()) {
                // this shouldn't happen'
                System.out.println("WARNING: UNEXPECTED LEAF " + mod.nodeString());
                //result.add(new Property(Property.ATTRIBUTE, mod.nodeString()));
                //result.add(new Property(Property.ATTRIBUTE, getSense(mod.nodeString())));
            } else {
                if (mod.isPreTerminal()) { // this shouldn't happen either,
                    // but we'll add it to the properties
                    NodeCategory ncat = lang_plugin.labelCat(mod.nodeString());
                    if (ncat == NodeCategory.CN || ncat == NodeCategory.ADJ) {
                        if (ncat == NodeCategory.CN) {
                            pos = 'N';
                        }
                        if (ncat == NodeCategory.ADJ) {
                            pos = 'A';
                        }
                    }
                } else {
                    //System.out.println("Type of postmodifier: " + mod.nodeString());
                    NodeCategory ncat = lang_plugin.labelCat(mod.nodeString());
                    if (ncat == NodeCategory.PP) {
                        if (mod.numChildren() == 2) { // (PP (in from) (NP (nnp India)))
                            Tree prepNode = mod.getChild(0);
                            Tree npNode = mod.getChild(1);
                            Tree npHead = massimoHeadFindHack(npNode);
                            if (npHead != null && prepNode != null) {

                                //DEBUG
                                //System.out.println("Adding relation "+
                                //                  prepNode.firstChild().nodeString()+" "+
                                //                  npHead.firstChild().nodeString() );

                                /* -- no clue what it means, just fixed so that it doesn't crash  (Olga) -- */
                                if (prepNode.numChildren() > 0)
                                    prepNode = prepNode.firstChild();
                                result.add(
                                        new Property(prepNode.nodeString(), npHead.firstChild().nodeString()));
                            }
                        }
                    }
                }
            }
        } //end outer loop
    } //end if premodified
    return result;
}

From source file:opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder.java

License:Apache License

private void navigateR(Tree t, List<ParseTreeNode> sentence, List<List<ParseTreeNode>> phrases) {
    if (!t.isPreTerminal()) {
        if (t.label() != null) {
            if (t.value() != null) {
                // if ROOT or S, returns empty
                List<ParseTreeNode> nodes = parsePhrase(t.label().value(), t.toString());
                nodes = assignIndexToNodes(nodes, sentence);
                if (!nodes.isEmpty())
                    phrases.add(nodes);/*from ww  w .  java 2s  .  c  o  m*/
                if (nodes.size() > 0 && nodes.get(0).getId() == null) {
                    if (nodes.size() > 1 && nodes.get(1) != null && nodes.get(1).getId() != null) {
                        try {
                            ParseTreeNode n = nodes.get(0);
                            n.setId(nodes.get(1).getId() - 1);
                            nodes.set(0, n);
                        } catch (Exception e) {
                            e.printStackTrace();
                        }
                    } else {
                        log.severe("Failed alignment:" + nodes);
                    }
                }
            }
        }
        Tree[] kids = t.children();
        if (kids != null) {
            for (Tree kid : kids) {
                navigateR(kid, sentence, phrases);
            }
        }
        return;
    }
}

From source file:opennlp.tools.parse_thicket.matching.PT2ThicketPhraseBuilder.java

License:Apache License

private void navigateR1(Tree t, List<ParseTreeNode> sentence, int l, List<List<ParseTreeNode>> phrases) {
    if (t.isPreTerminal()) {
        if (t.label() != null) {
            List<ParseTreeNode> node = parsePhrase(t.toString());
            if (!node.isEmpty())
                phrases.add(node);/*from www  .  ja v a  2  s.c o m*/
        }
        return;
    } else {
        if (t.label() != null) {
            if (t.value() != null) {
                List<ParseTreeNode> node = parsePhrase(t.label().value());
                if (!node.isEmpty())
                    phrases.add(node);
            }
        }
        Tree[] kids = t.children();
        if (kids != null) {
            for (Tree kid : kids) {
                navigateR1(kid, sentence, l, phrases);
            }
        }
        return;
    }
}

From source file:opennlp.tools.parse_thicket.parse_thicket2graph.GraphFromPTreeBuilder.java

License:Apache License

private void navigate(Tree tree, Graph<ParseGraphNode, DefaultEdge> g, int l, ParseGraphNode currParent) {
    // String currParent = tree.label().value()+" $"+Integer.toString(l);
    // g.addVertex(currParent);
    if (tree.getChildrenAsList().size() == 1)
        navigate(tree.getChildrenAsList().get(0), g, l + 1, currParent);
    else if (tree.getChildrenAsList().size() == 0)
        return;/*from   w ww  . j a v  a2  s.c  o m*/

    for (Tree child : tree.getChildrenAsList()) {
        String currChild = null;
        ParseGraphNode currChildNode = null;
        try {
            if (child.isLeaf())
                continue;
            if (child.label().value().startsWith("S"))
                navigate(child.getChildrenAsList().get(0), g, l + 1, currParent);

            if (!child.isPhrasal() || child.isPreTerminal())
                currChild = child.toString() + " #" + Integer.toString(l);
            else
                currChild = child.label().value() + " #" + Integer.toString(l);
            currChildNode = new ParseGraphNode(child, currChild);
            g.addVertex(currChildNode);
            g.addEdge(currParent, currChildNode);
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        navigate(child, g, l + 1, currChildNode);
    }
}