Example usage for edu.stanford.nlp.trees Tree indexLeaves

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree indexLeaves.

Prototype

public void indexLeaves()

Source Link

Document

Assign sequential integer indices to the leaves of the tree rooted at this Tree , starting with 1.

Usage

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.TreeWithTokens.java

License:Open Source License

public void setTree(Tree tree) {
    if (!(tree.label() instanceof CoreLabel)) {
        tree = tree.deepCopy(tree.treeFactory(), CoreLabel.factory());
    }//from w w  w  .  j  av  a  2 s.c om

    tree.indexLeaves();

    this.tree = tree;
}

From source file:knu.univ.lingvo.coref.MentionExtractor.java

License:Open Source License

/**
 * Sets the label of the leaf nodes to be the CoreLabels in the given sentence
 * The original value() of the Tree nodes is preserved
 *//*from  w  w w  .java2 s  .co m*/
public static void mergeLabels(Tree tree, List<CoreLabel> sentence) {
    int idx = 0;
    for (Tree t : tree.getLeaves()) {
        CoreLabel cl = sentence.get(idx++);
        String value = t.value();
        cl.set(CoreAnnotations.ValueAnnotation.class, value);
        t.setLabel(cl);
    }
    tree.indexLeaves();
}

From source file:pltag.parser.semantics.discriminative.ExtractFeatures.java

License:Open Source License

private void extractStringTreeAnalysisFeatures(String inputTree, String stringAnalysisTree, boolean endOfSent,
        IncrementalAnalysis analysis, DiscriminativeFeatureIndexers featureIndexers, boolean train) {
    //        System.out.println(inputTree);        
    Tree tree = Tree.valueOf(inputTree);
    List<Tree> leaves = tree.getLeaves();
    Tree currentWord = leaves.get(leaves.size() - 1);
    int currentWordIndex = featureIndexers.getWordIndex(currentWord.nodeString(), train);
    // right branch (2): 1. Count number of nodes from root to rightmost non-terminal, 2. Count rest nodes
    // compute domination path from root to rightmost leaf. Subtract 2 from size to exclude root and pre-terminal
    int pathSize = tree.dominationPath(currentWord.parent(tree)).size();
    analysis.setRightBranchSpine(pathSize > 2 ? pathSize - 2 : 0);
    int rest = tree.size() - analysis.getRightBranchSpine() - leaves.size() * 2 - 1;
    analysis.setRightBranchRest(rest > 0 ? rest : 0); // Subtract the number of terminals, pre-terminals (leaves.size()*2) and root symbol

    // get list of rightmost complete non-terminals. We don't compute the whole list of complete non-terminals, but only the ones that have been just completed,
    // hence lie at the rightmost position of the tree. Since we compute the features piecemeal, by the end of the sentence we will have computed all complete
    // non-terminals, depending on the training strategy. Used for features: heavy, neighbours, and edges
    Tree analysisTree = Tree.valueOf(stringAnalysisTree);
    analysisTree.indexLeaves();
    List<Tree> complete = getListOfRightMostCompleteNonTerminals(analysisTree);
    String[] heavyStr = new String[complete.size()];
    String[] neighboursL1Str = new String[complete.size()];
    String[] neighboursL2Str = new String[complete.size()];
    int i = 0;/*from  w ww. j  ava2  s.  c o m*/
    for (Tree subTree : complete) {
        // heavy feature
        int categoryIndex = featureIndexers.getCategoryIndex(subTree.nodeString(), train);
        List<Label> yield = subTree.yield();
        String yieldSize = yield.size() > 5 ? "5+" : String.valueOf(yield.size());
        heavyStr[i] = String.format("%s %s %s", categoryIndex, yieldSize, endOfSent ? "y" : "n");
        // neighbours l1, l2 features            
        int leftmostLeafId = ((CoreLabel) yield.get(0)).index();
        if (leftmostLeafId > 1) {
            int l1CategoryId = featureIndexers
                    .getCategoryIndex(leaves.get(leftmostLeafId - 2).parent(tree).nodeString(), train);
            if (leftmostLeafId > 2) {
                neighboursL1Str[i] = String.format("%s %s %s", categoryIndex, yieldSize, l1CategoryId);
                int l2CategoryId = featureIndexers
                        .getCategoryIndex(leaves.get(leftmostLeafId - 3).parent(tree).nodeString(), train);
                neighboursL2Str[i] = String.format("%s %s %s %s", categoryIndex, yieldSize, l2CategoryId,
                        l1CategoryId);
            } else {
                neighboursL2Str[i] = String.format("%s %s SoS %s", categoryIndex, yieldSize, l1CategoryId);
            }
        } else // leftmost leaf is at the beginning of the sentence
        {
            neighboursL1Str[i] = String.format("%s %s SoS", categoryIndex, yieldSize);
            neighboursL2Str[i] = String.format("%s %s SoS SoS", categoryIndex, yieldSize);
        }

        // coPar and coLenPar features
        Tree[] children = subTree.children();
        if (children.length > 2) {
            // found structure: (X (A ...) (CC and/or) (B ...))
            if (children.length == 3 && children[1].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[0], children[2]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[0], children[2], endOfSent), featureIndexers,
                        train);
            }
            // found structure ((CC either) (A ...) (CC or) (B...))
            else if (children.length == 4 && children[0].nodeString().startsWith("CC")
                    && children[2].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[1], children[3]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[1], children[3], endOfSent), featureIndexers,
                        train);
            }
            // found structure ((A ...) (, ,) (CC but) (B...))
            else if (children.length == 4 && children[1].nodeString().equals(",")
                    && children[2].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[0], children[3]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[0], children[3], endOfSent), featureIndexers,
                        train);
            }
        }
        i++;
    }
    analysis.setHeavy(heavyStr, featureIndexers, train);
    analysis.setNeighboursL1(neighboursL1Str, featureIndexers, train);
    analysis.setNeighboursL2(neighboursL2Str, featureIndexers, train);

    // compute word + L=2 ancestor nodes, L=3 ancestor nodes
    Tree preTerminal = currentWord.parent(tree);
    Tree wordL2 = preTerminal.parent(tree);
    if (wordL2 != null) {
        int preTerminalIndex = featureIndexers.getCategoryIndex(preTerminal.nodeString(), train);
        int wordL2Index = featureIndexers.getCategoryIndex(wordL2.nodeString(), train);
        analysis.setWordL2(String.format("%s %s %s", currentWordIndex, preTerminalIndex, wordL2Index),
                featureIndexers, train);
        Tree wordL3 = wordL2.parent(tree);
        if (wordL3 != null) {
            analysis.setWordL3(String.format("%s %s %s %s", currentWordIndex, preTerminalIndex, wordL2Index,
                    featureIndexers.getCategoryIndex(wordL3.nodeString())), featureIndexers, train);
        }
    }

    // get integration point + elem tree (Parent-emulation feature)
    analysis.setIpElemTree(String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTree()),
            featureIndexers, train);
    analysis.setIpElemTreeUnlex(
            String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTreeUnlex()),
            featureIndexers, train);
}