Example usage for edu.stanford.nlp.trees Tree dominationPath

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree dominationPath.

Prototype

public List<Tree> dominationPath(Tree t)

Source Link

Document

Returns the path of nodes leading down to a dominated node, including this and the dominated node itself.

Usage

From source file:pltag.parser.semantics.discriminative.ExtractFeatures.java

License:Open Source License

private void extractStringTreeAnalysisFeatures(String inputTree, String stringAnalysisTree, boolean endOfSent,
        IncrementalAnalysis analysis, DiscriminativeFeatureIndexers featureIndexers, boolean train) {
    //        System.out.println(inputTree);        
    Tree tree = Tree.valueOf(inputTree);
    List<Tree> leaves = tree.getLeaves();
    Tree currentWord = leaves.get(leaves.size() - 1);
    int currentWordIndex = featureIndexers.getWordIndex(currentWord.nodeString(), train);
    // right branch (2): 1. Count number of nodes from root to rightmost non-terminal, 2. Count rest nodes
    // compute domination path from root to rightmost leaf. Subtract 2 from size to exclude root and pre-terminal
    int pathSize = tree.dominationPath(currentWord.parent(tree)).size();
    analysis.setRightBranchSpine(pathSize > 2 ? pathSize - 2 : 0);
    int rest = tree.size() - analysis.getRightBranchSpine() - leaves.size() * 2 - 1;
    analysis.setRightBranchRest(rest > 0 ? rest : 0); // Subtract the number of terminals, pre-terminals (leaves.size()*2) and root symbol

    // get list of rightmost complete non-terminals. We don't compute the whole list of complete non-terminals, but only the ones that have been just completed,
    // hence lie at the rightmost position of the tree. Since we compute the features piecemeal, by the end of the sentence we will have computed all complete
    // non-terminals, depending on the training strategy. Used for features: heavy, neighbours, and edges
    Tree analysisTree = Tree.valueOf(stringAnalysisTree);
    analysisTree.indexLeaves();/* w w w . jav a 2s  .  c  om*/
    List<Tree> complete = getListOfRightMostCompleteNonTerminals(analysisTree);
    String[] heavyStr = new String[complete.size()];
    String[] neighboursL1Str = new String[complete.size()];
    String[] neighboursL2Str = new String[complete.size()];
    int i = 0;
    for (Tree subTree : complete) {
        // heavy feature
        int categoryIndex = featureIndexers.getCategoryIndex(subTree.nodeString(), train);
        List<Label> yield = subTree.yield();
        String yieldSize = yield.size() > 5 ? "5+" : String.valueOf(yield.size());
        heavyStr[i] = String.format("%s %s %s", categoryIndex, yieldSize, endOfSent ? "y" : "n");
        // neighbours l1, l2 features            
        int leftmostLeafId = ((CoreLabel) yield.get(0)).index();
        if (leftmostLeafId > 1) {
            int l1CategoryId = featureIndexers
                    .getCategoryIndex(leaves.get(leftmostLeafId - 2).parent(tree).nodeString(), train);
            if (leftmostLeafId > 2) {
                neighboursL1Str[i] = String.format("%s %s %s", categoryIndex, yieldSize, l1CategoryId);
                int l2CategoryId = featureIndexers
                        .getCategoryIndex(leaves.get(leftmostLeafId - 3).parent(tree).nodeString(), train);
                neighboursL2Str[i] = String.format("%s %s %s %s", categoryIndex, yieldSize, l2CategoryId,
                        l1CategoryId);
            } else {
                neighboursL2Str[i] = String.format("%s %s SoS %s", categoryIndex, yieldSize, l1CategoryId);
            }
        } else // leftmost leaf is at the beginning of the sentence
        {
            neighboursL1Str[i] = String.format("%s %s SoS", categoryIndex, yieldSize);
            neighboursL2Str[i] = String.format("%s %s SoS SoS", categoryIndex, yieldSize);
        }

        // coPar and coLenPar features
        Tree[] children = subTree.children();
        if (children.length > 2) {
            // found structure: (X (A ...) (CC and/or) (B ...))
            if (children.length == 3 && children[1].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[0], children[2]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[0], children[2], endOfSent), featureIndexers,
                        train);
            }
            // found structure ((CC either) (A ...) (CC or) (B...))
            else if (children.length == 4 && children[0].nodeString().startsWith("CC")
                    && children[2].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[1], children[3]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[1], children[3], endOfSent), featureIndexers,
                        train);
            }
            // found structure ((A ...) (, ,) (CC but) (B...))
            else if (children.length == 4 && children[1].nodeString().equals(",")
                    && children[2].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[0], children[3]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[0], children[3], endOfSent), featureIndexers,
                        train);
            }
        }
        i++;
    }
    analysis.setHeavy(heavyStr, featureIndexers, train);
    analysis.setNeighboursL1(neighboursL1Str, featureIndexers, train);
    analysis.setNeighboursL2(neighboursL2Str, featureIndexers, train);

    // compute word + L=2 ancestor nodes, L=3 ancestor nodes
    Tree preTerminal = currentWord.parent(tree);
    Tree wordL2 = preTerminal.parent(tree);
    if (wordL2 != null) {
        int preTerminalIndex = featureIndexers.getCategoryIndex(preTerminal.nodeString(), train);
        int wordL2Index = featureIndexers.getCategoryIndex(wordL2.nodeString(), train);
        analysis.setWordL2(String.format("%s %s %s", currentWordIndex, preTerminalIndex, wordL2Index),
                featureIndexers, train);
        Tree wordL3 = wordL2.parent(tree);
        if (wordL3 != null) {
            analysis.setWordL3(String.format("%s %s %s %s", currentWordIndex, preTerminalIndex, wordL2Index,
                    featureIndexers.getCategoryIndex(wordL3.nodeString())), featureIndexers, train);
        }
    }

    // get integration point + elem tree (Parent-emulation feature)
    analysis.setIpElemTree(String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTree()),
            featureIndexers, train);
    analysis.setIpElemTreeUnlex(
            String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTreeUnlex()),
            featureIndexers, train);
}

From source file:tml.utils.StanfordUtils.java

License:Apache License

/**
 * Calculates the typed dependencies from a grammatical tree
 * @param tree the grammatical tree/*from   w ww.  j  a  v  a  2  s .  c om*/
 */
public static List<String> calculateTypedDependencies(Tree tree) {
    double time = System.nanoTime();
    List<String> output = new ArrayList<String>();
    GrammaticalStructure gs = null;
    try {
        gs = getGrammaticalStructureFactory().newGrammaticalStructure(tree);
    } catch (Exception e) {
        logger.error(e);
        return null;
    }

    Collection<TypedDependency> tdl = gs.typedDependenciesCollapsed();

    // Get the POS tag from each word
    Hashtable<String, String> posInfo = new Hashtable<String, String>();
    for (Tree t : tree.getLeaves()) {
        Tree pt = null;
        for (Tree tt : tree.dominationPath(t)) {
            if (tt.isLeaf()) {
                posInfo.put(tt.nodeString(), pt.nodeString());
            }
            pt = tt;
        }
    }

    for (Object obj : tdl.toArray()) {
        TypedDependency dep = (TypedDependency) obj;

        String wordGov = dep.gov().nodeString().split("-")[0];
        String wordDep = dep.dep().nodeString().split("-")[0];
        String posGov = posInfo.get(wordGov);
        String posDep = posInfo.get(wordDep);
        String dependencyString = dep.reln().toString() + "(" + dep.gov().pennString().trim() + "-" + posGov
                + ", " + dep.dep().pennString().trim() + "-" + posDep + ")";
        output.add(dependencyString);
    }

    time = System.nanoTime() - time;
    logger.debug("Typed dependencies obtained in " + time * 10E-6 + " milliseconds");
    return output;
}