Example usage for edu.stanford.nlp.trees Tree dominationPath

List of usage examples for edu.stanford.nlp.trees Tree dominationPath

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree dominationPath.

Prototype

public List<Tree> dominationPath(Tree t) 

Source Link

Document

Returns the path of nodes leading down to a dominated node, including this and the dominated node itself.

Usage

From source file:pltag.parser.semantics.discriminative.ExtractFeatures.java

License:Open Source License

private void extractStringTreeAnalysisFeatures(String inputTree, String stringAnalysisTree, boolean endOfSent,
        IncrementalAnalysis analysis, DiscriminativeFeatureIndexers featureIndexers, boolean train) {
    //        System.out.println(inputTree);        
    Tree tree = Tree.valueOf(inputTree);
    List<Tree> leaves = tree.getLeaves();
    Tree currentWord = leaves.get(leaves.size() - 1);
    int currentWordIndex = featureIndexers.getWordIndex(currentWord.nodeString(), train);
    // right branch (2): 1. Count number of nodes from root to rightmost non-terminal, 2. Count rest nodes
    // compute domination path from root to rightmost leaf. Subtract 2 from size to exclude root and pre-terminal
    int pathSize = tree.dominationPath(currentWord.parent(tree)).size();
    analysis.setRightBranchSpine(pathSize > 2 ? pathSize - 2 : 0);
    int rest = tree.size() - analysis.getRightBranchSpine() - leaves.size() * 2 - 1;
    analysis.setRightBranchRest(rest > 0 ? rest : 0); // Subtract the number of terminals, pre-terminals (leaves.size()*2) and root symbol

    // get list of rightmost complete non-terminals. We don't compute the whole list of complete non-terminals, but only the ones that have been just completed,
    // hence lie at the rightmost position of the tree. Since we compute the features piecemeal, by the end of the sentence we will have computed all complete
    // non-terminals, depending on the training strategy. Used for features: heavy, neighbours, and edges
    Tree analysisTree = Tree.valueOf(stringAnalysisTree);
    analysisTree.indexLeaves();/* w w w . jav a 2s  .  c  om*/
    List<Tree> complete = getListOfRightMostCompleteNonTerminals(analysisTree);
    String[] heavyStr = new String[complete.size()];
    String[] neighboursL1Str = new String[complete.size()];
    String[] neighboursL2Str = new String[complete.size()];
    int i = 0;
    for (Tree subTree : complete) {
        // heavy feature
        int categoryIndex = featureIndexers.getCategoryIndex(subTree.nodeString(), train);
        List<Label> yield = subTree.yield();
        String yieldSize = yield.size() > 5 ? "5+" : String.valueOf(yield.size());
        heavyStr[i] = String.format("%s %s %s", categoryIndex, yieldSize, endOfSent ? "y" : "n");
        // neighbours l1, l2 features            
        int leftmostLeafId = ((CoreLabel) yield.get(0)).index();
        if (leftmostLeafId > 1) {
            int l1CategoryId = featureIndexers
                    .getCategoryIndex(leaves.get(leftmostLeafId - 2).parent(tree).nodeString(), train);
            if (leftmostLeafId > 2) {
                neighboursL1Str[i] = String.format("%s %s %s", categoryIndex, yieldSize, l1CategoryId);
                int l2CategoryId = featureIndexers
                        .getCategoryIndex(leaves.get(leftmostLeafId - 3).parent(tree).nodeString(), train);
                neighboursL2Str[i] = String.format("%s %s %s %s", categoryIndex, yieldSize, l2CategoryId,
                        l1CategoryId);
            } else {
                neighboursL2Str[i] = String.format("%s %s SoS %s", categoryIndex, yieldSize, l1CategoryId);
            }
        } else // leftmost leaf is at the beginning of the sentence
        {
            neighboursL1Str[i] = String.format("%s %s SoS", categoryIndex, yieldSize);
            neighboursL2Str[i] = String.format("%s %s SoS SoS", categoryIndex, yieldSize);
        }

        // coPar and coLenPar features
        Tree[] children = subTree.children();
        if (children.length > 2) {
            // found structure: (X (A ...) (CC and/or) (B ...))
            if (children.length == 3 && children[1].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[0], children[2]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[0], children[2], endOfSent), featureIndexers,
                        train);
            }
            // found structure ((CC either) (A ...) (CC or) (B...))
            else if (children.length == 4 && children[0].nodeString().startsWith("CC")
                    && children[2].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[1], children[3]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[1], children[3], endOfSent), featureIndexers,
                        train);
            }
            // found structure ((A ...) (, ,) (CC but) (B...))
            else if (children.length == 4 && children[1].nodeString().equals(",")
                    && children[2].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[0], children[3]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[0], children[3], endOfSent), featureIndexers,
                        train);
            }
        }
        i++;
    }
    analysis.setHeavy(heavyStr, featureIndexers, train);
    analysis.setNeighboursL1(neighboursL1Str, featureIndexers, train);
    analysis.setNeighboursL2(neighboursL2Str, featureIndexers, train);

    // compute word + L=2 ancestor nodes, L=3 ancestor nodes
    Tree preTerminal = currentWord.parent(tree);
    Tree wordL2 = preTerminal.parent(tree);
    if (wordL2 != null) {
        int preTerminalIndex = featureIndexers.getCategoryIndex(preTerminal.nodeString(), train);
        int wordL2Index = featureIndexers.getCategoryIndex(wordL2.nodeString(), train);
        analysis.setWordL2(String.format("%s %s %s", currentWordIndex, preTerminalIndex, wordL2Index),
                featureIndexers, train);
        Tree wordL3 = wordL2.parent(tree);
        if (wordL3 != null) {
            analysis.setWordL3(String.format("%s %s %s %s", currentWordIndex, preTerminalIndex, wordL2Index,
                    featureIndexers.getCategoryIndex(wordL3.nodeString())), featureIndexers, train);
        }
    }

    // get integration point + elem tree (Parent-emulation feature)
    analysis.setIpElemTree(String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTree()),
            featureIndexers, train);
    analysis.setIpElemTreeUnlex(
            String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTreeUnlex()),
            featureIndexers, train);
}

From source file:tml.utils.StanfordUtils.java

License:Apache License

/**
 * Calculates the typed dependencies from a grammatical tree
 * @param tree the grammatical tree/*from   w ww.  j  a  v  a  2  s .  c om*/
 */
public static List<String> calculateTypedDependencies(Tree tree) {
    double time = System.nanoTime();
    List<String> output = new ArrayList<String>();
    GrammaticalStructure gs = null;
    try {
        gs = getGrammaticalStructureFactory().newGrammaticalStructure(tree);
    } catch (Exception e) {
        logger.error(e);
        return null;
    }

    Collection<TypedDependency> tdl = gs.typedDependenciesCollapsed();

    // Get the POS tag from each word
    Hashtable<String, String> posInfo = new Hashtable<String, String>();
    for (Tree t : tree.getLeaves()) {
        Tree pt = null;
        for (Tree tt : tree.dominationPath(t)) {
            if (tt.isLeaf()) {
                posInfo.put(tt.nodeString(), pt.nodeString());
            }
            pt = tt;
        }
    }

    for (Object obj : tdl.toArray()) {
        TypedDependency dep = (TypedDependency) obj;

        String wordGov = dep.gov().nodeString().split("-")[0];
        String wordDep = dep.dep().nodeString().split("-")[0];
        String posGov = posInfo.get(wordGov);
        String posDep = posInfo.get(wordDep);
        String dependencyString = dep.reln().toString() + "(" + dep.gov().pennString().trim() + "-" + posGov
                + ", " + dep.dep().pennString().trim() + "-" + posDep + ")";
        output.add(dependencyString);
    }

    time = System.nanoTime() - time;
    logger.debug("Typed dependencies obtained in " + time * 10E-6 + " milliseconds");
    return output;
}