Example usage for edu.stanford.nlp.trees Tree size

List of usage examples for edu.stanford.nlp.trees Tree size

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree size.

Prototype

@Override
public int size() 

Source Link

Document

Returns the number of nodes the tree contains.

Usage

From source file:ConstituencyParse.java

License:Apache License

public int[] constTreeParents(Tree tree) {
    Tree binarized = binarizer.transformTree(tree);
    Tree collapsedUnary = transformer.transformTree(binarized);
    Trees.convertToCoreLabels(collapsedUnary);
    collapsedUnary.indexSpans();//from   w  ww  .  j  a v a2  s .c  om
    List<Tree> leaves = collapsedUnary.getLeaves();
    int size = collapsedUnary.size() - leaves.size();
    int[] parents = new int[size];
    HashMap<Integer, Integer> index = new HashMap<Integer, Integer>();

    int idx = leaves.size();
    int leafIdx = 0;
    for (Tree leaf : leaves) {
        Tree cur = leaf.parent(collapsedUnary); // go to preterminal
        int curIdx = leafIdx++;
        boolean done = false;
        while (!done) {
            Tree parent = cur.parent(collapsedUnary);
            if (parent == null) {
                parents[curIdx] = 0;
                break;
            }

            int parentIdx;
            int parentNumber = parent.nodeNumber(collapsedUnary);
            if (!index.containsKey(parentNumber)) {
                parentIdx = idx++;
                index.put(parentNumber, parentIdx);
            } else {
                parentIdx = index.get(parentNumber);
                done = true;
            }

            parents[curIdx] = parentIdx + 1;
            cur = parent;
            curIdx = parentIdx;
        }
    }

    return parents;
}

From source file:KleinBilingualParser.java

private static double insideTgtOutsideSrc(Tree nodeF, Tree nodeE,
        HashMap<Integer, ArrayList<Integer>> alignMap) {

    IntPair spanF = nodeF.getSpan();/*  w w w.  j  a  v a  2 s.  c om*/
    IntPair spanE = nodeF.getSpan();

    double sum = 0;
    for (int f = 0; f < spanF.getSource(); f++) {
        if (alignMap.containsKey(f)) {
            for (Integer alignedIndex : alignMap.get(f)) {
                if (alignedIndex >= spanE.getSource() && alignedIndex <= spanE.getTarget()) {
                    sum++;
                }
            }
        }
    }

    for (int f = spanF.getTarget() + 1; f < nodeF.size(); f++) {
        if (alignMap.containsKey(f)) {
            for (Integer alignedIndex : alignMap.get(f)) {
                if (alignedIndex >= spanE.getSource() && alignedIndex <= spanE.getTarget()) {
                    sum++;
                }
            }
        }
    }

    return sum / 10;
}

From source file:KleinBilingualParser.java

private static HashMap<Tree, Tree> getHungarianAlignment(Tree eParseTree, Tree fParseTree, double[] weights,
        HashMap<Integer, ArrayList<Integer>> alignMap) {
    // remember to ignore the top two weights because they are monolingual features
    int numFrenchNodes = fParseTree.size() - fParseTree.getLeaves().size();
    int numEnglishNodes = eParseTree.size() - eParseTree.getLeaves().size();

    double[][] costMatrix = new double[numFrenchNodes][numEnglishNodes];

    int i, j;/*  ww w.  j  a v a  2s .  co m*/
    i = 0;

    for (Tree fSubTree : fParseTree) {
        if (!fSubTree.isLeaf()) {
            j = 0;
            for (Tree eSubTree : eParseTree) {
                if (!eSubTree.isLeaf()) {
                    //IF IT GETS TOO SLOW DON'T COMPUTE WORD ALIGNMENT FEATURES FOR LARGE SENTENCES
                    costMatrix[i][j] = weights[2] * spanDiff(fSubTree, eSubTree)
                            + weights[3] * numChildren(fSubTree, eSubTree)
                            + weights[7] * bias(fSubTree, eSubTree);
                    if (numFrenchNodes < 50 && numEnglishNodes < 50) {
                        costMatrix[i][j] += weights[4] * insideBoth(fSubTree, eSubTree, alignMap)
                                + weights[5] * insideSrcOutsideTgt(fSubTree, eSubTree, alignMap)
                                + weights[6] * insideTgtOutsideSrc(fSubTree, eSubTree, alignMap);
                    }
                    costMatrix[i][j] = 0 - costMatrix[i][j];
                    j++;
                }
            }
            i++;
        }
    }

    HungarianAlgorithm hungAlgSolver = new HungarianAlgorithm(costMatrix);
    int[] assignments = hungAlgSolver.execute();

    HashMap<Tree, Tree> alignment = new HashMap<>();

    i = 0;
    for (Tree fSubTree : fParseTree) {
        if (!fSubTree.isLeaf()) {
            j = 0;
            for (Tree eSubTree : eParseTree) {
                if (!eSubTree.isLeaf()) {
                    if (j == assignments[i]) {
                        alignment.put(fSubTree, eSubTree);
                    }
                    j++;
                }
            }
            i++;
        }
    }

    return alignment;
}

From source file:pltag.parser.semantics.discriminative.ExtractFeatures.java

License:Open Source License

private void extractStringTreeAnalysisFeatures(String inputTree, String stringAnalysisTree, boolean endOfSent,
        IncrementalAnalysis analysis, DiscriminativeFeatureIndexers featureIndexers, boolean train) {
    //        System.out.println(inputTree);        
    Tree tree = Tree.valueOf(inputTree);
    List<Tree> leaves = tree.getLeaves();
    Tree currentWord = leaves.get(leaves.size() - 1);
    int currentWordIndex = featureIndexers.getWordIndex(currentWord.nodeString(), train);
    // right branch (2): 1. Count number of nodes from root to rightmost non-terminal, 2. Count rest nodes
    // compute domination path from root to rightmost leaf. Subtract 2 from size to exclude root and pre-terminal
    int pathSize = tree.dominationPath(currentWord.parent(tree)).size();
    analysis.setRightBranchSpine(pathSize > 2 ? pathSize - 2 : 0);
    int rest = tree.size() - analysis.getRightBranchSpine() - leaves.size() * 2 - 1;
    analysis.setRightBranchRest(rest > 0 ? rest : 0); // Subtract the number of terminals, pre-terminals (leaves.size()*2) and root symbol

    // get list of rightmost complete non-terminals. We don't compute the whole list of complete non-terminals, but only the ones that have been just completed,
    // hence lie at the rightmost position of the tree. Since we compute the features piecemeal, by the end of the sentence we will have computed all complete
    // non-terminals, depending on the training strategy. Used for features: heavy, neighbours, and edges
    Tree analysisTree = Tree.valueOf(stringAnalysisTree);
    analysisTree.indexLeaves();/*  w  ww .  j a  v a  2s.  c om*/
    List<Tree> complete = getListOfRightMostCompleteNonTerminals(analysisTree);
    String[] heavyStr = new String[complete.size()];
    String[] neighboursL1Str = new String[complete.size()];
    String[] neighboursL2Str = new String[complete.size()];
    int i = 0;
    for (Tree subTree : complete) {
        // heavy feature
        int categoryIndex = featureIndexers.getCategoryIndex(subTree.nodeString(), train);
        List<Label> yield = subTree.yield();
        String yieldSize = yield.size() > 5 ? "5+" : String.valueOf(yield.size());
        heavyStr[i] = String.format("%s %s %s", categoryIndex, yieldSize, endOfSent ? "y" : "n");
        // neighbours l1, l2 features            
        int leftmostLeafId = ((CoreLabel) yield.get(0)).index();
        if (leftmostLeafId > 1) {
            int l1CategoryId = featureIndexers
                    .getCategoryIndex(leaves.get(leftmostLeafId - 2).parent(tree).nodeString(), train);
            if (leftmostLeafId > 2) {
                neighboursL1Str[i] = String.format("%s %s %s", categoryIndex, yieldSize, l1CategoryId);
                int l2CategoryId = featureIndexers
                        .getCategoryIndex(leaves.get(leftmostLeafId - 3).parent(tree).nodeString(), train);
                neighboursL2Str[i] = String.format("%s %s %s %s", categoryIndex, yieldSize, l2CategoryId,
                        l1CategoryId);
            } else {
                neighboursL2Str[i] = String.format("%s %s SoS %s", categoryIndex, yieldSize, l1CategoryId);
            }
        } else // leftmost leaf is at the beginning of the sentence
        {
            neighboursL1Str[i] = String.format("%s %s SoS", categoryIndex, yieldSize);
            neighboursL2Str[i] = String.format("%s %s SoS SoS", categoryIndex, yieldSize);
        }

        // coPar and coLenPar features
        Tree[] children = subTree.children();
        if (children.length > 2) {
            // found structure: (X (A ...) (CC and/or) (B ...))
            if (children.length == 3 && children[1].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[0], children[2]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[0], children[2], endOfSent), featureIndexers,
                        train);
            }
            // found structure ((CC either) (A ...) (CC or) (B...))
            else if (children.length == 4 && children[0].nodeString().startsWith("CC")
                    && children[2].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[1], children[3]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[1], children[3], endOfSent), featureIndexers,
                        train);
            }
            // found structure ((A ...) (, ,) (CC but) (B...))
            else if (children.length == 4 && children[1].nodeString().equals(",")
                    && children[2].nodeString().startsWith("CC")) {
                analysis.setCoPar(getCoParString(children[0], children[3]), featureIndexers, train);
                analysis.setCoLenPar(getCoLenParString(children[0], children[3], endOfSent), featureIndexers,
                        train);
            }
        }
        i++;
    }
    analysis.setHeavy(heavyStr, featureIndexers, train);
    analysis.setNeighboursL1(neighboursL1Str, featureIndexers, train);
    analysis.setNeighboursL2(neighboursL2Str, featureIndexers, train);

    // compute word + L=2 ancestor nodes, L=3 ancestor nodes
    Tree preTerminal = currentWord.parent(tree);
    Tree wordL2 = preTerminal.parent(tree);
    if (wordL2 != null) {
        int preTerminalIndex = featureIndexers.getCategoryIndex(preTerminal.nodeString(), train);
        int wordL2Index = featureIndexers.getCategoryIndex(wordL2.nodeString(), train);
        analysis.setWordL2(String.format("%s %s %s", currentWordIndex, preTerminalIndex, wordL2Index),
                featureIndexers, train);
        Tree wordL3 = wordL2.parent(tree);
        if (wordL3 != null) {
            analysis.setWordL3(String.format("%s %s %s %s", currentWordIndex, preTerminalIndex, wordL2Index,
                    featureIndexers.getCategoryIndex(wordL3.nodeString())), featureIndexers, train);
        }
    }

    // get integration point + elem tree (Parent-emulation feature)
    analysis.setIpElemTree(String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTree()),
            featureIndexers, train);
    analysis.setIpElemTreeUnlex(
            String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTreeUnlex()),
            featureIndexers, train);
}

From source file:qmul.util.treekernel.TreeKernel.java

License:Open Source License

public static int countSubTrees(Tree t) {
    return (t.size() - ((includeWords ? 1 : 2) * t.getLeaves().size()));
}

From source file:qmul.util.treekernel.TreeKernel.java

License:Open Source License

public static int countSynTrees(Tree t) {
    return (t.size() - ((includeWords ? 1 : 2) * t.getLeaves().size()));
}