List of usage examples for edu.stanford.nlp.trees Tree size
@Override public int size()
From source file:ConstituencyParse.java
License:Apache License
public int[] constTreeParents(Tree tree) { Tree binarized = binarizer.transformTree(tree); Tree collapsedUnary = transformer.transformTree(binarized); Trees.convertToCoreLabels(collapsedUnary); collapsedUnary.indexSpans();//from w ww . j a v a2 s .c om List<Tree> leaves = collapsedUnary.getLeaves(); int size = collapsedUnary.size() - leaves.size(); int[] parents = new int[size]; HashMap<Integer, Integer> index = new HashMap<Integer, Integer>(); int idx = leaves.size(); int leafIdx = 0; for (Tree leaf : leaves) { Tree cur = leaf.parent(collapsedUnary); // go to preterminal int curIdx = leafIdx++; boolean done = false; while (!done) { Tree parent = cur.parent(collapsedUnary); if (parent == null) { parents[curIdx] = 0; break; } int parentIdx; int parentNumber = parent.nodeNumber(collapsedUnary); if (!index.containsKey(parentNumber)) { parentIdx = idx++; index.put(parentNumber, parentIdx); } else { parentIdx = index.get(parentNumber); done = true; } parents[curIdx] = parentIdx + 1; cur = parent; curIdx = parentIdx; } } return parents; }
From source file:KleinBilingualParser.java
private static double insideTgtOutsideSrc(Tree nodeF, Tree nodeE, HashMap<Integer, ArrayList<Integer>> alignMap) { IntPair spanF = nodeF.getSpan();/* w w w. j a v a 2 s. c om*/ IntPair spanE = nodeF.getSpan(); double sum = 0; for (int f = 0; f < spanF.getSource(); f++) { if (alignMap.containsKey(f)) { for (Integer alignedIndex : alignMap.get(f)) { if (alignedIndex >= spanE.getSource() && alignedIndex <= spanE.getTarget()) { sum++; } } } } for (int f = spanF.getTarget() + 1; f < nodeF.size(); f++) { if (alignMap.containsKey(f)) { for (Integer alignedIndex : alignMap.get(f)) { if (alignedIndex >= spanE.getSource() && alignedIndex <= spanE.getTarget()) { sum++; } } } } return sum / 10; }
From source file:KleinBilingualParser.java
private static HashMap<Tree, Tree> getHungarianAlignment(Tree eParseTree, Tree fParseTree, double[] weights, HashMap<Integer, ArrayList<Integer>> alignMap) { // remember to ignore the top two weights because they are monolingual features int numFrenchNodes = fParseTree.size() - fParseTree.getLeaves().size(); int numEnglishNodes = eParseTree.size() - eParseTree.getLeaves().size(); double[][] costMatrix = new double[numFrenchNodes][numEnglishNodes]; int i, j;/* ww w. j a v a 2s . co m*/ i = 0; for (Tree fSubTree : fParseTree) { if (!fSubTree.isLeaf()) { j = 0; for (Tree eSubTree : eParseTree) { if (!eSubTree.isLeaf()) { //IF IT GETS TOO SLOW DON'T COMPUTE WORD ALIGNMENT FEATURES FOR LARGE SENTENCES costMatrix[i][j] = weights[2] * spanDiff(fSubTree, eSubTree) + weights[3] * numChildren(fSubTree, eSubTree) + weights[7] * bias(fSubTree, eSubTree); if (numFrenchNodes < 50 && numEnglishNodes < 50) { costMatrix[i][j] += weights[4] * insideBoth(fSubTree, eSubTree, alignMap) + weights[5] * insideSrcOutsideTgt(fSubTree, eSubTree, alignMap) + weights[6] * insideTgtOutsideSrc(fSubTree, eSubTree, alignMap); } costMatrix[i][j] = 0 - costMatrix[i][j]; j++; } } i++; } } HungarianAlgorithm hungAlgSolver = new HungarianAlgorithm(costMatrix); int[] assignments = hungAlgSolver.execute(); HashMap<Tree, Tree> alignment = new HashMap<>(); i = 0; for (Tree fSubTree : fParseTree) { if (!fSubTree.isLeaf()) { j = 0; for (Tree eSubTree : eParseTree) { if (!eSubTree.isLeaf()) { if (j == assignments[i]) { alignment.put(fSubTree, eSubTree); } j++; } } i++; } } return alignment; }
From source file:pltag.parser.semantics.discriminative.ExtractFeatures.java
License:Open Source License
private void extractStringTreeAnalysisFeatures(String inputTree, String stringAnalysisTree, boolean endOfSent, IncrementalAnalysis analysis, DiscriminativeFeatureIndexers featureIndexers, boolean train) { // System.out.println(inputTree); Tree tree = Tree.valueOf(inputTree); List<Tree> leaves = tree.getLeaves(); Tree currentWord = leaves.get(leaves.size() - 1); int currentWordIndex = featureIndexers.getWordIndex(currentWord.nodeString(), train); // right branch (2): 1. Count number of nodes from root to rightmost non-terminal, 2. Count rest nodes // compute domination path from root to rightmost leaf. Subtract 2 from size to exclude root and pre-terminal int pathSize = tree.dominationPath(currentWord.parent(tree)).size(); analysis.setRightBranchSpine(pathSize > 2 ? pathSize - 2 : 0); int rest = tree.size() - analysis.getRightBranchSpine() - leaves.size() * 2 - 1; analysis.setRightBranchRest(rest > 0 ? rest : 0); // Subtract the number of terminals, pre-terminals (leaves.size()*2) and root symbol // get list of rightmost complete non-terminals. We don't compute the whole list of complete non-terminals, but only the ones that have been just completed, // hence lie at the rightmost position of the tree. Since we compute the features piecemeal, by the end of the sentence we will have computed all complete // non-terminals, depending on the training strategy. Used for features: heavy, neighbours, and edges Tree analysisTree = Tree.valueOf(stringAnalysisTree); analysisTree.indexLeaves();/* w ww . j a v a 2s. c om*/ List<Tree> complete = getListOfRightMostCompleteNonTerminals(analysisTree); String[] heavyStr = new String[complete.size()]; String[] neighboursL1Str = new String[complete.size()]; String[] neighboursL2Str = new String[complete.size()]; int i = 0; for (Tree subTree : complete) { // heavy feature int categoryIndex = featureIndexers.getCategoryIndex(subTree.nodeString(), train); List<Label> yield = subTree.yield(); String yieldSize = yield.size() > 5 ? "5+" : String.valueOf(yield.size()); heavyStr[i] = String.format("%s %s %s", categoryIndex, yieldSize, endOfSent ? "y" : "n"); // neighbours l1, l2 features int leftmostLeafId = ((CoreLabel) yield.get(0)).index(); if (leftmostLeafId > 1) { int l1CategoryId = featureIndexers .getCategoryIndex(leaves.get(leftmostLeafId - 2).parent(tree).nodeString(), train); if (leftmostLeafId > 2) { neighboursL1Str[i] = String.format("%s %s %s", categoryIndex, yieldSize, l1CategoryId); int l2CategoryId = featureIndexers .getCategoryIndex(leaves.get(leftmostLeafId - 3).parent(tree).nodeString(), train); neighboursL2Str[i] = String.format("%s %s %s %s", categoryIndex, yieldSize, l2CategoryId, l1CategoryId); } else { neighboursL2Str[i] = String.format("%s %s SoS %s", categoryIndex, yieldSize, l1CategoryId); } } else // leftmost leaf is at the beginning of the sentence { neighboursL1Str[i] = String.format("%s %s SoS", categoryIndex, yieldSize); neighboursL2Str[i] = String.format("%s %s SoS SoS", categoryIndex, yieldSize); } // coPar and coLenPar features Tree[] children = subTree.children(); if (children.length > 2) { // found structure: (X (A ...) (CC and/or) (B ...)) if (children.length == 3 && children[1].nodeString().startsWith("CC")) { analysis.setCoPar(getCoParString(children[0], children[2]), featureIndexers, train); analysis.setCoLenPar(getCoLenParString(children[0], children[2], endOfSent), featureIndexers, train); } // found structure ((CC either) (A ...) (CC or) (B...)) else if (children.length == 4 && children[0].nodeString().startsWith("CC") && children[2].nodeString().startsWith("CC")) { analysis.setCoPar(getCoParString(children[1], children[3]), featureIndexers, train); analysis.setCoLenPar(getCoLenParString(children[1], children[3], endOfSent), featureIndexers, train); } // found structure ((A ...) (, ,) (CC but) (B...)) else if (children.length == 4 && children[1].nodeString().equals(",") && children[2].nodeString().startsWith("CC")) { analysis.setCoPar(getCoParString(children[0], children[3]), featureIndexers, train); analysis.setCoLenPar(getCoLenParString(children[0], children[3], endOfSent), featureIndexers, train); } } i++; } analysis.setHeavy(heavyStr, featureIndexers, train); analysis.setNeighboursL1(neighboursL1Str, featureIndexers, train); analysis.setNeighboursL2(neighboursL2Str, featureIndexers, train); // compute word + L=2 ancestor nodes, L=3 ancestor nodes Tree preTerminal = currentWord.parent(tree); Tree wordL2 = preTerminal.parent(tree); if (wordL2 != null) { int preTerminalIndex = featureIndexers.getCategoryIndex(preTerminal.nodeString(), train); int wordL2Index = featureIndexers.getCategoryIndex(wordL2.nodeString(), train); analysis.setWordL2(String.format("%s %s %s", currentWordIndex, preTerminalIndex, wordL2Index), featureIndexers, train); Tree wordL3 = wordL2.parent(tree); if (wordL3 != null) { analysis.setWordL3(String.format("%s %s %s %s", currentWordIndex, preTerminalIndex, wordL2Index, featureIndexers.getCategoryIndex(wordL3.nodeString())), featureIndexers, train); } } // get integration point + elem tree (Parent-emulation feature) analysis.setIpElemTree(String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTree()), featureIndexers, train); analysis.setIpElemTreeUnlex( String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTreeUnlex()), featureIndexers, train); }
From source file:qmul.util.treekernel.TreeKernel.java
License:Open Source License
public static int countSubTrees(Tree t) { return (t.size() - ((includeWords ? 1 : 2) * t.getLeaves().size())); }
From source file:qmul.util.treekernel.TreeKernel.java
License:Open Source License
public static int countSynTrees(Tree t) { return (t.size() - ((includeWords ? 1 : 2) * t.getLeaves().size())); }