List of usage examples for edu.stanford.nlp.trees Tree dominationPath
public List<Tree> dominationPath(Tree t)
From source file:pltag.parser.semantics.discriminative.ExtractFeatures.java
License:Open Source License
private void extractStringTreeAnalysisFeatures(String inputTree, String stringAnalysisTree, boolean endOfSent, IncrementalAnalysis analysis, DiscriminativeFeatureIndexers featureIndexers, boolean train) { // System.out.println(inputTree); Tree tree = Tree.valueOf(inputTree); List<Tree> leaves = tree.getLeaves(); Tree currentWord = leaves.get(leaves.size() - 1); int currentWordIndex = featureIndexers.getWordIndex(currentWord.nodeString(), train); // right branch (2): 1. Count number of nodes from root to rightmost non-terminal, 2. Count rest nodes // compute domination path from root to rightmost leaf. Subtract 2 from size to exclude root and pre-terminal int pathSize = tree.dominationPath(currentWord.parent(tree)).size(); analysis.setRightBranchSpine(pathSize > 2 ? pathSize - 2 : 0); int rest = tree.size() - analysis.getRightBranchSpine() - leaves.size() * 2 - 1; analysis.setRightBranchRest(rest > 0 ? rest : 0); // Subtract the number of terminals, pre-terminals (leaves.size()*2) and root symbol // get list of rightmost complete non-terminals. We don't compute the whole list of complete non-terminals, but only the ones that have been just completed, // hence lie at the rightmost position of the tree. Since we compute the features piecemeal, by the end of the sentence we will have computed all complete // non-terminals, depending on the training strategy. Used for features: heavy, neighbours, and edges Tree analysisTree = Tree.valueOf(stringAnalysisTree); analysisTree.indexLeaves();/* w w w . jav a 2s . c om*/ List<Tree> complete = getListOfRightMostCompleteNonTerminals(analysisTree); String[] heavyStr = new String[complete.size()]; String[] neighboursL1Str = new String[complete.size()]; String[] neighboursL2Str = new String[complete.size()]; int i = 0; for (Tree subTree : complete) { // heavy feature int categoryIndex = featureIndexers.getCategoryIndex(subTree.nodeString(), train); List<Label> yield = subTree.yield(); String yieldSize = yield.size() > 5 ? "5+" : String.valueOf(yield.size()); heavyStr[i] = String.format("%s %s %s", categoryIndex, yieldSize, endOfSent ? "y" : "n"); // neighbours l1, l2 features int leftmostLeafId = ((CoreLabel) yield.get(0)).index(); if (leftmostLeafId > 1) { int l1CategoryId = featureIndexers .getCategoryIndex(leaves.get(leftmostLeafId - 2).parent(tree).nodeString(), train); if (leftmostLeafId > 2) { neighboursL1Str[i] = String.format("%s %s %s", categoryIndex, yieldSize, l1CategoryId); int l2CategoryId = featureIndexers .getCategoryIndex(leaves.get(leftmostLeafId - 3).parent(tree).nodeString(), train); neighboursL2Str[i] = String.format("%s %s %s %s", categoryIndex, yieldSize, l2CategoryId, l1CategoryId); } else { neighboursL2Str[i] = String.format("%s %s SoS %s", categoryIndex, yieldSize, l1CategoryId); } } else // leftmost leaf is at the beginning of the sentence { neighboursL1Str[i] = String.format("%s %s SoS", categoryIndex, yieldSize); neighboursL2Str[i] = String.format("%s %s SoS SoS", categoryIndex, yieldSize); } // coPar and coLenPar features Tree[] children = subTree.children(); if (children.length > 2) { // found structure: (X (A ...) (CC and/or) (B ...)) if (children.length == 3 && children[1].nodeString().startsWith("CC")) { analysis.setCoPar(getCoParString(children[0], children[2]), featureIndexers, train); analysis.setCoLenPar(getCoLenParString(children[0], children[2], endOfSent), featureIndexers, train); } // found structure ((CC either) (A ...) (CC or) (B...)) else if (children.length == 4 && children[0].nodeString().startsWith("CC") && children[2].nodeString().startsWith("CC")) { analysis.setCoPar(getCoParString(children[1], children[3]), featureIndexers, train); analysis.setCoLenPar(getCoLenParString(children[1], children[3], endOfSent), featureIndexers, train); } // found structure ((A ...) (, ,) (CC but) (B...)) else if (children.length == 4 && children[1].nodeString().equals(",") && children[2].nodeString().startsWith("CC")) { analysis.setCoPar(getCoParString(children[0], children[3]), featureIndexers, train); analysis.setCoLenPar(getCoLenParString(children[0], children[3], endOfSent), featureIndexers, train); } } i++; } analysis.setHeavy(heavyStr, featureIndexers, train); analysis.setNeighboursL1(neighboursL1Str, featureIndexers, train); analysis.setNeighboursL2(neighboursL2Str, featureIndexers, train); // compute word + L=2 ancestor nodes, L=3 ancestor nodes Tree preTerminal = currentWord.parent(tree); Tree wordL2 = preTerminal.parent(tree); if (wordL2 != null) { int preTerminalIndex = featureIndexers.getCategoryIndex(preTerminal.nodeString(), train); int wordL2Index = featureIndexers.getCategoryIndex(wordL2.nodeString(), train); analysis.setWordL2(String.format("%s %s %s", currentWordIndex, preTerminalIndex, wordL2Index), featureIndexers, train); Tree wordL3 = wordL2.parent(tree); if (wordL3 != null) { analysis.setWordL3(String.format("%s %s %s %s", currentWordIndex, preTerminalIndex, wordL2Index, featureIndexers.getCategoryIndex(wordL3.nodeString())), featureIndexers, train); } } // get integration point + elem tree (Parent-emulation feature) analysis.setIpElemTree(String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTree()), featureIndexers, train); analysis.setIpElemTreeUnlex( String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTreeUnlex()), featureIndexers, train); }
From source file:tml.utils.StanfordUtils.java
License:Apache License
/** * Calculates the typed dependencies from a grammatical tree * @param tree the grammatical tree/*from w ww. j a v a 2 s . c om*/ */ public static List<String> calculateTypedDependencies(Tree tree) { double time = System.nanoTime(); List<String> output = new ArrayList<String>(); GrammaticalStructure gs = null; try { gs = getGrammaticalStructureFactory().newGrammaticalStructure(tree); } catch (Exception e) { logger.error(e); return null; } Collection<TypedDependency> tdl = gs.typedDependenciesCollapsed(); // Get the POS tag from each word Hashtable<String, String> posInfo = new Hashtable<String, String>(); for (Tree t : tree.getLeaves()) { Tree pt = null; for (Tree tt : tree.dominationPath(t)) { if (tt.isLeaf()) { posInfo.put(tt.nodeString(), pt.nodeString()); } pt = tt; } } for (Object obj : tdl.toArray()) { TypedDependency dep = (TypedDependency) obj; String wordGov = dep.gov().nodeString().split("-")[0]; String wordDep = dep.dep().nodeString().split("-")[0]; String posGov = posInfo.get(wordGov); String posDep = posInfo.get(wordDep); String dependencyString = dep.reln().toString() + "(" + dep.gov().pennString().trim() + "-" + posGov + ", " + dep.dep().pennString().trim() + "-" + posDep + ")"; output.add(dependencyString); } time = System.nanoTime() - time; logger.debug("Typed dependencies obtained in " + time * 10E-6 + " milliseconds"); return output; }