List of usage examples for edu.stanford.nlp.trees Tree nodeString
public String nodeString()
From source file:Ceist.CeistView.java
License:Open Source License
/** * Displays the match results in a table with the matched parts * formatted./*from w ww. j a va 2s .co m*/ * * @param m the matcher containing the match results * @param matchedTree the tree which was matched * @param showTagged whether to show POS tags or not * @return the HTML to be displayed in the table row */ private String[] getMatcherTableRow(TregexMatcher m, Tree matchedTree, boolean showTagged) { //List<Tree> allMatches = new ArrayList<Tree>(); // Find matches for templates String strQuestion = QuestionTemplate.getQuestionString(m, txtQuestionTemplate.getText()); String strAnswer = AnswerTemplate.getAnswerString(m, txtAnswerTemplate.getText()); // Display the full tree in which the match was found String strMatchAll = "<html>"; String lastRef = ""; for (Tree t : matchedTree.getLeaves()) { String nodeValue = t.nodeString(); if (nodeValue.startsWith("{Q")) { // This is a match for the question string String ref = nodeValue.substring(2, nodeValue.indexOf("}")); nodeValue = nodeValue.substring(nodeValue.indexOf("}") + 1); t.setValue(nodeValue); if (!ref.equals(lastRef)) lastRef = ref; else ref = ""; if (!showTagged) strMatchAll += "<sup>" + ref + "</sup><b><font color=green>" + nodeValue + "</font></b> "; else strMatchAll += "<sup>" + ref + "</sup><b><font color=green>" + nodeValue + "</font><font color=gray>/" + t.parent(matchedTree).nodeString() + "</font></b> "; } else if (nodeValue.startsWith("{A")) { // This is a match for the answer string String ref = nodeValue.substring(2, nodeValue.indexOf("}")); nodeValue = nodeValue.substring(nodeValue.indexOf("}") + 1); t.setValue(nodeValue); if (!ref.equals(lastRef)) lastRef = ref; else ref = ""; if (!showTagged) strMatchAll += "<sup>" + ref + "</sup><b>" + nodeValue + "</b> "; else strMatchAll += "<sup>" + ref + "</sup><b>" + nodeValue + "<font color=gray>/" + t.parent(matchedTree).nodeString() + "</font></b> "; } else { // Normal unmatched text if (!showTagged) strMatchAll += nodeValue + " "; else strMatchAll += nodeValue + "<font color=gray>/" + t.parent(matchedTree).nodeString() + "</font> "; } } strMatchAll += "</html>"; return new String[] { strMatchAll, strQuestion, strAnswer }; }
From source file:edu.nus.comp.nlp.stanford.UtilParser.java
License:Open Source License
public static DefaultMutableTreeNode toDMTree(DefaultMutableTreeNode root, Tree tree) { if (root == null) { root = new DefaultMutableTreeNode(); }/*from w ww . j av a2s . c o m*/ String nodeContent = tree.nodeString(); root.setUserObject(nodeContent); for (Tree c : tree.children()) { DefaultMutableTreeNode n = toDMTree(null, c); root.add(n); } return root; }
From source file:elkfed.coref.discourse_entities.DiscourseEntity.java
License:Open Source License
/** * Reads the premodifiers from the input Mention and creates Property objects as attributes for every premodifier * not part of an embedded NE. Embedded NEs are to be treated as relations to other Discourse Entities. * @param np The NP to be processed/*from w ww .j a va 2 s . com*/ * @return Set A Set of Property Objects; one for every premodifier (attribute) */ private Set<Property> computeAttributes(Mention np) { LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin(); Set<Property> result = new LinkedHashSet<Property>(); List<Tree> preModifiers = np.getPremodifiers(); // straight from Mention //DEBUG //System.out.println("Number of premodifiers of "+np.getMarkableString()+" :"+ // preModifiers.size()); char pos = '\0'; if ((preModifiers != null) && (preModifiers.size() > 0)) { for (int i = 0; i < preModifiers.size(); i++) { Tree mod = preModifiers.get(i); // Expected structure: // (NP (DT the) (JJ last) (NN supper)) if (mod.isLeaf()) { // this shouldn't happen' System.out.println("WARNING: UNEXPECTED LEAF " + mod.nodeString()); //result.add(new Property(Property.ATTRIBUTE, mod.nodeString())); //result.add(new Property(Property.ATTRIBUTE, getSense(mod.nodeString()))); } else { NodeCategory ncat = lang_plugin.labelCat(mod.nodeString()); if (mod.isPreTerminal()) { if (ncat == NodeCategory.CN || ncat == NodeCategory.ADJ) { if (ncat == NodeCategory.CN) { pos = 'N'; } if (ncat == NodeCategory.ADJ) { pos = 'A'; } //System.out.println("Pre terminal node "+ mod.nodeString()); Tree wordNode = mod.firstChild(); _logger.fine("Adding attribute " + wordNode.nodeString() + " to entity"); result.add(new Property(Property.ATTRIBUTE, wordNode.nodeString(), pos)); } } } } } return result; }
From source file:elkfed.coref.discourse_entities.DiscourseEntity.java
License:Open Source License
private Set<Property> computeInitialRelations(Mention np) { LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin(); Set<Property> result = new LinkedHashSet<Property>(); List<Tree> postModifiers = np.getPostmodifiers(); // straight from Mention char pos = '\0'; //DEBUG/*from ww w . j av a2s. c om*/ //System.out.println("Number of postmodifiers of "+np.getMarkableString()+" :"+ // postModifiers.size()); if ((postModifiers != null) && (postModifiers.size() > 0)) { for (int i = 0; i < postModifiers.size(); i++) { Tree mod = postModifiers.get(i); // Expected structure: // (NP (NN software) (PP from (NP India)) if (mod.isLeaf()) { // this shouldn't happen' System.out.println("WARNING: UNEXPECTED LEAF " + mod.nodeString()); //result.add(new Property(Property.ATTRIBUTE, mod.nodeString())); //result.add(new Property(Property.ATTRIBUTE, getSense(mod.nodeString()))); } else { if (mod.isPreTerminal()) { // this shouldn't happen either, // but we'll add it to the properties NodeCategory ncat = lang_plugin.labelCat(mod.nodeString()); if (ncat == NodeCategory.CN || ncat == NodeCategory.ADJ) { if (ncat == NodeCategory.CN) { pos = 'N'; } if (ncat == NodeCategory.ADJ) { pos = 'A'; } } } else { //System.out.println("Type of postmodifier: " + mod.nodeString()); NodeCategory ncat = lang_plugin.labelCat(mod.nodeString()); if (ncat == NodeCategory.PP) { if (mod.numChildren() == 2) { // (PP (in from) (NP (nnp India))) Tree prepNode = mod.getChild(0); Tree npNode = mod.getChild(1); Tree npHead = massimoHeadFindHack(npNode); if (npHead != null && prepNode != null) { //DEBUG //System.out.println("Adding relation "+ // prepNode.firstChild().nodeString()+" "+ // npHead.firstChild().nodeString() ); /* -- no clue what it means, just fixed so that it doesn't crash (Olga) -- */ if (prepNode.numChildren() > 0) prepNode = prepNode.firstChild(); result.add( new Property(prepNode.nodeString(), npHead.firstChild().nodeString())); } } } } } } //end outer loop } //end if premodified return result; }
From source file:elkfed.coref.discourse_entities.DiscourseEntity.java
License:Open Source License
private Tree massimoHeadFindHack(Tree npNode) { LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin(); /*/*from ww w. j a va 2 s .c o m*/ * NOTE (yv): * We should really have a decent configurable head finder. * The "generic" head finder below probably works, but ... * this is ugly enough for English, but making it work for * English *and* Italian (and possibly other languages) * is only something for very enthusiastic people with * slight masochistic tendencies. */ //CollinsHeadFinder hf = new CollinsHeadFinder(); //ModCollinsHeadFinder hf = new ModCollinsHeadFinder(); /* -- trivial -- */ if (npNode.numChildren() == 0) return npNode; if (npNode.numChildren() == 1) { if (npNode.firstChild().numChildren() == 0) return npNode; return massimoHeadFindHack(npNode.firstChild()); } /* -- coordination -- */ if (npNode.numChildren() > 2) { for (Tree n : npNode.children()) { if (lang_plugin.labelCat(n.nodeString()) == NodeCategory.CC) return null; } } /* -- last child is a noun (common/proper) --*/ /* NB: will it work for italian though? */ NodeCategory firstpos = lang_plugin.labelCat(npNode.firstChild().nodeString()); NodeCategory nextpos = lang_plugin.labelCat(npNode.getChild(1).nodeString()); NodeCategory lastpos = lang_plugin.labelCat(npNode.lastChild().nodeString()); if (lastpos == NodeCategory.CN) return npNode.lastChild(); if (lastpos == NodeCategory.PN) return npNode.lastChild(); /* -- (NP (NP (DT the) (NN man)) (PP (in from) (NP (NNP UNCLE)))) -- */ if (firstpos == NodeCategory.NP && nextpos != NodeCategory.CN) return massimoHeadFindHack(npNode.firstChild()); /* -- misc -- */ Tree found_head = null; int state = 0; for (Tree n : npNode.children()) { NodeCategory ncat = lang_plugin.labelCat(n.nodeString()); if (ncat == NodeCategory.CN || ncat == NodeCategory.PN || ncat == NodeCategory.PRO) { state = 4; found_head = n; } else if (ncat == NodeCategory.NP && state < 3) { state = 3; found_head = n; } else if (ncat == NodeCategory.ADJ && state < 3) { state = 2; found_head = n; } } if (found_head != null) { if (state == 3) { return massimoHeadFindHack(found_head); } return found_head; } // if (ConfigProperties.getInstance().getDbgPrint()) System.err.println("Couldn't find a head for NP:" + npNode.pennString()); return null; }
From source file:pltag.parser.semantics.discriminative.ExtractFeatures.java
License:Open Source License
private void extractStringTreeAnalysisFeatures(String inputTree, String stringAnalysisTree, boolean endOfSent, IncrementalAnalysis analysis, DiscriminativeFeatureIndexers featureIndexers, boolean train) { // System.out.println(inputTree); Tree tree = Tree.valueOf(inputTree); List<Tree> leaves = tree.getLeaves(); Tree currentWord = leaves.get(leaves.size() - 1); int currentWordIndex = featureIndexers.getWordIndex(currentWord.nodeString(), train); // right branch (2): 1. Count number of nodes from root to rightmost non-terminal, 2. Count rest nodes // compute domination path from root to rightmost leaf. Subtract 2 from size to exclude root and pre-terminal int pathSize = tree.dominationPath(currentWord.parent(tree)).size(); analysis.setRightBranchSpine(pathSize > 2 ? pathSize - 2 : 0); int rest = tree.size() - analysis.getRightBranchSpine() - leaves.size() * 2 - 1; analysis.setRightBranchRest(rest > 0 ? rest : 0); // Subtract the number of terminals, pre-terminals (leaves.size()*2) and root symbol // get list of rightmost complete non-terminals. We don't compute the whole list of complete non-terminals, but only the ones that have been just completed, // hence lie at the rightmost position of the tree. Since we compute the features piecemeal, by the end of the sentence we will have computed all complete // non-terminals, depending on the training strategy. Used for features: heavy, neighbours, and edges Tree analysisTree = Tree.valueOf(stringAnalysisTree); analysisTree.indexLeaves();/*from w w w. j a va2 s . com*/ List<Tree> complete = getListOfRightMostCompleteNonTerminals(analysisTree); String[] heavyStr = new String[complete.size()]; String[] neighboursL1Str = new String[complete.size()]; String[] neighboursL2Str = new String[complete.size()]; int i = 0; for (Tree subTree : complete) { // heavy feature int categoryIndex = featureIndexers.getCategoryIndex(subTree.nodeString(), train); List<Label> yield = subTree.yield(); String yieldSize = yield.size() > 5 ? "5+" : String.valueOf(yield.size()); heavyStr[i] = String.format("%s %s %s", categoryIndex, yieldSize, endOfSent ? "y" : "n"); // neighbours l1, l2 features int leftmostLeafId = ((CoreLabel) yield.get(0)).index(); if (leftmostLeafId > 1) { int l1CategoryId = featureIndexers .getCategoryIndex(leaves.get(leftmostLeafId - 2).parent(tree).nodeString(), train); if (leftmostLeafId > 2) { neighboursL1Str[i] = String.format("%s %s %s", categoryIndex, yieldSize, l1CategoryId); int l2CategoryId = featureIndexers .getCategoryIndex(leaves.get(leftmostLeafId - 3).parent(tree).nodeString(), train); neighboursL2Str[i] = String.format("%s %s %s %s", categoryIndex, yieldSize, l2CategoryId, l1CategoryId); } else { neighboursL2Str[i] = String.format("%s %s SoS %s", categoryIndex, yieldSize, l1CategoryId); } } else // leftmost leaf is at the beginning of the sentence { neighboursL1Str[i] = String.format("%s %s SoS", categoryIndex, yieldSize); neighboursL2Str[i] = String.format("%s %s SoS SoS", categoryIndex, yieldSize); } // coPar and coLenPar features Tree[] children = subTree.children(); if (children.length > 2) { // found structure: (X (A ...) (CC and/or) (B ...)) if (children.length == 3 && children[1].nodeString().startsWith("CC")) { analysis.setCoPar(getCoParString(children[0], children[2]), featureIndexers, train); analysis.setCoLenPar(getCoLenParString(children[0], children[2], endOfSent), featureIndexers, train); } // found structure ((CC either) (A ...) (CC or) (B...)) else if (children.length == 4 && children[0].nodeString().startsWith("CC") && children[2].nodeString().startsWith("CC")) { analysis.setCoPar(getCoParString(children[1], children[3]), featureIndexers, train); analysis.setCoLenPar(getCoLenParString(children[1], children[3], endOfSent), featureIndexers, train); } // found structure ((A ...) (, ,) (CC but) (B...)) else if (children.length == 4 && children[1].nodeString().equals(",") && children[2].nodeString().startsWith("CC")) { analysis.setCoPar(getCoParString(children[0], children[3]), featureIndexers, train); analysis.setCoLenPar(getCoLenParString(children[0], children[3], endOfSent), featureIndexers, train); } } i++; } analysis.setHeavy(heavyStr, featureIndexers, train); analysis.setNeighboursL1(neighboursL1Str, featureIndexers, train); analysis.setNeighboursL2(neighboursL2Str, featureIndexers, train); // compute word + L=2 ancestor nodes, L=3 ancestor nodes Tree preTerminal = currentWord.parent(tree); Tree wordL2 = preTerminal.parent(tree); if (wordL2 != null) { int preTerminalIndex = featureIndexers.getCategoryIndex(preTerminal.nodeString(), train); int wordL2Index = featureIndexers.getCategoryIndex(wordL2.nodeString(), train); analysis.setWordL2(String.format("%s %s %s", currentWordIndex, preTerminalIndex, wordL2Index), featureIndexers, train); Tree wordL3 = wordL2.parent(tree); if (wordL3 != null) { analysis.setWordL3(String.format("%s %s %s %s", currentWordIndex, preTerminalIndex, wordL2Index, featureIndexers.getCategoryIndex(wordL3.nodeString())), featureIndexers, train); } } // get integration point + elem tree (Parent-emulation feature) analysis.setIpElemTree(String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTree()), featureIndexers, train); analysis.setIpElemTreeUnlex( String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTreeUnlex()), featureIndexers, train); }
From source file:pltag.parser.semantics.discriminative.ExtractFeatures.java
License:Open Source License
/** * Identify the list of rightmost non-terminals that span a complete subtree, i.e., one that * a) the leaf of its' rightmost child is a word, OR * b) the index of the leaf of its' rightmost is a word AND is the last in the yield (AND this leaf is the last word - optional, as this condition breeches incrementality). * @param analysisTree/*from w ww. j ava 2 s.c o m*/ * @return */ private List<Tree> getListOfRightMostCompleteNonTerminals(Tree tree) { List<Tree> list = new ArrayList(); List<Tree> leaves = tree.getLeaves(); // check if the last leaf is a word. Tree currentWord = leaves.get(leaves.size() - 1); if (currentWord.nodeString().endsWith("<>")) { Tree parent = currentWord.parent(tree); while (parent != tree) { if (parent.isPhrasal()) { list.add(parent); } parent = parent.parent(tree); } list.add(tree); } return list; }
From source file:pltag.parser.semantics.discriminative.ExtractFeatures.java
License:Open Source License
private boolean sameLabelAtLevel(Tree left, Tree right, int level) { if (left.isLeaf() || right.isLeaf()) return false; if (level == 0) return left.nodeString().equals(right.nodeString()); Tree[] leftChildren = left.children(); Tree[] rightChildren = right.children(); if (leftChildren.length != rightChildren.length) return false; for (int i = 0; i < leftChildren.length; i++) { if (!sameLabelAtLevel(leftChildren[i], rightChildren[i], level - 1)) return false; }/* ww w . j a v a2s . c o m*/ return true; }
From source file:tml.utils.StanfordUtils.java
License:Apache License
/** * Calculates the typed dependencies from a grammatical tree * @param tree the grammatical tree// www. ja v a 2 s.c o m */ public static List<String> calculateTypedDependencies(Tree tree) { double time = System.nanoTime(); List<String> output = new ArrayList<String>(); GrammaticalStructure gs = null; try { gs = getGrammaticalStructureFactory().newGrammaticalStructure(tree); } catch (Exception e) { logger.error(e); return null; } Collection<TypedDependency> tdl = gs.typedDependenciesCollapsed(); // Get the POS tag from each word Hashtable<String, String> posInfo = new Hashtable<String, String>(); for (Tree t : tree.getLeaves()) { Tree pt = null; for (Tree tt : tree.dominationPath(t)) { if (tt.isLeaf()) { posInfo.put(tt.nodeString(), pt.nodeString()); } pt = tt; } } for (Object obj : tdl.toArray()) { TypedDependency dep = (TypedDependency) obj; String wordGov = dep.gov().nodeString().split("-")[0]; String wordDep = dep.dep().nodeString().split("-")[0]; String posGov = posInfo.get(wordGov); String posDep = posInfo.get(wordDep); String dependencyString = dep.reln().toString() + "(" + dep.gov().pennString().trim() + "-" + posGov + ", " + dep.dep().pennString().trim() + "-" + posDep + ")"; output.add(dependencyString); } time = System.nanoTime() - time; logger.debug("Typed dependencies obtained in " + time * 10E-6 + " milliseconds"); return output; }
From source file:wseproject.nlp.StanfordManager.java
private static void printTree(Tree tree, int d) { for (int i = 0; i < d; i++) System.out.print("--"); System.out.println(tree.nodeString()); Tree[] children = tree.children();// w w w . j a v a 2 s. c o m for (Tree child : children) { //System.out.println("str = " + child.nodeString()); printTree(child, d + 1); } }