List of usage examples for edu.stanford.nlp.trees Tree parent
public Tree parent(Tree root)
From source file:elkfed.lang.ItalianLanguagePlugin.java
License:Apache License
@Override public Tree[] calcParseExtra(Tree sentenceTree, int startWord, int endWord, Tree prsHead, HeadFinder StHeadFinder) {//from ww w . j a va 2 s . c om List<Tree> Leaves = sentenceTree.getLeaves(); Tree startNode = Leaves.get(startWord); Tree endNode = null; if (endWord >= Leaves.size()) { // for marks that do not respect sentence boundaries endNode = Leaves.get(Leaves.size() - 1); } else { endNode = Leaves.get(endWord); } Tree prevNode = null; if (startWord > 0) prevNode = Leaves.get(startWord - 1); Tree nextNode = null; if (endWord < Leaves.size() - 1) nextNode = Leaves.get(endWord + 1); Tree[] result = new Tree[3]; //---------- calculate minimal np-like subtree, containing the head and included in the mention Tree HeadNode = prsHead; if (prsHead == null) { // todo: this should be fixed somehow though // todo (ctd): use getHeadIndex from NPHeadFinder, but need to reconstruct the markable // todo (ctd): mind marks spanning over sentene boundaries result[0] = null; result[1] = null; result[2] = null; return result; } Tree mincand = prsHead; Tree t = mincand; Tree minnp = null; Tree maxnp = null; while (t != null && (prevNode == null || !t.dominates(prevNode)) && (nextNode == null || !t.dominates(nextNode))) { if (t.value().equalsIgnoreCase("NP")) { mincand = t; t = null; } if (t != null) t = t.parent(sentenceTree); } result[0] = mincand; t = mincand; while (t != null && (t == mincand || !iscoordnp(t))) { if (t.value().equalsIgnoreCase("NP")) { if (t.headTerminal(StHeadFinder) == HeadNode) { maxnp = t; if (minnp == null) minnp = t; } else { t = null; } } if (t != null) t = t.parent(sentenceTree); } result[1] = minnp; result[2] = maxnp; return result; }
From source file:elkfed.mmax.importer.DetermineMinSpan.java
License:Apache License
/** adds min_ids and min_span attributes so that * BART's chunk-based coref resolution works *///from w w w . j a va2 s .c o m public static void addMinSpan(int start, Tree tree, IMarkable tag, List<String> tokens) { List<Tree> leaves = tree.getLeaves(); Tree startNode; Tree endNode; try { startNode = leaves.get(tag.getLeftmostDiscoursePosition() - start); endNode = leaves.get(tag.getRightmostDiscoursePosition() - start); if (".".equals(endNode.parent(tree).value())) { //System.err.println("Sentence-final dot in "+ // tokens.subList(tag.start, tag.end + 1)+ "removed."); endNode = leaves.get(tag.getRightmostDiscoursePosition() - start - 1); } } catch (IndexOutOfBoundsException ex) { System.out.format("indices not found: %d,%d in %s [wanted: %s] [ctx: %s]", tag.getLeftmostDiscoursePosition() - start, tag.getRightmostDiscoursePosition() - start, leaves, tokens.subList(tag.getLeftmostDiscoursePosition(), tag.getRightmostDiscoursePosition() + 1), tokens.subList(start, tag.getLeftmostDiscoursePosition())); throw ex; } Tree parentNode = startNode; while (parentNode != null && !parentNode.dominates(endNode)) { parentNode = parentNode.parent(tree); } if (parentNode == null) { System.err.println("Could not match tree (1)"); return; } if (startNode.leftCharEdge(tree) != parentNode.leftCharEdge(tree) || endNode.rightCharEdge(tree) != parentNode.rightCharEdge(tree)) { System.err.println("Could not match tree (2)"); return; } Tree oldParent = parentNode; ModCollinsHeadFinder hf = new ModCollinsHeadFinder(); // use the head finder to narrow down the span. // stop if (a) the head is no longer an NP or // (b) the NP is a conjunction go_up: while (true) { for (Tree t : parentNode.getChildrenAsList()) { if (t.value().equals("CC")) { break go_up; } } Tree headDtr = hf.determineHead(parentNode); if (headDtr == null || !headDtr.value().equals("NP")) { break; } parentNode = headDtr; } if (parentNode != oldParent) { List<Tree> newLeaves = parentNode.getLeaves(); int newStart = start + find_same(leaves, newLeaves.get(0)); int newEnd = newStart + newLeaves.size() - 1; if (newStart <= tag.getLeftmostDiscoursePosition()) { if (tag.getLeftmostDiscoursePosition() - newStart > 1) { System.err.println("NP node is too big:" + parentNode.toString() + " wanted:" + tokens .subList(tag.getLeftmostDiscoursePosition(), tag.getRightmostDiscoursePosition() + 1) + " in: " + tree); return; } for (int i = newStart - start; i < tag.getLeftmostDiscoursePosition() - start; i++) { System.err.println("additional prefix in syntax:" + leaves.get(i)); } // switch NP boundary and tag boundary // (even [Connie Cheung]) => min_words="Connie Cheung" int tmp = tag.getLeftmostDiscoursePosition(); tag.adjustSpan(newStart, tag.getRightmostDiscoursePosition()); newStart = tmp; } assert newEnd <= tag.getRightmostDiscoursePosition(); // this relies on MiniDiscourse's default word numbering // which is ugly but should generally work... if (newStart == newEnd) { tag.setAttributeValue("min_ids", "word_" + (newStart + 1)); } else { tag.setAttributeValue("min_ids", String.format("word_%d..word_%d", newStart + 1, newEnd + 1)); } StringBuffer buf = new StringBuffer(); for (Tree t : newLeaves) { buf.append(t.toString().toLowerCase()); buf.append(' '); } buf.setLength(buf.length() - 1); tag.setAttributeValue("min_words", buf.toString()); } }
From source file:elkfed.mmax.pipeline.P2Chunker.java
License:Apache License
/** Add parser, part of speech, and chunk markables */ protected void addMarkables() { final StringBuffer markableBuffer = new StringBuffer(); List<Markable> sentences = null; for (Markable parseMarkable : DiscourseUtils.getMarkables(currentDocument, DEFAULT_PARSE_LEVEL)) { int start = parseMarkable.getLeftmostDiscoursePosition(); int end = parseMarkable.getRightmostDiscoursePosition(); /** Retrieve chunk tags from the parse tree and add chunk markables */ /* traverse parse-tree (real tree, not string), extract basic NPs and poss */ Tree pTree = null;//from w w w . j a v a2 s .co m pTree = Tree.valueOf(parseMarkable.getAttributeValue(PipelineComponent.TAG_ATTRIBUTE)); normalizeTree(pTree); if (pTree == null) continue; //add all basic nps for (Iterator<Tree> treeIt = pTree.iterator(); treeIt.hasNext();) { Tree nod = treeIt.next(); if (nod.value().equals("NP" + NPSTATUS_SEPARATOR + "1") || nod.value().equals("NP" + NPSTATUS_SEPARATOR + "2")) { markableBuffer.setLength(0); addChunkMarkable(nod, pTree, start, false); } } List<Tree> Leaves = pTree.getLeaves(); // add NPs embedding possessives for (Tree l : Leaves) { if (l.value().toLowerCase().startsWith("'s")) { if (l.parent(pTree) != null && l.parent(pTree).value().equals("POS") && l.parent(pTree).parent(pTree) != null && l.parent(pTree).parent(pTree).value().startsWith("NP") && l.parent(pTree).parent(pTree).parent(pTree) != null && l.parent(pTree).parent(pTree) .parent(pTree).value().equals("NP" + NPSTATUS_SEPARATOR + "0")) { Tree nod = l.parent(pTree).parent(pTree).parent(pTree); markableBuffer.setLength(0); addChunkMarkable(nod, pTree, start, true); } } } } }
From source file:elkfed.mmax.pipeline.P2Chunker.java
License:Apache License
private void addChunkMarkable(Tree nod, Tree pTree, int start, Boolean checkup) { // register new chunk markable, setting maxspan if needed List<Tree> lv = nod.getLeaves(); int npstart = Integer.valueOf(lv.get(0).label().value().split(INDEX_SEPARATOR)[1]); int npend = Integer.valueOf(lv.get(lv.size() - 1).label().value().split(INDEX_SEPARATOR)[1]); npstart += start;/*from w w w .j ava 2 s . c om*/ npend += start; final Map<String, String> cAttributes = new HashMap<String, String>(chunkAttributes); cAttributes.put(TAG_ATTRIBUTE, "np"); //store maxspan for embedded nps (either basic or explicitly marked for doing so) if (checkup || nod.value().equals("NP" + NPSTATUS_SEPARATOR + "2")) { Tree p = nod; Tree head = p.headTerminal(getHeadFinder()); Tree lastmax = null; while (p != null) { p = p.parent(pTree); if (p != null && p.value().startsWith("NP")) { if ((p.headTerminal(getHeadFinder()) == head) && (!iscoordnp(p))) lastmax = p; else p = null; } } if (lastmax != null) { List<Tree> lvm = lastmax.getLeaves(); int maxstart = Integer.valueOf(lvm.get(0).label().value().split(INDEX_SEPARATOR)[1]); int maxend = Integer.valueOf(lvm.get(lvm.size() - 1).label().value().split(INDEX_SEPARATOR)[1]); maxstart += start + 1; maxend += start + 1; cAttributes.put(MAXSPAN_ATTRIBUTE, "word_" + maxstart + "..word_" + maxend); } } chunkLevel.addMarkable(npstart, npend, cAttributes); }
From source file:elkfed.mmax.pipeline.P2Chunker.java
License:Apache License
private void normalizeTree(Tree tree) { // for leaves -- add positions // for nps -- add whether they are basic or not int leaveIndex = 0; for (Iterator<Tree> treeIt = tree.iterator(); treeIt.hasNext();) { Tree currentTree = treeIt.next(); Label nodeLabel = currentTree.label(); if (currentTree.isLeaf()) { nodeLabel.setValue(nodeLabel.value() + INDEX_SEPARATOR + leaveIndex); leaveIndex++;/*from w w w .jav a 2 s . c o m*/ } else { if (currentTree.value().toLowerCase().startsWith("np")) { Boolean found = false; //adjust this np for keeping (if not already discarded if (!currentTree.value().endsWith("0") && !currentTree.value().endsWith("2")) currentTree.label().setValue("NP" + NPSTATUS_SEPARATOR + "1"); //adjust upper np for discarding Tree p = currentTree; Tree head = p.headTerminal(getHeadFinder()); while (p != null && !found) { p = p.parent(tree); if (p != null && p.value().toLowerCase().startsWith("np") && p.headTerminal(getHeadFinder()) == head && (!iscoordnp(p))) { found = true; p.label().setValue("NP" + NPSTATUS_SEPARATOR + "0"); currentTree.label().setValue("NP" + NPSTATUS_SEPARATOR + "2"); } } } else { nodeLabel.setValue(nodeLabel.value().toUpperCase()); } } } }
From source file:elkfed.mmax.pipeline.SemTagger.java
License:Apache License
/** Finds the index of the head of a (non-basal) semantic role phrase */ private int findSemanticRoleHeadIndex(Markable semroleMarkable) { // 1. Get the syntactic tree semroleMarkable is contained into final int srStart = semroleMarkable.getLeftmostDiscoursePosition(); final int srEnd = semroleMarkable.getRightmostDiscoursePosition(); for (int i = 0; i < parseTrees.size(); i++) { final int sentStart = parseStart.get(i); final int sentEnd = parseEnd.get(i); if (srStart >= sentStart && srEnd <= sentEnd) { // GOTCHA! Tree tree = parseTrees.get(i); // 2. Find the lowest node containing the markable at its leaves final int srOnset = srStart - sentStart; final int srOffset = srEnd - sentStart; final List<Tree> leaves = tree.getLeaves(); final Tree startNode = leaves.get(srOnset); final Tree endNode = leaves.get(srOffset); Tree parentNode = startNode; while (parentNode != null && !parentNode.dominates(endNode)) { parentNode = parentNode.parent(tree); }//from w w w.j a v a 2s .c o m Tree lowestProjection = null; if (parentNode == null) { lowestProjection = startNode; } else { lowestProjection = parentNode; } // 3. Find the head and return its index Tree headWord = lowestProjection.headTerminal(headFinder); return Integer.valueOf(headWord.label().value().split(INDEX_SEPARATOR)[1]) + sentStart; } } return -1; }
From source file:knu.univ.lingvo.coref.MentionExtractor.java
License:Open Source License
/** * Post-processes the extracted mentions. Here we set the Mention fields required for coref and order mentions by tree-traversal order. * @param words List of words in each sentence, in textual order * @param trees List of trees, one per sentence * @param unorderedMentions List of unordered, unprocessed mentions * Each mention MUST have startIndex and endIndex set! * Optionally, if scoring is desired, mentions must have mentionID and originalRef set. * All the other Mention fields are set here. * @return List of mentions ordered according to the tree traversal * @throws Exception// w ww . j a v a 2 s. c o m */ public List<List<Mention>> arrange(Annotation anno, List<List<CoreLabel>> words, List<Tree> trees, List<List<Mention>> unorderedMentions, boolean doMergeLabels) throws Exception { List<List<Mention>> orderedMentionsBySentence = new ArrayList<List<Mention>>(); // // traverse all sentences and process each individual one // int mentionNumber = 0; for (int sent = 0, sz = words.size(); sent < sz; sent++) { List<CoreLabel> sentence = words.get(sent); Tree tree = trees.get(sent); List<Mention> mentions = unorderedMentions.get(sent); Map<String, List<Mention>> mentionsToTrees = Generics.newHashMap(); // merge the parse tree of the entire sentence with the sentence words if (doMergeLabels) mergeLabels(tree, sentence); // // set the surface information and the syntactic info in each mention // startIndex and endIndex MUST be set before! // for (Mention mention : mentions) { mention.sentenceNumber = sent; mention.mentionNumber = mentionNumber++; mention.contextParseTree = tree; mention.sentenceWords = sentence; mention.originalSpan = new ArrayList<CoreLabel>( mention.sentenceWords.subList(mention.startIndex, mention.endIndex)); if (!((CoreLabel) tree.label()).has(CoreAnnotations.BeginIndexAnnotation.class)) tree.indexSpans(0); if (mention.headWord == null) { Tree headTree = ((RuleBasedCorefMentionFinder) mentionFinder).findSyntacticHead(mention, tree, sentence); mention.headWord = (CoreLabel) headTree.label(); mention.headIndex = mention.headWord.get(CoreAnnotations.IndexAnnotation.class) - 1; } if (mention.mentionSubTree == null) { // mentionSubTree = highest NP that has the same head Tree headTree = tree.getLeaves().get(mention.headIndex); if (headTree == null) { throw new RuntimeException("Missing head tree for a mention!"); } Tree t = headTree; while ((t = t.parent(tree)) != null) { if (t.headTerminal(headFinder) == headTree && t.value().equals("NP")) { mention.mentionSubTree = t; } else if (mention.mentionSubTree != null) { break; } } if (mention.mentionSubTree == null) { mention.mentionSubTree = headTree; } } List<Mention> mentionsForTree = mentionsToTrees.get(treeToKey(mention.mentionSubTree)); if (mentionsForTree == null) { mentionsForTree = new ArrayList<Mention>(); mentionsToTrees.put(treeToKey(mention.mentionSubTree), mentionsForTree); } mentionsForTree.add(mention); // generates all fields required for coref, such as gender, number, etc. mention.process(dictionaries, semantics, this, singletonPredictor); } // // Order all mentions in tree-traversal order // List<Mention> orderedMentions = new ArrayList<Mention>(); orderedMentionsBySentence.add(orderedMentions); // extract all mentions in tree traversal order (alternative: tree.postOrderNodeList()) for (Tree t : tree.preOrderNodeList()) { List<Mention> lm = mentionsToTrees.get(treeToKey(t)); if (lm != null) { for (Mention m : lm) { orderedMentions.add(m); } } } // // find appositions, predicate nominatives, relative pronouns in this sentence // findSyntacticRelations(tree, orderedMentions); assert (mentions.size() == orderedMentions.size()); } return orderedMentionsBySentence; }
From source file:MedArkRef.AnalysisUtilities.java
License:Open Source License
public static void downcaseFirstToken(Tree inputTree) { if (inputTree == null) { return;/*from ww w . j ava 2s . c o m*/ } Tree firstWordTree = inputTree.getLeaves().get(0); if (firstWordTree == null) return; Tree preterm = firstWordTree.parent(inputTree); String firstWord = firstWordTree.yield().toString(); if (firstWord != null && preterm != null && preterm.label() != null && !preterm.label().toString().matches("^NNP.*") && !firstWord.equals("I")) { //if(firstWord.indexOf('-') == -1 && !firstWord.equals("I")){ firstWord = firstWord.substring(0, 1).toLowerCase() + firstWord.substring(1); firstWordTree.label().setValue(firstWord); } //if(QuestionTransducer.DEBUG) System.err.println("downcaseFirstToken: "+inputTree.toString()); }
From source file:pltag.parser.semantics.discriminative.ExtractFeatures.java
License:Open Source License
private void extractStringTreeAnalysisFeatures(String inputTree, String stringAnalysisTree, boolean endOfSent, IncrementalAnalysis analysis, DiscriminativeFeatureIndexers featureIndexers, boolean train) { // System.out.println(inputTree); Tree tree = Tree.valueOf(inputTree); List<Tree> leaves = tree.getLeaves(); Tree currentWord = leaves.get(leaves.size() - 1); int currentWordIndex = featureIndexers.getWordIndex(currentWord.nodeString(), train); // right branch (2): 1. Count number of nodes from root to rightmost non-terminal, 2. Count rest nodes // compute domination path from root to rightmost leaf. Subtract 2 from size to exclude root and pre-terminal int pathSize = tree.dominationPath(currentWord.parent(tree)).size(); analysis.setRightBranchSpine(pathSize > 2 ? pathSize - 2 : 0); int rest = tree.size() - analysis.getRightBranchSpine() - leaves.size() * 2 - 1; analysis.setRightBranchRest(rest > 0 ? rest : 0); // Subtract the number of terminals, pre-terminals (leaves.size()*2) and root symbol // get list of rightmost complete non-terminals. We don't compute the whole list of complete non-terminals, but only the ones that have been just completed, // hence lie at the rightmost position of the tree. Since we compute the features piecemeal, by the end of the sentence we will have computed all complete // non-terminals, depending on the training strategy. Used for features: heavy, neighbours, and edges Tree analysisTree = Tree.valueOf(stringAnalysisTree); analysisTree.indexLeaves();/*from w w w . j a v a 2s .co m*/ List<Tree> complete = getListOfRightMostCompleteNonTerminals(analysisTree); String[] heavyStr = new String[complete.size()]; String[] neighboursL1Str = new String[complete.size()]; String[] neighboursL2Str = new String[complete.size()]; int i = 0; for (Tree subTree : complete) { // heavy feature int categoryIndex = featureIndexers.getCategoryIndex(subTree.nodeString(), train); List<Label> yield = subTree.yield(); String yieldSize = yield.size() > 5 ? "5+" : String.valueOf(yield.size()); heavyStr[i] = String.format("%s %s %s", categoryIndex, yieldSize, endOfSent ? "y" : "n"); // neighbours l1, l2 features int leftmostLeafId = ((CoreLabel) yield.get(0)).index(); if (leftmostLeafId > 1) { int l1CategoryId = featureIndexers .getCategoryIndex(leaves.get(leftmostLeafId - 2).parent(tree).nodeString(), train); if (leftmostLeafId > 2) { neighboursL1Str[i] = String.format("%s %s %s", categoryIndex, yieldSize, l1CategoryId); int l2CategoryId = featureIndexers .getCategoryIndex(leaves.get(leftmostLeafId - 3).parent(tree).nodeString(), train); neighboursL2Str[i] = String.format("%s %s %s %s", categoryIndex, yieldSize, l2CategoryId, l1CategoryId); } else { neighboursL2Str[i] = String.format("%s %s SoS %s", categoryIndex, yieldSize, l1CategoryId); } } else // leftmost leaf is at the beginning of the sentence { neighboursL1Str[i] = String.format("%s %s SoS", categoryIndex, yieldSize); neighboursL2Str[i] = String.format("%s %s SoS SoS", categoryIndex, yieldSize); } // coPar and coLenPar features Tree[] children = subTree.children(); if (children.length > 2) { // found structure: (X (A ...) (CC and/or) (B ...)) if (children.length == 3 && children[1].nodeString().startsWith("CC")) { analysis.setCoPar(getCoParString(children[0], children[2]), featureIndexers, train); analysis.setCoLenPar(getCoLenParString(children[0], children[2], endOfSent), featureIndexers, train); } // found structure ((CC either) (A ...) (CC or) (B...)) else if (children.length == 4 && children[0].nodeString().startsWith("CC") && children[2].nodeString().startsWith("CC")) { analysis.setCoPar(getCoParString(children[1], children[3]), featureIndexers, train); analysis.setCoLenPar(getCoLenParString(children[1], children[3], endOfSent), featureIndexers, train); } // found structure ((A ...) (, ,) (CC but) (B...)) else if (children.length == 4 && children[1].nodeString().equals(",") && children[2].nodeString().startsWith("CC")) { analysis.setCoPar(getCoParString(children[0], children[3]), featureIndexers, train); analysis.setCoLenPar(getCoLenParString(children[0], children[3], endOfSent), featureIndexers, train); } } i++; } analysis.setHeavy(heavyStr, featureIndexers, train); analysis.setNeighboursL1(neighboursL1Str, featureIndexers, train); analysis.setNeighboursL2(neighboursL2Str, featureIndexers, train); // compute word + L=2 ancestor nodes, L=3 ancestor nodes Tree preTerminal = currentWord.parent(tree); Tree wordL2 = preTerminal.parent(tree); if (wordL2 != null) { int preTerminalIndex = featureIndexers.getCategoryIndex(preTerminal.nodeString(), train); int wordL2Index = featureIndexers.getCategoryIndex(wordL2.nodeString(), train); analysis.setWordL2(String.format("%s %s %s", currentWordIndex, preTerminalIndex, wordL2Index), featureIndexers, train); Tree wordL3 = wordL2.parent(tree); if (wordL3 != null) { analysis.setWordL3(String.format("%s %s %s %s", currentWordIndex, preTerminalIndex, wordL2Index, featureIndexers.getCategoryIndex(wordL3.nodeString())), featureIndexers, train); } } // get integration point + elem tree (Parent-emulation feature) analysis.setIpElemTree(String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTree()), featureIndexers, train); analysis.setIpElemTreeUnlex( String.format("%s,%s", analysis.getIntegrationPoint(), analysis.getElemTreeUnlex()), featureIndexers, train); }
From source file:pltag.parser.semantics.discriminative.ExtractFeatures.java
License:Open Source License
/** * Identify the list of rightmost non-terminals that span a complete subtree, i.e., one that * a) the leaf of its' rightmost child is a word, OR * b) the index of the leaf of its' rightmost is a word AND is the last in the yield (AND this leaf is the last word - optional, as this condition breeches incrementality). * @param analysisTree// w ww . ja v a2 s. c o m * @return */ private List<Tree> getListOfRightMostCompleteNonTerminals(Tree tree) { List<Tree> list = new ArrayList(); List<Tree> leaves = tree.getLeaves(); // check if the last leaf is a word. Tree currentWord = leaves.get(leaves.size() - 1); if (currentWord.nodeString().endsWith("<>")) { Tree parent = currentWord.parent(tree); while (parent != tree) { if (parent.isPhrasal()) { list.add(parent); } parent = parent.parent(tree); } list.add(tree); } return list; }