List of usage examples for edu.stanford.nlp.trees Tree dominates
public boolean dominates(Tree t)
From source file:elkfed.mmax.importer.DetermineMinSpan.java
License:Apache License
/** adds min_ids and min_span attributes so that * BART's chunk-based coref resolution works *//*from w w w . j av a 2s. c o m*/ public static void addMinSpan(int start, Tree tree, IMarkable tag, List<String> tokens) { List<Tree> leaves = tree.getLeaves(); Tree startNode; Tree endNode; try { startNode = leaves.get(tag.getLeftmostDiscoursePosition() - start); endNode = leaves.get(tag.getRightmostDiscoursePosition() - start); if (".".equals(endNode.parent(tree).value())) { //System.err.println("Sentence-final dot in "+ // tokens.subList(tag.start, tag.end + 1)+ "removed."); endNode = leaves.get(tag.getRightmostDiscoursePosition() - start - 1); } } catch (IndexOutOfBoundsException ex) { System.out.format("indices not found: %d,%d in %s [wanted: %s] [ctx: %s]", tag.getLeftmostDiscoursePosition() - start, tag.getRightmostDiscoursePosition() - start, leaves, tokens.subList(tag.getLeftmostDiscoursePosition(), tag.getRightmostDiscoursePosition() + 1), tokens.subList(start, tag.getLeftmostDiscoursePosition())); throw ex; } Tree parentNode = startNode; while (parentNode != null && !parentNode.dominates(endNode)) { parentNode = parentNode.parent(tree); } if (parentNode == null) { System.err.println("Could not match tree (1)"); return; } if (startNode.leftCharEdge(tree) != parentNode.leftCharEdge(tree) || endNode.rightCharEdge(tree) != parentNode.rightCharEdge(tree)) { System.err.println("Could not match tree (2)"); return; } Tree oldParent = parentNode; ModCollinsHeadFinder hf = new ModCollinsHeadFinder(); // use the head finder to narrow down the span. // stop if (a) the head is no longer an NP or // (b) the NP is a conjunction go_up: while (true) { for (Tree t : parentNode.getChildrenAsList()) { if (t.value().equals("CC")) { break go_up; } } Tree headDtr = hf.determineHead(parentNode); if (headDtr == null || !headDtr.value().equals("NP")) { break; } parentNode = headDtr; } if (parentNode != oldParent) { List<Tree> newLeaves = parentNode.getLeaves(); int newStart = start + find_same(leaves, newLeaves.get(0)); int newEnd = newStart + newLeaves.size() - 1; if (newStart <= tag.getLeftmostDiscoursePosition()) { if (tag.getLeftmostDiscoursePosition() - newStart > 1) { System.err.println("NP node is too big:" + parentNode.toString() + " wanted:" + tokens .subList(tag.getLeftmostDiscoursePosition(), tag.getRightmostDiscoursePosition() + 1) + " in: " + tree); return; } for (int i = newStart - start; i < tag.getLeftmostDiscoursePosition() - start; i++) { System.err.println("additional prefix in syntax:" + leaves.get(i)); } // switch NP boundary and tag boundary // (even [Connie Cheung]) => min_words="Connie Cheung" int tmp = tag.getLeftmostDiscoursePosition(); tag.adjustSpan(newStart, tag.getRightmostDiscoursePosition()); newStart = tmp; } assert newEnd <= tag.getRightmostDiscoursePosition(); // this relies on MiniDiscourse's default word numbering // which is ugly but should generally work... if (newStart == newEnd) { tag.setAttributeValue("min_ids", "word_" + (newStart + 1)); } else { tag.setAttributeValue("min_ids", String.format("word_%d..word_%d", newStart + 1, newEnd + 1)); } StringBuffer buf = new StringBuffer(); for (Tree t : newLeaves) { buf.append(t.toString().toLowerCase()); buf.append(' '); } buf.setLength(buf.length() - 1); tag.setAttributeValue("min_words", buf.toString()); } }
From source file:elkfed.mmax.pipeline.SemTagger.java
License:Apache License
/** Finds the index of the head of a (non-basal) semantic role phrase */ private int findSemanticRoleHeadIndex(Markable semroleMarkable) { // 1. Get the syntactic tree semroleMarkable is contained into final int srStart = semroleMarkable.getLeftmostDiscoursePosition(); final int srEnd = semroleMarkable.getRightmostDiscoursePosition(); for (int i = 0; i < parseTrees.size(); i++) { final int sentStart = parseStart.get(i); final int sentEnd = parseEnd.get(i); if (srStart >= sentStart && srEnd <= sentEnd) { // GOTCHA! Tree tree = parseTrees.get(i); // 2. Find the lowest node containing the markable at its leaves final int srOnset = srStart - sentStart; final int srOffset = srEnd - sentStart; final List<Tree> leaves = tree.getLeaves(); final Tree startNode = leaves.get(srOnset); final Tree endNode = leaves.get(srOffset); Tree parentNode = startNode; while (parentNode != null && !parentNode.dominates(endNode)) { parentNode = parentNode.parent(tree); }//from w ww. j ava 2 s. c o m Tree lowestProjection = null; if (parentNode == null) { lowestProjection = startNode; } else { lowestProjection = parentNode; } // 3. Find the head and return its index Tree headWord = lowestProjection.headTerminal(headFinder); return Integer.valueOf(headWord.label().value().split(INDEX_SEPARATOR)[1]) + sentStart; } } return -1; }
From source file:knu.univ.lingvo.coref.sievepasses.DeterministicCorefSieve.java
License:Open Source License
/** Divides a sentence into clauses and sorts the antecedents for pronoun matching. */ private static List<Mention> sortMentionsForPronoun(List<Mention> l, Mention m1, boolean sameSentence) { List<Mention> sorted = new ArrayList<Mention>(); if (sameSentence) { Tree tree = m1.contextParseTree; Tree current = m1.mentionSubTree; while (true) { current = current.ancestor(1, tree); if (current.label().value().startsWith("S")) { for (Mention m : l) { if (!sorted.contains(m) && current.dominates(m.mentionSubTree)) { sorted.add(m);/* w ww. j av a 2 s.co m*/ } } } if (current.label().value().equals("ROOT") || current.ancestor(1, tree) == null) break; } if (SieveCoreferenceSystem.logger.isLoggable(Level.FINEST)) { if (l.size() != sorted.size()) { SieveCoreferenceSystem.logger.finest("sorting failed!!! -> parser error?? \tmentionID: " + m1.mentionID + " " + m1.spanToString()); sorted = l; } else if (!l.equals(sorted)) { SieveCoreferenceSystem.logger.finest("sorting succeeded & changed !! \tmentionID: " + m1.mentionID + " " + m1.spanToString()); for (int i = 0; i < l.size(); i++) { Mention ml = l.get(i); Mention msorted = sorted.get(i); SieveCoreferenceSystem.logger .finest("\t[" + ml.spanToString() + "]\t[" + msorted.spanToString() + "]"); } } else { SieveCoreferenceSystem.logger .finest("no changed !! \tmentionID: " + m1.mentionID + " " + m1.spanToString()); } } } return sorted; }