List of usage examples for edu.stanford.nlp.trees Tree value
@Override
public String value()
From source file:elkfed.mmax.importer.DetermineMinSpan.java
License:Apache License
/** adds min_ids and min_span attributes so that * BART's chunk-based coref resolution works *///from w w w . j a v a 2s .c o m public static void addMinSpan(int start, Tree tree, IMarkable tag, List<String> tokens) { List<Tree> leaves = tree.getLeaves(); Tree startNode; Tree endNode; try { startNode = leaves.get(tag.getLeftmostDiscoursePosition() - start); endNode = leaves.get(tag.getRightmostDiscoursePosition() - start); if (".".equals(endNode.parent(tree).value())) { //System.err.println("Sentence-final dot in "+ // tokens.subList(tag.start, tag.end + 1)+ "removed."); endNode = leaves.get(tag.getRightmostDiscoursePosition() - start - 1); } } catch (IndexOutOfBoundsException ex) { System.out.format("indices not found: %d,%d in %s [wanted: %s] [ctx: %s]", tag.getLeftmostDiscoursePosition() - start, tag.getRightmostDiscoursePosition() - start, leaves, tokens.subList(tag.getLeftmostDiscoursePosition(), tag.getRightmostDiscoursePosition() + 1), tokens.subList(start, tag.getLeftmostDiscoursePosition())); throw ex; } Tree parentNode = startNode; while (parentNode != null && !parentNode.dominates(endNode)) { parentNode = parentNode.parent(tree); } if (parentNode == null) { System.err.println("Could not match tree (1)"); return; } if (startNode.leftCharEdge(tree) != parentNode.leftCharEdge(tree) || endNode.rightCharEdge(tree) != parentNode.rightCharEdge(tree)) { System.err.println("Could not match tree (2)"); return; } Tree oldParent = parentNode; ModCollinsHeadFinder hf = new ModCollinsHeadFinder(); // use the head finder to narrow down the span. // stop if (a) the head is no longer an NP or // (b) the NP is a conjunction go_up: while (true) { for (Tree t : parentNode.getChildrenAsList()) { if (t.value().equals("CC")) { break go_up; } } Tree headDtr = hf.determineHead(parentNode); if (headDtr == null || !headDtr.value().equals("NP")) { break; } parentNode = headDtr; } if (parentNode != oldParent) { List<Tree> newLeaves = parentNode.getLeaves(); int newStart = start + find_same(leaves, newLeaves.get(0)); int newEnd = newStart + newLeaves.size() - 1; if (newStart <= tag.getLeftmostDiscoursePosition()) { if (tag.getLeftmostDiscoursePosition() - newStart > 1) { System.err.println("NP node is too big:" + parentNode.toString() + " wanted:" + tokens .subList(tag.getLeftmostDiscoursePosition(), tag.getRightmostDiscoursePosition() + 1) + " in: " + tree); return; } for (int i = newStart - start; i < tag.getLeftmostDiscoursePosition() - start; i++) { System.err.println("additional prefix in syntax:" + leaves.get(i)); } // switch NP boundary and tag boundary // (even [Connie Cheung]) => min_words="Connie Cheung" int tmp = tag.getLeftmostDiscoursePosition(); tag.adjustSpan(newStart, tag.getRightmostDiscoursePosition()); newStart = tmp; } assert newEnd <= tag.getRightmostDiscoursePosition(); // this relies on MiniDiscourse's default word numbering // which is ugly but should generally work... if (newStart == newEnd) { tag.setAttributeValue("min_ids", "word_" + (newStart + 1)); } else { tag.setAttributeValue("min_ids", String.format("word_%d..word_%d", newStart + 1, newEnd + 1)); } StringBuffer buf = new StringBuffer(); for (Tree t : newLeaves) { buf.append(t.toString().toLowerCase()); buf.append(' '); } buf.setLength(buf.length() - 1); tag.setAttributeValue("min_words", buf.toString()); } }
From source file:elkfed.mmax.pipeline.P2Chunker.java
License:Apache License
/** Add parser, part of speech, and chunk markables */ protected void addMarkables() { final StringBuffer markableBuffer = new StringBuffer(); List<Markable> sentences = null; for (Markable parseMarkable : DiscourseUtils.getMarkables(currentDocument, DEFAULT_PARSE_LEVEL)) { int start = parseMarkable.getLeftmostDiscoursePosition(); int end = parseMarkable.getRightmostDiscoursePosition(); /** Retrieve chunk tags from the parse tree and add chunk markables */ /* traverse parse-tree (real tree, not string), extract basic NPs and poss */ Tree pTree = null;/*from w ww . ja v a 2 s. c o m*/ pTree = Tree.valueOf(parseMarkable.getAttributeValue(PipelineComponent.TAG_ATTRIBUTE)); normalizeTree(pTree); if (pTree == null) continue; //add all basic nps for (Iterator<Tree> treeIt = pTree.iterator(); treeIt.hasNext();) { Tree nod = treeIt.next(); if (nod.value().equals("NP" + NPSTATUS_SEPARATOR + "1") || nod.value().equals("NP" + NPSTATUS_SEPARATOR + "2")) { markableBuffer.setLength(0); addChunkMarkable(nod, pTree, start, false); } } List<Tree> Leaves = pTree.getLeaves(); // add NPs embedding possessives for (Tree l : Leaves) { if (l.value().toLowerCase().startsWith("'s")) { if (l.parent(pTree) != null && l.parent(pTree).value().equals("POS") && l.parent(pTree).parent(pTree) != null && l.parent(pTree).parent(pTree).value().startsWith("NP") && l.parent(pTree).parent(pTree).parent(pTree) != null && l.parent(pTree).parent(pTree) .parent(pTree).value().equals("NP" + NPSTATUS_SEPARATOR + "0")) { Tree nod = l.parent(pTree).parent(pTree).parent(pTree); markableBuffer.setLength(0); addChunkMarkable(nod, pTree, start, true); } } } } }
From source file:elkfed.mmax.pipeline.P2Chunker.java
License:Apache License
private void addChunkMarkable(Tree nod, Tree pTree, int start, Boolean checkup) { // register new chunk markable, setting maxspan if needed List<Tree> lv = nod.getLeaves(); int npstart = Integer.valueOf(lv.get(0).label().value().split(INDEX_SEPARATOR)[1]); int npend = Integer.valueOf(lv.get(lv.size() - 1).label().value().split(INDEX_SEPARATOR)[1]); npstart += start;// w w w . j a v a2 s . c om npend += start; final Map<String, String> cAttributes = new HashMap<String, String>(chunkAttributes); cAttributes.put(TAG_ATTRIBUTE, "np"); //store maxspan for embedded nps (either basic or explicitly marked for doing so) if (checkup || nod.value().equals("NP" + NPSTATUS_SEPARATOR + "2")) { Tree p = nod; Tree head = p.headTerminal(getHeadFinder()); Tree lastmax = null; while (p != null) { p = p.parent(pTree); if (p != null && p.value().startsWith("NP")) { if ((p.headTerminal(getHeadFinder()) == head) && (!iscoordnp(p))) lastmax = p; else p = null; } } if (lastmax != null) { List<Tree> lvm = lastmax.getLeaves(); int maxstart = Integer.valueOf(lvm.get(0).label().value().split(INDEX_SEPARATOR)[1]); int maxend = Integer.valueOf(lvm.get(lvm.size() - 1).label().value().split(INDEX_SEPARATOR)[1]); maxstart += start + 1; maxend += start + 1; cAttributes.put(MAXSPAN_ATTRIBUTE, "word_" + maxstart + "..word_" + maxend); } } chunkLevel.addMarkable(npstart, npend, cAttributes); }
From source file:elkfed.mmax.pipeline.P2Chunker.java
License:Apache License
private void normalizeTree(Tree tree) { // for leaves -- add positions // for nps -- add whether they are basic or not int leaveIndex = 0; for (Iterator<Tree> treeIt = tree.iterator(); treeIt.hasNext();) { Tree currentTree = treeIt.next(); Label nodeLabel = currentTree.label(); if (currentTree.isLeaf()) { nodeLabel.setValue(nodeLabel.value() + INDEX_SEPARATOR + leaveIndex); leaveIndex++;// w ww . java 2 s.co m } else { if (currentTree.value().toLowerCase().startsWith("np")) { Boolean found = false; //adjust this np for keeping (if not already discarded if (!currentTree.value().endsWith("0") && !currentTree.value().endsWith("2")) currentTree.label().setValue("NP" + NPSTATUS_SEPARATOR + "1"); //adjust upper np for discarding Tree p = currentTree; Tree head = p.headTerminal(getHeadFinder()); while (p != null && !found) { p = p.parent(tree); if (p != null && p.value().toLowerCase().startsWith("np") && p.headTerminal(getHeadFinder()) == head && (!iscoordnp(p))) { found = true; p.label().setValue("NP" + NPSTATUS_SEPARATOR + "0"); currentTree.label().setValue("NP" + NPSTATUS_SEPARATOR + "2"); } } } else { nodeLabel.setValue(nodeLabel.value().toUpperCase()); } } } }
From source file:elkfed.mmax.pipeline.P2Chunker.java
License:Apache License
private Boolean iscoordnp(Tree np) { // helper -- checks that a parse np-tree is in fact coordination (contains CC on the highest level) if (np == null) return false; if (!np.value().startsWith("NP")) return false; Tree[] chlds = np.children();// w w w. j a v a2s . c o m for (int i = 0; i < chlds.length; i++) { if (chlds[i].value().equalsIgnoreCase("CC")) return true; } return false; }
From source file:gate.stanford.Parser.java
License:Open Source License
/** * Generate a SyntaxTreeNode Annotation corresponding to this Tree. Work * recursively so that the annotations are actually generated from the * bottom up, in order to build the consists list of annotation IDs. * /* w w w. j av a2 s . c o m*/ * @param tree the current subtree * @param rootTree the whole sentence, used to find the span of the current subtree * @return a GATE Annotation of type "SyntaxTreeNode" */ protected Annotation annotatePhraseStructureRecursively(AnnotationSet annotationSet, StanfordSentence stanfordSentence, Tree tree, Tree rootTree) { Annotation annotation = null; Annotation child; String label = tree.value(); List<Tree> children = tree.getChildrenAsList(); if (children.size() == 0) { return null; } /* implied else */ /* following line generates ClassCastException * IntPair span = tree.getSpan(); * edu.stanford.nlp.ling.CategoryWordTag * at edu.stanford.nlp.trees.Tree.getSpan(Tree.java:393) * but I think it's a bug in the parser, so I'm hacking * around it as follows. */ int startPos = Trees.leftEdge(tree, rootTree); int endPos = Trees.rightEdge(tree, rootTree); Long startNode = stanfordSentence.startPos2offset(startPos); Long endNode = stanfordSentence.endPos2offset(endPos); List<Integer> consists = new ArrayList<Integer>(); Iterator<Tree> childIter = children.iterator(); while (childIter.hasNext()) { child = annotatePhraseStructureRecursively(annotationSet, stanfordSentence, childIter.next(), rootTree); if ((child != null) && (!child.getType().equals(inputTokenType))) { consists.add(child.getId()); } } annotation = annotatePhraseStructureConstituent(annotationSet, startNode, endNode, label, consists, tree.depth()); return annotation; }
From source file:knu.univ.lingvo.coref.Mention.java
License:Open Source License
private boolean isListLike() { // See if this mention looks to be a conjunction of things // Check for "or" and "and" and "," int commas = 0; // boolean firstLabelLike = false; // if (originalSpan.size() > 1) { // String w = originalSpan.get(1).word(); // firstLabelLike = (w.equals(":") || w.equals("-")); // }// ww w .j a v a 2s . c om String mentionSpanString = spanToString(); String subTreeSpanString = StringUtils.joinWords(mentionSubTree.yieldWords(), " "); if (subTreeSpanString.equals(mentionSpanString)) { // subtree represents this mention well.... List<Tree> children = mentionSubTree.getChildrenAsList(); for (Tree t : children) { String label = t.value(); String ner = null; if (t.isLeaf()) { ner = ((CoreLabel) t.getLeaves().get(0).label()).ner(); } if ("CC".equals(label)) { // Check NER type if (ner == null || "O".equals(ner)) { return true; } } else if (label.equals(",")) { if (ner == null || "O".equals(ner)) { commas++; } } } } if (commas <= 2) { // look at the string for and/or boolean first = true; for (CoreLabel t : originalSpan) { String tag = t.tag(); String ner = t.ner(); String w = t.word(); if (tag.equals("TO") || tag.equals("IN") || tag.startsWith("VB")) { // prepositions and verbs are too hard for us return false; } if (!first) { if (w.equalsIgnoreCase("and") || w.equalsIgnoreCase("or")) { // Check NER type if (ner == null || "O".equals(ner)) { return true; } } } first = false; } } return (commas > 2); }
From source file:knu.univ.lingvo.coref.MentionExtractor.java
License:Open Source License
/** * Post-processes the extracted mentions. Here we set the Mention fields required for coref and order mentions by tree-traversal order. * @param words List of words in each sentence, in textual order * @param trees List of trees, one per sentence * @param unorderedMentions List of unordered, unprocessed mentions * Each mention MUST have startIndex and endIndex set! * Optionally, if scoring is desired, mentions must have mentionID and originalRef set. * All the other Mention fields are set here. * @return List of mentions ordered according to the tree traversal * @throws Exception/*from w ww. j a va2 s .c om*/ */ public List<List<Mention>> arrange(Annotation anno, List<List<CoreLabel>> words, List<Tree> trees, List<List<Mention>> unorderedMentions, boolean doMergeLabels) throws Exception { List<List<Mention>> orderedMentionsBySentence = new ArrayList<List<Mention>>(); // // traverse all sentences and process each individual one // int mentionNumber = 0; for (int sent = 0, sz = words.size(); sent < sz; sent++) { List<CoreLabel> sentence = words.get(sent); Tree tree = trees.get(sent); List<Mention> mentions = unorderedMentions.get(sent); Map<String, List<Mention>> mentionsToTrees = Generics.newHashMap(); // merge the parse tree of the entire sentence with the sentence words if (doMergeLabels) mergeLabels(tree, sentence); // // set the surface information and the syntactic info in each mention // startIndex and endIndex MUST be set before! // for (Mention mention : mentions) { mention.sentenceNumber = sent; mention.mentionNumber = mentionNumber++; mention.contextParseTree = tree; mention.sentenceWords = sentence; mention.originalSpan = new ArrayList<CoreLabel>( mention.sentenceWords.subList(mention.startIndex, mention.endIndex)); if (!((CoreLabel) tree.label()).has(CoreAnnotations.BeginIndexAnnotation.class)) tree.indexSpans(0); if (mention.headWord == null) { Tree headTree = ((RuleBasedCorefMentionFinder) mentionFinder).findSyntacticHead(mention, tree, sentence); mention.headWord = (CoreLabel) headTree.label(); mention.headIndex = mention.headWord.get(CoreAnnotations.IndexAnnotation.class) - 1; } if (mention.mentionSubTree == null) { // mentionSubTree = highest NP that has the same head Tree headTree = tree.getLeaves().get(mention.headIndex); if (headTree == null) { throw new RuntimeException("Missing head tree for a mention!"); } Tree t = headTree; while ((t = t.parent(tree)) != null) { if (t.headTerminal(headFinder) == headTree && t.value().equals("NP")) { mention.mentionSubTree = t; } else if (mention.mentionSubTree != null) { break; } } if (mention.mentionSubTree == null) { mention.mentionSubTree = headTree; } } List<Mention> mentionsForTree = mentionsToTrees.get(treeToKey(mention.mentionSubTree)); if (mentionsForTree == null) { mentionsForTree = new ArrayList<Mention>(); mentionsToTrees.put(treeToKey(mention.mentionSubTree), mentionsForTree); } mentionsForTree.add(mention); // generates all fields required for coref, such as gender, number, etc. mention.process(dictionaries, semantics, this, singletonPredictor); } // // Order all mentions in tree-traversal order // List<Mention> orderedMentions = new ArrayList<Mention>(); orderedMentionsBySentence.add(orderedMentions); // extract all mentions in tree traversal order (alternative: tree.postOrderNodeList()) for (Tree t : tree.preOrderNodeList()) { List<Mention> lm = mentionsToTrees.get(treeToKey(t)); if (lm != null) { for (Mention m : lm) { orderedMentions.add(m); } } } // // find appositions, predicate nominatives, relative pronouns in this sentence // findSyntacticRelations(tree, orderedMentions); assert (mentions.size() == orderedMentions.size()); } return orderedMentionsBySentence; }
From source file:knu.univ.lingvo.coref.MentionExtractor.java
License:Open Source License
/** * Sets the label of the leaf nodes to be the CoreLabels in the given sentence * The original value() of the Tree nodes is preserved *///from ww w .j a v a2 s . co m public static void mergeLabels(Tree tree, List<CoreLabel> sentence) { int idx = 0; for (Tree t : tree.getLeaves()) { CoreLabel cl = sentence.get(idx++); String value = t.value(); cl.set(CoreAnnotations.ValueAnnotation.class, value); t.setLabel(cl); } tree.indexLeaves(); }
From source file:opennlp.tools.parse_thicket.kernel_interface.TreeExtenderByAnotherLinkedTree.java
License:Apache License
public StringBuilder toStringBuilderExtenderByAnotherLinkedTree1(StringBuilder sb, Tree t, Tree treeToInsert, String[] corefWords) {/*w w w. ja v a 2 s . c om*/ if (t.isLeaf()) { if (t.label() != null) { sb.append(t.label().value()); } return sb; } else { sb.append('('); if (t.label() != null) { if (t.value() != null) { sb.append(t.label().value()); } } boolean bInsertNow = false; Tree[] kids = t.children(); if (kids != null) { for (Tree kid : kids) { if (corefWords != null) { String word = corefWords[corefWords.length - 1]; String phraseStr = kid.toString(); phraseStr = phraseStr.replace(")", ""); if (phraseStr.endsWith(word)) { bInsertNow = true; } } } if (bInsertNow) { for (Tree kid : kids) { sb.append(' '); toStringBuilderExtenderByAnotherLinkedTree1(sb, kid, null, null); } sb.append(' '); toStringBuilderExtenderByAnotherLinkedTree1(sb, treeToInsert, null, null); } else { for (Tree kid : kids) { sb.append(' '); toStringBuilderExtenderByAnotherLinkedTree1(sb, kid, treeToInsert, corefWords); } } } return sb.append(')'); } }