Example usage for edu.stanford.nlp.trees Tree value

List of usage examples for edu.stanford.nlp.trees Tree value

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees Tree value.

Prototype

@Override
    public String value() 

Source Link

Usage

From source file:elkfed.mmax.importer.DetermineMinSpan.java

License:Apache License

/** adds min_ids and min_span attributes so that
 *  BART's chunk-based coref resolution works
 *///from  w w w . j  a v a  2s  .c  o m
public static void addMinSpan(int start, Tree tree, IMarkable tag, List<String> tokens) {
    List<Tree> leaves = tree.getLeaves();
    Tree startNode;
    Tree endNode;
    try {
        startNode = leaves.get(tag.getLeftmostDiscoursePosition() - start);
        endNode = leaves.get(tag.getRightmostDiscoursePosition() - start);
        if (".".equals(endNode.parent(tree).value())) {
            //System.err.println("Sentence-final dot in "+
            //        tokens.subList(tag.start, tag.end + 1)+ "removed.");
            endNode = leaves.get(tag.getRightmostDiscoursePosition() - start - 1);
        }
    } catch (IndexOutOfBoundsException ex) {
        System.out.format("indices not found: %d,%d in %s [wanted: %s] [ctx: %s]",
                tag.getLeftmostDiscoursePosition() - start, tag.getRightmostDiscoursePosition() - start, leaves,
                tokens.subList(tag.getLeftmostDiscoursePosition(), tag.getRightmostDiscoursePosition() + 1),
                tokens.subList(start, tag.getLeftmostDiscoursePosition()));
        throw ex;
    }

    Tree parentNode = startNode;
    while (parentNode != null && !parentNode.dominates(endNode)) {
        parentNode = parentNode.parent(tree);
    }

    if (parentNode == null) {
        System.err.println("Could not match tree (1)");
        return;
    }

    if (startNode.leftCharEdge(tree) != parentNode.leftCharEdge(tree)
            || endNode.rightCharEdge(tree) != parentNode.rightCharEdge(tree)) {
        System.err.println("Could not match tree (2)");
        return;
    }

    Tree oldParent = parentNode;
    ModCollinsHeadFinder hf = new ModCollinsHeadFinder();
    // use the head finder to narrow down the span.
    // stop if (a) the head is no longer an NP or
    // (b) the NP is a conjunction
    go_up: while (true) {
        for (Tree t : parentNode.getChildrenAsList()) {
            if (t.value().equals("CC")) {
                break go_up;
            }
        }
        Tree headDtr = hf.determineHead(parentNode);
        if (headDtr == null || !headDtr.value().equals("NP")) {
            break;
        }
        parentNode = headDtr;
    }
    if (parentNode != oldParent) {
        List<Tree> newLeaves = parentNode.getLeaves();
        int newStart = start + find_same(leaves, newLeaves.get(0));
        int newEnd = newStart + newLeaves.size() - 1;
        if (newStart <= tag.getLeftmostDiscoursePosition()) {
            if (tag.getLeftmostDiscoursePosition() - newStart > 1) {
                System.err.println("NP node is too big:" + parentNode.toString() + " wanted:" + tokens
                        .subList(tag.getLeftmostDiscoursePosition(), tag.getRightmostDiscoursePosition() + 1)
                        + " in: " + tree);
                return;
            }
            for (int i = newStart - start; i < tag.getLeftmostDiscoursePosition() - start; i++) {
                System.err.println("additional prefix in syntax:" + leaves.get(i));
            }
            // switch NP boundary and tag boundary
            // (even [Connie Cheung]) => min_words="Connie Cheung"
            int tmp = tag.getLeftmostDiscoursePosition();
            tag.adjustSpan(newStart, tag.getRightmostDiscoursePosition());
            newStart = tmp;
        }
        assert newEnd <= tag.getRightmostDiscoursePosition();
        // this relies on MiniDiscourse's default word numbering
        // which is ugly but should generally work...
        if (newStart == newEnd) {
            tag.setAttributeValue("min_ids", "word_" + (newStart + 1));
        } else {
            tag.setAttributeValue("min_ids", String.format("word_%d..word_%d", newStart + 1, newEnd + 1));
        }
        StringBuffer buf = new StringBuffer();
        for (Tree t : newLeaves) {
            buf.append(t.toString().toLowerCase());
            buf.append(' ');
        }
        buf.setLength(buf.length() - 1);
        tag.setAttributeValue("min_words", buf.toString());
    }
}

From source file:elkfed.mmax.pipeline.P2Chunker.java

License:Apache License

/** Add parser, part of speech, and chunk markables */
protected void addMarkables() {

    final StringBuffer markableBuffer = new StringBuffer();
    List<Markable> sentences = null;

    for (Markable parseMarkable : DiscourseUtils.getMarkables(currentDocument, DEFAULT_PARSE_LEVEL)) {

        int start = parseMarkable.getLeftmostDiscoursePosition();
        int end = parseMarkable.getRightmostDiscoursePosition();

        /** Retrieve chunk tags from the parse tree and add chunk markables */

        /* traverse parse-tree (real tree, not string), extract basic NPs and poss */

        Tree pTree = null;/*from  w  ww .  ja  v  a  2 s.  c  o m*/
        pTree = Tree.valueOf(parseMarkable.getAttributeValue(PipelineComponent.TAG_ATTRIBUTE));
        normalizeTree(pTree);

        if (pTree == null)
            continue;

        //add all basic nps
        for (Iterator<Tree> treeIt = pTree.iterator(); treeIt.hasNext();) {
            Tree nod = treeIt.next();
            if (nod.value().equals("NP" + NPSTATUS_SEPARATOR + "1")
                    || nod.value().equals("NP" + NPSTATUS_SEPARATOR + "2")) {
                markableBuffer.setLength(0);
                addChunkMarkable(nod, pTree, start, false);
            }
        }

        List<Tree> Leaves = pTree.getLeaves();

        // add NPs embedding possessives
        for (Tree l : Leaves) {
            if (l.value().toLowerCase().startsWith("'s")) {

                if (l.parent(pTree) != null && l.parent(pTree).value().equals("POS")
                        && l.parent(pTree).parent(pTree) != null
                        && l.parent(pTree).parent(pTree).value().startsWith("NP")
                        && l.parent(pTree).parent(pTree).parent(pTree) != null && l.parent(pTree).parent(pTree)
                                .parent(pTree).value().equals("NP" + NPSTATUS_SEPARATOR + "0")) {
                    Tree nod = l.parent(pTree).parent(pTree).parent(pTree);
                    markableBuffer.setLength(0);
                    addChunkMarkable(nod, pTree, start, true);

                }

            }

        }
    }
}

From source file:elkfed.mmax.pipeline.P2Chunker.java

License:Apache License

private void addChunkMarkable(Tree nod, Tree pTree, int start, Boolean checkup) {

    // register new chunk markable, setting maxspan if needed
    List<Tree> lv = nod.getLeaves();
    int npstart = Integer.valueOf(lv.get(0).label().value().split(INDEX_SEPARATOR)[1]);
    int npend = Integer.valueOf(lv.get(lv.size() - 1).label().value().split(INDEX_SEPARATOR)[1]);
    npstart += start;// w  w w  . j  a  v a2 s . c om
    npend += start;

    final Map<String, String> cAttributes = new HashMap<String, String>(chunkAttributes);
    cAttributes.put(TAG_ATTRIBUTE, "np");

    //store maxspan for embedded nps (either basic or explicitly marked for doing so)

    if (checkup || nod.value().equals("NP" + NPSTATUS_SEPARATOR + "2")) {
        Tree p = nod;
        Tree head = p.headTerminal(getHeadFinder());
        Tree lastmax = null;
        while (p != null) {
            p = p.parent(pTree);
            if (p != null && p.value().startsWith("NP")) {
                if ((p.headTerminal(getHeadFinder()) == head) && (!iscoordnp(p)))
                    lastmax = p;
                else
                    p = null;
            }
        }
        if (lastmax != null) {
            List<Tree> lvm = lastmax.getLeaves();
            int maxstart = Integer.valueOf(lvm.get(0).label().value().split(INDEX_SEPARATOR)[1]);
            int maxend = Integer.valueOf(lvm.get(lvm.size() - 1).label().value().split(INDEX_SEPARATOR)[1]);
            maxstart += start + 1;
            maxend += start + 1;
            cAttributes.put(MAXSPAN_ATTRIBUTE, "word_" + maxstart + "..word_" + maxend);
        }

    }

    chunkLevel.addMarkable(npstart, npend, cAttributes);
}

From source file:elkfed.mmax.pipeline.P2Chunker.java

License:Apache License

private void normalizeTree(Tree tree) {
    // for leaves -- add positions
    // for nps -- add whether they are basic or not

    int leaveIndex = 0;
    for (Iterator<Tree> treeIt = tree.iterator(); treeIt.hasNext();) {
        Tree currentTree = treeIt.next();
        Label nodeLabel = currentTree.label();
        if (currentTree.isLeaf()) {
            nodeLabel.setValue(nodeLabel.value() + INDEX_SEPARATOR + leaveIndex);
            leaveIndex++;//  w  ww  .  java 2  s.co  m
        } else {

            if (currentTree.value().toLowerCase().startsWith("np")) {

                Boolean found = false;

                //adjust this np for keeping (if not already discarded
                if (!currentTree.value().endsWith("0") && !currentTree.value().endsWith("2"))
                    currentTree.label().setValue("NP" + NPSTATUS_SEPARATOR + "1");

                //adjust upper np for discarding

                Tree p = currentTree;
                Tree head = p.headTerminal(getHeadFinder());
                while (p != null && !found) {
                    p = p.parent(tree);
                    if (p != null && p.value().toLowerCase().startsWith("np")
                            && p.headTerminal(getHeadFinder()) == head && (!iscoordnp(p))) {
                        found = true;
                        p.label().setValue("NP" + NPSTATUS_SEPARATOR + "0");
                        currentTree.label().setValue("NP" + NPSTATUS_SEPARATOR + "2");
                    }
                }

            } else {
                nodeLabel.setValue(nodeLabel.value().toUpperCase());
            }
        }
    }
}

From source file:elkfed.mmax.pipeline.P2Chunker.java

License:Apache License

private Boolean iscoordnp(Tree np) {
    // helper -- checks that a parse np-tree is in fact coordination (contains CC on the highest level)
    if (np == null)
        return false;
    if (!np.value().startsWith("NP"))
        return false;
    Tree[] chlds = np.children();// w w  w. j  a  v  a2s . c  o  m
    for (int i = 0; i < chlds.length; i++) {
        if (chlds[i].value().equalsIgnoreCase("CC"))
            return true;
    }
    return false;
}

From source file:gate.stanford.Parser.java

License:Open Source License

/**
 * Generate a SyntaxTreeNode Annotation corresponding to this Tree.  Work 
 * recursively so that the annotations are actually generated from the 
 * bottom up, in order to build the consists list of annotation IDs.
 * /*  w w w. j av  a2 s  .  c o  m*/
 * @param tree  the current subtree
 * @param rootTree  the whole sentence, used to find the span of the current subtree
 * @return a GATE Annotation of type "SyntaxTreeNode"
 */
protected Annotation annotatePhraseStructureRecursively(AnnotationSet annotationSet,
        StanfordSentence stanfordSentence, Tree tree, Tree rootTree) {
    Annotation annotation = null;
    Annotation child;
    String label = tree.value();

    List<Tree> children = tree.getChildrenAsList();

    if (children.size() == 0) {
        return null;
    }
    /* implied else */

    /* following line generates ClassCastException
     *       IntPair span = tree.getSpan();
     * edu.stanford.nlp.ling.CategoryWordTag
     * at edu.stanford.nlp.trees.Tree.getSpan(Tree.java:393)
     * but I think it's a bug in the parser, so I'm hacking 
     * around it as follows. */
    int startPos = Trees.leftEdge(tree, rootTree);
    int endPos = Trees.rightEdge(tree, rootTree);

    Long startNode = stanfordSentence.startPos2offset(startPos);
    Long endNode = stanfordSentence.endPos2offset(endPos);

    List<Integer> consists = new ArrayList<Integer>();

    Iterator<Tree> childIter = children.iterator();
    while (childIter.hasNext()) {
        child = annotatePhraseStructureRecursively(annotationSet, stanfordSentence, childIter.next(), rootTree);
        if ((child != null) && (!child.getType().equals(inputTokenType))) {
            consists.add(child.getId());
        }
    }
    annotation = annotatePhraseStructureConstituent(annotationSet, startNode, endNode, label, consists,
            tree.depth());

    return annotation;
}

From source file:knu.univ.lingvo.coref.Mention.java

License:Open Source License

private boolean isListLike() {
    // See if this mention looks to be a conjunction of things
    // Check for "or" and "and" and ","
    int commas = 0;
    //    boolean firstLabelLike = false;
    //    if (originalSpan.size() > 1) {
    //      String w = originalSpan.get(1).word();
    //      firstLabelLike = (w.equals(":") || w.equals("-"));
    //    }//  ww  w .j  a v  a 2s  .  c  om
    String mentionSpanString = spanToString();
    String subTreeSpanString = StringUtils.joinWords(mentionSubTree.yieldWords(), " ");
    if (subTreeSpanString.equals(mentionSpanString)) {
        // subtree represents this mention well....
        List<Tree> children = mentionSubTree.getChildrenAsList();
        for (Tree t : children) {
            String label = t.value();
            String ner = null;
            if (t.isLeaf()) {
                ner = ((CoreLabel) t.getLeaves().get(0).label()).ner();
            }
            if ("CC".equals(label)) {
                // Check NER type
                if (ner == null || "O".equals(ner)) {
                    return true;
                }
            } else if (label.equals(",")) {
                if (ner == null || "O".equals(ner)) {
                    commas++;
                }
            }
        }
    }

    if (commas <= 2) {
        // look at the string for and/or
        boolean first = true;
        for (CoreLabel t : originalSpan) {
            String tag = t.tag();
            String ner = t.ner();
            String w = t.word();
            if (tag.equals("TO") || tag.equals("IN") || tag.startsWith("VB")) {
                // prepositions and verbs are too hard for us
                return false;
            }
            if (!first) {
                if (w.equalsIgnoreCase("and") || w.equalsIgnoreCase("or")) {
                    // Check NER type
                    if (ner == null || "O".equals(ner)) {
                        return true;
                    }
                }
            }
            first = false;
        }
    }

    return (commas > 2);
}

From source file:knu.univ.lingvo.coref.MentionExtractor.java

License:Open Source License

/**
 * Post-processes the extracted mentions. Here we set the Mention fields required for coref and order mentions by tree-traversal order.
 * @param words List of words in each sentence, in textual order
 * @param trees List of trees, one per sentence
 * @param unorderedMentions List of unordered, unprocessed mentions
 *                 Each mention MUST have startIndex and endIndex set!
 *                 Optionally, if scoring is desired, mentions must have mentionID and originalRef set.
 *                 All the other Mention fields are set here.
 * @return List of mentions ordered according to the tree traversal
 * @throws Exception/*from   w  ww. j  a va2 s  .c om*/
 */
public List<List<Mention>> arrange(Annotation anno, List<List<CoreLabel>> words, List<Tree> trees,
        List<List<Mention>> unorderedMentions, boolean doMergeLabels) throws Exception {

    List<List<Mention>> orderedMentionsBySentence = new ArrayList<List<Mention>>();

    //
    // traverse all sentences and process each individual one
    //
    int mentionNumber = 0;
    for (int sent = 0, sz = words.size(); sent < sz; sent++) {
        List<CoreLabel> sentence = words.get(sent);
        Tree tree = trees.get(sent);
        List<Mention> mentions = unorderedMentions.get(sent);
        Map<String, List<Mention>> mentionsToTrees = Generics.newHashMap();

        // merge the parse tree of the entire sentence with the sentence words
        if (doMergeLabels)
            mergeLabels(tree, sentence);

        //
        // set the surface information and the syntactic info in each mention
        // startIndex and endIndex MUST be set before!
        //
        for (Mention mention : mentions) {
            mention.sentenceNumber = sent;
            mention.mentionNumber = mentionNumber++;
            mention.contextParseTree = tree;
            mention.sentenceWords = sentence;
            mention.originalSpan = new ArrayList<CoreLabel>(
                    mention.sentenceWords.subList(mention.startIndex, mention.endIndex));
            if (!((CoreLabel) tree.label()).has(CoreAnnotations.BeginIndexAnnotation.class))
                tree.indexSpans(0);
            if (mention.headWord == null) {
                Tree headTree = ((RuleBasedCorefMentionFinder) mentionFinder).findSyntacticHead(mention, tree,
                        sentence);
                mention.headWord = (CoreLabel) headTree.label();
                mention.headIndex = mention.headWord.get(CoreAnnotations.IndexAnnotation.class) - 1;
            }
            if (mention.mentionSubTree == null) {
                // mentionSubTree = highest NP that has the same head
                Tree headTree = tree.getLeaves().get(mention.headIndex);
                if (headTree == null) {
                    throw new RuntimeException("Missing head tree for a mention!");
                }
                Tree t = headTree;
                while ((t = t.parent(tree)) != null) {
                    if (t.headTerminal(headFinder) == headTree && t.value().equals("NP")) {
                        mention.mentionSubTree = t;
                    } else if (mention.mentionSubTree != null) {
                        break;
                    }
                }
                if (mention.mentionSubTree == null) {
                    mention.mentionSubTree = headTree;
                }
            }

            List<Mention> mentionsForTree = mentionsToTrees.get(treeToKey(mention.mentionSubTree));
            if (mentionsForTree == null) {
                mentionsForTree = new ArrayList<Mention>();
                mentionsToTrees.put(treeToKey(mention.mentionSubTree), mentionsForTree);
            }
            mentionsForTree.add(mention);

            // generates all fields required for coref, such as gender, number, etc.
            mention.process(dictionaries, semantics, this, singletonPredictor);
        }

        //
        // Order all mentions in tree-traversal order
        //
        List<Mention> orderedMentions = new ArrayList<Mention>();
        orderedMentionsBySentence.add(orderedMentions);

        // extract all mentions in tree traversal order (alternative: tree.postOrderNodeList())
        for (Tree t : tree.preOrderNodeList()) {
            List<Mention> lm = mentionsToTrees.get(treeToKey(t));
            if (lm != null) {
                for (Mention m : lm) {
                    orderedMentions.add(m);
                }
            }
        }

        //
        // find appositions, predicate nominatives, relative pronouns in this sentence
        //
        findSyntacticRelations(tree, orderedMentions);
        assert (mentions.size() == orderedMentions.size());
    }
    return orderedMentionsBySentence;
}

From source file:knu.univ.lingvo.coref.MentionExtractor.java

License:Open Source License

/**
 * Sets the label of the leaf nodes to be the CoreLabels in the given sentence
 * The original value() of the Tree nodes is preserved
 *///from ww  w  .j  a  v a2 s .  co  m
public static void mergeLabels(Tree tree, List<CoreLabel> sentence) {
    int idx = 0;
    for (Tree t : tree.getLeaves()) {
        CoreLabel cl = sentence.get(idx++);
        String value = t.value();
        cl.set(CoreAnnotations.ValueAnnotation.class, value);
        t.setLabel(cl);
    }
    tree.indexLeaves();
}

From source file:opennlp.tools.parse_thicket.kernel_interface.TreeExtenderByAnotherLinkedTree.java

License:Apache License

public StringBuilder toStringBuilderExtenderByAnotherLinkedTree1(StringBuilder sb, Tree t, Tree treeToInsert,
        String[] corefWords) {/*w  w  w.  ja v a  2 s  .  c om*/
    if (t.isLeaf()) {
        if (t.label() != null) {
            sb.append(t.label().value());
        }
        return sb;
    } else {
        sb.append('(');
        if (t.label() != null) {
            if (t.value() != null) {
                sb.append(t.label().value());
            }
        }
        boolean bInsertNow = false;
        Tree[] kids = t.children();
        if (kids != null) {
            for (Tree kid : kids) {
                if (corefWords != null) {
                    String word = corefWords[corefWords.length - 1];
                    String phraseStr = kid.toString();
                    phraseStr = phraseStr.replace(")", "");
                    if (phraseStr.endsWith(word)) {
                        bInsertNow = true;
                    }
                }
            }
            if (bInsertNow) {
                for (Tree kid : kids) {
                    sb.append(' ');
                    toStringBuilderExtenderByAnotherLinkedTree1(sb, kid, null, null);
                }
                sb.append(' ');
                toStringBuilderExtenderByAnotherLinkedTree1(sb, treeToInsert, null, null);
            } else {
                for (Tree kid : kids) {
                    sb.append(' ');
                    toStringBuilderExtenderByAnotherLinkedTree1(sb, kid, treeToInsert, corefWords);
                }

            }
        }

        return sb.append(')');
    }
}