Example usage for edu.stanford.nlp.trees ModCollinsHeadFinder determineHead

List of usage examples for edu.stanford.nlp.trees ModCollinsHeadFinder determineHead

Introduction

In this page you can find the example usage for edu.stanford.nlp.trees ModCollinsHeadFinder determineHead.

Prototype

@Override
public Tree determineHead(Tree t) 

Source Link

Document

Determine which daughter of the current parse tree is the head.

Usage

From source file:elkfed.mmax.importer.DetermineMinSpan.java

License:Apache License

/** adds min_ids and min_span attributes so that
 *  BART's chunk-based coref resolution works
 *//*from  w  w w  .j a  v  a 2  s  . c  o  m*/
public static void addMinSpan(int start, Tree tree, IMarkable tag, List<String> tokens) {
    List<Tree> leaves = tree.getLeaves();
    Tree startNode;
    Tree endNode;
    try {
        startNode = leaves.get(tag.getLeftmostDiscoursePosition() - start);
        endNode = leaves.get(tag.getRightmostDiscoursePosition() - start);
        if (".".equals(endNode.parent(tree).value())) {
            //System.err.println("Sentence-final dot in "+
            //        tokens.subList(tag.start, tag.end + 1)+ "removed.");
            endNode = leaves.get(tag.getRightmostDiscoursePosition() - start - 1);
        }
    } catch (IndexOutOfBoundsException ex) {
        System.out.format("indices not found: %d,%d in %s [wanted: %s] [ctx: %s]",
                tag.getLeftmostDiscoursePosition() - start, tag.getRightmostDiscoursePosition() - start, leaves,
                tokens.subList(tag.getLeftmostDiscoursePosition(), tag.getRightmostDiscoursePosition() + 1),
                tokens.subList(start, tag.getLeftmostDiscoursePosition()));
        throw ex;
    }

    Tree parentNode = startNode;
    while (parentNode != null && !parentNode.dominates(endNode)) {
        parentNode = parentNode.parent(tree);
    }

    if (parentNode == null) {
        System.err.println("Could not match tree (1)");
        return;
    }

    if (startNode.leftCharEdge(tree) != parentNode.leftCharEdge(tree)
            || endNode.rightCharEdge(tree) != parentNode.rightCharEdge(tree)) {
        System.err.println("Could not match tree (2)");
        return;
    }

    Tree oldParent = parentNode;
    ModCollinsHeadFinder hf = new ModCollinsHeadFinder();
    // use the head finder to narrow down the span.
    // stop if (a) the head is no longer an NP or
    // (b) the NP is a conjunction
    go_up: while (true) {
        for (Tree t : parentNode.getChildrenAsList()) {
            if (t.value().equals("CC")) {
                break go_up;
            }
        }
        Tree headDtr = hf.determineHead(parentNode);
        if (headDtr == null || !headDtr.value().equals("NP")) {
            break;
        }
        parentNode = headDtr;
    }
    if (parentNode != oldParent) {
        List<Tree> newLeaves = parentNode.getLeaves();
        int newStart = start + find_same(leaves, newLeaves.get(0));
        int newEnd = newStart + newLeaves.size() - 1;
        if (newStart <= tag.getLeftmostDiscoursePosition()) {
            if (tag.getLeftmostDiscoursePosition() - newStart > 1) {
                System.err.println("NP node is too big:" + parentNode.toString() + " wanted:" + tokens
                        .subList(tag.getLeftmostDiscoursePosition(), tag.getRightmostDiscoursePosition() + 1)
                        + " in: " + tree);
                return;
            }
            for (int i = newStart - start; i < tag.getLeftmostDiscoursePosition() - start; i++) {
                System.err.println("additional prefix in syntax:" + leaves.get(i));
            }
            // switch NP boundary and tag boundary
            // (even [Connie Cheung]) => min_words="Connie Cheung"
            int tmp = tag.getLeftmostDiscoursePosition();
            tag.adjustSpan(newStart, tag.getRightmostDiscoursePosition());
            newStart = tmp;
        }
        assert newEnd <= tag.getRightmostDiscoursePosition();
        // this relies on MiniDiscourse's default word numbering
        // which is ugly but should generally work...
        if (newStart == newEnd) {
            tag.setAttributeValue("min_ids", "word_" + (newStart + 1));
        } else {
            tag.setAttributeValue("min_ids", String.format("word_%d..word_%d", newStart + 1, newEnd + 1));
        }
        StringBuffer buf = new StringBuffer();
        for (Tree t : newLeaves) {
            buf.append(t.toString().toLowerCase());
            buf.append(' ');
        }
        buf.setLength(buf.length() - 1);
        tag.setAttributeValue("min_words", buf.toString());
    }
}