Example usage for edu.stanford.nlp.dcoref MentionExtractor mergeLabels

List of usage examples for edu.stanford.nlp.dcoref MentionExtractor mergeLabels

Introduction

In this page you can find the example usage for edu.stanford.nlp.dcoref MentionExtractor mergeLabels.

Prototype


public static void mergeLabels(Tree tree, List<CoreLabel> sentence) 

Source Link

Document

Sets the label of the leaf nodes of a Tree to be the CoreLabels in the given sentence.

Usage

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordCoreferenceResolver.java

License:Open Source License

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    modelProvider.configure(aJCas.getCas());

    List<Tree> trees = new ArrayList<Tree>();
    List<CoreMap> sentences = new ArrayList<CoreMap>();
    List<List<CoreLabel>> sentenceTokens = new ArrayList<List<CoreLabel>>();
    for (ROOT root : select(aJCas, ROOT.class)) {
        // Copy all relevant information from the tokens
        List<CoreLabel> tokens = new ArrayList<CoreLabel>();
        for (Token token : selectCovered(Token.class, root)) {
            tokens.add(tokenToWord(token));
        }/* w  ww  .j  a  v a 2s .  c o  m*/
        sentenceTokens.add(tokens);

        // SemanticHeadFinder (nonTerminalInfo) does not know about PRN0, so we have to replace
        // it with PRN to avoid NPEs.
        TreeFactory tFact = new LabeledScoredTreeFactory(CoreLabel.factory()) {
            @Override
            public Tree newTreeNode(String aParent, List<Tree> aChildren) {
                String parent = aParent;
                if ("PRN0".equals(parent)) {
                    parent = "PRN";
                }
                Tree node = super.newTreeNode(parent, aChildren);
                return node;
            }
        };

        // deep copy of the tree. These are modified inside coref!
        Tree treeCopy = TreeUtils.createStanfordTree(root, tFact).treeSkeletonCopy();
        treeCopy.indexSpans();
        trees.add(treeCopy);

        // Build the sentence
        CoreMap sentence = new CoreLabel();
        sentence.set(TreeAnnotation.class, treeCopy);
        sentence.set(TokensAnnotation.class, tokens);
        sentence.set(RootKey.class, root);
        sentences.add(sentence);

        // https://code.google.com/p/dkpro-core-asl/issues/detail?id=590
        // We currently do not copy over dependencies from the CAS. This is supposed to fill
        // in the dependencies so we do not get NPEs.
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(),
                tlp.typedDependencyHeadFinder());
        ParserAnnotatorUtils.fillInParseAnnotations(false, true, gsf, sentence, treeCopy,
                GrammaticalStructure.Extras.NONE);

        // https://code.google.com/p/dkpro-core-asl/issues/detail?id=582
        SemanticGraph deps = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
        for (IndexedWord vertex : deps.vertexSet()) {
            vertex.setWord(vertex.value());
        }

        // merge the new CoreLabels with the tree leaves
        MentionExtractor.mergeLabels(treeCopy, tokens);
        MentionExtractor.initializeUtterance(tokens);
    }

    Annotation document = new Annotation(aJCas.getDocumentText());
    document.set(SentencesAnnotation.class, sentences);

    Coreferencer coref = modelProvider.getResource();

    // extract all possible mentions
    // Reparsing only works when the full CoreNLP pipeline system is set up! Passing false here
    // disables reparsing.
    RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder(false);
    List<List<Mention>> allUnprocessedMentions = finder.extractPredictedMentions(document, 0,
            coref.corefSystem.dictionaries());

    // add the relevant info to mentions and order them for coref
    Map<Integer, CorefChain> result;
    try {
        Document doc = coref.mentionExtractor.arrange(document, sentenceTokens, trees, allUnprocessedMentions);
        result = coref.corefSystem.coref(doc);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }

    for (CorefChain chain : result.values()) {
        CoreferenceLink last = null;
        for (CorefMention mention : chain.getMentionsInTextualOrder()) {
            CoreLabel beginLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class)
                    .get(mention.startIndex - 1);
            CoreLabel endLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class)
                    .get(mention.endIndex - 2);
            CoreferenceLink link = new CoreferenceLink(aJCas, beginLabel.get(TokenKey.class).getBegin(),
                    endLabel.get(TokenKey.class).getEnd());

            if (mention.mentionType != null) {
                link.setReferenceType(mention.mentionType.toString());
            }

            if (last == null) {
                // This is the first mention. Here we'll initialize the chain
                CoreferenceChain corefChain = new CoreferenceChain(aJCas);
                corefChain.setFirst(link);
                corefChain.addToIndexes();
            } else {
                // For the other mentions, we'll add them to the chain.
                last.setNext(link);
            }
            last = link;

            link.addToIndexes();
        }
    }
}