Example usage for edu.stanford.nlp.pipeline ParserAnnotatorUtils fillInParseAnnotations

List of usage examples for edu.stanford.nlp.pipeline ParserAnnotatorUtils fillInParseAnnotations

Introduction

In this page you can find the example usage for edu.stanford.nlp.pipeline ParserAnnotatorUtils fillInParseAnnotations.

Prototype

public static void fillInParseAnnotations(boolean verbose, boolean buildGraphs, GrammaticalStructureFactory gsf,
        CoreMap sentence, List<Tree> trees, GrammaticalStructure.Extras extras) 

Source Link

Document

Put the tree in the CoreMap for the sentence, also add any dependency graphs to the sentence, and fill in missing tag annotations.

Usage

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordCoreferenceResolver.java

License:Open Source License

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    modelProvider.configure(aJCas.getCas());

    List<Tree> trees = new ArrayList<Tree>();
    List<CoreMap> sentences = new ArrayList<CoreMap>();
    List<List<CoreLabel>> sentenceTokens = new ArrayList<List<CoreLabel>>();
    for (ROOT root : select(aJCas, ROOT.class)) {
        // Copy all relevant information from the tokens
        List<CoreLabel> tokens = new ArrayList<CoreLabel>();
        for (Token token : selectCovered(Token.class, root)) {
            tokens.add(tokenToWord(token));
        }/*w  ww.j a  v  a 2s. c o  m*/
        sentenceTokens.add(tokens);

        // SemanticHeadFinder (nonTerminalInfo) does not know about PRN0, so we have to replace
        // it with PRN to avoid NPEs.
        TreeFactory tFact = new LabeledScoredTreeFactory(CoreLabel.factory()) {
            @Override
            public Tree newTreeNode(String aParent, List<Tree> aChildren) {
                String parent = aParent;
                if ("PRN0".equals(parent)) {
                    parent = "PRN";
                }
                Tree node = super.newTreeNode(parent, aChildren);
                return node;
            }
        };

        // deep copy of the tree. These are modified inside coref!
        Tree treeCopy = TreeUtils.createStanfordTree(root, tFact).treeSkeletonCopy();
        treeCopy.indexSpans();
        trees.add(treeCopy);

        // Build the sentence
        CoreMap sentence = new CoreLabel();
        sentence.set(TreeAnnotation.class, treeCopy);
        sentence.set(TokensAnnotation.class, tokens);
        sentence.set(RootKey.class, root);
        sentences.add(sentence);

        // https://code.google.com/p/dkpro-core-asl/issues/detail?id=590
        // We currently do not copy over dependencies from the CAS. This is supposed to fill
        // in the dependencies so we do not get NPEs.
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(),
                tlp.typedDependencyHeadFinder());
        ParserAnnotatorUtils.fillInParseAnnotations(false, true, gsf, sentence, treeCopy,
                GrammaticalStructure.Extras.NONE);

        // https://code.google.com/p/dkpro-core-asl/issues/detail?id=582
        SemanticGraph deps = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
        for (IndexedWord vertex : deps.vertexSet()) {
            vertex.setWord(vertex.value());
        }

        // merge the new CoreLabels with the tree leaves
        MentionExtractor.mergeLabels(treeCopy, tokens);
        MentionExtractor.initializeUtterance(tokens);
    }

    Annotation document = new Annotation(aJCas.getDocumentText());
    document.set(SentencesAnnotation.class, sentences);

    Coreferencer coref = modelProvider.getResource();

    // extract all possible mentions
    // Reparsing only works when the full CoreNLP pipeline system is set up! Passing false here
    // disables reparsing.
    RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder(false);
    List<List<Mention>> allUnprocessedMentions = finder.extractPredictedMentions(document, 0,
            coref.corefSystem.dictionaries());

    // add the relevant info to mentions and order them for coref
    Map<Integer, CorefChain> result;
    try {
        Document doc = coref.mentionExtractor.arrange(document, sentenceTokens, trees, allUnprocessedMentions);
        result = coref.corefSystem.coref(doc);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }

    for (CorefChain chain : result.values()) {
        CoreferenceLink last = null;
        for (CorefMention mention : chain.getMentionsInTextualOrder()) {
            CoreLabel beginLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class)
                    .get(mention.startIndex - 1);
            CoreLabel endLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class)
                    .get(mention.endIndex - 2);
            CoreferenceLink link = new CoreferenceLink(aJCas, beginLabel.get(TokenKey.class).getBegin(),
                    endLabel.get(TokenKey.class).getEnd());

            if (mention.mentionType != null) {
                link.setReferenceType(mention.mentionType.toString());
            }

            if (last == null) {
                // This is the first mention. Here we'll initialize the chain
                CoreferenceChain corefChain = new CoreferenceChain(aJCas);
                corefChain.setFirst(link);
                corefChain.addToIndexes();
            } else {
                // For the other mentions, we'll add them to the chain.
                last.setNext(link);
            }
            last = link;

            link.addToIndexes();
        }
    }
}

From source file:edu.jhu.hlt.concrete.stanford.ConcreteStanfordPreCorefAnalytic.java

License:Open Source License

@Override
public TokenizedCommunication annotate(TokenizedCommunication arg0) throws AnalyticException {
    final Communication root = new Communication(arg0.getRoot());
    if (!root.isSetText())
        throw new AnalyticException("communication.text must be set to run this analytic.");
    AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory(root);
    AnalyticUUIDGenerator g = f.create();
    final List<Section> sectList = root.getSectionList();
    final String commText = root.getText();

    List<CoreMap> allCoreMaps = new ArrayList<>();
    // String noMarkup = MarkupRewriter.removeMarkup(commText);
    String noMarkup = commText;// w w  w.j  a  v a 2s.  c om
    sectList.forEach(sect -> {
        List<CoreMap> cmList = ConcreteToStanfordMapper.concreteSectionToCoreMapList(sect, commText);
        allCoreMaps.addAll(cmList);
    });

    allCoreMaps.forEach(cm -> LOGGER.trace("Got CoreMap pre-coref: {}", cm.toShorterString(new String[0])));
    Annotation anno = new Annotation(allCoreMaps);
    anno.set(TextAnnotation.class, noMarkup);

    // TODO: it's possible that fixNullDependencyGraphs needs to be called
    // before dcoref annotator is called. TB investigated further.
    for (String annotator : this.lang.getPostTokenizationAnnotators()) {
        LOGGER.debug("Running annotator: {}", annotator);
        (StanfordCoreNLP.getExistingAnnotator(annotator)).annotate(anno);
    }

    anno.get(SentencesAnnotation.class)
            .forEach(cm -> LOGGER.trace("Got CoreMaps post-coref: {}", cm.toShorterString(new String[0])));
    // TODO: not sure if this is necessary - found it in the old code.
    anno.get(SentencesAnnotation.class).stream().filter(cm -> cm.containsKey(TreeAnnotation.class))
            .forEach(cm -> {
                Tree tree = cm.get(TreeAnnotation.class);
                List<Tree> treeList = new ArrayList<>();
                treeList.add(tree);
                this.lang.getGrammaticalFactory()
                        .ifPresent(k -> ParserAnnotatorUtils.fillInParseAnnotations(false, true, k, cm,
                                treeList.get(0), GrammaticalStructure.Extras.NONE));
            });

    anno.get(SentencesAnnotation.class)
            .forEach(cm -> LOGGER.trace("Got CoreMap post-fill-in: {}", cm.toShorterString(new String[0])));
    List<Sentence> postSentences = annotationToSentenceList(anno, hf, arg0.getSentences(), g);
    postSentences.forEach(st -> LOGGER.trace("Got pre-coref sentence: {}", st.toString()));
    Map<TextSpan, Sentence> tsToSentenceMap = new HashMap<>();
    postSentences.forEach(st -> tsToSentenceMap.put(st.getTextSpan(), st));
    tsToSentenceMap.keySet().forEach(k -> LOGGER.trace("Got TextSpan key: {}", k.toString()));

    sectList.forEach(sect -> {
        List<Sentence> sentList = sect.getSentenceList();
        sentList.forEach(st -> {
            TextSpan ts = st.getTextSpan();
            LOGGER.debug("Trying to find span: {}", ts.toString());
            if (tsToSentenceMap.containsKey(ts)) {
                Sentence newSent = tsToSentenceMap.get(ts);
                st.setTokenization(newSent.getTokenization());
            } else {
                throw new RuntimeException("Didn't find sentence in the new sentences. Old sentence UUID: "
                        + st.getUuid().getUuidString());
            }
        });
    });

    try {
        // Coref.
        CorefManager coref = new CorefManager(new CachedTokenizationCommunication(root), anno);
        TokenizedCommunication tcWithCoref = coref.addCoreference();
        return tcWithCoref;
    } catch (MiscommunicationException e) {
        throw new AnalyticException(e);
    }
}