List of usage examples for edu.stanford.nlp.pipeline ParserAnnotatorUtils fillInParseAnnotations
public static void fillInParseAnnotations(boolean verbose, boolean buildGraphs, GrammaticalStructureFactory gsf, CoreMap sentence, List<Tree> trees, GrammaticalStructure.Extras extras)
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordCoreferenceResolver.java
License:Open Source License
@Override public void process(JCas aJCas) throws AnalysisEngineProcessException { modelProvider.configure(aJCas.getCas()); List<Tree> trees = new ArrayList<Tree>(); List<CoreMap> sentences = new ArrayList<CoreMap>(); List<List<CoreLabel>> sentenceTokens = new ArrayList<List<CoreLabel>>(); for (ROOT root : select(aJCas, ROOT.class)) { // Copy all relevant information from the tokens List<CoreLabel> tokens = new ArrayList<CoreLabel>(); for (Token token : selectCovered(Token.class, root)) { tokens.add(tokenToWord(token)); }/*w ww.j a v a 2s. c o m*/ sentenceTokens.add(tokens); // SemanticHeadFinder (nonTerminalInfo) does not know about PRN0, so we have to replace // it with PRN to avoid NPEs. TreeFactory tFact = new LabeledScoredTreeFactory(CoreLabel.factory()) { @Override public Tree newTreeNode(String aParent, List<Tree> aChildren) { String parent = aParent; if ("PRN0".equals(parent)) { parent = "PRN"; } Tree node = super.newTreeNode(parent, aChildren); return node; } }; // deep copy of the tree. These are modified inside coref! Tree treeCopy = TreeUtils.createStanfordTree(root, tFact).treeSkeletonCopy(); treeCopy.indexSpans(); trees.add(treeCopy); // Build the sentence CoreMap sentence = new CoreLabel(); sentence.set(TreeAnnotation.class, treeCopy); sentence.set(TokensAnnotation.class, tokens); sentence.set(RootKey.class, root); sentences.add(sentence); // https://code.google.com/p/dkpro-core-asl/issues/detail?id=590 // We currently do not copy over dependencies from the CAS. This is supposed to fill // in the dependencies so we do not get NPEs. TreebankLanguagePack tlp = new PennTreebankLanguagePack(); GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(), tlp.typedDependencyHeadFinder()); ParserAnnotatorUtils.fillInParseAnnotations(false, true, gsf, sentence, treeCopy, GrammaticalStructure.Extras.NONE); // https://code.google.com/p/dkpro-core-asl/issues/detail?id=582 SemanticGraph deps = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class); for (IndexedWord vertex : deps.vertexSet()) { vertex.setWord(vertex.value()); } // merge the new CoreLabels with the tree leaves MentionExtractor.mergeLabels(treeCopy, tokens); MentionExtractor.initializeUtterance(tokens); } Annotation document = new Annotation(aJCas.getDocumentText()); document.set(SentencesAnnotation.class, sentences); Coreferencer coref = modelProvider.getResource(); // extract all possible mentions // Reparsing only works when the full CoreNLP pipeline system is set up! Passing false here // disables reparsing. RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder(false); List<List<Mention>> allUnprocessedMentions = finder.extractPredictedMentions(document, 0, coref.corefSystem.dictionaries()); // add the relevant info to mentions and order them for coref Map<Integer, CorefChain> result; try { Document doc = coref.mentionExtractor.arrange(document, sentenceTokens, trees, allUnprocessedMentions); result = coref.corefSystem.coref(doc); } catch (Exception e) { throw new AnalysisEngineProcessException(e); } for (CorefChain chain : result.values()) { CoreferenceLink last = null; for (CorefMention mention : chain.getMentionsInTextualOrder()) { CoreLabel beginLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class) .get(mention.startIndex - 1); CoreLabel endLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class) .get(mention.endIndex - 2); CoreferenceLink link = new CoreferenceLink(aJCas, beginLabel.get(TokenKey.class).getBegin(), endLabel.get(TokenKey.class).getEnd()); if (mention.mentionType != null) { link.setReferenceType(mention.mentionType.toString()); } if (last == null) { // This is the first mention. Here we'll initialize the chain CoreferenceChain corefChain = new CoreferenceChain(aJCas); corefChain.setFirst(link); corefChain.addToIndexes(); } else { // For the other mentions, we'll add them to the chain. last.setNext(link); } last = link; link.addToIndexes(); } } }
From source file:edu.jhu.hlt.concrete.stanford.ConcreteStanfordPreCorefAnalytic.java
License:Open Source License
@Override public TokenizedCommunication annotate(TokenizedCommunication arg0) throws AnalyticException { final Communication root = new Communication(arg0.getRoot()); if (!root.isSetText()) throw new AnalyticException("communication.text must be set to run this analytic."); AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory(root); AnalyticUUIDGenerator g = f.create(); final List<Section> sectList = root.getSectionList(); final String commText = root.getText(); List<CoreMap> allCoreMaps = new ArrayList<>(); // String noMarkup = MarkupRewriter.removeMarkup(commText); String noMarkup = commText;// w w w.j a v a 2s. c om sectList.forEach(sect -> { List<CoreMap> cmList = ConcreteToStanfordMapper.concreteSectionToCoreMapList(sect, commText); allCoreMaps.addAll(cmList); }); allCoreMaps.forEach(cm -> LOGGER.trace("Got CoreMap pre-coref: {}", cm.toShorterString(new String[0]))); Annotation anno = new Annotation(allCoreMaps); anno.set(TextAnnotation.class, noMarkup); // TODO: it's possible that fixNullDependencyGraphs needs to be called // before dcoref annotator is called. TB investigated further. for (String annotator : this.lang.getPostTokenizationAnnotators()) { LOGGER.debug("Running annotator: {}", annotator); (StanfordCoreNLP.getExistingAnnotator(annotator)).annotate(anno); } anno.get(SentencesAnnotation.class) .forEach(cm -> LOGGER.trace("Got CoreMaps post-coref: {}", cm.toShorterString(new String[0]))); // TODO: not sure if this is necessary - found it in the old code. anno.get(SentencesAnnotation.class).stream().filter(cm -> cm.containsKey(TreeAnnotation.class)) .forEach(cm -> { Tree tree = cm.get(TreeAnnotation.class); List<Tree> treeList = new ArrayList<>(); treeList.add(tree); this.lang.getGrammaticalFactory() .ifPresent(k -> ParserAnnotatorUtils.fillInParseAnnotations(false, true, k, cm, treeList.get(0), GrammaticalStructure.Extras.NONE)); }); anno.get(SentencesAnnotation.class) .forEach(cm -> LOGGER.trace("Got CoreMap post-fill-in: {}", cm.toShorterString(new String[0]))); List<Sentence> postSentences = annotationToSentenceList(anno, hf, arg0.getSentences(), g); postSentences.forEach(st -> LOGGER.trace("Got pre-coref sentence: {}", st.toString())); Map<TextSpan, Sentence> tsToSentenceMap = new HashMap<>(); postSentences.forEach(st -> tsToSentenceMap.put(st.getTextSpan(), st)); tsToSentenceMap.keySet().forEach(k -> LOGGER.trace("Got TextSpan key: {}", k.toString())); sectList.forEach(sect -> { List<Sentence> sentList = sect.getSentenceList(); sentList.forEach(st -> { TextSpan ts = st.getTextSpan(); LOGGER.debug("Trying to find span: {}", ts.toString()); if (tsToSentenceMap.containsKey(ts)) { Sentence newSent = tsToSentenceMap.get(ts); st.setTokenization(newSent.getTokenization()); } else { throw new RuntimeException("Didn't find sentence in the new sentences. Old sentence UUID: " + st.getUuid().getUuidString()); } }); }); try { // Coref. CorefManager coref = new CorefManager(new CachedTokenizationCommunication(root), anno); TokenizedCommunication tcWithCoref = coref.addCoreference(); return tcWithCoref; } catch (MiscommunicationException e) { throw new AnalyticException(e); } }