List of usage examples for edu.stanford.nlp.pipeline StanfordCoreNLP getExistingAnnotator
public static synchronized Annotator getExistingAnnotator(String name)
From source file:edu.jhu.hlt.concrete.stanford.ConcreteStanfordPreCorefAnalytic.java
License:Open Source License
@Override public TokenizedCommunication annotate(TokenizedCommunication arg0) throws AnalyticException { final Communication root = new Communication(arg0.getRoot()); if (!root.isSetText()) throw new AnalyticException("communication.text must be set to run this analytic."); AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory(root); AnalyticUUIDGenerator g = f.create(); final List<Section> sectList = root.getSectionList(); final String commText = root.getText(); List<CoreMap> allCoreMaps = new ArrayList<>(); // String noMarkup = MarkupRewriter.removeMarkup(commText); String noMarkup = commText;//w w w. j a v a 2 s . c om sectList.forEach(sect -> { List<CoreMap> cmList = ConcreteToStanfordMapper.concreteSectionToCoreMapList(sect, commText); allCoreMaps.addAll(cmList); }); allCoreMaps.forEach(cm -> LOGGER.trace("Got CoreMap pre-coref: {}", cm.toShorterString(new String[0]))); Annotation anno = new Annotation(allCoreMaps); anno.set(TextAnnotation.class, noMarkup); // TODO: it's possible that fixNullDependencyGraphs needs to be called // before dcoref annotator is called. TB investigated further. for (String annotator : this.lang.getPostTokenizationAnnotators()) { LOGGER.debug("Running annotator: {}", annotator); (StanfordCoreNLP.getExistingAnnotator(annotator)).annotate(anno); } anno.get(SentencesAnnotation.class) .forEach(cm -> LOGGER.trace("Got CoreMaps post-coref: {}", cm.toShorterString(new String[0]))); // TODO: not sure if this is necessary - found it in the old code. anno.get(SentencesAnnotation.class).stream().filter(cm -> cm.containsKey(TreeAnnotation.class)) .forEach(cm -> { Tree tree = cm.get(TreeAnnotation.class); List<Tree> treeList = new ArrayList<>(); treeList.add(tree); this.lang.getGrammaticalFactory() .ifPresent(k -> ParserAnnotatorUtils.fillInParseAnnotations(false, true, k, cm, treeList.get(0), GrammaticalStructure.Extras.NONE)); }); anno.get(SentencesAnnotation.class) .forEach(cm -> LOGGER.trace("Got CoreMap post-fill-in: {}", cm.toShorterString(new String[0]))); List<Sentence> postSentences = annotationToSentenceList(anno, hf, arg0.getSentences(), g); postSentences.forEach(st -> LOGGER.trace("Got pre-coref sentence: {}", st.toString())); Map<TextSpan, Sentence> tsToSentenceMap = new HashMap<>(); postSentences.forEach(st -> tsToSentenceMap.put(st.getTextSpan(), st)); tsToSentenceMap.keySet().forEach(k -> LOGGER.trace("Got TextSpan key: {}", k.toString())); sectList.forEach(sect -> { List<Sentence> sentList = sect.getSentenceList(); sentList.forEach(st -> { TextSpan ts = st.getTextSpan(); LOGGER.debug("Trying to find span: {}", ts.toString()); if (tsToSentenceMap.containsKey(ts)) { Sentence newSent = tsToSentenceMap.get(ts); st.setTokenization(newSent.getTokenization()); } else { throw new RuntimeException("Didn't find sentence in the new sentences. Old sentence UUID: " + st.getUuid().getUuidString()); } }); }); try { // Coref. CorefManager coref = new CorefManager(new CachedTokenizationCommunication(root), anno); TokenizedCommunication tcWithCoref = coref.addCoreference(); return tcWithCoref; } catch (MiscommunicationException e) { throw new AnalyticException(e); } }
From source file:edu.sabanciuniv.sentilab.utils.text.nlp.stanford.english.StanfordCoreNLPWrapper.java
License:Open Source License
private StanfordCoreNLPWrapper addAnnotators(String annotators) { String[] annotatorsArr = annotators.split(","); for (String annotator : annotatorsArr) { if (!this.currentAnnotators.contains(annotator)) { this.getPipeline().addAnnotator(StanfordCoreNLP.getExistingAnnotator(annotator.trim())); this.currentAnnotators.add(annotator); }// w ww .j a v a2 s. c o m } return this; }