List of usage examples for edu.stanford.nlp.pipeline Annotation keySet
@Override
public Set<Class<?>> keySet()
From source file:edu.jhu.hlt.concrete.stanford.ConcreteStanfordTokensSentenceAnalytic.java
License:Open Source License
@Override public TokenizedCommunication annotate(SectionedCommunication arg0) throws AnalyticException { final Communication cp = new Communication(arg0.getRoot()); if (!cp.isSetText()) throw new AnalyticException("communication.text must be set to run this analytic."); AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory(cp); AnalyticUUIDGenerator g = f.create(); List<Section> sList = arg0.getSections().stream() // temporary hack - filter out // any zero-length TextSpans. .filter(s -> {/*from w ww . j a v a 2s. c om*/ final TextSpan ts = s.getTextSpan(); return ts.getStart() != ts.getEnding(); }) // temporary hack - filter out any // TextSpans that contain only whitespace. .filter(s -> { final TextSpan ts = s.getTextSpan(); final int b = ts.getStart(); final int e = ts.getEnding(); String txt = cp.getText().substring(b, e); // that isn't enough, could get HTML encoded blank spaces. if (txt.contains(" ")) txt = StringEscapeUtils.unescapeHtml4(txt); String slim = txt.trim().replaceAll("\\p{Zs}", ""); return !slim.isEmpty(); }).collect(Collectors.toList()); final int newSize = sList.size(); final int oSize = arg0.getSections().size(); if (newSize < oSize) LOGGER.warn("Dropped {} section(s) because they were zero-length or contained only whitespace.", oSize - newSize); // for each section, run stanford tokenization and sentence splitting for (Section s : sList) { LOGGER.debug("Annotating section: {}", s.getUuid().getUuidString()); final TextSpan sts = s.getTextSpan(); final String sectTxt = cp.getText().substring(sts.getStart(), sts.getEnding()); // final String sectTxt = new SuperTextSpan(sts, cp).getText(); LOGGER.debug("Section text: {}", sectTxt); final Annotation sectAnnotation = new Annotation(sectTxt); LOGGER.debug("Got annotation keys:"); sectAnnotation.keySet().forEach(k -> LOGGER.debug("{}", k)); this.pipeline.annotate(sectAnnotation); LOGGER.trace("Post annotation annotation keys:"); sectAnnotation.keySet().forEach(k -> LOGGER.trace("{}", k)); List<CoreLabel> tokensOnly = sectAnnotation.get(TokensAnnotation.class); tokensOnly.forEach( cl -> LOGGER.trace("Got non-sent Stanford token: {}", cl.toShorterString(new String[0]))); // LOGGER.debug("Got first sentence text annotation: {}", sectAnnotation.get(SentencesAnnotation.class).get(0).get(TextAnnotation.class)); List<Sentence> stList = annotationToSentenceList(sectAnnotation, sts.getStart(), g); s.setSentenceList(stList); } cp.setSectionList(sList); try { return new CachedTokenizationCommunication(cp); } catch (MiscommunicationException e) { throw new AnalyticException(e); } }
From source file:nlp.prototype.NewJFrame.java
private void jButton1MouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_jButton1MouseClicked DefaultTreeModel model2 = (DefaultTreeModel) jTree2.getModel(); DefaultMutableTreeNode rootNode2 = new DefaultMutableTreeNode("top"); model2.setRoot(rootNode2);/* www .j a va 2 s . c o m*/ /*TextCorpus textCorpus = processor.parseCorpus(jTextArea1.getText()); for (SentenceToken token : textCorpus.getSentences()) { DefaultMutableTreeNode sentenceTokenNode = new DefaultMutableTreeNode(); sentenceTokenNode.setUserObject(token); rootNode2.add(sentenceTokenNode); addNodes(token, sentenceTokenNode); } DefaultTokenSerializer serializer = new DefaultTokenSerializer(); Document xmlDocument = serializer.serialize(textCorpus); jTextArea4.setText(serializer.transform(xmlDocument)); jTextArea7.setText(serializer.transform(xmlDocument, this.jTextArea6.getText()));*/ Annotation document = new Annotation(jTextArea1.getText()); pipeline.annotate(document); List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class); Map<Integer, CorefChain> corefMap = document.get(CorefChainAnnotation.class); List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class); DefaultListModel listModel = new DefaultListModel(); for (Class key : document.keySet()) { Object value = document.get(key); if (value != null && value.toString() != null && !value.toString().isEmpty()) { listModel.addElement(key.toString() + " - [" + value.toString() + "]"); } } DefaultTreeModel model = (DefaultTreeModel) jTree1.getModel(); DefaultMutableTreeNode rootNode = new DefaultMutableTreeNode("top"); model.setRoot(rootNode); List<POSToken> tokenList = new ArrayList<>(); jList1.setModel(listModel); for (CoreMap sentence : sentences) { Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class); String root = graph.getFirstRoot().originalText(); MultiValuedMap<String, GrammarToken> map = new HashSetValuedHashMap<>(); for (SemanticGraphEdge edge : graph.edgeIterable()) { GrammarToken grammarToken = new GrammarToken(edge); map.put(grammarToken.getTarget(), grammarToken); } DefaultMutableTreeNode node = new DefaultMutableTreeNode(); POSToken token = new POSToken((CoreLabel) tree.label()); token.setGrammar(graph.toString()); node.setUserObject(token); rootNode.add(node); addNodes(tree, false, node, node, map, root, corefMap, tokens); tokenList.add(token); } setAdjacentNodes(tokenList); }