Example usage for edu.stanford.nlp.pipeline Annotation keySet

List of usage examples for edu.stanford.nlp.pipeline Annotation keySet

Introduction

In this page you can find the example usage for edu.stanford.nlp.pipeline Annotation keySet.

Prototype

@Override
public Set<Class<?>> keySet() 

Source Link

Usage

From source file:edu.jhu.hlt.concrete.stanford.ConcreteStanfordTokensSentenceAnalytic.java

License:Open Source License

@Override
public TokenizedCommunication annotate(SectionedCommunication arg0) throws AnalyticException {
    final Communication cp = new Communication(arg0.getRoot());
    if (!cp.isSetText())
        throw new AnalyticException("communication.text must be set to run this analytic.");
    AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory(cp);
    AnalyticUUIDGenerator g = f.create();
    List<Section> sList = arg0.getSections().stream()
            // temporary hack - filter out
            // any zero-length TextSpans.
            .filter(s -> {/*from  w  ww . j a v  a 2s.  c om*/
                final TextSpan ts = s.getTextSpan();
                return ts.getStart() != ts.getEnding();
            })
            // temporary hack - filter out any
            // TextSpans that contain only whitespace.
            .filter(s -> {
                final TextSpan ts = s.getTextSpan();
                final int b = ts.getStart();
                final int e = ts.getEnding();
                String txt = cp.getText().substring(b, e);
                // that isn't enough, could get HTML encoded blank spaces.
                if (txt.contains("&nbsp"))
                    txt = StringEscapeUtils.unescapeHtml4(txt);

                String slim = txt.trim().replaceAll("\\p{Zs}", "");
                return !slim.isEmpty();
            }).collect(Collectors.toList());
    final int newSize = sList.size();
    final int oSize = arg0.getSections().size();
    if (newSize < oSize)
        LOGGER.warn("Dropped {} section(s) because they were zero-length or contained only whitespace.",
                oSize - newSize);
    // for each section, run stanford tokenization and sentence splitting
    for (Section s : sList) {
        LOGGER.debug("Annotating section: {}", s.getUuid().getUuidString());
        final TextSpan sts = s.getTextSpan();
        final String sectTxt = cp.getText().substring(sts.getStart(), sts.getEnding());
        // final String sectTxt = new SuperTextSpan(sts, cp).getText();
        LOGGER.debug("Section text: {}", sectTxt);
        final Annotation sectAnnotation = new Annotation(sectTxt);
        LOGGER.debug("Got annotation keys:");
        sectAnnotation.keySet().forEach(k -> LOGGER.debug("{}", k));
        this.pipeline.annotate(sectAnnotation);
        LOGGER.trace("Post annotation annotation keys:");
        sectAnnotation.keySet().forEach(k -> LOGGER.trace("{}", k));

        List<CoreLabel> tokensOnly = sectAnnotation.get(TokensAnnotation.class);
        tokensOnly.forEach(
                cl -> LOGGER.trace("Got non-sent Stanford token: {}", cl.toShorterString(new String[0])));
        // LOGGER.debug("Got first sentence text annotation: {}", sectAnnotation.get(SentencesAnnotation.class).get(0).get(TextAnnotation.class));
        List<Sentence> stList = annotationToSentenceList(sectAnnotation, sts.getStart(), g);
        s.setSentenceList(stList);
    }

    cp.setSectionList(sList);
    try {
        return new CachedTokenizationCommunication(cp);
    } catch (MiscommunicationException e) {
        throw new AnalyticException(e);
    }
}

From source file:nlp.prototype.NewJFrame.java

private void jButton1MouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_jButton1MouseClicked

    DefaultTreeModel model2 = (DefaultTreeModel) jTree2.getModel();
    DefaultMutableTreeNode rootNode2 = new DefaultMutableTreeNode("top");
    model2.setRoot(rootNode2);/*  www  .j  a  va 2 s  . c o  m*/

    /*TextCorpus textCorpus = processor.parseCorpus(jTextArea1.getText());
            
    for (SentenceToken token : textCorpus.getSentences()) {
    DefaultMutableTreeNode sentenceTokenNode = new DefaultMutableTreeNode();
    sentenceTokenNode.setUserObject(token);
    rootNode2.add(sentenceTokenNode);
    addNodes(token, sentenceTokenNode);
    }
            
    DefaultTokenSerializer serializer = new DefaultTokenSerializer();
    Document xmlDocument = serializer.serialize(textCorpus);
    jTextArea4.setText(serializer.transform(xmlDocument));
    jTextArea7.setText(serializer.transform(xmlDocument, this.jTextArea6.getText()));*/

    Annotation document = new Annotation(jTextArea1.getText());
    pipeline.annotate(document);
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    Map<Integer, CorefChain> corefMap = document.get(CorefChainAnnotation.class);
    List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);

    DefaultListModel listModel = new DefaultListModel();

    for (Class key : document.keySet()) {
        Object value = document.get(key);

        if (value != null && value.toString() != null && !value.toString().isEmpty()) {
            listModel.addElement(key.toString() + " - [" + value.toString() + "]");
        }
    }

    DefaultTreeModel model = (DefaultTreeModel) jTree1.getModel();
    DefaultMutableTreeNode rootNode = new DefaultMutableTreeNode("top");
    model.setRoot(rootNode);

    List<POSToken> tokenList = new ArrayList<>();

    jList1.setModel(listModel);

    for (CoreMap sentence : sentences) {
        Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
        SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
        String root = graph.getFirstRoot().originalText();

        MultiValuedMap<String, GrammarToken> map = new HashSetValuedHashMap<>();

        for (SemanticGraphEdge edge : graph.edgeIterable()) {
            GrammarToken grammarToken = new GrammarToken(edge);
            map.put(grammarToken.getTarget(), grammarToken);
        }

        DefaultMutableTreeNode node = new DefaultMutableTreeNode();
        POSToken token = new POSToken((CoreLabel) tree.label());
        token.setGrammar(graph.toString());
        node.setUserObject(token);
        rootNode.add(node);
        addNodes(tree, false, node, node, map, root, corefMap, tokens);
        tokenList.add(token);
    }

    setAdjacentNodes(tokenList);
}