Example usage for edu.stanford.nlp.semgraph SemanticGraph edgeIterable

Introduction

In this page you can find the example usage for edu.stanford.nlp.semgraph SemanticGraph edgeIterable.

Prototype

public Iterable<SemanticGraphEdge> edgeIterable()

Source Link

Usage

From source file:edu.cmu.deiis.annotator.StanfordCoreNLPAnnotator.java

License:Open Source License

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
    Annotation document = this.processor.process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

        // create the token annotation
        int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
        int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
        String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
        String lemma = tokenAnn.get(LemmaAnnotation.class);
        Token token = new Token(jCas, begin, end);
        token.setPos(pos);// w ww  . j av  a2 s . com
        token.setLemma(lemma);
        token.addToIndexes();

        // hackery to convert token-level named entity tag into phrase-level tag
        String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
        if (neTag.equals("O") && !lastNETag.equals("O")) {
            NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
            ne.setMentionType(lastNETag);
            ne.addToIndexes();
        } else {
            if (lastNETag.equals("O")) {
                lastNEBegin = begin;
            } else if (lastNETag.equals(neTag)) {
                // do nothing - begin was already set
            } else {
                NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
                ne.setMentionType(lastNETag);
                ne.addToIndexes();
                lastNEBegin = begin;
            }
            lastNEEnd = end;
        }
        lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {
        NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
        ne.setMentionType(lastNETag);
        ne.addToIndexes();
    }

    // add sentences and trees
    for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) {

        // add the sentence annotation
        int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
        int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
        Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
        sentence.addToIndexes();

        // add the syntactic tree annotation
        List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class);
        Tree tree = sentenceAnn.get(TreeAnnotation.class);
        if (tree.children().length != 1) {
            throw new RuntimeException("Expected single root node, found " + tree);
        }
        tree = tree.firstChild();
        tree.indexSpans(0);
        TopTreebankNode root = new TopTreebankNode(jCas);
        root.setTreebankParse(tree.toString());
        // TODO: root.setTerminals(v)
        this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns);

        // get the dependencies
        SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class);

        // convert Stanford nodes to UIMA annotations
        List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence);
        Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>();
        for (IndexedWord stanfordNode : dependencies.vertexSet()) {
            int indexBegin = stanfordNode.get(BeginIndexAnnotation.class);
            int indexEnd = stanfordNode.get(EndIndexAnnotation.class);
            int tokenBegin = tokens.get(indexBegin).getBegin();
            int tokenEnd = tokens.get(indexEnd - 1).getEnd();
            DependencyNode node;
            if (dependencies.getRoots().contains(stanfordNode)) {
                node = new TopDependencyNode(jCas, tokenBegin, tokenEnd);
            } else {
                node = new DependencyNode(jCas, tokenBegin, tokenEnd);
            }
            stanfordToUima.put(stanfordNode, node);
        }

        // create relation annotations for each Stanford dependency
        ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create();
        ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create();
        for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {
            DependencyRelation relation = new DependencyRelation(jCas);
            DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor());
            DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent());
            String relationType = stanfordEdge.getRelation().toString();
            if (head == null || child == null || relationType == null) {
                throw new RuntimeException(String.format(
                        "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation,
                        child, head));
            }
            relation.setHead(head);
            relation.setChild(child);
            relation.setRelation(relationType);
            relation.addToIndexes();
            headRelations.put(child, relation);
            childRelations.put(head, relation);
        }

        // set the relations for each node annotation
        for (DependencyNode node : stanfordToUima.values()) {
            List<DependencyRelation> heads = headRelations.get(node);
            node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size()));
            if (heads != null) {
                FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads);
            }
            List<DependencyRelation> children = childRelations.get(node);
            node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size()));
            if (children != null) {
                FSCollectionFactory.fillArrayFS(node.getChildRelations(), children);
            }
            node.addToIndexes();
        }
    }

    // map from spans to named entity mentions
    Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>();
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention);
    }

    // add mentions for all entities identified by the coreference system
    List<NamedEntity> entities = new ArrayList<NamedEntity>();
    List<List<Token>> sentenceTokens = new ArrayList<List<Token>>();
    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
        sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence));
    }
    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
        List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>();
        for (CorefMention corefMention : chain.getMentionsInTextualOrder()) {

            // figure out the character span of the token
            List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1);
            int begin = tokens.get(corefMention.startIndex - 1).getBegin();
            int end = tokens.get(corefMention.endIndex - 2).getEnd();

            // use an existing named entity mention when possible; otherwise create a new one
            NamedEntityMention mention = spanMentionMap.get(new Span(begin, end));
            if (mention == null) {
                mention = new NamedEntityMention(jCas, begin, end);
                mention.addToIndexes();
            }
            mentions.add(mention);
        }

        // create an entity for the mentions
        Collections.sort(mentions, new Comparator<NamedEntityMention>() {
            @Override
            public int compare(NamedEntityMention m1, NamedEntityMention m2) {
                return m1.getBegin() - m2.getBegin();
            }
        });

        // create mentions and add them to entity
        NamedEntity entity = new NamedEntity(jCas);
        entity.setMentions(new FSArray(jCas, mentions.size()));
        int index = 0;
        for (NamedEntityMention mention : mentions) {
            mention.setMentionedEntity(entity);
            entity.setMentions(index, mention);
            index += 1;
        }
        entities.add(entity);
    }

    // add singleton entities for any named entities not picked up by coreference system
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        if (mention.getMentionedEntity() == null) {
            NamedEntity entity = new NamedEntity(jCas);
            entity.setMentions(new FSArray(jCas, 1));
            entity.setMentions(0, mention);
            mention.setMentionedEntity(entity);
            entity.getMentions();
            entities.add(entity);
        }
    }

    // sort entities by document order
    Collections.sort(entities, new Comparator<NamedEntity>() {
        @Override
        public int compare(NamedEntity o1, NamedEntity o2) {
            return getFirstBegin(o1) - getFirstBegin(o2);
        }

        private int getFirstBegin(NamedEntity entity) {
            int min = Integer.MAX_VALUE;
            for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) {
                if (mention.getBegin() < min) {
                    min = mention.getBegin();
                }
            }
            return min;
        }
    });

    // add entities to document
    for (NamedEntity entity : entities) {
        entity.addToIndexes();
    }

}

From source file:edu.cmu.deiis.annotators.StanfordAnnotator.java

License:Open Source License

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
    Annotation document = this.processor.process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

        // create the token annotation
        int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
        int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
        String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
        String lemma = tokenAnn.get(LemmaAnnotation.class);
        Token token = new Token(jCas, begin, end);
        token.setPos(pos);//w  w  w.  j ava  2 s. c  o  m
        token.setLemma(lemma);
        token.addToIndexes();

        // hackery to convert token-level named entity tag into phrase-level tag
        String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
        if (neTag.equals("O") && !lastNETag.equals("O")) {
            NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
            ne.setMentionType(lastNETag);
            ne.addToIndexes();
        } else {
            if (lastNETag.equals("O")) {
                lastNEBegin = begin;
            } else if (lastNETag.equals(neTag)) {
                // do nothing - begin was already set
            } else {
                NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
                ne.setMentionType(lastNETag);
                ne.addToIndexes();
                lastNEBegin = begin;
            }
            lastNEEnd = end;
        }
        lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {
        NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
        ne.setMentionType(lastNETag);
        ne.addToIndexes();
    }

    // add sentences and trees
    for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) {

        // add the sentence annotation
        int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
        int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
        Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
        sentence.addToIndexes();

        // add the syntactic tree annotation
        List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class);
        Tree tree = sentenceAnn.get(TreeAnnotation.class);
        if (tree.children().length != 1) {
            throw new RuntimeException("Expected single root node, found " + tree);
        }
        tree = tree.firstChild();
        tree.indexSpans(0);
        TopTreebankNode root = new TopTreebankNode(jCas);
        root.setTreebankParse(tree.toString());
        // TODO: root.setTerminals(v)
        this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns);

        // get the dependencies
        SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class);

        // convert Stanford nodes to UIMA annotations
        List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence);
        Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>();
        for (IndexedWord stanfordNode : dependencies.vertexSet()) {
            int indexBegin = stanfordNode.get(BeginIndexAnnotation.class);
            int indexEnd = stanfordNode.get(EndIndexAnnotation.class);
            int tokenBegin = tokens.get(indexBegin).getBegin();
            int tokenEnd = tokens.get(indexEnd - 1).getEnd();
            DependencyNode node;
            if (dependencies.getRoots().contains(stanfordNode)) {
                node = new TopDependencyNode(jCas, tokenBegin, tokenEnd);
            } else {
                node = new DependencyNode(jCas, tokenBegin, tokenEnd);
            }
            stanfordToUima.put(stanfordNode, node);
        }

        // create relation annotations for each Stanford dependency
        ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create();
        ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create();
        for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {
            DependencyRelation relation = new DependencyRelation(jCas);
            DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor());
            DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent());
            String relationType = stanfordEdge.getRelation().toString();
            if (head == null || child == null || relationType == null) {
                throw new RuntimeException(String.format(
                        "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation,
                        child, head));
            }
            relation.setHead(head);
            relation.setChild(child);
            relation.setRelation(relationType);
            relation.addToIndexes();
            headRelations.put(child, relation);
            childRelations.put(head, relation);
        }

        // set the relations for each node annotation
        for (DependencyNode node : stanfordToUima.values()) {
            List<DependencyRelation> heads = headRelations.get(node);
            node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size()));
            if (heads != null) {
                FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads);
            }
            List<DependencyRelation> children = childRelations.get(node);
            node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size()));
            if (children != null) {
                FSCollectionFactory.fillArrayFS(node.getChildRelations(), children);
            }
            node.addToIndexes();
        }
    }

    // map from spans to named entity mentions
    Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>();
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention);
    }

    // add mentions for all entities identified by the coreference system
    List<NamedEntity> entities = new ArrayList<NamedEntity>();
    List<List<Token>> sentenceTokens = new ArrayList<List<Token>>();
    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
        sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence));
    }
    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
        List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>();
        for (CorefMention corefMention : chain.getMentionsInTextualOrder()) {

            // figure out the character span of the token
            List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1);
            int begin = tokens.get(corefMention.startIndex - 1).getBegin();
            int end = tokens.get(corefMention.endIndex - 2).getEnd();

            // use an existing named entity mention when possible; otherwise create a new one
            NamedEntityMention mention = spanMentionMap.get(new Span(begin, end));
            if (mention == null) {
                mention = new NamedEntityMention(jCas, begin, end);
                //String line = mention.getCoveredText();
                //System.out.println(line);
                mention.addToIndexes();
            }
            mentions.add(mention);
        }

        // create an entity for the mentions
        Collections.sort(mentions, new Comparator<NamedEntityMention>() {
            @Override
            public int compare(NamedEntityMention m1, NamedEntityMention m2) {
                return m1.getBegin() - m2.getBegin();
            }
        });

        // create mentions and add them to entity
        NamedEntity entity = new NamedEntity(jCas);
        entity.setMentions(new FSArray(jCas, mentions.size()));
        int index = 0;
        for (NamedEntityMention mention : mentions) {
            mention.setMentionedEntity(entity);
            entity.setMentions(index, mention);
            index += 1;
        }
        entities.add(entity);
    }

    // add singleton entities for any named entities not picked up by coreference system
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        if (mention.getMentionedEntity() == null) {
            NamedEntity entity = new NamedEntity(jCas);
            entity.setMentions(new FSArray(jCas, 1));
            entity.setMentions(0, mention);
            mention.setMentionedEntity(entity);
            entity.getMentions();
            entities.add(entity);
        }
    }

    // sort entities by document order
    Collections.sort(entities, new Comparator<NamedEntity>() {
        @Override
        public int compare(NamedEntity o1, NamedEntity o2) {
            return getFirstBegin(o1) - getFirstBegin(o2);
        }

        private int getFirstBegin(NamedEntity entity) {
            int min = Integer.MAX_VALUE;
            for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) {
                if (mention.getBegin() < min) {
                    min = mention.getBegin();
                }
            }
            return min;
        }
    });

    // add entities to document
    for (NamedEntity entity : entities) {
        //NamedEntityMention mention=entity.getMentions(3);
        //System.out.println(mention.getBegin());
        entity.addToIndexes();
    }

}

From source file:edu.nus.comp.nlp.stanford.UtilParser.java

License:Open Source License

public static DefaultMutableTreeNode toDMTree(IndexedWord root, SemanticGraph dependencies) {

    if (root == null) {
        root = dependencies.getFirstRoot();
    }// ww w .  ja va2 s .  co  m

    DefaultMutableTreeNode node = new DefaultMutableTreeNode();

    String nodeContent = root.value();

    for (SemanticGraphEdge edge : dependencies.edgeIterable()) {
        if (edge.getDependent().equals(root)) {
            nodeContent = "<-" + edge.getRelation() + "- " + nodeContent;
            break;
        }
    }

    node.setUserObject(nodeContent);
    for (IndexedWord c : dependencies.getChildList(root)) {
        DefaultMutableTreeNode n = toDMTree(c, dependencies);
        node.add(n);
    }
    return node;
}

From source file:nlp.prototype.NewJFrame.java

private void jButton1MouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_jButton1MouseClicked

    DefaultTreeModel model2 = (DefaultTreeModel) jTree2.getModel();
    DefaultMutableTreeNode rootNode2 = new DefaultMutableTreeNode("top");
    model2.setRoot(rootNode2);/* w  w  w .  j  av a2s .  c  om*/

    /*TextCorpus textCorpus = processor.parseCorpus(jTextArea1.getText());
            
    for (SentenceToken token : textCorpus.getSentences()) {
    DefaultMutableTreeNode sentenceTokenNode = new DefaultMutableTreeNode();
    sentenceTokenNode.setUserObject(token);
    rootNode2.add(sentenceTokenNode);
    addNodes(token, sentenceTokenNode);
    }
            
    DefaultTokenSerializer serializer = new DefaultTokenSerializer();
    Document xmlDocument = serializer.serialize(textCorpus);
    jTextArea4.setText(serializer.transform(xmlDocument));
    jTextArea7.setText(serializer.transform(xmlDocument, this.jTextArea6.getText()));*/

    Annotation document = new Annotation(jTextArea1.getText());
    pipeline.annotate(document);
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    Map<Integer, CorefChain> corefMap = document.get(CorefChainAnnotation.class);
    List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);

    DefaultListModel listModel = new DefaultListModel();

    for (Class key : document.keySet()) {
        Object value = document.get(key);

        if (value != null && value.toString() != null && !value.toString().isEmpty()) {
            listModel.addElement(key.toString() + " - [" + value.toString() + "]");
        }
    }

    DefaultTreeModel model = (DefaultTreeModel) jTree1.getModel();
    DefaultMutableTreeNode rootNode = new DefaultMutableTreeNode("top");
    model.setRoot(rootNode);

    List<POSToken> tokenList = new ArrayList<>();

    jList1.setModel(listModel);

    for (CoreMap sentence : sentences) {
        Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
        SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
        String root = graph.getFirstRoot().originalText();

        MultiValuedMap<String, GrammarToken> map = new HashSetValuedHashMap<>();

        for (SemanticGraphEdge edge : graph.edgeIterable()) {
            GrammarToken grammarToken = new GrammarToken(edge);
            map.put(grammarToken.getTarget(), grammarToken);
        }

        DefaultMutableTreeNode node = new DefaultMutableTreeNode();
        POSToken token = new POSToken((CoreLabel) tree.label());
        token.setGrammar(graph.toString());
        node.setUserObject(token);
        rootNode.add(node);
        addNodes(tree, false, node, node, map, root, corefMap, tokens);
        tokenList.add(token);
    }

    setAdjacentNodes(tokenList);
}

From source file:nlp.service.implementation.DefaultGrammarService.java

public DefaultGrammarService(SemanticGraph graph) {
    targetMap = new HashSetValuedHashMap<>();

    rootIndex = graph.getFirstRoot().index();

    for (SemanticGraphEdge edge : graph.edgeIterable()) {

        GrammaticalDependency dependency;

        try {//from ww  w  .  ja v a  2 s .co  m
            String relation = edge.getRelation().toString();
            if (relation.contains(":")) {
                relation = relation.substring(relation.indexOf(':') + 1, relation.length());
            }

            if (relation.equals("case")) {
                dependency = GrammaticalDependency.casemarker;
            } else {
                dependency = GrammaticalDependency.valueOf(relation);
            }
        } catch (IllegalArgumentException e) {
            dependency = GrammaticalDependency.unknown;
        }

        GrammaticalRelation<Integer> relation = new GrammaticalRelation<>(dependency, edge.getTarget().index(),
                edge.getSource().index());
        targetMap.put(relation.getTarget(), relation);
    }
}

From source file:org.nlp2rdf.implementation.stanfordcorenlp.StanfordWrapper.java

License:Apache License

public void process(Individual context, OntModel inputModel, OntModel outputModel,
        NIFParameters nifParameters) {//from  w  w  w.j a  v  a 2 s  .c om
    String contextString = context
            .getPropertyValue(NIFDatatypeProperties.isString.getDatatypeProperty(inputModel)).asLiteral()
            .getString();
    String prefix = nifParameters.getPrefix();
    URIScheme urischeme = nifParameters.getUriScheme();

    Annotator pipeline = buildAnnotator(nifParameters);

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(contextString);
    // run all Annotators on this text
    pipeline.annotate(document);

    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);

    //get all the sentences and words and read it in an intermediate structure
    //NOTE: this can be greatly optimized of course
    // for now it is just simple and cheap to implement it like this
    int wordCount = 0;
    TreeMap<Span, List<Span>> tokenizedText = new TreeMap<Span, List<Span>>();
    for (CoreMap sentence : sentences) {
        Span sentenceSpan = new Span(sentence.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class),
                sentence.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
        List<Span> wordSpans = new ArrayList<Span>();
        for (CoreLabel coreLabel : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            wordSpans.add(new Span(coreLabel.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class),
                    coreLabel.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)));
            wordCount++;
        }
        tokenizedText.put(sentenceSpan, wordSpans);
    }

    /**
     * Basic Model Setup
     **/
    //get parameters for the URIGenerator
    Text2RDF text2RDF = new Text2RDF();
    text2RDF.generateNIFModel(prefix, context, urischeme, outputModel, tokenizedText);
    outputModel.add(RLOGSLF4JBinding.log(nifParameters.getLogPrefix(),
            "Finished creating " + tokenizedText.size() + " sentence(s) with " + wordCount + " word(s) ",
            RLOGIndividuals.DEBUG, this.getClass().getCanonicalName(), null, null));
    // text2RDF.addNextAndPreviousProperties(prefix,urischeme,model);

    // traversing the words in the current sentence
    // a CoreLabel is a CoreMap with additional token-specific methods
    for (CoreMap sentence : sentences) {

        for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            Span wordSpan = new Span(token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class),
                    token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
            //the word should exist already
            Individual wordIndividual = outputModel
                    .getIndividual(urischeme.generate(prefix, contextString, wordSpan));

            if (wordIndividual == null) {
                log.error("SKIPPING: word was not found in the model: "
                        + urischeme.generate(prefix, contextString, wordSpan));
                continue;
            }
            /********************************
             * Lemma
             ******/

            if (token.get(CoreAnnotations.LemmaAnnotation.class) != null) {
                wordIndividual.addProperty(NIFDatatypeProperties.lemma.getDatatypeProperty(outputModel),
                        token.get(CoreAnnotations.LemmaAnnotation.class), XSDDatatype.XSDstring);
            }

            /********************************
             * POS tag
             ******/
            outputModel.setNsPrefix("olia", "http://purl.org/olia/olia.owl#");
            // this is the POS tag of the token
            String posTag = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);

            List<String> oliaIndividual = (List<String>) Penn.hasTag.get(posTag);
            if (oliaIndividual != null) {

                for (String s : oliaIndividual) {
                    wordIndividual.addProperty(NIFObjectProperties.oliaLink.getObjectProperty(outputModel),
                            outputModel.createIndividual(s, OWL.Thing));
                    List<String> pennlinks = (List<String>) Penn.links.get(s);
                    if (pennlinks != null) {
                        for (String oc : pennlinks) {
                            wordIndividual.addProperty(
                                    NIFAnnotationProperties.oliaCategory.getAnnotationProperty(outputModel),
                                    outputModel.createClass(oc));
                        }
                    } else {
                        outputModel.add(
                                RLOGSLF4JBinding.log(nifParameters.getLogPrefix(), "missing oliaLinks for " + s,
                                        RLOGIndividuals.ERROR, this.getClass().getCanonicalName(), null, null));
                    }
                }
            } else {
                outputModel.add(
                        RLOGSLF4JBinding.log(nifParameters.getLogPrefix(), "missing oliaLinks for " + posTag,
                                RLOGIndividuals.ERROR, this.getClass().getCanonicalName(), null, null));

            }
        }

        SemanticGraph dependencies = sentence
                .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);

        if (dependencies != null) {
            //time to add the prefix
            StanfordSimple.addStanfordSimplePrefix(outputModel);

            // create relation annotations for each Stanford dependency
            for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {

                Span govSpan = new Span(
                        stanfordEdge.getGovernor().get(CoreAnnotations.CharacterOffsetBeginAnnotation.class),
                        stanfordEdge.getGovernor().get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
                Span depSpan = new Span(
                        stanfordEdge.getDependent().get(CoreAnnotations.CharacterOffsetBeginAnnotation.class),
                        stanfordEdge.getDependent().get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
                //String relationType = stanfordEdge.getRelation().toString();

                String[] edgeURIs = StanfordSimple.getURIforEdgeLabel(stanfordEdge.getRelation().toString());
                //ObjectProperty relation = model.createObjectProperty(new CStringInst().generate(prefix, contextString, new Span[]{}));
                ObjectProperty relation = null;
                switch (edgeURIs.length) {
                case 1:
                    relation = outputModel.createObjectProperty(edgeURIs[0]);

                    break;
                case 2:
                    relation = outputModel.createObjectProperty(edgeURIs[0]);
                    relation.addSubProperty(outputModel.createObjectProperty(edgeURIs[1]));
                    break;
                default:
                    String message = "Empty edge label, no URI written: " + edgeURIs;
                    outputModel.add(RLOGSLF4JBinding.log(nifParameters.getLogPrefix(), message,
                            RLOGIndividuals.ERROR, this.getClass().getCanonicalName(), null, null));
                    continue;

                }

                Individual gov = text2RDF.createCStringIndividual(prefix, context, govSpan, urischeme,
                        outputModel);
                Individual dep = text2RDF.createCStringIndividual(prefix, context, depSpan, urischeme,
                        outputModel);
                gov.addProperty(relation, dep);
                relation.addSuperProperty(NIFObjectProperties.inter.getObjectProperty(outputModel));
                relation.addSuperProperty(NIFObjectProperties.dependency.getObjectProperty(outputModel));

                if (gov == null || dep == null) {
                    String message = "SKIPPING Either gov or dep was null for the dependencies\n" + "gov: "
                            + gov + "\ndep: " + dep;
                    outputModel.add(RLOGSLF4JBinding.log(nifParameters.getLogPrefix(), message,
                            RLOGIndividuals.ERROR, this.getClass().getCanonicalName(), null, null));
                    continue;
                }

                //  List<String> oliaIndividual = (List<String>) Stanford.hasTag.get(stanfordEdge.getRelation().getShortName());

                /** for (String s : oliaIndividual) {
                        
                 relation.addProperty(NIFAnnotationProperties.oliaPropLink.getAnnotationProperty(model), model.createIndividual(s, OWL.Thing));
                 for (String oc : (List<String>) Stanford.links.get(s)) {
                 relation.addProperty(NIFAnnotationProperties.oliaCategory.getAnnotationProperty(model), oc);
                 }
                 if (((List<String>) Stanford.links.get(s)).isEmpty()) {
                 log.error("missing links for: " + s);
                 }
                 } **/

                /* Individual relation = null;//dependency.getOLiAIndividualForTag(relationType);
                        
                //in an ideal world, all used tags should also be in OLiA, this tends to be null sometimes
                if (relation == null) {
                    log.error("reltype was null for: " + relationType);
                    continue;
                }
                        
                ObjectProperty dependencyRelation = model.createObjectProperty(relation.getURI());
                //add the property from governer to dependent
                gov.addProperty(dependencyRelation, dep);
                        
                        
                Set<String> classUris = dependency.getClassURIsForTag(relationType);
                for (String cl : classUris) {
                    if (!cl.startsWith("http://purl.org/olia/stanford.owl")) {
                        continue;
                    }
                    //add the property from governer to dependent
                    ObjectProperty nn = model.createObjectProperty(cl);
                    gov.addProperty(nn, dep);
                    dependencyRelation.addSuperProperty(nn);
                        
                    //copy and transform the hierarchy
                    //removed for 2.0
                    //OLiAOntology.classHierarchy2PropertyHierarchy(dependency.getHierarchy(cl), model, "http://purl.org/olia/stanford.owl");
                }
                }*/

            }
        } //end sentences
        /**************
         * Syntax Tree
         * */

        //Tree tree = sentence.get(TreeAnnotation.class);
        //if (tree != null) {
        //removed for 2.0
        //processTree(tree, urigenerator, prefix, text, model);
        //}

    }

}

From source file:org.sam_agent.csparser.ContinuousParser.java

License:Open Source License

public String stringify(SemanticGraph dependencies) {
    List<String> depsList = new ArrayList<String>();

    for (SemanticGraphEdge eit : dependencies.edgeIterable()) {
        String rel = eit.getRelation().toString();
        IndexedWord gov = eit.getGovernor(), dep = eit.getDependent();
        String arg0 = gov.word().toString() + "-" + gov.index();
        String arg1 = dep.word().toString() + "-" + dep.index();
        depsList.add(String.format("{\"rel\":\"%s\",\"arg0\":\"%s\",\"arg1\":\"%s\"}", rel, arg0, arg1));
    }//from   w  ww . ja v  a 2s. c o  m

    return String.format("\"dependencies\":[%s]", String.join(",", depsList));
}

From source file:org.textmining.annotator.StanfordCoreNlpAnnotator.java

License:Open Source License

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
    Annotation document = this.processor.process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

        // create the token annotation
        int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
        int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
        String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
        String lemma = tokenAnn.get(LemmaAnnotation.class);
        Token token = new Token(jCas, begin, end);
        token.setPos(pos);/*from   ww w .  j  a v a2 s.co m*/
        token.setLemma(lemma);
        token.addToIndexes();

        // hackery to convert token-level named entity tag into phrase-level tag
        String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
        if (neTag.equals("O") && !lastNETag.equals("O")) {
            NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
            ne.setMentionType(lastNETag);
            ne.addToIndexes();
        } else {
            if (lastNETag.equals("O")) {
                lastNEBegin = begin;
            } else if (lastNETag.equals(neTag)) {
                // do nothing - begin was already set
            } else {
                NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
                ne.setMentionType(lastNETag);
                ne.addToIndexes();
                lastNEBegin = begin;
            }
            lastNEEnd = end;
        }
        lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {
        NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
        ne.setMentionType(lastNETag);
        ne.addToIndexes();
    }

    // add sentences and trees
    for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) {

        // add the sentence annotation
        int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
        int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
        Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
        sentence.addToIndexes();

        // add the syntactic tree annotation
        List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class);
        Tree tree = sentenceAnn.get(TreeAnnotation.class);
        if (tree.children().length != 1) {
            throw new RuntimeException("Expected single root node, found " + tree);
        }
        tree = tree.firstChild();
        tree.indexSpans(0);
        TopTreebankNode root = new TopTreebankNode(jCas);
        root.setTreebankParse(tree.toString());
        // TODO: root.setTerminals(v)
        this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns);

        // get the dependencies
        SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class);

        // convert Stanford nodes to UIMA annotations
        List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence);
        Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>();
        for (IndexedWord stanfordNode : dependencies.vertexSet()) {
            int indexBegin = stanfordNode.get(BeginIndexAnnotation.class);
            int indexEnd = stanfordNode.get(EndIndexAnnotation.class);
            int tokenBegin = tokens.get(indexBegin).getBegin();
            int tokenEnd = tokens.get(indexEnd - 1).getEnd();
            DependencyNode node;
            if (dependencies.getRoots().contains(stanfordNode)) {
                node = new TopDependencyNode(jCas, tokenBegin, tokenEnd);
            } else {
                node = new DependencyNode(jCas, tokenBegin, tokenEnd);
            }
            stanfordToUima.put(stanfordNode, node);
        }

        // create relation annotations for each Stanford dependency
        ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create();
        ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create();
        for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {
            DependencyRelation relation = new DependencyRelation(jCas);
            DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor());
            DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent());
            String relationType = stanfordEdge.getRelation().toString();
            if (head == null || child == null || relationType == null) {
                throw new RuntimeException(String.format(
                        "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation,
                        child, head));
            }
            relation.setHead(head);
            relation.setChild(child);
            relation.setRelation(relationType);
            relation.addToIndexes();
            headRelations.put(child, relation);
            childRelations.put(head, relation);
        }

        // set the relations for each node annotation
        for (DependencyNode node : stanfordToUima.values()) {
            List<DependencyRelation> heads = headRelations.get(node);
            node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size()));
            if (heads != null) {
                FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads);
            }
            List<DependencyRelation> children = childRelations.get(node);
            node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size()));
            if (children != null) {
                FSCollectionFactory.fillArrayFS(node.getChildRelations(), children);
            }
            node.addToIndexes();
        }
    }

    // map from spans to named entity mentions
    Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>();
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention);
    }

    // add mentions for all entities identified by the coreference system
    List<NamedEntity> entities = new ArrayList<NamedEntity>();
    List<List<Token>> sentenceTokens = new ArrayList<List<Token>>();
    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
        sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence));
    }
    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
        List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>();
        for (CorefMention corefMention : chain.getMentionsInTextualOrder()) {

            // figure out the character span of the token
            List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1);
            int begin = tokens.get(corefMention.startIndex - 1).getBegin();
            int end = tokens.get(corefMention.endIndex - 2).getEnd();

            // use an existing named entity mention when possible; otherwise create a new one
            NamedEntityMention mention = spanMentionMap.get(new Span(begin, end));
            if (mention == null) {
                mention = new NamedEntityMention(jCas, begin, end);
                mention.addToIndexes();
            }
            mentions.add(mention);
        }

        // create an entity for the mentions
        Collections.sort(mentions, new Comparator<NamedEntityMention>() {
            @Override
            public int compare(NamedEntityMention m1, NamedEntityMention m2) {
                return m1.getBegin() - m2.getBegin();
            }
        });

        // create mentions and add them to entity
        NamedEntity entity = new NamedEntity(jCas);
        entity.setMentions(new FSArray(jCas, mentions.size()));
        int index = 0;
        for (NamedEntityMention mention : mentions) {
            mention.setMentionedEntity(entity);
            entity.setMentions(index, mention);
            index += 1;
        }
        entities.add(entity);
    }

    // add singleton entities for any named entities not picked up by coreference system
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        if (mention.getMentionedEntity() == null) {
            NamedEntity entity = new NamedEntity(jCas);
            entity.setMentions(new FSArray(jCas, 1));
            entity.setMentions(0, mention);
            mention.setMentionedEntity(entity);
            entity.getMentions();
            entities.add(entity);
        }
    }

    // sort entities by document order
    Collections.sort(entities, new Comparator<NamedEntity>() {
        @Override
        public int compare(NamedEntity o1, NamedEntity o2) {
            return getFirstBegin(o1) - getFirstBegin(o2);
        }

        private int getFirstBegin(NamedEntity entity) {
            int min = Integer.MAX_VALUE;
            for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) {
                if (mention.getBegin() < min) {
                    min = mention.getBegin();
                }
            }
            return min;
        }
    });

    // add entities to document
    for (NamedEntity entity : entities) {
        entity.addToIndexes();
    }

    //end of process-method
}