Example usage for edu.stanford.nlp.semgraph SemanticGraph edgeIterable

List of usage examples for edu.stanford.nlp.semgraph SemanticGraph edgeIterable

Introduction

In this page you can find the example usage for edu.stanford.nlp.semgraph SemanticGraph edgeIterable.

Prototype

public Iterable<SemanticGraphEdge> edgeIterable() 

Source Link

Usage

From source file:edu.cmu.deiis.annotator.StanfordCoreNLPAnnotator.java

License:Open Source License

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
    Annotation document = this.processor.process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

        // create the token annotation
        int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
        int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
        String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
        String lemma = tokenAnn.get(LemmaAnnotation.class);
        Token token = new Token(jCas, begin, end);
        token.setPos(pos);// w ww  . j av  a2 s . com
        token.setLemma(lemma);
        token.addToIndexes();

        // hackery to convert token-level named entity tag into phrase-level tag
        String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
        if (neTag.equals("O") && !lastNETag.equals("O")) {
            NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
            ne.setMentionType(lastNETag);
            ne.addToIndexes();
        } else {
            if (lastNETag.equals("O")) {
                lastNEBegin = begin;
            } else if (lastNETag.equals(neTag)) {
                // do nothing - begin was already set
            } else {
                NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
                ne.setMentionType(lastNETag);
                ne.addToIndexes();
                lastNEBegin = begin;
            }
            lastNEEnd = end;
        }
        lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {
        NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
        ne.setMentionType(lastNETag);
        ne.addToIndexes();
    }

    // add sentences and trees
    for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) {

        // add the sentence annotation
        int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
        int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
        Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
        sentence.addToIndexes();

        // add the syntactic tree annotation
        List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class);
        Tree tree = sentenceAnn.get(TreeAnnotation.class);
        if (tree.children().length != 1) {
            throw new RuntimeException("Expected single root node, found " + tree);
        }
        tree = tree.firstChild();
        tree.indexSpans(0);
        TopTreebankNode root = new TopTreebankNode(jCas);
        root.setTreebankParse(tree.toString());
        // TODO: root.setTerminals(v)
        this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns);

        // get the dependencies
        SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class);

        // convert Stanford nodes to UIMA annotations
        List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence);
        Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>();
        for (IndexedWord stanfordNode : dependencies.vertexSet()) {
            int indexBegin = stanfordNode.get(BeginIndexAnnotation.class);
            int indexEnd = stanfordNode.get(EndIndexAnnotation.class);
            int tokenBegin = tokens.get(indexBegin).getBegin();
            int tokenEnd = tokens.get(indexEnd - 1).getEnd();
            DependencyNode node;
            if (dependencies.getRoots().contains(stanfordNode)) {
                node = new TopDependencyNode(jCas, tokenBegin, tokenEnd);
            } else {
                node = new DependencyNode(jCas, tokenBegin, tokenEnd);
            }
            stanfordToUima.put(stanfordNode, node);
        }

        // create relation annotations for each Stanford dependency
        ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create();
        ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create();
        for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {
            DependencyRelation relation = new DependencyRelation(jCas);
            DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor());
            DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent());
            String relationType = stanfordEdge.getRelation().toString();
            if (head == null || child == null || relationType == null) {
                throw new RuntimeException(String.format(
                        "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation,
                        child, head));
            }
            relation.setHead(head);
            relation.setChild(child);
            relation.setRelation(relationType);
            relation.addToIndexes();
            headRelations.put(child, relation);
            childRelations.put(head, relation);
        }

        // set the relations for each node annotation
        for (DependencyNode node : stanfordToUima.values()) {
            List<DependencyRelation> heads = headRelations.get(node);
            node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size()));
            if (heads != null) {
                FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads);
            }
            List<DependencyRelation> children = childRelations.get(node);
            node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size()));
            if (children != null) {
                FSCollectionFactory.fillArrayFS(node.getChildRelations(), children);
            }
            node.addToIndexes();
        }
    }

    // map from spans to named entity mentions
    Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>();
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention);
    }

    // add mentions for all entities identified by the coreference system
    List<NamedEntity> entities = new ArrayList<NamedEntity>();
    List<List<Token>> sentenceTokens = new ArrayList<List<Token>>();
    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
        sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence));
    }
    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
        List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>();
        for (CorefMention corefMention : chain.getMentionsInTextualOrder()) {

            // figure out the character span of the token
            List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1);
            int begin = tokens.get(corefMention.startIndex - 1).getBegin();
            int end = tokens.get(corefMention.endIndex - 2).getEnd();

            // use an existing named entity mention when possible; otherwise create a new one
            NamedEntityMention mention = spanMentionMap.get(new Span(begin, end));
            if (mention == null) {
                mention = new NamedEntityMention(jCas, begin, end);
                mention.addToIndexes();
            }
            mentions.add(mention);
        }

        // create an entity for the mentions
        Collections.sort(mentions, new Comparator<NamedEntityMention>() {
            @Override
            public int compare(NamedEntityMention m1, NamedEntityMention m2) {
                return m1.getBegin() - m2.getBegin();
            }
        });

        // create mentions and add them to entity
        NamedEntity entity = new NamedEntity(jCas);
        entity.setMentions(new FSArray(jCas, mentions.size()));
        int index = 0;
        for (NamedEntityMention mention : mentions) {
            mention.setMentionedEntity(entity);
            entity.setMentions(index, mention);
            index += 1;
        }
        entities.add(entity);
    }

    // add singleton entities for any named entities not picked up by coreference system
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        if (mention.getMentionedEntity() == null) {
            NamedEntity entity = new NamedEntity(jCas);
            entity.setMentions(new FSArray(jCas, 1));
            entity.setMentions(0, mention);
            mention.setMentionedEntity(entity);
            entity.getMentions();
            entities.add(entity);
        }
    }

    // sort entities by document order
    Collections.sort(entities, new Comparator<NamedEntity>() {
        @Override
        public int compare(NamedEntity o1, NamedEntity o2) {
            return getFirstBegin(o1) - getFirstBegin(o2);
        }

        private int getFirstBegin(NamedEntity entity) {
            int min = Integer.MAX_VALUE;
            for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) {
                if (mention.getBegin() < min) {
                    min = mention.getBegin();
                }
            }
            return min;
        }
    });

    // add entities to document
    for (NamedEntity entity : entities) {
        entity.addToIndexes();
    }

}

From source file:edu.cmu.deiis.annotators.StanfordAnnotator.java

License:Open Source License

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
    Annotation document = this.processor.process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

        // create the token annotation
        int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
        int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
        String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
        String lemma = tokenAnn.get(LemmaAnnotation.class);
        Token token = new Token(jCas, begin, end);
        token.setPos(pos);//w  w  w.  j ava  2 s. c  o  m
        token.setLemma(lemma);
        token.addToIndexes();

        // hackery to convert token-level named entity tag into phrase-level tag
        String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
        if (neTag.equals("O") && !lastNETag.equals("O")) {
            NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
            ne.setMentionType(lastNETag);
            ne.addToIndexes();
        } else {
            if (lastNETag.equals("O")) {
                lastNEBegin = begin;
            } else if (lastNETag.equals(neTag)) {
                // do nothing - begin was already set
            } else {
                NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
                ne.setMentionType(lastNETag);
                ne.addToIndexes();
                lastNEBegin = begin;
            }
            lastNEEnd = end;
        }
        lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {
        NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
        ne.setMentionType(lastNETag);
        ne.addToIndexes();
    }

    // add sentences and trees
    for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) {

        // add the sentence annotation
        int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
        int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
        Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
        sentence.addToIndexes();

        // add the syntactic tree annotation
        List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class);
        Tree tree = sentenceAnn.get(TreeAnnotation.class);
        if (tree.children().length != 1) {
            throw new RuntimeException("Expected single root node, found " + tree);
        }
        tree = tree.firstChild();
        tree.indexSpans(0);
        TopTreebankNode root = new TopTreebankNode(jCas);
        root.setTreebankParse(tree.toString());
        // TODO: root.setTerminals(v)
        this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns);

        // get the dependencies
        SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class);

        // convert Stanford nodes to UIMA annotations
        List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence);
        Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>();
        for (IndexedWord stanfordNode : dependencies.vertexSet()) {
            int indexBegin = stanfordNode.get(BeginIndexAnnotation.class);
            int indexEnd = stanfordNode.get(EndIndexAnnotation.class);
            int tokenBegin = tokens.get(indexBegin).getBegin();
            int tokenEnd = tokens.get(indexEnd - 1).getEnd();
            DependencyNode node;
            if (dependencies.getRoots().contains(stanfordNode)) {
                node = new TopDependencyNode(jCas, tokenBegin, tokenEnd);
            } else {
                node = new DependencyNode(jCas, tokenBegin, tokenEnd);
            }
            stanfordToUima.put(stanfordNode, node);
        }

        // create relation annotations for each Stanford dependency
        ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create();
        ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create();
        for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {
            DependencyRelation relation = new DependencyRelation(jCas);
            DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor());
            DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent());
            String relationType = stanfordEdge.getRelation().toString();
            if (head == null || child == null || relationType == null) {
                throw new RuntimeException(String.format(
                        "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation,
                        child, head));
            }
            relation.setHead(head);
            relation.setChild(child);
            relation.setRelation(relationType);
            relation.addToIndexes();
            headRelations.put(child, relation);
            childRelations.put(head, relation);
        }

        // set the relations for each node annotation
        for (DependencyNode node : stanfordToUima.values()) {
            List<DependencyRelation> heads = headRelations.get(node);
            node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size()));
            if (heads != null) {
                FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads);
            }
            List<DependencyRelation> children = childRelations.get(node);
            node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size()));
            if (children != null) {
                FSCollectionFactory.fillArrayFS(node.getChildRelations(), children);
            }
            node.addToIndexes();
        }
    }

    // map from spans to named entity mentions
    Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>();
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention);
    }

    // add mentions for all entities identified by the coreference system
    List<NamedEntity> entities = new ArrayList<NamedEntity>();
    List<List<Token>> sentenceTokens = new ArrayList<List<Token>>();
    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
        sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence));
    }
    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
        List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>();
        for (CorefMention corefMention : chain.getMentionsInTextualOrder()) {

            // figure out the character span of the token
            List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1);
            int begin = tokens.get(corefMention.startIndex - 1).getBegin();
            int end = tokens.get(corefMention.endIndex - 2).getEnd();

            // use an existing named entity mention when possible; otherwise create a new one
            NamedEntityMention mention = spanMentionMap.get(new Span(begin, end));
            if (mention == null) {
                mention = new NamedEntityMention(jCas, begin, end);
                //String line = mention.getCoveredText();
                //System.out.println(line);
                mention.addToIndexes();
            }
            mentions.add(mention);
        }

        // create an entity for the mentions
        Collections.sort(mentions, new Comparator<NamedEntityMention>() {
            @Override
            public int compare(NamedEntityMention m1, NamedEntityMention m2) {
                return m1.getBegin() - m2.getBegin();
            }
        });

        // create mentions and add them to entity
        NamedEntity entity = new NamedEntity(jCas);
        entity.setMentions(new FSArray(jCas, mentions.size()));
        int index = 0;
        for (NamedEntityMention mention : mentions) {
            mention.setMentionedEntity(entity);
            entity.setMentions(index, mention);
            index += 1;
        }
        entities.add(entity);
    }

    // add singleton entities for any named entities not picked up by coreference system
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        if (mention.getMentionedEntity() == null) {
            NamedEntity entity = new NamedEntity(jCas);
            entity.setMentions(new FSArray(jCas, 1));
            entity.setMentions(0, mention);
            mention.setMentionedEntity(entity);
            entity.getMentions();
            entities.add(entity);
        }
    }

    // sort entities by document order
    Collections.sort(entities, new Comparator<NamedEntity>() {
        @Override
        public int compare(NamedEntity o1, NamedEntity o2) {
            return getFirstBegin(o1) - getFirstBegin(o2);
        }

        private int getFirstBegin(NamedEntity entity) {
            int min = Integer.MAX_VALUE;
            for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) {
                if (mention.getBegin() < min) {
                    min = mention.getBegin();
                }
            }
            return min;
        }
    });

    // add entities to document
    for (NamedEntity entity : entities) {
        //NamedEntityMention mention=entity.getMentions(3);
        //System.out.println(mention.getBegin());
        entity.addToIndexes();
    }

}

From source file:edu.nus.comp.nlp.stanford.UtilParser.java

License:Open Source License

public static DefaultMutableTreeNode toDMTree(IndexedWord root, SemanticGraph dependencies) {

    if (root == null) {
        root = dependencies.getFirstRoot();
    }// ww w .  ja va2 s .  co  m

    DefaultMutableTreeNode node = new DefaultMutableTreeNode();

    String nodeContent = root.value();

    for (SemanticGraphEdge edge : dependencies.edgeIterable()) {
        if (edge.getDependent().equals(root)) {
            nodeContent = "<-" + edge.getRelation() + "- " + nodeContent;
            break;
        }
    }

    node.setUserObject(nodeContent);
    for (IndexedWord c : dependencies.getChildList(root)) {
        DefaultMutableTreeNode n = toDMTree(c, dependencies);
        node.add(n);
    }
    return node;
}

From source file:nlp.prototype.NewJFrame.java

private void jButton1MouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_jButton1MouseClicked

    DefaultTreeModel model2 = (DefaultTreeModel) jTree2.getModel();
    DefaultMutableTreeNode rootNode2 = new DefaultMutableTreeNode("top");
    model2.setRoot(rootNode2);/* w  w  w .  j  av a2s .  c  om*/

    /*TextCorpus textCorpus = processor.parseCorpus(jTextArea1.getText());
            
    for (SentenceToken token : textCorpus.getSentences()) {
    DefaultMutableTreeNode sentenceTokenNode = new DefaultMutableTreeNode();
    sentenceTokenNode.setUserObject(token);
    rootNode2.add(sentenceTokenNode);
    addNodes(token, sentenceTokenNode);
    }
            
    DefaultTokenSerializer serializer = new DefaultTokenSerializer();
    Document xmlDocument = serializer.serialize(textCorpus);
    jTextArea4.setText(serializer.transform(xmlDocument));
    jTextArea7.setText(serializer.transform(xmlDocument, this.jTextArea6.getText()));*/

    Annotation document = new Annotation(jTextArea1.getText());
    pipeline.annotate(document);
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
    Map<Integer, CorefChain> corefMap = document.get(CorefChainAnnotation.class);
    List<CoreLabel> tokens = document.get(CoreAnnotations.TokensAnnotation.class);

    DefaultListModel listModel = new DefaultListModel();

    for (Class key : document.keySet()) {
        Object value = document.get(key);

        if (value != null && value.toString() != null && !value.toString().isEmpty()) {
            listModel.addElement(key.toString() + " - [" + value.toString() + "]");
        }
    }

    DefaultTreeModel model = (DefaultTreeModel) jTree1.getModel();
    DefaultMutableTreeNode rootNode = new DefaultMutableTreeNode("top");
    model.setRoot(rootNode);

    List<POSToken> tokenList = new ArrayList<>();

    jList1.setModel(listModel);

    for (CoreMap sentence : sentences) {
        Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class);
        SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
        String root = graph.getFirstRoot().originalText();

        MultiValuedMap<String, GrammarToken> map = new HashSetValuedHashMap<>();

        for (SemanticGraphEdge edge : graph.edgeIterable()) {
            GrammarToken grammarToken = new GrammarToken(edge);
            map.put(grammarToken.getTarget(), grammarToken);
        }

        DefaultMutableTreeNode node = new DefaultMutableTreeNode();
        POSToken token = new POSToken((CoreLabel) tree.label());
        token.setGrammar(graph.toString());
        node.setUserObject(token);
        rootNode.add(node);
        addNodes(tree, false, node, node, map, root, corefMap, tokens);
        tokenList.add(token);
    }

    setAdjacentNodes(tokenList);
}

From source file:nlp.service.implementation.DefaultGrammarService.java

public DefaultGrammarService(SemanticGraph graph) {
    targetMap = new HashSetValuedHashMap<>();

    rootIndex = graph.getFirstRoot().index();

    for (SemanticGraphEdge edge : graph.edgeIterable()) {

        GrammaticalDependency dependency;

        try {//from ww  w  .  ja v a  2 s .co  m
            String relation = edge.getRelation().toString();
            if (relation.contains(":")) {
                relation = relation.substring(relation.indexOf(':') + 1, relation.length());
            }

            if (relation.equals("case")) {
                dependency = GrammaticalDependency.casemarker;
            } else {
                dependency = GrammaticalDependency.valueOf(relation);
            }
        } catch (IllegalArgumentException e) {
            dependency = GrammaticalDependency.unknown;
        }

        GrammaticalRelation<Integer> relation = new GrammaticalRelation<>(dependency, edge.getTarget().index(),
                edge.getSource().index());
        targetMap.put(relation.getTarget(), relation);
    }
}

From source file:org.nlp2rdf.implementation.stanfordcorenlp.StanfordWrapper.java

License:Apache License

public void process(Individual context, OntModel inputModel, OntModel outputModel,
        NIFParameters nifParameters) {//from  w  w  w.j a  v  a 2 s  .c om
    String contextString = context
            .getPropertyValue(NIFDatatypeProperties.isString.getDatatypeProperty(inputModel)).asLiteral()
            .getString();
    String prefix = nifParameters.getPrefix();
    URIScheme urischeme = nifParameters.getUriScheme();

    Annotator pipeline = buildAnnotator(nifParameters);

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(contextString);
    // run all Annotators on this text
    pipeline.annotate(document);

    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);

    //get all the sentences and words and read it in an intermediate structure
    //NOTE: this can be greatly optimized of course
    // for now it is just simple and cheap to implement it like this
    int wordCount = 0;
    TreeMap<Span, List<Span>> tokenizedText = new TreeMap<Span, List<Span>>();
    for (CoreMap sentence : sentences) {
        Span sentenceSpan = new Span(sentence.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class),
                sentence.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
        List<Span> wordSpans = new ArrayList<Span>();
        for (CoreLabel coreLabel : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            wordSpans.add(new Span(coreLabel.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class),
                    coreLabel.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)));
            wordCount++;
        }
        tokenizedText.put(sentenceSpan, wordSpans);
    }

    /**
     * Basic Model Setup
     **/
    //get parameters for the URIGenerator
    Text2RDF text2RDF = new Text2RDF();
    text2RDF.generateNIFModel(prefix, context, urischeme, outputModel, tokenizedText);
    outputModel.add(RLOGSLF4JBinding.log(nifParameters.getLogPrefix(),
            "Finished creating " + tokenizedText.size() + " sentence(s) with " + wordCount + " word(s) ",
            RLOGIndividuals.DEBUG, this.getClass().getCanonicalName(), null, null));
    // text2RDF.addNextAndPreviousProperties(prefix,urischeme,model);

    // traversing the words in the current sentence
    // a CoreLabel is a CoreMap with additional token-specific methods
    for (CoreMap sentence : sentences) {

        for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            Span wordSpan = new Span(token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class),
                    token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
            //the word should exist already
            Individual wordIndividual = outputModel
                    .getIndividual(urischeme.generate(prefix, contextString, wordSpan));

            if (wordIndividual == null) {
                log.error("SKIPPING: word was not found in the model: "
                        + urischeme.generate(prefix, contextString, wordSpan));
                continue;
            }
            /********************************
             * Lemma
             ******/

            if (token.get(CoreAnnotations.LemmaAnnotation.class) != null) {
                wordIndividual.addProperty(NIFDatatypeProperties.lemma.getDatatypeProperty(outputModel),
                        token.get(CoreAnnotations.LemmaAnnotation.class), XSDDatatype.XSDstring);
            }

            /********************************
             * POS tag
             ******/
            outputModel.setNsPrefix("olia", "http://purl.org/olia/olia.owl#");
            // this is the POS tag of the token
            String posTag = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);

            List<String> oliaIndividual = (List<String>) Penn.hasTag.get(posTag);
            if (oliaIndividual != null) {

                for (String s : oliaIndividual) {
                    wordIndividual.addProperty(NIFObjectProperties.oliaLink.getObjectProperty(outputModel),
                            outputModel.createIndividual(s, OWL.Thing));
                    List<String> pennlinks = (List<String>) Penn.links.get(s);
                    if (pennlinks != null) {
                        for (String oc : pennlinks) {
                            wordIndividual.addProperty(
                                    NIFAnnotationProperties.oliaCategory.getAnnotationProperty(outputModel),
                                    outputModel.createClass(oc));
                        }
                    } else {
                        outputModel.add(
                                RLOGSLF4JBinding.log(nifParameters.getLogPrefix(), "missing oliaLinks for " + s,
                                        RLOGIndividuals.ERROR, this.getClass().getCanonicalName(), null, null));
                    }
                }
            } else {
                outputModel.add(
                        RLOGSLF4JBinding.log(nifParameters.getLogPrefix(), "missing oliaLinks for " + posTag,
                                RLOGIndividuals.ERROR, this.getClass().getCanonicalName(), null, null));

            }
        }

        SemanticGraph dependencies = sentence
                .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);

        if (dependencies != null) {
            //time to add the prefix
            StanfordSimple.addStanfordSimplePrefix(outputModel);

            // create relation annotations for each Stanford dependency
            for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {

                Span govSpan = new Span(
                        stanfordEdge.getGovernor().get(CoreAnnotations.CharacterOffsetBeginAnnotation.class),
                        stanfordEdge.getGovernor().get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
                Span depSpan = new Span(
                        stanfordEdge.getDependent().get(CoreAnnotations.CharacterOffsetBeginAnnotation.class),
                        stanfordEdge.getDependent().get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
                //String relationType = stanfordEdge.getRelation().toString();

                String[] edgeURIs = StanfordSimple.getURIforEdgeLabel(stanfordEdge.getRelation().toString());
                //ObjectProperty relation = model.createObjectProperty(new CStringInst().generate(prefix, contextString, new Span[]{}));
                ObjectProperty relation = null;
                switch (edgeURIs.length) {
                case 1:
                    relation = outputModel.createObjectProperty(edgeURIs[0]);

                    break;
                case 2:
                    relation = outputModel.createObjectProperty(edgeURIs[0]);
                    relation.addSubProperty(outputModel.createObjectProperty(edgeURIs[1]));
                    break;
                default:
                    String message = "Empty edge label, no URI written: " + edgeURIs;
                    outputModel.add(RLOGSLF4JBinding.log(nifParameters.getLogPrefix(), message,
                            RLOGIndividuals.ERROR, this.getClass().getCanonicalName(), null, null));
                    continue;

                }

                Individual gov = text2RDF.createCStringIndividual(prefix, context, govSpan, urischeme,
                        outputModel);
                Individual dep = text2RDF.createCStringIndividual(prefix, context, depSpan, urischeme,
                        outputModel);
                gov.addProperty(relation, dep);
                relation.addSuperProperty(NIFObjectProperties.inter.getObjectProperty(outputModel));
                relation.addSuperProperty(NIFObjectProperties.dependency.getObjectProperty(outputModel));

                if (gov == null || dep == null) {
                    String message = "SKIPPING Either gov or dep was null for the dependencies\n" + "gov: "
                            + gov + "\ndep: " + dep;
                    outputModel.add(RLOGSLF4JBinding.log(nifParameters.getLogPrefix(), message,
                            RLOGIndividuals.ERROR, this.getClass().getCanonicalName(), null, null));
                    continue;
                }

                //  List<String> oliaIndividual = (List<String>) Stanford.hasTag.get(stanfordEdge.getRelation().getShortName());

                /** for (String s : oliaIndividual) {
                        
                 relation.addProperty(NIFAnnotationProperties.oliaPropLink.getAnnotationProperty(model), model.createIndividual(s, OWL.Thing));
                 for (String oc : (List<String>) Stanford.links.get(s)) {
                 relation.addProperty(NIFAnnotationProperties.oliaCategory.getAnnotationProperty(model), oc);
                 }
                 if (((List<String>) Stanford.links.get(s)).isEmpty()) {
                 log.error("missing links for: " + s);
                 }
                 } **/

                /* Individual relation = null;//dependency.getOLiAIndividualForTag(relationType);
                        
                //in an ideal world, all used tags should also be in OLiA, this tends to be null sometimes
                if (relation == null) {
                    log.error("reltype was null for: " + relationType);
                    continue;
                }
                        
                ObjectProperty dependencyRelation = model.createObjectProperty(relation.getURI());
                //add the property from governer to dependent
                gov.addProperty(dependencyRelation, dep);
                        
                        
                Set<String> classUris = dependency.getClassURIsForTag(relationType);
                for (String cl : classUris) {
                    if (!cl.startsWith("http://purl.org/olia/stanford.owl")) {
                        continue;
                    }
                    //add the property from governer to dependent
                    ObjectProperty nn = model.createObjectProperty(cl);
                    gov.addProperty(nn, dep);
                    dependencyRelation.addSuperProperty(nn);
                        
                    //copy and transform the hierarchy
                    //removed for 2.0
                    //OLiAOntology.classHierarchy2PropertyHierarchy(dependency.getHierarchy(cl), model, "http://purl.org/olia/stanford.owl");
                }
                }*/

            }
        } //end sentences
        /**************
         * Syntax Tree
         * */

        //Tree tree = sentence.get(TreeAnnotation.class);
        //if (tree != null) {
        //removed for 2.0
        //processTree(tree, urigenerator, prefix, text, model);
        //}

    }

}

From source file:org.sam_agent.csparser.ContinuousParser.java

License:Open Source License

public String stringify(SemanticGraph dependencies) {
    List<String> depsList = new ArrayList<String>();

    for (SemanticGraphEdge eit : dependencies.edgeIterable()) {
        String rel = eit.getRelation().toString();
        IndexedWord gov = eit.getGovernor(), dep = eit.getDependent();
        String arg0 = gov.word().toString() + "-" + gov.index();
        String arg1 = dep.word().toString() + "-" + dep.index();
        depsList.add(String.format("{\"rel\":\"%s\",\"arg0\":\"%s\",\"arg1\":\"%s\"}", rel, arg0, arg1));
    }//from   w  ww . ja v  a 2s. c o  m

    return String.format("\"dependencies\":[%s]", String.join(",", depsList));
}

From source file:org.textmining.annotator.StanfordCoreNlpAnnotator.java

License:Open Source License

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
    Annotation document = this.processor.process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

        // create the token annotation
        int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
        int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
        String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
        String lemma = tokenAnn.get(LemmaAnnotation.class);
        Token token = new Token(jCas, begin, end);
        token.setPos(pos);/*from   ww w .  j  a v a2 s.co m*/
        token.setLemma(lemma);
        token.addToIndexes();

        // hackery to convert token-level named entity tag into phrase-level tag
        String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
        if (neTag.equals("O") && !lastNETag.equals("O")) {
            NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
            ne.setMentionType(lastNETag);
            ne.addToIndexes();
        } else {
            if (lastNETag.equals("O")) {
                lastNEBegin = begin;
            } else if (lastNETag.equals(neTag)) {
                // do nothing - begin was already set
            } else {
                NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
                ne.setMentionType(lastNETag);
                ne.addToIndexes();
                lastNEBegin = begin;
            }
            lastNEEnd = end;
        }
        lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {
        NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
        ne.setMentionType(lastNETag);
        ne.addToIndexes();
    }

    // add sentences and trees
    for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) {

        // add the sentence annotation
        int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
        int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
        Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
        sentence.addToIndexes();

        // add the syntactic tree annotation
        List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class);
        Tree tree = sentenceAnn.get(TreeAnnotation.class);
        if (tree.children().length != 1) {
            throw new RuntimeException("Expected single root node, found " + tree);
        }
        tree = tree.firstChild();
        tree.indexSpans(0);
        TopTreebankNode root = new TopTreebankNode(jCas);
        root.setTreebankParse(tree.toString());
        // TODO: root.setTerminals(v)
        this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns);

        // get the dependencies
        SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class);

        // convert Stanford nodes to UIMA annotations
        List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence);
        Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>();
        for (IndexedWord stanfordNode : dependencies.vertexSet()) {
            int indexBegin = stanfordNode.get(BeginIndexAnnotation.class);
            int indexEnd = stanfordNode.get(EndIndexAnnotation.class);
            int tokenBegin = tokens.get(indexBegin).getBegin();
            int tokenEnd = tokens.get(indexEnd - 1).getEnd();
            DependencyNode node;
            if (dependencies.getRoots().contains(stanfordNode)) {
                node = new TopDependencyNode(jCas, tokenBegin, tokenEnd);
            } else {
                node = new DependencyNode(jCas, tokenBegin, tokenEnd);
            }
            stanfordToUima.put(stanfordNode, node);
        }

        // create relation annotations for each Stanford dependency
        ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create();
        ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create();
        for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {
            DependencyRelation relation = new DependencyRelation(jCas);
            DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor());
            DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent());
            String relationType = stanfordEdge.getRelation().toString();
            if (head == null || child == null || relationType == null) {
                throw new RuntimeException(String.format(
                        "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation,
                        child, head));
            }
            relation.setHead(head);
            relation.setChild(child);
            relation.setRelation(relationType);
            relation.addToIndexes();
            headRelations.put(child, relation);
            childRelations.put(head, relation);
        }

        // set the relations for each node annotation
        for (DependencyNode node : stanfordToUima.values()) {
            List<DependencyRelation> heads = headRelations.get(node);
            node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size()));
            if (heads != null) {
                FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads);
            }
            List<DependencyRelation> children = childRelations.get(node);
            node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size()));
            if (children != null) {
                FSCollectionFactory.fillArrayFS(node.getChildRelations(), children);
            }
            node.addToIndexes();
        }
    }

    // map from spans to named entity mentions
    Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>();
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention);
    }

    // add mentions for all entities identified by the coreference system
    List<NamedEntity> entities = new ArrayList<NamedEntity>();
    List<List<Token>> sentenceTokens = new ArrayList<List<Token>>();
    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
        sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence));
    }
    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
        List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>();
        for (CorefMention corefMention : chain.getMentionsInTextualOrder()) {

            // figure out the character span of the token
            List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1);
            int begin = tokens.get(corefMention.startIndex - 1).getBegin();
            int end = tokens.get(corefMention.endIndex - 2).getEnd();

            // use an existing named entity mention when possible; otherwise create a new one
            NamedEntityMention mention = spanMentionMap.get(new Span(begin, end));
            if (mention == null) {
                mention = new NamedEntityMention(jCas, begin, end);
                mention.addToIndexes();
            }
            mentions.add(mention);
        }

        // create an entity for the mentions
        Collections.sort(mentions, new Comparator<NamedEntityMention>() {
            @Override
            public int compare(NamedEntityMention m1, NamedEntityMention m2) {
                return m1.getBegin() - m2.getBegin();
            }
        });

        // create mentions and add them to entity
        NamedEntity entity = new NamedEntity(jCas);
        entity.setMentions(new FSArray(jCas, mentions.size()));
        int index = 0;
        for (NamedEntityMention mention : mentions) {
            mention.setMentionedEntity(entity);
            entity.setMentions(index, mention);
            index += 1;
        }
        entities.add(entity);
    }

    // add singleton entities for any named entities not picked up by coreference system
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        if (mention.getMentionedEntity() == null) {
            NamedEntity entity = new NamedEntity(jCas);
            entity.setMentions(new FSArray(jCas, 1));
            entity.setMentions(0, mention);
            mention.setMentionedEntity(entity);
            entity.getMentions();
            entities.add(entity);
        }
    }

    // sort entities by document order
    Collections.sort(entities, new Comparator<NamedEntity>() {
        @Override
        public int compare(NamedEntity o1, NamedEntity o2) {
            return getFirstBegin(o1) - getFirstBegin(o2);
        }

        private int getFirstBegin(NamedEntity entity) {
            int min = Integer.MAX_VALUE;
            for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) {
                if (mention.getBegin() < min) {
                    min = mention.getBegin();
                }
            }
            return min;
        }
    });

    // add entities to document
    for (NamedEntity entity : entities) {
        entity.addToIndexes();
    }

    //end of process-method
}