Example usage for edu.stanford.nlp.dcoref CorefChain getMentionsInTextualOrder

Introduction

In this page you can find the example usage for edu.stanford.nlp.dcoref CorefChain getMentionsInTextualOrder.

Prototype

public List<CorefMention> getMentionsInTextualOrder()

Source Link

Document

get List of CorefMentions

Usage

From source file:Anaphora_Resolution.AnaphoraDetection.java

public void anophora() {
    String text = "Tom is a smart boy. He know a lot of thing.";

    Annotation document = new Annotation(text);
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit,parse, lemma, ner, dcoref");

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(document);/*from  w w w  .  j a va 2s  .  co m*/

    Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
    for (Integer i : graph.keySet()) {
        System.out.println("GROUP " + i);
        CorefChain x = graph.get(i);
        for (CorefMention m : x.getMentionsInTextualOrder()) {
            System.out.println(m.mentionSpan);
        }
    }

}

From source file:be.fivebyfive.lingua.stanfordcorenlp.Pipeline.java

License:Open Source License

public PipelineSentenceList process(String text) {
    if (pipeline == null) {
        initPipeline();/*w w w  . j a  v a 2  s .  c o m*/
    }

    PipelineSentenceList outList = new PipelineSentenceList();
    Annotation document = new Annotation(text);

    if (document == null) {
        return null;
    }

    pipeline.annotate(document);

    for (CoreMap sentence : document.get(SentencesAnnotation.class)) {
        String str = sentence.get(TextAnnotation.class);
        PipelineTokenList ptl = new PipelineTokenList();
        PipelineDependencyList pel = new PipelineDependencyList();

        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            String word = token.get(TextAnnotation.class);
            String pos = token.get(PartOfSpeechAnnotation.class);
            String ner = token.get(NamedEntityTagAnnotation.class);
            String lemma = token.get(LemmaAnnotation.class);

            ptl.add(new PipelineToken(word, pos, ner, lemma));
        }

        SemanticGraph dependencies = sentence.get(depMode.equals(DEP_BASIC) ? BasicDependenciesAnnotation.class
                : depMode.equals(DEP_COLLAPSED) ? CollapsedDependenciesAnnotation.class
                        : CollapsedCCProcessedDependenciesAnnotation.class);

        if (dependencies != null) {
            for (SemanticGraphEdge edge : dependencies.edgeListSorted()) {
                GrammaticalRelation rel = edge.getRelation();

                int govTokenIndex = edge.getGovernor().index() - 1;
                int depTokenIndex = edge.getDependent().index() - 1;

                if (govTokenIndex >= 0 && depTokenIndex >= 0 && govTokenIndex < ptl.size()
                        && depTokenIndex < ptl.size()) {
                    pel.add(new PipelineDependency(ptl.get(govTokenIndex), ptl.get(depTokenIndex),
                            govTokenIndex, depTokenIndex, rel));
                } else {
                    System.err.println("Index of " + edge.toString() + " out of range!");
                }
            }
        }
        outList.add(new PipelineSentence(str, ptl, pel));
    } //for -- SentenceAnnotation
    Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);

    if (graph != null) {
        for (CorefChain crc : graph.values()) {
            List<CorefMention> crms = crc.getMentionsInTextualOrder();
            CorefMention rm = crc.getRepresentativeMention();

            if (rm != null) {
                PipelineCorefChain crChain = new PipelineCorefChain();
                PipelineCorefMention repRef = PipelineCorefMention.fromMention(rm);
                repRef.setTokens(outList.get(repRef.getSentNum()).getTokens().slice(repRef.getStartIndex(),
                        repRef.getEndIndex()));
                repRef.setHeadToken(outList.get(repRef.getSentNum()).getTokens().get(repRef.getHeadIndex()));
                crChain.setRepresentativeMention(repRef);
                if (crms.size() > 0) {
                    for (CorefMention cm : crms) {
                        PipelineCorefMention cr = PipelineCorefMention.fromMention(cm);
                        cr.setTokens(outList.get(cr.getSentNum()).getTokens().slice(cr.getStartIndex(),
                                cr.getEndIndex()));
                        crChain.addMention(cr);
                    }
                }
                outList.get(repRef.getSentNum()).addCorefChain(crChain);
            } //if(rm
        } //for
    } //if(graph

    return outList;
}

From source file:candidateGeneration.remove_missingContext.java

public static void main(String[] args) throws FileNotFoundException, IOException {
    InputStream is = new FileInputStream(sentence_detect_model);
    SentenceModel model = new SentenceModel(is);
    SentenceDetectorME sdetector = new SentenceDetectorME(model);

    Properties props = new Properties();
    props.put("annotators", "tokenize,ssplit,pos,lemma,ner,parse,dcoref");
    StanfordCoreNLP pi = new StanfordCoreNLP(props);

    File writeFile = new File(
            "C:\\Users\\Abhay Prakash\\Workspace\\trivia\\Data\\Candidate_Generation\\good_sentences_new.txt");
    writeFile.createNewFile();//  w  w w  .jav  a  2 s.c  om
    FileWriter writer = new FileWriter(writeFile);

    File writeFile2 = new File(
            "C:\\Users\\Abhay Prakash\\Workspace\\trivia\\Data\\Candidate_Generation\\bad_sentences_new.txt");
    writeFile2.createNewFile();
    FileWriter writer2 = new FileWriter(writeFile2);

    String folderPath = "C:\\Users\\Abhay Prakash\\Workspace\\trivia\\Data\\movieTest\\indivFiles\\";
    File[] files = new File(folderPath).listFiles();
    for (File file : files) {
        if (file.isFile()) {
            String name = file.getName();
            name = name.replace("_", " ");
            name = name.replace("%28", "(");
            name = name.replace("%29", ")");
            name = name.replace(".txt", "");
            System.out.println("File: " + name);

            FileReader inputFile = new FileReader(folderPath + file.getName());
            BufferedReader bufferReader = new BufferedReader(inputFile);
            String input;

            while ((input = bufferReader.readLine()) != null) {
                //System.out.println("Line: " + input);
                String sentences[] = sdetector.sentDetect(input);
                HashMap<Integer, Integer> toRemove = new HashMap<>();
                Annotation doc = new Annotation(input);
                pi.annotate(doc);
                Map<Integer, CorefChain> graph = doc.get(CorefCoreAnnotations.CorefChainAnnotation.class);

                for (Map.Entry<Integer, CorefChain> entry : graph.entrySet()) {
                    CorefChain c = entry.getValue();

                    if (c.getMentionsInTextualOrder().size() <= 1) {
                        continue;
                    }

                    //System.out.println("Mentions: " + c.toString());
                    String[] sentenceOccurence = c.toString().split(" ");
                    int firstOccurence = -1;
                    for (int i = 0; i < sentenceOccurence.length; i++) {
                        if (firstOccurence == -1 && sentenceOccurence[i].equals("sentence")) {
                            //System.out.println("first occurence : " + sentenceOccurence[i+1]);
                            firstOccurence = Integer
                                    .parseInt(sentenceOccurence[i + 1].replace(",", "").replace("]", ""));
                            continue;
                        }

                        if (sentenceOccurence[i].equals("sentence")) {
                            //System.out.println("further occurence : "+sentenceOccurence[i+1]);
                            if (Integer.parseInt(sentenceOccurence[i + 1].replace(",", "").replace("]",
                                    "")) != firstOccurence) {
                                //System.out.println("Added " + sentenceOccurence[i+1].replace(",", "").replace("]", "") + " for removal");
                                toRemove.put(Integer.parseInt(
                                        sentenceOccurence[i + 1].replace(",", "").replace("]", "")), 1);
                            }
                        }
                    }
                    //System.out.println(c.toString());
                }

                int cand_i = 1;
                for (String candidate_sentence : sentences) {
                    if (toRemove.containsKey(cand_i)) {
                        //System.out.println("REMOVING: " + candidate_sentence + "\n");
                        writer2.write(name + "\t" + candidate_sentence + "\n");
                        continue;
                    }
                    //System.out.println("TAKING: " + candidate_sentence + "\n");
                    writer.write(name + "\t" + candidate_sentence + "\n");
                    cand_i++;
                }
                //System.in.read();
            }
            //System.out.println("Line done");
            bufferReader.close();
            //System.in.read();
        }
        writer.flush();
        writer2.flush();
    }
    writer.close();
    writer2.close();
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordCoreferenceResolver.java

License:Open Source License

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    modelProvider.configure(aJCas.getCas());

    List<Tree> trees = new ArrayList<Tree>();
    List<CoreMap> sentences = new ArrayList<CoreMap>();
    List<List<CoreLabel>> sentenceTokens = new ArrayList<List<CoreLabel>>();
    for (ROOT root : select(aJCas, ROOT.class)) {
        // Copy all relevant information from the tokens
        List<CoreLabel> tokens = new ArrayList<CoreLabel>();
        for (Token token : selectCovered(Token.class, root)) {
            tokens.add(tokenToWord(token));
        }//from  www. j  ava  2 s  .  c o  m
        sentenceTokens.add(tokens);

        // SemanticHeadFinder (nonTerminalInfo) does not know about PRN0, so we have to replace
        // it with PRN to avoid NPEs.
        TreeFactory tFact = new LabeledScoredTreeFactory(CoreLabel.factory()) {
            @Override
            public Tree newTreeNode(String aParent, List<Tree> aChildren) {
                String parent = aParent;
                if ("PRN0".equals(parent)) {
                    parent = "PRN";
                }
                Tree node = super.newTreeNode(parent, aChildren);
                return node;
            }
        };

        // deep copy of the tree. These are modified inside coref!
        Tree treeCopy = TreeUtils.createStanfordTree(root, tFact).treeSkeletonCopy();
        treeCopy.indexSpans();
        trees.add(treeCopy);

        // Build the sentence
        CoreMap sentence = new CoreLabel();
        sentence.set(TreeAnnotation.class, treeCopy);
        sentence.set(TokensAnnotation.class, tokens);
        sentence.set(RootKey.class, root);
        sentences.add(sentence);

        // https://code.google.com/p/dkpro-core-asl/issues/detail?id=590
        // We currently do not copy over dependencies from the CAS. This is supposed to fill
        // in the dependencies so we do not get NPEs.
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(),
                tlp.typedDependencyHeadFinder());
        ParserAnnotatorUtils.fillInParseAnnotations(false, true, gsf, sentence, treeCopy,
                GrammaticalStructure.Extras.NONE);

        // https://code.google.com/p/dkpro-core-asl/issues/detail?id=582
        SemanticGraph deps = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
        for (IndexedWord vertex : deps.vertexSet()) {
            vertex.setWord(vertex.value());
        }

        // merge the new CoreLabels with the tree leaves
        MentionExtractor.mergeLabels(treeCopy, tokens);
        MentionExtractor.initializeUtterance(tokens);
    }

    Annotation document = new Annotation(aJCas.getDocumentText());
    document.set(SentencesAnnotation.class, sentences);

    Coreferencer coref = modelProvider.getResource();

    // extract all possible mentions
    // Reparsing only works when the full CoreNLP pipeline system is set up! Passing false here
    // disables reparsing.
    RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder(false);
    List<List<Mention>> allUnprocessedMentions = finder.extractPredictedMentions(document, 0,
            coref.corefSystem.dictionaries());

    // add the relevant info to mentions and order them for coref
    Map<Integer, CorefChain> result;
    try {
        Document doc = coref.mentionExtractor.arrange(document, sentenceTokens, trees, allUnprocessedMentions);
        result = coref.corefSystem.coref(doc);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }

    for (CorefChain chain : result.values()) {
        CoreferenceLink last = null;
        for (CorefMention mention : chain.getMentionsInTextualOrder()) {
            CoreLabel beginLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class)
                    .get(mention.startIndex - 1);
            CoreLabel endLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class)
                    .get(mention.endIndex - 2);
            CoreferenceLink link = new CoreferenceLink(aJCas, beginLabel.get(TokenKey.class).getBegin(),
                    endLabel.get(TokenKey.class).getEnd());

            if (mention.mentionType != null) {
                link.setReferenceType(mention.mentionType.toString());
            }

            if (last == null) {
                // This is the first mention. Here we'll initialize the chain
                CoreferenceChain corefChain = new CoreferenceChain(aJCas);
                corefChain.setFirst(link);
                corefChain.addToIndexes();
            } else {
                // For the other mentions, we'll add them to the chain.
                last.setNext(link);
            }
            last = link;

            link.addToIndexes();
        }
    }
}

From source file:edu.cmu.deiis.annotator.StanfordCoreNLPAnnotator.java

License:Open Source License

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
    Annotation document = this.processor.process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

        // create the token annotation
        int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
        int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
        String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
        String lemma = tokenAnn.get(LemmaAnnotation.class);
        Token token = new Token(jCas, begin, end);
        token.setPos(pos);/*from   w w w. j a  v a2  s . c o m*/
        token.setLemma(lemma);
        token.addToIndexes();

        // hackery to convert token-level named entity tag into phrase-level tag
        String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
        if (neTag.equals("O") && !lastNETag.equals("O")) {
            NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
            ne.setMentionType(lastNETag);
            ne.addToIndexes();
        } else {
            if (lastNETag.equals("O")) {
                lastNEBegin = begin;
            } else if (lastNETag.equals(neTag)) {
                // do nothing - begin was already set
            } else {
                NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
                ne.setMentionType(lastNETag);
                ne.addToIndexes();
                lastNEBegin = begin;
            }
            lastNEEnd = end;
        }
        lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {
        NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
        ne.setMentionType(lastNETag);
        ne.addToIndexes();
    }

    // add sentences and trees
    for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) {

        // add the sentence annotation
        int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
        int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
        Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
        sentence.addToIndexes();

        // add the syntactic tree annotation
        List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class);
        Tree tree = sentenceAnn.get(TreeAnnotation.class);
        if (tree.children().length != 1) {
            throw new RuntimeException("Expected single root node, found " + tree);
        }
        tree = tree.firstChild();
        tree.indexSpans(0);
        TopTreebankNode root = new TopTreebankNode(jCas);
        root.setTreebankParse(tree.toString());
        // TODO: root.setTerminals(v)
        this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns);

        // get the dependencies
        SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class);

        // convert Stanford nodes to UIMA annotations
        List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence);
        Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>();
        for (IndexedWord stanfordNode : dependencies.vertexSet()) {
            int indexBegin = stanfordNode.get(BeginIndexAnnotation.class);
            int indexEnd = stanfordNode.get(EndIndexAnnotation.class);
            int tokenBegin = tokens.get(indexBegin).getBegin();
            int tokenEnd = tokens.get(indexEnd - 1).getEnd();
            DependencyNode node;
            if (dependencies.getRoots().contains(stanfordNode)) {
                node = new TopDependencyNode(jCas, tokenBegin, tokenEnd);
            } else {
                node = new DependencyNode(jCas, tokenBegin, tokenEnd);
            }
            stanfordToUima.put(stanfordNode, node);
        }

        // create relation annotations for each Stanford dependency
        ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create();
        ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create();
        for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {
            DependencyRelation relation = new DependencyRelation(jCas);
            DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor());
            DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent());
            String relationType = stanfordEdge.getRelation().toString();
            if (head == null || child == null || relationType == null) {
                throw new RuntimeException(String.format(
                        "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation,
                        child, head));
            }
            relation.setHead(head);
            relation.setChild(child);
            relation.setRelation(relationType);
            relation.addToIndexes();
            headRelations.put(child, relation);
            childRelations.put(head, relation);
        }

        // set the relations for each node annotation
        for (DependencyNode node : stanfordToUima.values()) {
            List<DependencyRelation> heads = headRelations.get(node);
            node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size()));
            if (heads != null) {
                FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads);
            }
            List<DependencyRelation> children = childRelations.get(node);
            node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size()));
            if (children != null) {
                FSCollectionFactory.fillArrayFS(node.getChildRelations(), children);
            }
            node.addToIndexes();
        }
    }

    // map from spans to named entity mentions
    Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>();
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention);
    }

    // add mentions for all entities identified by the coreference system
    List<NamedEntity> entities = new ArrayList<NamedEntity>();
    List<List<Token>> sentenceTokens = new ArrayList<List<Token>>();
    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
        sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence));
    }
    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
        List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>();
        for (CorefMention corefMention : chain.getMentionsInTextualOrder()) {

            // figure out the character span of the token
            List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1);
            int begin = tokens.get(corefMention.startIndex - 1).getBegin();
            int end = tokens.get(corefMention.endIndex - 2).getEnd();

            // use an existing named entity mention when possible; otherwise create a new one
            NamedEntityMention mention = spanMentionMap.get(new Span(begin, end));
            if (mention == null) {
                mention = new NamedEntityMention(jCas, begin, end);
                mention.addToIndexes();
            }
            mentions.add(mention);
        }

        // create an entity for the mentions
        Collections.sort(mentions, new Comparator<NamedEntityMention>() {
            @Override
            public int compare(NamedEntityMention m1, NamedEntityMention m2) {
                return m1.getBegin() - m2.getBegin();
            }
        });

        // create mentions and add them to entity
        NamedEntity entity = new NamedEntity(jCas);
        entity.setMentions(new FSArray(jCas, mentions.size()));
        int index = 0;
        for (NamedEntityMention mention : mentions) {
            mention.setMentionedEntity(entity);
            entity.setMentions(index, mention);
            index += 1;
        }
        entities.add(entity);
    }

    // add singleton entities for any named entities not picked up by coreference system
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        if (mention.getMentionedEntity() == null) {
            NamedEntity entity = new NamedEntity(jCas);
            entity.setMentions(new FSArray(jCas, 1));
            entity.setMentions(0, mention);
            mention.setMentionedEntity(entity);
            entity.getMentions();
            entities.add(entity);
        }
    }

    // sort entities by document order
    Collections.sort(entities, new Comparator<NamedEntity>() {
        @Override
        public int compare(NamedEntity o1, NamedEntity o2) {
            return getFirstBegin(o1) - getFirstBegin(o2);
        }

        private int getFirstBegin(NamedEntity entity) {
            int min = Integer.MAX_VALUE;
            for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) {
                if (mention.getBegin() < min) {
                    min = mention.getBegin();
                }
            }
            return min;
        }
    });

    // add entities to document
    for (NamedEntity entity : entities) {
        entity.addToIndexes();
    }

}

From source file:edu.cmu.deiis.annotators.StanfordAnnotator.java

License:Open Source License

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
    Annotation document = this.processor.process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

        // create the token annotation
        int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
        int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
        String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
        String lemma = tokenAnn.get(LemmaAnnotation.class);
        Token token = new Token(jCas, begin, end);
        token.setPos(pos);/*from   w  w  w.ja v  a2 s  . c  o m*/
        token.setLemma(lemma);
        token.addToIndexes();

        // hackery to convert token-level named entity tag into phrase-level tag
        String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
        if (neTag.equals("O") && !lastNETag.equals("O")) {
            NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
            ne.setMentionType(lastNETag);
            ne.addToIndexes();
        } else {
            if (lastNETag.equals("O")) {
                lastNEBegin = begin;
            } else if (lastNETag.equals(neTag)) {
                // do nothing - begin was already set
            } else {
                NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
                ne.setMentionType(lastNETag);
                ne.addToIndexes();
                lastNEBegin = begin;
            }
            lastNEEnd = end;
        }
        lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {
        NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
        ne.setMentionType(lastNETag);
        ne.addToIndexes();
    }

    // add sentences and trees
    for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) {

        // add the sentence annotation
        int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
        int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
        Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
        sentence.addToIndexes();

        // add the syntactic tree annotation
        List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class);
        Tree tree = sentenceAnn.get(TreeAnnotation.class);
        if (tree.children().length != 1) {
            throw new RuntimeException("Expected single root node, found " + tree);
        }
        tree = tree.firstChild();
        tree.indexSpans(0);
        TopTreebankNode root = new TopTreebankNode(jCas);
        root.setTreebankParse(tree.toString());
        // TODO: root.setTerminals(v)
        this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns);

        // get the dependencies
        SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class);

        // convert Stanford nodes to UIMA annotations
        List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence);
        Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>();
        for (IndexedWord stanfordNode : dependencies.vertexSet()) {
            int indexBegin = stanfordNode.get(BeginIndexAnnotation.class);
            int indexEnd = stanfordNode.get(EndIndexAnnotation.class);
            int tokenBegin = tokens.get(indexBegin).getBegin();
            int tokenEnd = tokens.get(indexEnd - 1).getEnd();
            DependencyNode node;
            if (dependencies.getRoots().contains(stanfordNode)) {
                node = new TopDependencyNode(jCas, tokenBegin, tokenEnd);
            } else {
                node = new DependencyNode(jCas, tokenBegin, tokenEnd);
            }
            stanfordToUima.put(stanfordNode, node);
        }

        // create relation annotations for each Stanford dependency
        ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create();
        ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create();
        for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {
            DependencyRelation relation = new DependencyRelation(jCas);
            DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor());
            DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent());
            String relationType = stanfordEdge.getRelation().toString();
            if (head == null || child == null || relationType == null) {
                throw new RuntimeException(String.format(
                        "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation,
                        child, head));
            }
            relation.setHead(head);
            relation.setChild(child);
            relation.setRelation(relationType);
            relation.addToIndexes();
            headRelations.put(child, relation);
            childRelations.put(head, relation);
        }

        // set the relations for each node annotation
        for (DependencyNode node : stanfordToUima.values()) {
            List<DependencyRelation> heads = headRelations.get(node);
            node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size()));
            if (heads != null) {
                FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads);
            }
            List<DependencyRelation> children = childRelations.get(node);
            node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size()));
            if (children != null) {
                FSCollectionFactory.fillArrayFS(node.getChildRelations(), children);
            }
            node.addToIndexes();
        }
    }

    // map from spans to named entity mentions
    Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>();
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention);
    }

    // add mentions for all entities identified by the coreference system
    List<NamedEntity> entities = new ArrayList<NamedEntity>();
    List<List<Token>> sentenceTokens = new ArrayList<List<Token>>();
    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
        sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence));
    }
    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
        List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>();
        for (CorefMention corefMention : chain.getMentionsInTextualOrder()) {

            // figure out the character span of the token
            List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1);
            int begin = tokens.get(corefMention.startIndex - 1).getBegin();
            int end = tokens.get(corefMention.endIndex - 2).getEnd();

            // use an existing named entity mention when possible; otherwise create a new one
            NamedEntityMention mention = spanMentionMap.get(new Span(begin, end));
            if (mention == null) {
                mention = new NamedEntityMention(jCas, begin, end);
                //String line = mention.getCoveredText();
                //System.out.println(line);
                mention.addToIndexes();
            }
            mentions.add(mention);
        }

        // create an entity for the mentions
        Collections.sort(mentions, new Comparator<NamedEntityMention>() {
            @Override
            public int compare(NamedEntityMention m1, NamedEntityMention m2) {
                return m1.getBegin() - m2.getBegin();
            }
        });

        // create mentions and add them to entity
        NamedEntity entity = new NamedEntity(jCas);
        entity.setMentions(new FSArray(jCas, mentions.size()));
        int index = 0;
        for (NamedEntityMention mention : mentions) {
            mention.setMentionedEntity(entity);
            entity.setMentions(index, mention);
            index += 1;
        }
        entities.add(entity);
    }

    // add singleton entities for any named entities not picked up by coreference system
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        if (mention.getMentionedEntity() == null) {
            NamedEntity entity = new NamedEntity(jCas);
            entity.setMentions(new FSArray(jCas, 1));
            entity.setMentions(0, mention);
            mention.setMentionedEntity(entity);
            entity.getMentions();
            entities.add(entity);
        }
    }

    // sort entities by document order
    Collections.sort(entities, new Comparator<NamedEntity>() {
        @Override
        public int compare(NamedEntity o1, NamedEntity o2) {
            return getFirstBegin(o1) - getFirstBegin(o2);
        }

        private int getFirstBegin(NamedEntity entity) {
            int min = Integer.MAX_VALUE;
            for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) {
                if (mention.getBegin() < min) {
                    min = mention.getBegin();
                }
            }
            return min;
        }
    });

    // add entities to document
    for (NamedEntity entity : entities) {
        //NamedEntityMention mention=entity.getMentions(3);
        //System.out.println(mention.getBegin());
        entity.addToIndexes();
    }

}

From source file:edu.jhu.hlt.concrete.stanford.CorefManager.java

License:Open Source License

private Entity makeEntity(CorefChain chain, EntityMentionSet ems, List<Tokenization> tokenizations)
        throws AnalyticException {
    Entity concEntity = new Entity().setUuid(this.gen.next());
    CorefChain.CorefMention coreHeadMention = chain.getRepresentativeMention();
    // CoreNLP uses 1-based indexing for the sentences
    // just subtract 1.
    Tokenization tkz = tokenizations.get(coreHeadMention.sentNum - 1);
    UUID tkzUuid = tkz.getUuid();
    LOGGER.debug("Creating EntityMention based on tokenization: {}", tkzUuid.getUuidString());
    EntityMention concHeadMention = makeEntityMention(coreHeadMention, tkzUuid, true);
    TokenRefSequence trs = concHeadMention.getTokens();

    // TODO: below throws if they're invalid. maybe this can be removed in the future.
    this.validateTokenRefSeqValidity(trs, tkz);

    concEntity.setCanonicalName(coreHeadMention.mentionSpan);
    concEntity.addToMentionIdList(concHeadMention.getUuid());
    ems.addToMentionList(concHeadMention);
    for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) {
        if (mention == coreHeadMention)
            continue;
        // CoreNLP uses 1-based indexing for the sentences
        // we'll just subtract one.
        Tokenization localTkz = tokenizations.get(mention.sentNum - 1);
        EntityMention concMention = this.makeEntityMention(mention, localTkz.getUuid(), false);
        TokenRefSequence localTrs = concMention.getTokens();
        this.validateTokenRefSeqValidity(localTrs, localTkz);

        ems.addToMentionList(concMention);
        concEntity.addToMentionIdList(concMention.getUuid());
    }/*ww  w .j  a  va2  s .  c o  m*/
    return concEntity;
}

From source file:edu.tuberlin.dima.textmining.jedi.core.features.detector.StanfordUIMAAnnotator.java

License:Open Source License

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {

    CAS cas = jCas.getCas();/*from  w w w .j a  va2  s .  c o m*/
    posMappingProvider.configure(cas);
    nerMappingProvider.configure(cas);

    modelProvider.configure(cas);

    Annotation document = modelProvider.getResource().process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

        // create the token annotation
        int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
        int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
        String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
        String lemma = tokenAnn.get(LemmaAnnotation.class);

        Token token = new Token(jCas, begin, end);

        Type posTag = posMappingProvider.getTagType(pos);
        POS posAnno = (POS) cas.createAnnotation(posTag, begin, end);
        posAnno.setStringValue(posTag.getFeatureByBaseName("PosValue"), pos.intern());
        posAnno.addToIndexes();
        token.setPos(posAnno);

        Lemma dkproLemma = new Lemma(jCas, begin, end);
        dkproLemma.setValue(lemma);
        dkproLemma.addToIndexes();

        token.setLemma(dkproLemma);
        token.addToIndexes();

        // hackery to convert token-level named entity tag into phrase-level tag
        String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
        if (neTag == null)
            continue;
        if (neTag.equals("O") && !lastNETag.equals("O")) {

            Type type = nerMappingProvider.getTagType(lastNETag);
            NamedEntity neAnno = (NamedEntity) cas.createAnnotation(type, lastNEBegin, lastNEEnd);
            neAnno.setValue(lastNETag);
            neAnno.addToIndexes();

        } else {
            if (lastNETag.equals("O")) {
                lastNEBegin = begin;
            } else if (lastNETag.equals(neTag)) {
                // do nothing - begin was already set
            } else {

                Type type = nerMappingProvider.getTagType(lastNETag);
                NamedEntity neAnno = (NamedEntity) cas.createAnnotation(type, lastNEBegin, lastNEEnd);
                neAnno.setValue(lastNETag);
                neAnno.addToIndexes();

                lastNEBegin = begin;
            }
            lastNEEnd = end;
        }
        lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {

        Type type = nerMappingProvider.getTagType(lastNETag);
        NamedEntity neAnno = (NamedEntity) cas.createAnnotation(type, lastNEBegin, lastNEEnd);
        neAnno.setValue(lastNETag);
        neAnno.addToIndexes();

    }

    // add sentences and trees
    List<CoreMap> sentenceAnnotations = document.get(SentencesAnnotation.class);
    for (CoreMap sentenceAnn : sentenceAnnotations) {

        // add the sentence annotation
        int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
        int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
        Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
        sentence.addToIndexes();

    }

    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    if (corefChains != null) {
        for (CorefChain chain : corefChains.values()) {
            CoreferenceLink last = null;
            for (CorefMention mention : chain.getMentionsInTextualOrder()) {

                CoreLabel beginLabel = sentenceAnnotations.get(mention.sentNum - 1).get(TokensAnnotation.class)
                        .get(mention.startIndex - 1);
                CoreLabel endLabel = sentenceAnnotations.get(mention.sentNum - 1).get(TokensAnnotation.class)
                        .get(mention.endIndex - 2);
                CoreferenceLink link = new CoreferenceLink(jCas,
                        beginLabel.get(CharacterOffsetBeginAnnotation.class),
                        endLabel.get(CharacterOffsetEndAnnotation.class));

                if (mention.mentionType != null) {
                    link.setReferenceType(mention.mentionType.toString());
                }

                if (last == null) {
                    // This is the first mention. Here we'll initialize the chain
                    CoreferenceChain corefChain = new CoreferenceChain(jCas);
                    corefChain.setFirst(link);
                    corefChain.addToIndexes();
                } else {
                    // For the other mentions, we'll add them to the chain.
                    last.setNext(link);
                }
                last = link;

                link.addToIndexes();
            }
        }
    }

}

From source file:nlp.pipeline.SentenceUtil.java

License:Open Source License

/** ***************************************************************
 *  Print the coreference link graph/*from   www . ja va  2  s  . co  m*/
 *  Each chain stores a set of mentions that link to each other,
 *  along with a method for getting the most representative mention
 *  Both sentence and token offsets start at 1!
 */
public static void printCorefChain(Annotation document) {

    Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
    if (graph == null)
        return;
    for (CorefChain cc : graph.values()) {
        List<CorefChain.CorefMention> mentions = cc.getMentionsInTextualOrder();
        if (mentions.size() > 1) {
            for (CorefChain.CorefMention ment : mentions) {
                System.out.println(ment.sentNum + " : " + ment.headIndex + " : " + ment.mentionSpan);
            }
            System.out.println();
        }
    }
}

From source file:org.ets.research.nlp.stanford_thrift.coref.StanfordCorefThrift.java

License:Open Source License

@SuppressWarnings("unused")
private void newStyleCoreferenceGraphOutput(Annotation annotation) {
    // display the new-style coreference graph
    //List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
    if (corefChains != null) {
        for (CorefChain chain : corefChains.values()) {
            CorefChain.CorefMention representative = chain.getRepresentativeMention();
            for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) {
                System.out.println(mention);
                if (mention == representative)
                    continue;
                // all offsets start at 1!
                System.out.println("\t" + mention.mentionID + ": (Mention from sentence " + mention.sentNum
                        + ", " + "Head word = " + mention.headIndex + ", (" + mention.startIndex + ","
                        + mention.endIndex + ")" + ")" + " -> " + "(Representative from sentence "
                        + representative.sentNum + ", " + "Head word = " + representative.headIndex + ", ("
                        + representative.startIndex + "," + representative.endIndex + ")" + "), that is: \""
                        + mention.mentionSpan + "\" -> \"" + representative.mentionSpan + "\"");
            }//www .  j a  v a2  s  .com
        }
    }
}