Example usage for edu.stanford.nlp.dcoref CorefChain getMentionsInTextualOrder

List of usage examples for edu.stanford.nlp.dcoref CorefChain getMentionsInTextualOrder

Introduction

In this page you can find the example usage for edu.stanford.nlp.dcoref CorefChain getMentionsInTextualOrder.

Prototype

public List<CorefMention> getMentionsInTextualOrder() 

Source Link

Document

get List of CorefMentions

Usage

From source file:Anaphora_Resolution.AnaphoraDetection.java

public void anophora() {
    String text = "Tom is a smart boy. He know a lot of thing.";

    Annotation document = new Annotation(text);
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit,parse, lemma, ner, dcoref");

    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    pipeline.annotate(document);/*from  w w w  .  j a va 2s  .  co m*/

    Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
    for (Integer i : graph.keySet()) {
        System.out.println("GROUP " + i);
        CorefChain x = graph.get(i);
        for (CorefMention m : x.getMentionsInTextualOrder()) {
            System.out.println(m.mentionSpan);
        }
    }

}

From source file:be.fivebyfive.lingua.stanfordcorenlp.Pipeline.java

License:Open Source License

public PipelineSentenceList process(String text) {
    if (pipeline == null) {
        initPipeline();/*w w w  . j a  v a 2  s .  c o m*/
    }

    PipelineSentenceList outList = new PipelineSentenceList();
    Annotation document = new Annotation(text);

    if (document == null) {
        return null;
    }

    pipeline.annotate(document);

    for (CoreMap sentence : document.get(SentencesAnnotation.class)) {
        String str = sentence.get(TextAnnotation.class);
        PipelineTokenList ptl = new PipelineTokenList();
        PipelineDependencyList pel = new PipelineDependencyList();

        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            String word = token.get(TextAnnotation.class);
            String pos = token.get(PartOfSpeechAnnotation.class);
            String ner = token.get(NamedEntityTagAnnotation.class);
            String lemma = token.get(LemmaAnnotation.class);

            ptl.add(new PipelineToken(word, pos, ner, lemma));
        }

        SemanticGraph dependencies = sentence.get(depMode.equals(DEP_BASIC) ? BasicDependenciesAnnotation.class
                : depMode.equals(DEP_COLLAPSED) ? CollapsedDependenciesAnnotation.class
                        : CollapsedCCProcessedDependenciesAnnotation.class);

        if (dependencies != null) {
            for (SemanticGraphEdge edge : dependencies.edgeListSorted()) {
                GrammaticalRelation rel = edge.getRelation();

                int govTokenIndex = edge.getGovernor().index() - 1;
                int depTokenIndex = edge.getDependent().index() - 1;

                if (govTokenIndex >= 0 && depTokenIndex >= 0 && govTokenIndex < ptl.size()
                        && depTokenIndex < ptl.size()) {
                    pel.add(new PipelineDependency(ptl.get(govTokenIndex), ptl.get(depTokenIndex),
                            govTokenIndex, depTokenIndex, rel));
                } else {
                    System.err.println("Index of " + edge.toString() + " out of range!");
                }
            }
        }
        outList.add(new PipelineSentence(str, ptl, pel));
    } //for -- SentenceAnnotation
    Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);

    if (graph != null) {
        for (CorefChain crc : graph.values()) {
            List<CorefMention> crms = crc.getMentionsInTextualOrder();
            CorefMention rm = crc.getRepresentativeMention();

            if (rm != null) {
                PipelineCorefChain crChain = new PipelineCorefChain();
                PipelineCorefMention repRef = PipelineCorefMention.fromMention(rm);
                repRef.setTokens(outList.get(repRef.getSentNum()).getTokens().slice(repRef.getStartIndex(),
                        repRef.getEndIndex()));
                repRef.setHeadToken(outList.get(repRef.getSentNum()).getTokens().get(repRef.getHeadIndex()));
                crChain.setRepresentativeMention(repRef);
                if (crms.size() > 0) {
                    for (CorefMention cm : crms) {
                        PipelineCorefMention cr = PipelineCorefMention.fromMention(cm);
                        cr.setTokens(outList.get(cr.getSentNum()).getTokens().slice(cr.getStartIndex(),
                                cr.getEndIndex()));
                        crChain.addMention(cr);
                    }
                }
                outList.get(repRef.getSentNum()).addCorefChain(crChain);
            } //if(rm
        } //for
    } //if(graph

    return outList;
}

From source file:candidateGeneration.remove_missingContext.java

public static void main(String[] args) throws FileNotFoundException, IOException {
    InputStream is = new FileInputStream(sentence_detect_model);
    SentenceModel model = new SentenceModel(is);
    SentenceDetectorME sdetector = new SentenceDetectorME(model);

    Properties props = new Properties();
    props.put("annotators", "tokenize,ssplit,pos,lemma,ner,parse,dcoref");
    StanfordCoreNLP pi = new StanfordCoreNLP(props);

    File writeFile = new File(
            "C:\\Users\\Abhay Prakash\\Workspace\\trivia\\Data\\Candidate_Generation\\good_sentences_new.txt");
    writeFile.createNewFile();//  w  w w  .jav  a  2 s.c  om
    FileWriter writer = new FileWriter(writeFile);

    File writeFile2 = new File(
            "C:\\Users\\Abhay Prakash\\Workspace\\trivia\\Data\\Candidate_Generation\\bad_sentences_new.txt");
    writeFile2.createNewFile();
    FileWriter writer2 = new FileWriter(writeFile2);

    String folderPath = "C:\\Users\\Abhay Prakash\\Workspace\\trivia\\Data\\movieTest\\indivFiles\\";
    File[] files = new File(folderPath).listFiles();
    for (File file : files) {
        if (file.isFile()) {
            String name = file.getName();
            name = name.replace("_", " ");
            name = name.replace("%28", "(");
            name = name.replace("%29", ")");
            name = name.replace(".txt", "");
            System.out.println("File: " + name);

            FileReader inputFile = new FileReader(folderPath + file.getName());
            BufferedReader bufferReader = new BufferedReader(inputFile);
            String input;

            while ((input = bufferReader.readLine()) != null) {
                //System.out.println("Line: " + input);
                String sentences[] = sdetector.sentDetect(input);
                HashMap<Integer, Integer> toRemove = new HashMap<>();
                Annotation doc = new Annotation(input);
                pi.annotate(doc);
                Map<Integer, CorefChain> graph = doc.get(CorefCoreAnnotations.CorefChainAnnotation.class);

                for (Map.Entry<Integer, CorefChain> entry : graph.entrySet()) {
                    CorefChain c = entry.getValue();

                    if (c.getMentionsInTextualOrder().size() <= 1) {
                        continue;
                    }

                    //System.out.println("Mentions: " + c.toString());
                    String[] sentenceOccurence = c.toString().split(" ");
                    int firstOccurence = -1;
                    for (int i = 0; i < sentenceOccurence.length; i++) {
                        if (firstOccurence == -1 && sentenceOccurence[i].equals("sentence")) {
                            //System.out.println("first occurence : " + sentenceOccurence[i+1]);
                            firstOccurence = Integer
                                    .parseInt(sentenceOccurence[i + 1].replace(",", "").replace("]", ""));
                            continue;
                        }

                        if (sentenceOccurence[i].equals("sentence")) {
                            //System.out.println("further occurence : "+sentenceOccurence[i+1]);
                            if (Integer.parseInt(sentenceOccurence[i + 1].replace(",", "").replace("]",
                                    "")) != firstOccurence) {
                                //System.out.println("Added " + sentenceOccurence[i+1].replace(",", "").replace("]", "") + " for removal");
                                toRemove.put(Integer.parseInt(
                                        sentenceOccurence[i + 1].replace(",", "").replace("]", "")), 1);
                            }
                        }
                    }
                    //System.out.println(c.toString());
                }

                int cand_i = 1;
                for (String candidate_sentence : sentences) {
                    if (toRemove.containsKey(cand_i)) {
                        //System.out.println("REMOVING: " + candidate_sentence + "\n");
                        writer2.write(name + "\t" + candidate_sentence + "\n");
                        continue;
                    }
                    //System.out.println("TAKING: " + candidate_sentence + "\n");
                    writer.write(name + "\t" + candidate_sentence + "\n");
                    cand_i++;
                }
                //System.in.read();
            }
            //System.out.println("Line done");
            bufferReader.close();
            //System.in.read();
        }
        writer.flush();
        writer2.flush();
    }
    writer.close();
    writer2.close();
}

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordCoreferenceResolver.java

License:Open Source License

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    modelProvider.configure(aJCas.getCas());

    List<Tree> trees = new ArrayList<Tree>();
    List<CoreMap> sentences = new ArrayList<CoreMap>();
    List<List<CoreLabel>> sentenceTokens = new ArrayList<List<CoreLabel>>();
    for (ROOT root : select(aJCas, ROOT.class)) {
        // Copy all relevant information from the tokens
        List<CoreLabel> tokens = new ArrayList<CoreLabel>();
        for (Token token : selectCovered(Token.class, root)) {
            tokens.add(tokenToWord(token));
        }//from  www. j  ava  2 s  .  c o  m
        sentenceTokens.add(tokens);

        // SemanticHeadFinder (nonTerminalInfo) does not know about PRN0, so we have to replace
        // it with PRN to avoid NPEs.
        TreeFactory tFact = new LabeledScoredTreeFactory(CoreLabel.factory()) {
            @Override
            public Tree newTreeNode(String aParent, List<Tree> aChildren) {
                String parent = aParent;
                if ("PRN0".equals(parent)) {
                    parent = "PRN";
                }
                Tree node = super.newTreeNode(parent, aChildren);
                return node;
            }
        };

        // deep copy of the tree. These are modified inside coref!
        Tree treeCopy = TreeUtils.createStanfordTree(root, tFact).treeSkeletonCopy();
        treeCopy.indexSpans();
        trees.add(treeCopy);

        // Build the sentence
        CoreMap sentence = new CoreLabel();
        sentence.set(TreeAnnotation.class, treeCopy);
        sentence.set(TokensAnnotation.class, tokens);
        sentence.set(RootKey.class, root);
        sentences.add(sentence);

        // https://code.google.com/p/dkpro-core-asl/issues/detail?id=590
        // We currently do not copy over dependencies from the CAS. This is supposed to fill
        // in the dependencies so we do not get NPEs.
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(),
                tlp.typedDependencyHeadFinder());
        ParserAnnotatorUtils.fillInParseAnnotations(false, true, gsf, sentence, treeCopy,
                GrammaticalStructure.Extras.NONE);

        // https://code.google.com/p/dkpro-core-asl/issues/detail?id=582
        SemanticGraph deps = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
        for (IndexedWord vertex : deps.vertexSet()) {
            vertex.setWord(vertex.value());
        }

        // merge the new CoreLabels with the tree leaves
        MentionExtractor.mergeLabels(treeCopy, tokens);
        MentionExtractor.initializeUtterance(tokens);
    }

    Annotation document = new Annotation(aJCas.getDocumentText());
    document.set(SentencesAnnotation.class, sentences);

    Coreferencer coref = modelProvider.getResource();

    // extract all possible mentions
    // Reparsing only works when the full CoreNLP pipeline system is set up! Passing false here
    // disables reparsing.
    RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder(false);
    List<List<Mention>> allUnprocessedMentions = finder.extractPredictedMentions(document, 0,
            coref.corefSystem.dictionaries());

    // add the relevant info to mentions and order them for coref
    Map<Integer, CorefChain> result;
    try {
        Document doc = coref.mentionExtractor.arrange(document, sentenceTokens, trees, allUnprocessedMentions);
        result = coref.corefSystem.coref(doc);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }

    for (CorefChain chain : result.values()) {
        CoreferenceLink last = null;
        for (CorefMention mention : chain.getMentionsInTextualOrder()) {
            CoreLabel beginLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class)
                    .get(mention.startIndex - 1);
            CoreLabel endLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class)
                    .get(mention.endIndex - 2);
            CoreferenceLink link = new CoreferenceLink(aJCas, beginLabel.get(TokenKey.class).getBegin(),
                    endLabel.get(TokenKey.class).getEnd());

            if (mention.mentionType != null) {
                link.setReferenceType(mention.mentionType.toString());
            }

            if (last == null) {
                // This is the first mention. Here we'll initialize the chain
                CoreferenceChain corefChain = new CoreferenceChain(aJCas);
                corefChain.setFirst(link);
                corefChain.addToIndexes();
            } else {
                // For the other mentions, we'll add them to the chain.
                last.setNext(link);
            }
            last = link;

            link.addToIndexes();
        }
    }
}

From source file:edu.cmu.deiis.annotator.StanfordCoreNLPAnnotator.java

License:Open Source License

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
    Annotation document = this.processor.process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

        // create the token annotation
        int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
        int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
        String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
        String lemma = tokenAnn.get(LemmaAnnotation.class);
        Token token = new Token(jCas, begin, end);
        token.setPos(pos);/*from   w w w. j a  v a2  s . c o m*/
        token.setLemma(lemma);
        token.addToIndexes();

        // hackery to convert token-level named entity tag into phrase-level tag
        String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
        if (neTag.equals("O") && !lastNETag.equals("O")) {
            NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
            ne.setMentionType(lastNETag);
            ne.addToIndexes();
        } else {
            if (lastNETag.equals("O")) {
                lastNEBegin = begin;
            } else if (lastNETag.equals(neTag)) {
                // do nothing - begin was already set
            } else {
                NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
                ne.setMentionType(lastNETag);
                ne.addToIndexes();
                lastNEBegin = begin;
            }
            lastNEEnd = end;
        }
        lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {
        NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
        ne.setMentionType(lastNETag);
        ne.addToIndexes();
    }

    // add sentences and trees
    for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) {

        // add the sentence annotation
        int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
        int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
        Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
        sentence.addToIndexes();

        // add the syntactic tree annotation
        List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class);
        Tree tree = sentenceAnn.get(TreeAnnotation.class);
        if (tree.children().length != 1) {
            throw new RuntimeException("Expected single root node, found " + tree);
        }
        tree = tree.firstChild();
        tree.indexSpans(0);
        TopTreebankNode root = new TopTreebankNode(jCas);
        root.setTreebankParse(tree.toString());
        // TODO: root.setTerminals(v)
        this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns);

        // get the dependencies
        SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class);

        // convert Stanford nodes to UIMA annotations
        List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence);
        Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>();
        for (IndexedWord stanfordNode : dependencies.vertexSet()) {
            int indexBegin = stanfordNode.get(BeginIndexAnnotation.class);
            int indexEnd = stanfordNode.get(EndIndexAnnotation.class);
            int tokenBegin = tokens.get(indexBegin).getBegin();
            int tokenEnd = tokens.get(indexEnd - 1).getEnd();
            DependencyNode node;
            if (dependencies.getRoots().contains(stanfordNode)) {
                node = new TopDependencyNode(jCas, tokenBegin, tokenEnd);
            } else {
                node = new DependencyNode(jCas, tokenBegin, tokenEnd);
            }
            stanfordToUima.put(stanfordNode, node);
        }

        // create relation annotations for each Stanford dependency
        ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create();
        ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create();
        for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {
            DependencyRelation relation = new DependencyRelation(jCas);
            DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor());
            DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent());
            String relationType = stanfordEdge.getRelation().toString();
            if (head == null || child == null || relationType == null) {
                throw new RuntimeException(String.format(
                        "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation,
                        child, head));
            }
            relation.setHead(head);
            relation.setChild(child);
            relation.setRelation(relationType);
            relation.addToIndexes();
            headRelations.put(child, relation);
            childRelations.put(head, relation);
        }

        // set the relations for each node annotation
        for (DependencyNode node : stanfordToUima.values()) {
            List<DependencyRelation> heads = headRelations.get(node);
            node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size()));
            if (heads != null) {
                FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads);
            }
            List<DependencyRelation> children = childRelations.get(node);
            node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size()));
            if (children != null) {
                FSCollectionFactory.fillArrayFS(node.getChildRelations(), children);
            }
            node.addToIndexes();
        }
    }

    // map from spans to named entity mentions
    Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>();
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention);
    }

    // add mentions for all entities identified by the coreference system
    List<NamedEntity> entities = new ArrayList<NamedEntity>();
    List<List<Token>> sentenceTokens = new ArrayList<List<Token>>();
    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
        sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence));
    }
    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
        List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>();
        for (CorefMention corefMention : chain.getMentionsInTextualOrder()) {

            // figure out the character span of the token
            List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1);
            int begin = tokens.get(corefMention.startIndex - 1).getBegin();
            int end = tokens.get(corefMention.endIndex - 2).getEnd();

            // use an existing named entity mention when possible; otherwise create a new one
            NamedEntityMention mention = spanMentionMap.get(new Span(begin, end));
            if (mention == null) {
                mention = new NamedEntityMention(jCas, begin, end);
                mention.addToIndexes();
            }
            mentions.add(mention);
        }

        // create an entity for the mentions
        Collections.sort(mentions, new Comparator<NamedEntityMention>() {
            @Override
            public int compare(NamedEntityMention m1, NamedEntityMention m2) {
                return m1.getBegin() - m2.getBegin();
            }
        });

        // create mentions and add them to entity
        NamedEntity entity = new NamedEntity(jCas);
        entity.setMentions(new FSArray(jCas, mentions.size()));
        int index = 0;
        for (NamedEntityMention mention : mentions) {
            mention.setMentionedEntity(entity);
            entity.setMentions(index, mention);
            index += 1;
        }
        entities.add(entity);
    }

    // add singleton entities for any named entities not picked up by coreference system
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        if (mention.getMentionedEntity() == null) {
            NamedEntity entity = new NamedEntity(jCas);
            entity.setMentions(new FSArray(jCas, 1));
            entity.setMentions(0, mention);
            mention.setMentionedEntity(entity);
            entity.getMentions();
            entities.add(entity);
        }
    }

    // sort entities by document order
    Collections.sort(entities, new Comparator<NamedEntity>() {
        @Override
        public int compare(NamedEntity o1, NamedEntity o2) {
            return getFirstBegin(o1) - getFirstBegin(o2);
        }

        private int getFirstBegin(NamedEntity entity) {
            int min = Integer.MAX_VALUE;
            for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) {
                if (mention.getBegin() < min) {
                    min = mention.getBegin();
                }
            }
            return min;
        }
    });

    // add entities to document
    for (NamedEntity entity : entities) {
        entity.addToIndexes();
    }

}

From source file:edu.cmu.deiis.annotators.StanfordAnnotator.java

License:Open Source License

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {
    Annotation document = this.processor.process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

        // create the token annotation
        int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
        int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
        String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
        String lemma = tokenAnn.get(LemmaAnnotation.class);
        Token token = new Token(jCas, begin, end);
        token.setPos(pos);/*from   w  w  w.ja v  a2 s  . c  o m*/
        token.setLemma(lemma);
        token.addToIndexes();

        // hackery to convert token-level named entity tag into phrase-level tag
        String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
        if (neTag.equals("O") && !lastNETag.equals("O")) {
            NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
            ne.setMentionType(lastNETag);
            ne.addToIndexes();
        } else {
            if (lastNETag.equals("O")) {
                lastNEBegin = begin;
            } else if (lastNETag.equals(neTag)) {
                // do nothing - begin was already set
            } else {
                NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
                ne.setMentionType(lastNETag);
                ne.addToIndexes();
                lastNEBegin = begin;
            }
            lastNEEnd = end;
        }
        lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {
        NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd);
        ne.setMentionType(lastNETag);
        ne.addToIndexes();
    }

    // add sentences and trees
    for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) {

        // add the sentence annotation
        int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
        int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
        Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
        sentence.addToIndexes();

        // add the syntactic tree annotation
        List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class);
        Tree tree = sentenceAnn.get(TreeAnnotation.class);
        if (tree.children().length != 1) {
            throw new RuntimeException("Expected single root node, found " + tree);
        }
        tree = tree.firstChild();
        tree.indexSpans(0);
        TopTreebankNode root = new TopTreebankNode(jCas);
        root.setTreebankParse(tree.toString());
        // TODO: root.setTerminals(v)
        this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns);

        // get the dependencies
        SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class);

        // convert Stanford nodes to UIMA annotations
        List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence);
        Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>();
        for (IndexedWord stanfordNode : dependencies.vertexSet()) {
            int indexBegin = stanfordNode.get(BeginIndexAnnotation.class);
            int indexEnd = stanfordNode.get(EndIndexAnnotation.class);
            int tokenBegin = tokens.get(indexBegin).getBegin();
            int tokenEnd = tokens.get(indexEnd - 1).getEnd();
            DependencyNode node;
            if (dependencies.getRoots().contains(stanfordNode)) {
                node = new TopDependencyNode(jCas, tokenBegin, tokenEnd);
            } else {
                node = new DependencyNode(jCas, tokenBegin, tokenEnd);
            }
            stanfordToUima.put(stanfordNode, node);
        }

        // create relation annotations for each Stanford dependency
        ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create();
        ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create();
        for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) {
            DependencyRelation relation = new DependencyRelation(jCas);
            DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor());
            DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent());
            String relationType = stanfordEdge.getRelation().toString();
            if (head == null || child == null || relationType == null) {
                throw new RuntimeException(String.format(
                        "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation,
                        child, head));
            }
            relation.setHead(head);
            relation.setChild(child);
            relation.setRelation(relationType);
            relation.addToIndexes();
            headRelations.put(child, relation);
            childRelations.put(head, relation);
        }

        // set the relations for each node annotation
        for (DependencyNode node : stanfordToUima.values()) {
            List<DependencyRelation> heads = headRelations.get(node);
            node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size()));
            if (heads != null) {
                FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads);
            }
            List<DependencyRelation> children = childRelations.get(node);
            node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size()));
            if (children != null) {
                FSCollectionFactory.fillArrayFS(node.getChildRelations(), children);
            }
            node.addToIndexes();
        }
    }

    // map from spans to named entity mentions
    Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>();
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention);
    }

    // add mentions for all entities identified by the coreference system
    List<NamedEntity> entities = new ArrayList<NamedEntity>();
    List<List<Token>> sentenceTokens = new ArrayList<List<Token>>();
    for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) {
        sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence));
    }
    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    for (CorefChain chain : corefChains.values()) {
        List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>();
        for (CorefMention corefMention : chain.getMentionsInTextualOrder()) {

            // figure out the character span of the token
            List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1);
            int begin = tokens.get(corefMention.startIndex - 1).getBegin();
            int end = tokens.get(corefMention.endIndex - 2).getEnd();

            // use an existing named entity mention when possible; otherwise create a new one
            NamedEntityMention mention = spanMentionMap.get(new Span(begin, end));
            if (mention == null) {
                mention = new NamedEntityMention(jCas, begin, end);
                //String line = mention.getCoveredText();
                //System.out.println(line);
                mention.addToIndexes();
            }
            mentions.add(mention);
        }

        // create an entity for the mentions
        Collections.sort(mentions, new Comparator<NamedEntityMention>() {
            @Override
            public int compare(NamedEntityMention m1, NamedEntityMention m2) {
                return m1.getBegin() - m2.getBegin();
            }
        });

        // create mentions and add them to entity
        NamedEntity entity = new NamedEntity(jCas);
        entity.setMentions(new FSArray(jCas, mentions.size()));
        int index = 0;
        for (NamedEntityMention mention : mentions) {
            mention.setMentionedEntity(entity);
            entity.setMentions(index, mention);
            index += 1;
        }
        entities.add(entity);
    }

    // add singleton entities for any named entities not picked up by coreference system
    for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) {
        if (mention.getMentionedEntity() == null) {
            NamedEntity entity = new NamedEntity(jCas);
            entity.setMentions(new FSArray(jCas, 1));
            entity.setMentions(0, mention);
            mention.setMentionedEntity(entity);
            entity.getMentions();
            entities.add(entity);
        }
    }

    // sort entities by document order
    Collections.sort(entities, new Comparator<NamedEntity>() {
        @Override
        public int compare(NamedEntity o1, NamedEntity o2) {
            return getFirstBegin(o1) - getFirstBegin(o2);
        }

        private int getFirstBegin(NamedEntity entity) {
            int min = Integer.MAX_VALUE;
            for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) {
                if (mention.getBegin() < min) {
                    min = mention.getBegin();
                }
            }
            return min;
        }
    });

    // add entities to document
    for (NamedEntity entity : entities) {
        //NamedEntityMention mention=entity.getMentions(3);
        //System.out.println(mention.getBegin());
        entity.addToIndexes();
    }

}

From source file:edu.jhu.hlt.concrete.stanford.CorefManager.java

License:Open Source License

private Entity makeEntity(CorefChain chain, EntityMentionSet ems, List<Tokenization> tokenizations)
        throws AnalyticException {
    Entity concEntity = new Entity().setUuid(this.gen.next());
    CorefChain.CorefMention coreHeadMention = chain.getRepresentativeMention();
    // CoreNLP uses 1-based indexing for the sentences
    // just subtract 1.
    Tokenization tkz = tokenizations.get(coreHeadMention.sentNum - 1);
    UUID tkzUuid = tkz.getUuid();
    LOGGER.debug("Creating EntityMention based on tokenization: {}", tkzUuid.getUuidString());
    EntityMention concHeadMention = makeEntityMention(coreHeadMention, tkzUuid, true);
    TokenRefSequence trs = concHeadMention.getTokens();

    // TODO: below throws if they're invalid. maybe this can be removed in the future.
    this.validateTokenRefSeqValidity(trs, tkz);

    concEntity.setCanonicalName(coreHeadMention.mentionSpan);
    concEntity.addToMentionIdList(concHeadMention.getUuid());
    ems.addToMentionList(concHeadMention);
    for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) {
        if (mention == coreHeadMention)
            continue;
        // CoreNLP uses 1-based indexing for the sentences
        // we'll just subtract one.
        Tokenization localTkz = tokenizations.get(mention.sentNum - 1);
        EntityMention concMention = this.makeEntityMention(mention, localTkz.getUuid(), false);
        TokenRefSequence localTrs = concMention.getTokens();
        this.validateTokenRefSeqValidity(localTrs, localTkz);

        ems.addToMentionList(concMention);
        concEntity.addToMentionIdList(concMention.getUuid());
    }/*ww  w .j  a  va2  s .  c o  m*/
    return concEntity;
}

From source file:edu.tuberlin.dima.textmining.jedi.core.features.detector.StanfordUIMAAnnotator.java

License:Open Source License

@Override
public void process(JCas jCas) throws AnalysisEngineProcessException {

    CAS cas = jCas.getCas();/*from  w w w .j a  va2  s .  c o m*/
    posMappingProvider.configure(cas);
    nerMappingProvider.configure(cas);

    modelProvider.configure(cas);

    Annotation document = modelProvider.getResource().process(jCas.getDocumentText());

    String lastNETag = "O";
    int lastNEBegin = -1;
    int lastNEEnd = -1;
    for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) {

        // create the token annotation
        int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class);
        int end = tokenAnn.get(CharacterOffsetEndAnnotation.class);
        String pos = tokenAnn.get(PartOfSpeechAnnotation.class);
        String lemma = tokenAnn.get(LemmaAnnotation.class);

        Token token = new Token(jCas, begin, end);

        Type posTag = posMappingProvider.getTagType(pos);
        POS posAnno = (POS) cas.createAnnotation(posTag, begin, end);
        posAnno.setStringValue(posTag.getFeatureByBaseName("PosValue"), pos.intern());
        posAnno.addToIndexes();
        token.setPos(posAnno);

        Lemma dkproLemma = new Lemma(jCas, begin, end);
        dkproLemma.setValue(lemma);
        dkproLemma.addToIndexes();

        token.setLemma(dkproLemma);
        token.addToIndexes();

        // hackery to convert token-level named entity tag into phrase-level tag
        String neTag = tokenAnn.get(NamedEntityTagAnnotation.class);
        if (neTag == null)
            continue;
        if (neTag.equals("O") && !lastNETag.equals("O")) {

            Type type = nerMappingProvider.getTagType(lastNETag);
            NamedEntity neAnno = (NamedEntity) cas.createAnnotation(type, lastNEBegin, lastNEEnd);
            neAnno.setValue(lastNETag);
            neAnno.addToIndexes();

        } else {
            if (lastNETag.equals("O")) {
                lastNEBegin = begin;
            } else if (lastNETag.equals(neTag)) {
                // do nothing - begin was already set
            } else {

                Type type = nerMappingProvider.getTagType(lastNETag);
                NamedEntity neAnno = (NamedEntity) cas.createAnnotation(type, lastNEBegin, lastNEEnd);
                neAnno.setValue(lastNETag);
                neAnno.addToIndexes();

                lastNEBegin = begin;
            }
            lastNEEnd = end;
        }
        lastNETag = neTag;
    }
    if (!lastNETag.equals("O")) {

        Type type = nerMappingProvider.getTagType(lastNETag);
        NamedEntity neAnno = (NamedEntity) cas.createAnnotation(type, lastNEBegin, lastNEEnd);
        neAnno.setValue(lastNETag);
        neAnno.addToIndexes();

    }

    // add sentences and trees
    List<CoreMap> sentenceAnnotations = document.get(SentencesAnnotation.class);
    for (CoreMap sentenceAnn : sentenceAnnotations) {

        // add the sentence annotation
        int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class);
        int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class);
        Sentence sentence = new Sentence(jCas, sentBegin, sentEnd);
        sentence.addToIndexes();

    }

    Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class);
    if (corefChains != null) {
        for (CorefChain chain : corefChains.values()) {
            CoreferenceLink last = null;
            for (CorefMention mention : chain.getMentionsInTextualOrder()) {

                CoreLabel beginLabel = sentenceAnnotations.get(mention.sentNum - 1).get(TokensAnnotation.class)
                        .get(mention.startIndex - 1);
                CoreLabel endLabel = sentenceAnnotations.get(mention.sentNum - 1).get(TokensAnnotation.class)
                        .get(mention.endIndex - 2);
                CoreferenceLink link = new CoreferenceLink(jCas,
                        beginLabel.get(CharacterOffsetBeginAnnotation.class),
                        endLabel.get(CharacterOffsetEndAnnotation.class));

                if (mention.mentionType != null) {
                    link.setReferenceType(mention.mentionType.toString());
                }

                if (last == null) {
                    // This is the first mention. Here we'll initialize the chain
                    CoreferenceChain corefChain = new CoreferenceChain(jCas);
                    corefChain.setFirst(link);
                    corefChain.addToIndexes();
                } else {
                    // For the other mentions, we'll add them to the chain.
                    last.setNext(link);
                }
                last = link;

                link.addToIndexes();
            }
        }
    }

}

From source file:nlp.pipeline.SentenceUtil.java

License:Open Source License

/** ***************************************************************
 *  Print the coreference link graph/*from   www . ja va  2  s  . co  m*/
 *  Each chain stores a set of mentions that link to each other,
 *  along with a method for getting the most representative mention
 *  Both sentence and token offsets start at 1!
 */
public static void printCorefChain(Annotation document) {

    Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
    if (graph == null)
        return;
    for (CorefChain cc : graph.values()) {
        List<CorefChain.CorefMention> mentions = cc.getMentionsInTextualOrder();
        if (mentions.size() > 1) {
            for (CorefChain.CorefMention ment : mentions) {
                System.out.println(ment.sentNum + " : " + ment.headIndex + " : " + ment.mentionSpan);
            }
            System.out.println();
        }
    }
}

From source file:org.ets.research.nlp.stanford_thrift.coref.StanfordCorefThrift.java

License:Open Source License

@SuppressWarnings("unused")
private void newStyleCoreferenceGraphOutput(Annotation annotation) {
    // display the new-style coreference graph
    //List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
    if (corefChains != null) {
        for (CorefChain chain : corefChains.values()) {
            CorefChain.CorefMention representative = chain.getRepresentativeMention();
            for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) {
                System.out.println(mention);
                if (mention == representative)
                    continue;
                // all offsets start at 1!
                System.out.println("\t" + mention.mentionID + ": (Mention from sentence " + mention.sentNum
                        + ", " + "Head word = " + mention.headIndex + ", (" + mention.startIndex + ","
                        + mention.endIndex + ")" + ")" + " -> " + "(Representative from sentence "
                        + representative.sentNum + ", " + "Head word = " + representative.headIndex + ", ("
                        + representative.startIndex + "," + representative.endIndex + ")" + "), that is: \""
                        + mention.mentionSpan + "\" -> \"" + representative.mentionSpan + "\"");
            }//www .  j a  v a2  s  .com
        }
    }
}