Example usage for edu.stanford.nlp.dcoref CorefChain getRepresentativeMention

List of usage examples for edu.stanford.nlp.dcoref CorefChain getRepresentativeMention

Introduction

In this page you can find the example usage for edu.stanford.nlp.dcoref CorefChain getRepresentativeMention.

Prototype

public CorefMention getRepresentativeMention() 

Source Link

Document

Return the most representative mention in the chain.

Usage

From source file:be.fivebyfive.lingua.stanfordcorenlp.Pipeline.java

License:Open Source License

public PipelineSentenceList process(String text) {
    if (pipeline == null) {
        initPipeline();//  w  w  w. j ava2s.  com
    }

    PipelineSentenceList outList = new PipelineSentenceList();
    Annotation document = new Annotation(text);

    if (document == null) {
        return null;
    }

    pipeline.annotate(document);

    for (CoreMap sentence : document.get(SentencesAnnotation.class)) {
        String str = sentence.get(TextAnnotation.class);
        PipelineTokenList ptl = new PipelineTokenList();
        PipelineDependencyList pel = new PipelineDependencyList();

        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            String word = token.get(TextAnnotation.class);
            String pos = token.get(PartOfSpeechAnnotation.class);
            String ner = token.get(NamedEntityTagAnnotation.class);
            String lemma = token.get(LemmaAnnotation.class);

            ptl.add(new PipelineToken(word, pos, ner, lemma));
        }

        SemanticGraph dependencies = sentence.get(depMode.equals(DEP_BASIC) ? BasicDependenciesAnnotation.class
                : depMode.equals(DEP_COLLAPSED) ? CollapsedDependenciesAnnotation.class
                        : CollapsedCCProcessedDependenciesAnnotation.class);

        if (dependencies != null) {
            for (SemanticGraphEdge edge : dependencies.edgeListSorted()) {
                GrammaticalRelation rel = edge.getRelation();

                int govTokenIndex = edge.getGovernor().index() - 1;
                int depTokenIndex = edge.getDependent().index() - 1;

                if (govTokenIndex >= 0 && depTokenIndex >= 0 && govTokenIndex < ptl.size()
                        && depTokenIndex < ptl.size()) {
                    pel.add(new PipelineDependency(ptl.get(govTokenIndex), ptl.get(depTokenIndex),
                            govTokenIndex, depTokenIndex, rel));
                } else {
                    System.err.println("Index of " + edge.toString() + " out of range!");
                }
            }
        }
        outList.add(new PipelineSentence(str, ptl, pel));
    } //for -- SentenceAnnotation
    Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);

    if (graph != null) {
        for (CorefChain crc : graph.values()) {
            List<CorefMention> crms = crc.getMentionsInTextualOrder();
            CorefMention rm = crc.getRepresentativeMention();

            if (rm != null) {
                PipelineCorefChain crChain = new PipelineCorefChain();
                PipelineCorefMention repRef = PipelineCorefMention.fromMention(rm);
                repRef.setTokens(outList.get(repRef.getSentNum()).getTokens().slice(repRef.getStartIndex(),
                        repRef.getEndIndex()));
                repRef.setHeadToken(outList.get(repRef.getSentNum()).getTokens().get(repRef.getHeadIndex()));
                crChain.setRepresentativeMention(repRef);
                if (crms.size() > 0) {
                    for (CorefMention cm : crms) {
                        PipelineCorefMention cr = PipelineCorefMention.fromMention(cm);
                        cr.setTokens(outList.get(cr.getSentNum()).getTokens().slice(cr.getStartIndex(),
                                cr.getEndIndex()));
                        crChain.addMention(cr);
                    }
                }
                outList.get(repRef.getSentNum()).addCorefChain(crChain);
            } //if(rm
        } //for
    } //if(graph

    return outList;
}

From source file:com.project.NLP.Requirement.AnaphoraAnalyzer.java

public String doPronounResolving() {
    for (int i = 1; i <= graph.size(); i++) {
        CorefChain cc = graph.get(i);
        if (cc != null) {
            //System.out.println("-----"+cc.toString());
            //System.out.println("---TextualOrder--"+cc.getMentionsInTextualOrder());
            Map<IntPair, Set<CorefChain.CorefMention>> mentionMap = cc.getMentionMap();
            //System.out.println("--MentionMap-----"+mentionMap);
            int mentionSize = mentionMap.size();

            Set intPairSet = mentionMap.keySet();

            // System.out.println("-----"+cc.getMentionsWithSameHead(1,i));
            //System.out.println("---RepresentativeMention-----"+cc.getRepresentativeMention());
            String mentionSpan = cc.getRepresentativeMention().mentionSpan;
            //System.out.println("----get the mentionspan---"+mentionSpan);
            String animacy = cc.getRepresentativeMention().animacy.toString();
            //System.out.println("----get the animacy---"+animacy);
            if (animacy.equalsIgnoreCase("ANIMATE") && mentionSize > 1) {
                Iterator it = intPairSet.iterator();
                while (it.hasNext()) {
                    IntPair ip = (IntPair) it.next();
                    Set coref = mentionMap.get(ip);
                    Iterator itC = coref.iterator();
                    while (itC.hasNext()) {
                        CorefChain.CorefMention cm = (CorefMention) itC.next();
                        String mentionPronoun = cm.mentionSpan;
                        //mentionPronoun.replace(mentionPronoun,mentionSpan)
                        //System.out.println("---Sentences ------- :"+sentencesFromDoc);
                        //System.out.println("---Words ------- :"+wordsFromDoc);
                        //for(String[] str:wordsFromDoc){
                        //     System.out.println("---Words from array ------- :"+str[0] + " "+str[1]);
                        //}

                        //System.out.println("--- cm.mentionSpan---  "+mentionPronoun+ " int pair : "+ip);
                        int sentenceIndex = ip.getSource() - 1;
                        int wordIndex = ip.getTarget() - 1;
                        try {
                            String docWord = wordsFromDoc.get(sentenceIndex)[wordIndex];
                            //System.out.println("From arraylist : "+docWord);
                            if (mentionPronoun.equalsIgnoreCase(docWord)) {
                                wordsFromDoc.get(sentenceIndex)[wordIndex] = mentionSpan;
                            }/* w  w w  . java  2s  . c o  m*/
                        } catch (ArrayIndexOutOfBoundsException e) {
                            //System.err.println("----- AnaphoraAnalyzer ------- : "+e.getMessage());
                        }
                    }
                }
            }
        }

    }

    return getPronounResolvedDocument();
}

From source file:edu.jhu.hlt.concrete.stanford.CorefManager.java

License:Open Source License

private Entity makeEntity(CorefChain chain, EntityMentionSet ems, List<Tokenization> tokenizations)
        throws AnalyticException {
    Entity concEntity = new Entity().setUuid(this.gen.next());
    CorefChain.CorefMention coreHeadMention = chain.getRepresentativeMention();
    // CoreNLP uses 1-based indexing for the sentences
    // just subtract 1.
    Tokenization tkz = tokenizations.get(coreHeadMention.sentNum - 1);
    UUID tkzUuid = tkz.getUuid();
    LOGGER.debug("Creating EntityMention based on tokenization: {}", tkzUuid.getUuidString());
    EntityMention concHeadMention = makeEntityMention(coreHeadMention, tkzUuid, true);
    TokenRefSequence trs = concHeadMention.getTokens();

    // TODO: below throws if they're invalid. maybe this can be removed in the future.
    this.validateTokenRefSeqValidity(trs, tkz);

    concEntity.setCanonicalName(coreHeadMention.mentionSpan);
    concEntity.addToMentionIdList(concHeadMention.getUuid());
    ems.addToMentionList(concHeadMention);
    for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) {
        if (mention == coreHeadMention)
            continue;
        // CoreNLP uses 1-based indexing for the sentences
        // we'll just subtract one.
        Tokenization localTkz = tokenizations.get(mention.sentNum - 1);
        EntityMention concMention = this.makeEntityMention(mention, localTkz.getUuid(), false);
        TokenRefSequence localTrs = concMention.getTokens();
        this.validateTokenRefSeqValidity(localTrs, localTkz);

        ems.addToMentionList(concMention);
        concEntity.addToMentionIdList(concMention.getUuid());
    }//w w w. j ava  2  s  . c  o m
    return concEntity;
}

From source file:it.uniud.ailab.dcore.wrappers.external.StanfordBootstrapperAnnotator.java

License:Open Source License

/**
 * Annotate the document by splitting the document, tokenizing it,
 * performing PoS tagging and Named Entity Recognition using the Stanford
 * Core NLP tools./* www .  j  a  v  a 2  s  .  c  o  m*/
 *
 * @param component the component to annotate.
 */
@Override
public void annotate(Blackboard blackboard, DocumentComponent component) {

    if (pipeline == null) {
        // creates a StanfordCoreNLP object, with POS tagging, lemmatization, 
        //NER, parsing, and coreference resolution 
        Properties props = new Properties();
        props.put("annotators", "tokenize, ssplit, pos, parse, lemma, ner, dcoref");
        pipeline = new StanfordCoreNLP(props);

    }

    // read some text in the text variable
    String text = component.getText();

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(text);

    // run all Annotators on this text
    pipeline.annotate(document);

    //get the graph for coreference resolution
    Map<Integer, CorefChain> graph = document.get(CorefCoreAnnotations.CorefChainAnnotation.class);

    //prepare the map for coreference graph of document
    Map<String, Collection<Set<CorefChain.CorefMention>>> coreferenceGraph = new HashMap<>();

    for (CorefChain corefChain : graph.values()) {

        //get the representative mention, that is the word recall in other sentences
        CorefChain.CorefMention cm = corefChain.getRepresentativeMention();

        //eliminate auto-references
        if (corefChain.getMentionMap().size() <= 1) {
            continue;
        }

        //get the stemmed form of the references, so the comparison with 
        //grams will be easier
        List<CoreLabel> tks = document.get(SentencesAnnotation.class).get(cm.sentNum - 1)
                .get(TokensAnnotation.class);
        //list of tokens which compose the anaphor

        List<Token> anaphorsTokens = new ArrayList<>();
        for (int i = cm.startIndex - 1; i < cm.endIndex - 1; i++) {
            CoreLabel current = tks.get(i);
            Token t = new Token(current.word());
            t.setPoS(current.tag());
            t.setLemma(current.lemma());
            anaphorsTokens.add(t);
        }

        //the mention n-gram which is formed by the anaphor and a 
        //list of references
        Mention mention = new Mention(cm.mentionSpan, anaphorsTokens, cm.mentionSpan);

        //get map of the references to the corefchain obj
        Collection<Set<CorefChain.CorefMention>> mentionMap = corefChain.getMentionMap().values();
        for (Set<CorefChain.CorefMention> mentions : mentionMap) {

            for (CorefChain.CorefMention reference : mentions) {
                //eliminate self-references
                if (reference.mentionSpan.equalsIgnoreCase(cm.mentionSpan)) {
                    continue;
                }
                List<CoreLabel> tokens = document.get(SentencesAnnotation.class).get(reference.sentNum - 1)
                        .get(TokensAnnotation.class);

                //list of tokens which compose the mention
                List<Token> mentionTokens = new ArrayList<>();
                for (int i = reference.startIndex - 1; i < reference.endIndex - 1; i++) {
                    CoreLabel current = tokens.get(i);
                    //set token features 
                    Token t = new Token(current.word());
                    t.setPoS(current.tag());
                    t.setLemma(current.lemma());
                    mentionTokens.add(t);
                }
                //add to mention a new reference
                mention.addReference(reference.mentionSpan, mentionTokens, reference.mentionType.toString());
            }
        }

        //assign to the document a new corenference obj
        //containing the anaphor and its mentions 
        blackboard.addGram(mention);
    }

    // these are all the sentences in this document
    // a CoreMap is essentially a Map that uses class objects as keys and 
    //has values with custom types
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);

    //A counter that keeps track of the number of phrases in a sentences
    int phraseCounter = 0;

    for (CoreMap stanfordSentence : sentences) {

        Sentence distilledSentence = new Sentence(stanfordSentence.toString(), "" + sentenceCounter++);

        distilledSentence.setLanguage(Locale.ENGLISH);

        //getting the dependency graph of the document so to count the number of phrases 
        //ROOT sentences are the first level children in the parse tree; every ROOT sentence
        //is constitute by a group of clauses which can be the principal (main clauses) or not
        //(coordinate and subordinate). We use ROOT sentences as a starting point to find out all
        //the phrases present in the sentences themselves, checking out for the tag "S".
        Tree sentenceTree = stanfordSentence.get(TreeCoreAnnotations.TreeAnnotation.class);

        for (Tree sub : sentenceTree.subTreeList()) {
            if (sub.label().value().equals("S")) {
                phraseCounter++;
            }
        }

        //annotate the sentence with a new feature counting all the phrases
        //cointained in the sentence    
        distilledSentence.addAnnotation(new FeatureAnnotation(DefaultAnnotations.PHRASES_COUNT, phraseCounter));

        // traversing the words in the current sentence
        // for each token in the text, we create a new token annotate it 
        // with the word representing it, its pos tag and its lemma
        for (CoreLabel token : stanfordSentence.get(TokensAnnotation.class)) {

            // this is the text of the token
            Token t = new Token(token.originalText());

            // this is the POS tag of the token                
            t.setPoS(token.tag());

            // this is the lemma of the ttoken
            t.setLemma(token.lemma());

            String ner = token.get(NamedEntityTagAnnotation.class);
            if (!ner.equalsIgnoreCase("O")) {
                t.addAnnotation(new NERAnnotation(DefaultAnnotations.IS_NER, ner));
            }
            //add the token to the sentence
            distilledSentence.addToken(t);
        }

        //add the sentence to document
        ((DocumentComposite) component).addComponent(distilledSentence);
    }
}

From source file:org.ets.research.nlp.stanford_thrift.coref.StanfordCorefThrift.java

License:Open Source License

@SuppressWarnings("unused")
private void newStyleCoreferenceGraphOutput(Annotation annotation) {
    // display the new-style coreference graph
    //List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
    if (corefChains != null) {
        for (CorefChain chain : corefChains.values()) {
            CorefChain.CorefMention representative = chain.getRepresentativeMention();
            for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) {
                System.out.println(mention);
                if (mention == representative)
                    continue;
                // all offsets start at 1!
                System.out.println("\t" + mention.mentionID + ": (Mention from sentence " + mention.sentNum
                        + ", " + "Head word = " + mention.headIndex + ", (" + mention.startIndex + ","
                        + mention.endIndex + ")" + ")" + " -> " + "(Representative from sentence "
                        + representative.sentNum + ", " + "Head word = " + representative.headIndex + ", ("
                        + representative.startIndex + "," + representative.endIndex + ")" + "), that is: \""
                        + mention.mentionSpan + "\" -> \"" + representative.mentionSpan + "\"");
            }/*from w  w  w  .  j  a v a 2 s .c o m*/
        }
    }
}

From source file:org.ets.research.nlp.stanford_thrift.coref.StanfordCorefThrift.java

License:Open Source License

private List<String> MUCStyleOutput(Annotation annotation) {
    Map<Integer, CorefChain> corefChains = annotation.get(CorefCoreAnnotations.CorefChainAnnotation.class);
    Map<Integer, Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>>> mentionMap = new HashMap<Integer, Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>>>();

    List<String> mucOutput = new ArrayList<String>();

    for (CorefChain chain : corefChains.values()) {
        CorefChain.CorefMention ref = chain.getRepresentativeMention();

        for (CorefChain.CorefMention mention : chain.getMentionsInTextualOrder()) {
            if (mention != ref) {
                // first add the mention itself
                Pair<CorefChain.CorefMention, CorefChain.CorefMention> mentions = new Pair<CorefChain.CorefMention, CorefChain.CorefMention>(
                        mention, ref);/*from www .j  a v  a2 s  .c  o m*/
                if (mentionMap.containsKey(mention.sentNum)) {
                    Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>> value = mentionMap
                            .get(mention.sentNum);
                    value.put(mention.startIndex, mentions);
                    mentionMap.put(mention.sentNum, value);
                } else {
                    Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>> startIndexToMentionMap = new HashMap<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>>();
                    startIndexToMentionMap.put(mention.startIndex, mentions);
                    mentionMap.put(mention.sentNum, startIndexToMentionMap);
                }

                // now make sure the representative is there (TODO make this code less redundant)
                Pair<CorefChain.CorefMention, CorefChain.CorefMention> refMention = new Pair<CorefChain.CorefMention, CorefChain.CorefMention>(
                        ref, ref);
                if (mentionMap.containsKey(ref.sentNum)) {
                    Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>> value = mentionMap
                            .get(ref.sentNum);
                    value.put(ref.startIndex, refMention);
                    mentionMap.put(ref.sentNum, value);
                } else {
                    Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>> startIndexToMentionMap = new HashMap<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>>();
                    startIndexToMentionMap.put(ref.startIndex, refMention);
                    mentionMap.put(ref.sentNum, startIndexToMentionMap);
                }
            }
        }
    }

    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);
    for (Integer sentenceNum : mentionMap.keySet()) {
        CoreMap currentSentence = sentences.get(sentenceNum - 1);
        Map<Integer, Pair<CorefChain.CorefMention, CorefChain.CorefMention>> currentSetOfMentions = mentionMap
                .get(sentenceNum);
        CorefChain.CorefMention lastMention = null;
        String outputString = "";
        for (CoreLabel token : currentSentence.get(CoreAnnotations.TokensAnnotation.class)) {
            if (currentSetOfMentions.containsKey(token.index())) {
                lastMention = currentSetOfMentions.get(token.index()).first();
                CorefChain.CorefMention ref = currentSetOfMentions.get(token.index()).second();
                outputString += "<COREF ID=\"" + lastMention.mentionID + "\"";
                if (lastMention.mentionID != ref.mentionID) {
                    outputString += " REF=\"" + ref.mentionID + "\"";
                }
                outputString += ">";
            }
            if (lastMention != null && token.index() == lastMention.endIndex) {
                outputString += "</COREF> ";
            }
            outputString += token.word() + " ";
        }
        mucOutput.add(CoreNLPThriftUtil.closeHTMLTags(outputString.replaceAll(" </", "</")));
    }

    return mucOutput;
}

From source file:parsers.CoreNLPSuite.java

public static String resolveCoreferences(String input) {
    String ans = "";
    Annotation document = new Annotation(input);
    Parsers.pipeline.annotate(document);
    Map<Integer, CorefChain> graph = document.get(CorefChainAnnotation.class);
    //http://stackoverflow.com/questions/6572207/stanford-core-nlp-understanding-coreference-resolution
    for (Map.Entry<Integer, CorefChain> entry : graph.entrySet()) {
        CorefChain c = entry.getValue();
        //this is because it prints out a lot of self references which aren't that useful
        if (c.getMentionsInTextualOrder().size() <= 1)
            continue;
        CorefMention cm = c.getRepresentativeMention();
        String clust = "";
        List<CoreLabel> tks = document.get(SentencesAnnotation.class).get(cm.sentNum - 1)
                .get(TokensAnnotation.class);
        for (int i = cm.startIndex - 1; i < cm.endIndex - 1; i++)
            clust += tks.get(i).get(TextAnnotation.class) + " ";
        clust = clust.trim();//from   w  ww. j  av a  2 s  .  c  om
        ans = ans + "Representative mention: \"" + clust + "\" is mentioned by:";
        for (CorefMention m : c.getMentionsInTextualOrder()) {
            String clust2 = "";
            tks = document.get(SentencesAnnotation.class).get(m.sentNum - 1).get(TokensAnnotation.class);
            for (int i = m.startIndex - 1; i < m.endIndex - 1; i++)
                clust2 += tks.get(i).get(TextAnnotation.class) + " ";
            clust2 = clust2.trim();
            //don't need the self mention
            if (clust.equals(clust2))
                continue;
            ans = ans + "\t" + clust2 + "\n";
        }
    }
    return ans;
}