edu.stanford.nlp.dcoref.CorefChain.java Source code

Java tutorial

Introduction

Here is the source code for edu.stanford.nlp.dcoref.CorefChain.java

Source

//
// StanfordCoreNLP -- a suite of NLP tools
// Copyright (c) 2009-2010 The Board of Trustees of
// The Leland Stanford Junior University. All Rights Reserved.
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
//
// For more information, bug reports, fixes, contact:
//    Christopher Manning
//    Dept of Computer Science, Gates 1A
//    Stanford CA 94305-9010
//    USA
//

package edu.stanford.nlp.dcoref;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import edu.stanford.nlp.dcoref.Dictionaries.Animacy;
import edu.stanford.nlp.dcoref.Dictionaries.Gender;
import edu.stanford.nlp.dcoref.Dictionaries.MentionType;
import edu.stanford.nlp.dcoref.Dictionaries.Number;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.IntPair;
import edu.stanford.nlp.util.IntTuple;

/**
 * Output of (deterministic) coref system.  Each CorefChain represents a set
 * of mentions in the text which should all correspond to the same actual
 * entity.  There is a representative mention, which stores the best
 * mention of an entity, and then there is a List of all mentions
 * that are coreferent with that mention. The mentionMap maps from pairs of
 * a sentence number and a head word index to a CorefMention. The chainID is
 * an arbitrary integer for the chain number.
 *
 * @author Heeyoung Lee
 */
public class CorefChain implements Serializable {

    private final int chainID;
    private final List<CorefMention> mentions;
    private final Map<IntPair, Set<CorefMention>> mentionMap;

    /** The most representative mention in this cluster */
    private final CorefMention representative;

    @Override
    public boolean equals(Object aThat) {
        if (this == aThat)
            return true;
        if (!(aThat instanceof CorefChain))
            return false;
        CorefChain that = (CorefChain) aThat;
        if (chainID != that.chainID)
            return false;
        if (!mentions.equals(that.mentions))
            return false;
        if (representative == null && that.representative == null) {
            return true;
        }
        if (representative == null || that.representative == null || !representative.equals(that.representative)) {
            return false;
        }
        // mentionMap is another view of mentions, so no need to compare
        // that once we've compared mentions
        return true;
    }

    @Override
    public int hashCode() {
        return mentions.hashCode();
    }

    /** get List of CorefMentions */
    public List<CorefMention> getMentionsInTextualOrder() {
        return mentions;
    }

    /** get CorefMentions by position (sentence number, headIndex) Can be multiple mentions sharing headword */
    public Set<CorefMention> getMentionsWithSameHead(IntPair position) {
        return mentionMap.get(position);
    }

    /** get CorefMention by position */
    public Set<CorefMention> getMentionsWithSameHead(int sentenceNumber, int headIndex) {
        return getMentionsWithSameHead(new IntPair(sentenceNumber, headIndex));
    }

    public Map<IntPair, Set<CorefMention>> getMentionMap() {
        return mentionMap;
    }

    /** Return the most representative mention in the chain.
     *  Proper mention and a mention with more pre-modifiers are preferred.
     */
    public CorefMention getRepresentativeMention() {
        return representative;
    }

    public int getChainID() {
        return chainID;
    }

    /** Mention for coref output.  This is one instance of the entity
     * referred to by a given CorefChain.
     */
    public static class CorefMention implements Serializable {
        public final MentionType mentionType;
        public final Number number;
        public final Gender gender;
        public final Animacy animacy;

        /**
         * Starting word number, indexed from 1
         */
        public final int startIndex;
        /**
         * One past the end word number, indexed from 1
         */
        public final int endIndex;
        /**
         * Head word of the mention
         */
        public final int headIndex;
        public final int corefClusterID;
        public final int mentionID;
        /**
         * Sentence number in the document containing this mention,
         * indexed from 1.
         */
        public final int sentNum;
        /**
         * Position is a binary tuple of (sentence number, mention number
         * in that sentence).  This is used for indexing by mention.
         */
        public final IntTuple position;
        public final String mentionSpan;

        /** This constructor is used to recreate a CorefMention following serialization. */
        public CorefMention(MentionType mentionType, Number number, Gender gender, Animacy animacy, int startIndex,
                int endIndex, int headIndex, int corefClusterID, int mentionID, int sentNum, IntTuple position,
                String mentionSpan) {
            this.mentionType = mentionType;
            this.number = number;
            this.gender = gender;
            this.animacy = animacy;
            this.startIndex = startIndex;
            this.endIndex = endIndex;
            this.headIndex = headIndex;
            this.corefClusterID = corefClusterID;
            this.mentionID = mentionID;
            this.sentNum = sentNum;
            this.position = position;
            this.mentionSpan = mentionSpan;
        }

        /** This constructor builds the external CorefMention class from the internal Mention. */
        public CorefMention(Mention m, IntTuple pos) {
            mentionType = m.mentionType;
            number = m.number;
            gender = m.gender;
            animacy = m.animacy;
            startIndex = m.startIndex + 1;
            endIndex = m.endIndex + 1;
            headIndex = m.headIndex + 1;
            corefClusterID = m.corefClusterID;
            sentNum = m.sentNum + 1;
            mentionID = m.mentionID;
            mentionSpan = m.spanToString();

            // index starts from 1
            position = new IntTuple(2);
            position.set(0, pos.get(0) + 1);
            position.set(1, pos.get(1) + 1);

            m.headWord.set(CorefCoreAnnotations.CorefClusterIdAnnotation.class, corefClusterID);
        }

        @Override
        public boolean equals(Object aThat) {
            if (this == aThat)
                return true;
            if (!(aThat instanceof CorefMention))
                return false;
            CorefMention that = (CorefMention) aThat;
            if (mentionType != that.mentionType)
                return false;
            if (number != that.number)
                return false;
            if (gender != that.gender)
                return false;
            if (animacy != that.animacy)
                return false;
            if (startIndex != that.startIndex)
                return false;
            if (endIndex != that.endIndex)
                return false;
            if (headIndex != that.headIndex)
                return false;
            if (corefClusterID != that.corefClusterID)
                return false;
            if (mentionID != that.mentionID)
                return false;
            if (sentNum != that.sentNum)
                return false;
            if (!position.equals(that.position))
                return false;
            // we ignore MentionSpan as it is constructed from the tokens
            // the mention is a span of, so if we know those spans are the
            // same, we should be able to ignore the actual text
            return true;
        }

        @Override
        public int hashCode() {
            return position.hashCode();
        }

        @Override
        public String toString() {
            return '"' + mentionSpan + "\" in sentence " + sentNum;
            //      return "(sentence:" + sentNum + ", startIndex:" + startIndex + "-endIndex:" + endIndex + ")";
        }

        private boolean moreRepresentativeThan(CorefMention m) {
            if (m == null)
                return true;
            if (mentionType != m.mentionType) {
                return (mentionType == MentionType.PROPER)
                        || (mentionType == MentionType.NOMINAL && m.mentionType == MentionType.PRONOMINAL);
            } else {
                // First, check length
                if (headIndex - startIndex > m.headIndex - m.startIndex)
                    return true;
                if (headIndex - startIndex < m.headIndex - m.startIndex)
                    return false;
                if (endIndex - startIndex > m.endIndex - m.startIndex)
                    return true;
                if (endIndex - startIndex < m.endIndex - m.startIndex)
                    return false;
                // Now check relative position
                if (sentNum < m.sentNum)
                    return true;
                if (sentNum > m.sentNum)
                    return false;
                if (headIndex < m.headIndex)
                    return true;
                if (headIndex > m.headIndex)
                    return false;
                if (startIndex < m.startIndex)
                    return true;
                if (startIndex > m.startIndex)
                    return false;
                // At this point they're equal...
                return false;
            }
        }

        private static final long serialVersionUID = 3657691243504173L;

    } // end static class CorefMention

    protected static class CorefMentionComparator implements Comparator<CorefMention> {
        @Override
        public int compare(CorefMention m1, CorefMention m2) {
            if (m1.sentNum < m2.sentNum)
                return -1;
            else if (m1.sentNum > m2.sentNum)
                return 1;
            else {
                if (m1.startIndex < m2.startIndex)
                    return -1;
                else if (m1.startIndex > m2.startIndex)
                    return 1;
                else {
                    if (m1.endIndex > m2.endIndex)
                        return -1;
                    else if (m1.endIndex < m2.endIndex)
                        return 1;
                    else
                        return 0;
                }
            }
        }
    }

    protected static class MentionComparator implements Comparator<Mention> {
        @Override
        public int compare(Mention m1, Mention m2) {
            if (m1.sentNum < m2.sentNum)
                return -1;
            else if (m1.sentNum > m2.sentNum)
                return 1;
            else {
                if (m1.startIndex < m2.startIndex)
                    return -1;
                else if (m1.startIndex > m2.startIndex)
                    return 1;
                else {
                    if (m1.endIndex > m2.endIndex)
                        return -1;
                    else if (m1.endIndex < m2.endIndex)
                        return 1;
                    else
                        return 0;
                }
            }
        }
    }

    /**
     * Delete a mention from this coreference chain.
     * @param m The mention to delete.
     */
    public void deleteMention(CorefMention m) {
        this.mentions.remove(m);
        IntPair position = new IntPair(m.sentNum, m.headIndex);
        this.mentionMap.remove(position);
    }

    public CorefChain(CorefCluster c, Map<Mention, IntTuple> positions) {
        chainID = c.clusterID;
        // Collect mentions
        mentions = new ArrayList<>();
        mentionMap = Generics.newHashMap();
        CorefMention represents = null;
        for (Mention m : c.getCorefMentions()) {
            CorefMention men = new CorefMention(m, positions.get(m));
            mentions.add(men);
        }
        Collections.sort(mentions, new CorefMentionComparator());
        // Find representative mention
        for (CorefMention men : mentions) {
            IntPair position = new IntPair(men.sentNum, men.headIndex);
            if (!mentionMap.containsKey(position))
                mentionMap.put(position, Generics.<CorefMention>newHashSet());
            mentionMap.get(position).add(men);
            if (men.moreRepresentativeThan(represents)) {
                represents = men;
            }
        }
        representative = represents;
    }

    /** Constructor required by CustomAnnotationSerializer */
    public CorefChain(int cid, Map<IntPair, Set<CorefMention>> mentionMap, CorefMention representative) {
        this.chainID = cid;
        this.representative = representative;
        this.mentionMap = mentionMap;
        this.mentions = new ArrayList<>();
        for (Set<CorefMention> ms : mentionMap.values()) {
            for (CorefMention m : ms) {
                this.mentions.add(m);
            }
        }
        Collections.sort(mentions, new CorefMentionComparator());
    }

    public String toString() {
        return "CHAIN" + this.chainID + '-' + mentions;
    }

    private static final long serialVersionUID = 3657691243506528L;

}