Example usage for edu.stanford.nlp.util CollectionValuedMap get

List of usage examples for edu.stanford.nlp.util CollectionValuedMap get

Introduction

In this page you can find the example usage for edu.stanford.nlp.util CollectionValuedMap get.

Prototype

@Override
public Collection<V> get(Object key) 

Source Link

Usage

From source file:knu.univ.lingvo.coref.CoNLLMentionExtractor.java

License:Open Source License

public List<List<Mention>> extractGoldMentions(CoNLL2011DocumentReader.Document conllDoc) {
    List<CoreMap> sentences = conllDoc.getAnnotation().get(CoreAnnotations.SentencesAnnotation.class);
    List<List<Mention>> allGoldMentions = new ArrayList<List<Mention>>();
    CollectionValuedMap<String, CoreMap> corefChainMap = conllDoc.getCorefChainMap();
    for (int i = 0; i < sentences.size(); i++) {
        allGoldMentions.add(new ArrayList<Mention>());
    }//from  www. j a v a2s  .  c o m
    int maxCorefClusterId = -1;
    for (String corefIdStr : corefChainMap.keySet()) {
        int id = Integer.parseInt(corefIdStr);
        if (id > maxCorefClusterId) {
            maxCorefClusterId = id;
        }
    }
    int newMentionID = maxCorefClusterId + 1;
    for (String corefIdStr : corefChainMap.keySet()) {
        int id = Integer.parseInt(corefIdStr);
        int clusterMentionCnt = 0;
        for (CoreMap m : corefChainMap.get(corefIdStr)) {
            clusterMentionCnt++;
            Mention mention = new Mention();

            mention.goldCorefClusterID = id;
            if (clusterMentionCnt == 1) {
                // First mention in cluster
                mention.mentionID = id;
                mention.originalRef = -1;
            } else {
                mention.mentionID = newMentionID;
                mention.originalRef = id;
                newMentionID++;
            }
            if (maxID < mention.mentionID)
                maxID = mention.mentionID;
            int sentIndex = m.get(CoreAnnotations.SentenceIndexAnnotation.class);
            CoreMap sent = sentences.get(sentIndex);
            mention.startIndex = m.get(CoreAnnotations.TokenBeginAnnotation.class)
                    - sent.get(CoreAnnotations.TokenBeginAnnotation.class);
            mention.endIndex = m.get(CoreAnnotations.TokenEndAnnotation.class)
                    - sent.get(CoreAnnotations.TokenBeginAnnotation.class);

            // will be set by arrange
            mention.originalSpan = m.get(CoreAnnotations.TokensAnnotation.class);

            // Mention dependency is collapsed dependency for sentence
            mention.dependency = sentences.get(sentIndex)
                    .get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);

            allGoldMentions.get(sentIndex).add(mention);
        }
    }
    return allGoldMentions;
}

From source file:knu.univ.lingvo.coref.Document.java

License:Open Source License

/** Mark twin mentions: All mention boundaries should be matched */
private void findTwinMentionsStrict() {
    for (int sentNum = 0; sentNum < goldOrderedMentionsBySentence.size(); sentNum++) {
        List<Mention> golds = goldOrderedMentionsBySentence.get(sentNum);
        List<Mention> predicts = predictedOrderedMentionsBySentence.get(sentNum);

        // For CoNLL training there are some documents with gold mentions with the same position offsets
        // See /scr/nlp/data/conll-2011/v2/data/train/data/english/annotations/nw/wsj/09/wsj_0990.v2_auto_conll
        //  (Packwood - Roth)
        CollectionValuedMap<IntPair, Mention> goldMentionPositions = new CollectionValuedMap<IntPair, Mention>();
        for (Mention g : golds) {
            IntPair ip = new IntPair(g.startIndex, g.endIndex);
            if (goldMentionPositions.containsKey(ip)) {
                StringBuilder existingMentions = new StringBuilder();
                for (Mention eg : goldMentionPositions.get(ip)) {
                    if (existingMentions.length() > 0) {
                        existingMentions.append(",");
                    }/* www. j  a  v a  2 s.c  o  m*/
                    existingMentions.append(eg.mentionID);
                }
                SieveCoreferenceSystem.logger.warning("WARNING: gold mentions with the same offsets: " + ip
                        + " mentions=" + g.mentionID + "," + existingMentions + ", " + g.spanToString());
            }
            //assert(!goldMentionPositions.containsKey(ip));
            goldMentionPositions.add(new IntPair(g.startIndex, g.endIndex), g);
        }
        for (Mention p : predicts) {
            IntPair pos = new IntPair(p.startIndex, p.endIndex);
            if (goldMentionPositions.containsKey(pos)) {
                Collection<Mention> cm = goldMentionPositions.get(pos);
                Mention g = cm.iterator().next();
                cm.remove(g);
                p.mentionID = g.mentionID;
                p.twinless = false;
                g.twinless = false;
            }
        }
        // temp: for making easy to recognize twinless mention
        for (Mention p : predicts) {
            if (p.twinless)
                p.mentionID += 10000;
        }
    }
}