List of usage examples for edu.stanford.nlp.util CollectionValuedMap get
@Override
public Collection<V> get(Object key)
From source file:knu.univ.lingvo.coref.CoNLLMentionExtractor.java
License:Open Source License
public List<List<Mention>> extractGoldMentions(CoNLL2011DocumentReader.Document conllDoc) { List<CoreMap> sentences = conllDoc.getAnnotation().get(CoreAnnotations.SentencesAnnotation.class); List<List<Mention>> allGoldMentions = new ArrayList<List<Mention>>(); CollectionValuedMap<String, CoreMap> corefChainMap = conllDoc.getCorefChainMap(); for (int i = 0; i < sentences.size(); i++) { allGoldMentions.add(new ArrayList<Mention>()); }//from www. j a v a2s . c o m int maxCorefClusterId = -1; for (String corefIdStr : corefChainMap.keySet()) { int id = Integer.parseInt(corefIdStr); if (id > maxCorefClusterId) { maxCorefClusterId = id; } } int newMentionID = maxCorefClusterId + 1; for (String corefIdStr : corefChainMap.keySet()) { int id = Integer.parseInt(corefIdStr); int clusterMentionCnt = 0; for (CoreMap m : corefChainMap.get(corefIdStr)) { clusterMentionCnt++; Mention mention = new Mention(); mention.goldCorefClusterID = id; if (clusterMentionCnt == 1) { // First mention in cluster mention.mentionID = id; mention.originalRef = -1; } else { mention.mentionID = newMentionID; mention.originalRef = id; newMentionID++; } if (maxID < mention.mentionID) maxID = mention.mentionID; int sentIndex = m.get(CoreAnnotations.SentenceIndexAnnotation.class); CoreMap sent = sentences.get(sentIndex); mention.startIndex = m.get(CoreAnnotations.TokenBeginAnnotation.class) - sent.get(CoreAnnotations.TokenBeginAnnotation.class); mention.endIndex = m.get(CoreAnnotations.TokenEndAnnotation.class) - sent.get(CoreAnnotations.TokenBeginAnnotation.class); // will be set by arrange mention.originalSpan = m.get(CoreAnnotations.TokensAnnotation.class); // Mention dependency is collapsed dependency for sentence mention.dependency = sentences.get(sentIndex) .get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class); allGoldMentions.get(sentIndex).add(mention); } } return allGoldMentions; }
From source file:knu.univ.lingvo.coref.Document.java
License:Open Source License
/** Mark twin mentions: All mention boundaries should be matched */ private void findTwinMentionsStrict() { for (int sentNum = 0; sentNum < goldOrderedMentionsBySentence.size(); sentNum++) { List<Mention> golds = goldOrderedMentionsBySentence.get(sentNum); List<Mention> predicts = predictedOrderedMentionsBySentence.get(sentNum); // For CoNLL training there are some documents with gold mentions with the same position offsets // See /scr/nlp/data/conll-2011/v2/data/train/data/english/annotations/nw/wsj/09/wsj_0990.v2_auto_conll // (Packwood - Roth) CollectionValuedMap<IntPair, Mention> goldMentionPositions = new CollectionValuedMap<IntPair, Mention>(); for (Mention g : golds) { IntPair ip = new IntPair(g.startIndex, g.endIndex); if (goldMentionPositions.containsKey(ip)) { StringBuilder existingMentions = new StringBuilder(); for (Mention eg : goldMentionPositions.get(ip)) { if (existingMentions.length() > 0) { existingMentions.append(","); }/* www. j a v a 2 s.c o m*/ existingMentions.append(eg.mentionID); } SieveCoreferenceSystem.logger.warning("WARNING: gold mentions with the same offsets: " + ip + " mentions=" + g.mentionID + "," + existingMentions + ", " + g.spanToString()); } //assert(!goldMentionPositions.containsKey(ip)); goldMentionPositions.add(new IntPair(g.startIndex, g.endIndex), g); } for (Mention p : predicts) { IntPair pos = new IntPair(p.startIndex, p.endIndex); if (goldMentionPositions.containsKey(pos)) { Collection<Mention> cm = goldMentionPositions.get(pos); Mention g = cm.iterator().next(); cm.remove(g); p.mentionID = g.mentionID; p.twinless = false; g.twinless = false; } } // temp: for making easy to recognize twinless mention for (Mention p : predicts) { if (p.twinless) p.mentionID += 10000; } } }