List of usage examples for edu.stanford.nlp.stats Counter getCount
double getCount(Object key);
From source file:knu.univ.lingvo.coref.ACEMentionExtractor.java
License:Open Source License
private static void printRawDoc(List<CoreMap> sentences, List<List<Mention>> allMentions, String filename, boolean gold) throws FileNotFoundException { StringBuilder doc = new StringBuilder(); int previousOffset = 0; Counter<Integer> mentionCount = new ClassicCounter<Integer>(); for (List<Mention> l : allMentions) { for (Mention m : l) { mentionCount.incrementCount(m.goldCorefClusterID); }/*from www. j a v a 2 s . co m*/ } for (int i = 0; i < sentences.size(); i++) { CoreMap sentence = sentences.get(i); List<Mention> mentions = allMentions.get(i); String[] tokens = sentence.get(CoreAnnotations.TextAnnotation.class).split(" "); String sent = ""; List<CoreLabel> t = sentence.get(CoreAnnotations.TokensAnnotation.class); if (previousOffset + 2 < t.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)) sent += "\n"; previousOffset = t.get(t.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class); Counter<Integer> startCounts = new ClassicCounter<Integer>(); Counter<Integer> endCounts = new ClassicCounter<Integer>(); Map<Integer, Set<Integer>> endID = Generics.newHashMap(); for (Mention m : mentions) { startCounts.incrementCount(m.startIndex); endCounts.incrementCount(m.endIndex); if (!endID.containsKey(m.endIndex)) endID.put(m.endIndex, Generics.<Integer>newHashSet()); endID.get(m.endIndex).add(m.goldCorefClusterID); } for (int j = 0; j < tokens.length; j++) { if (endID.containsKey(j)) { for (Integer id : endID.get(j)) { if (mentionCount.getCount(id) != 1 && gold) sent += "]_" + id; else sent += "]"; } } for (int k = 0; k < startCounts.getCount(j); k++) { if (!sent.endsWith("[")) sent += " "; sent += "["; } sent += " "; sent = sent + tokens[j]; } for (int k = 0; k < endCounts.getCount(tokens.length); k++) { sent += "]"; } sent += "\n"; doc.append(sent); } if (gold) logger.fine("New DOC: (GOLD MENTIONS) =================================================="); else logger.fine("New DOC: (Predicted Mentions) =================================================="); logger.fine(doc.toString()); }
From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java
License:Open Source License
/** * print a coref link information including context and parse tree *//*w w w . ja v a 2 s . com*/ private static void printLinkWithContext(Logger logger, String header, IntTuple src, IntTuple dst, Document document, Semantics semantics) { List<List<Mention>> orderedMentionsBySentence = document.getOrderedMentions(); List<List<Mention>> goldOrderedMentionsBySentence = document.goldOrderedMentionsBySentence; Mention srcMention = orderedMentionsBySentence.get(src.get(0)).get(src.get(1)); Mention dstMention = orderedMentionsBySentence.get(dst.get(0)).get(dst.get(1)); List<CoreLabel> srcSentence = srcMention.sentenceWords; List<CoreLabel> dstSentence = dstMention.sentenceWords; printLink(logger, header, src, dst, orderedMentionsBySentence); printList(logger, "Mention:" + srcMention.spanToString(), "Gender:" + srcMention.gender.toString(), "Number:" + srcMention.number.toString(), "Animacy:" + srcMention.animacy.toString(), "Person:" + srcMention.person.toString(), "NER:" + srcMention.nerString, "Head:" + srcMention.headString, "Type:" + srcMention.mentionType.toString(), "utter: " + srcMention.headWord.get(CoreAnnotations.UtteranceAnnotation.class), "speakerID: " + srcMention.headWord.get(CoreAnnotations.SpeakerAnnotation.class), "twinless:" + srcMention.twinless); logger.fine("Context:"); String p = ""; for (int i = 0; i < srcSentence.size(); i++) { if (i == srcMention.startIndex) { p += "["; } if (i == srcMention.endIndex) { p += "]"; } p += srcSentence.get(i).word() + " "; } logger.fine(p); StringBuilder golds = new StringBuilder(); golds.append("Gold mentions in the sentence:\n"); Counter<Integer> mBegin = new ClassicCounter<Integer>(); Counter<Integer> mEnd = new ClassicCounter<Integer>(); for (Mention m : goldOrderedMentionsBySentence.get(src.get(0))) { mBegin.incrementCount(m.startIndex); mEnd.incrementCount(m.endIndex); } List<CoreLabel> l = document.annotation.get(CoreAnnotations.SentencesAnnotation.class).get(src.get(0)) .get(CoreAnnotations.TokensAnnotation.class); for (int i = 0; i < l.size(); i++) { for (int j = 0; j < mEnd.getCount(i); j++) { golds.append("]"); } for (int j = 0; j < mBegin.getCount(i); j++) { golds.append("["); } golds.append(l.get(i).get(CoreAnnotations.TextAnnotation.class)); golds.append(" "); } logger.fine(golds.toString()); printList(logger, "\nAntecedent:" + dstMention.spanToString(), "Gender:" + dstMention.gender.toString(), "Number:" + dstMention.number.toString(), "Animacy:" + dstMention.animacy.toString(), "Person:" + dstMention.person.toString(), "NER:" + dstMention.nerString, "Head:" + dstMention.headString, "Type:" + dstMention.mentionType.toString(), "utter: " + dstMention.headWord.get(CoreAnnotations.UtteranceAnnotation.class), "speakerID: " + dstMention.headWord.get(CoreAnnotations.SpeakerAnnotation.class), "twinless:" + dstMention.twinless); logger.fine("Context:"); p = ""; for (int i = 0; i < dstSentence.size(); i++) { if (i == dstMention.startIndex) { p += "["; } if (i == dstMention.endIndex) { p += "]"; } p += dstSentence.get(i).word() + " "; } logger.fine(p); golds = new StringBuilder(); golds.append("Gold mentions in the sentence:\n"); mBegin = new ClassicCounter<Integer>(); mEnd = new ClassicCounter<Integer>(); for (Mention m : goldOrderedMentionsBySentence.get(dst.get(0))) { mBegin.incrementCount(m.startIndex); mEnd.incrementCount(m.endIndex); } l = document.annotation.get(CoreAnnotations.SentencesAnnotation.class).get(dst.get(0)) .get(CoreAnnotations.TokensAnnotation.class); for (int i = 0; i < l.size(); i++) { for (int j = 0; j < mEnd.getCount(i); j++) { golds.append("]"); } for (int j = 0; j < mBegin.getCount(i); j++) { golds.append("["); } golds.append(l.get(i).get(CoreAnnotations.TextAnnotation.class)); golds.append(" "); } logger.fine(golds.toString()); logger.finer("\nMention:: --------------------------------------------------------"); try { logger.finer(srcMention.dependency.toString()); } catch (Exception e) { } //throw new RuntimeException(e);} logger.finer("Parse:"); logger.finer(formatPennTree(srcMention.contextParseTree)); logger.finer("\nAntecedent:: -----------------------------------------------------"); try { logger.finer(dstMention.dependency.toString()); } catch (Exception e) { } //throw new RuntimeException(e);} logger.finer("Parse:"); logger.finer(formatPennTree(dstMention.contextParseTree)); }
From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java
License:Open Source License
/** * Print raw document for analysis// ww w. j a va 2s . c o m */ public static void printRawDoc(Document document, boolean gold) throws FileNotFoundException { List<CoreMap> sentences = document.annotation.get(CoreAnnotations.SentencesAnnotation.class); List<List<Mention>> allMentions; if (gold) { allMentions = document.goldOrderedMentionsBySentence; } else { allMentions = document.predictedOrderedMentionsBySentence; } // String filename = document.annotation.get() StringBuilder doc = new StringBuilder(); int previousOffset = 0; for (int i = 0; i < sentences.size(); i++) { CoreMap sentence = sentences.get(i); List<Mention> mentions = allMentions.get(i); List<CoreLabel> t = sentence.get(CoreAnnotations.TokensAnnotation.class); String[] tokens = new String[t.size()]; for (CoreLabel c : t) { tokens[c.index() - 1] = c.word(); } if (previousOffset + 2 < t.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)) { doc.append("\n"); } previousOffset = t.get(t.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class); Counter<Integer> startCounts = new ClassicCounter<Integer>(); Counter<Integer> endCounts = new ClassicCounter<Integer>(); Map<Integer, Set<Mention>> endMentions = Generics.newHashMap(); for (Mention m : mentions) { startCounts.incrementCount(m.startIndex); endCounts.incrementCount(m.endIndex); if (!endMentions.containsKey(m.endIndex)) { endMentions.put(m.endIndex, Generics.<Mention>newHashSet()); } endMentions.get(m.endIndex).add(m); } for (int j = 0; j < tokens.length; j++) { if (endMentions.containsKey(j)) { for (Mention m : endMentions.get(j)) { int corefChainId = (gold) ? m.goldCorefClusterID : m.corefClusterID; doc.append("]_").append(corefChainId); } } for (int k = 0; k < startCounts.getCount(j); k++) { if (doc.length() > 0 && doc.charAt(doc.length() - 1) != '[') { doc.append(" "); } doc.append("["); } if (doc.length() > 0 && doc.charAt(doc.length() - 1) != '[') { doc.append(" "); } doc.append(tokens[j]); } if (endMentions.containsKey(tokens.length)) { for (Mention m : endMentions.get(tokens.length)) { int corefChainId = (gold) ? m.goldCorefClusterID : m.corefClusterID; doc.append("]_").append(corefChainId); //append("_").append(m.mentionID); } } doc.append("\n"); } logger.fine(document.annotation.get(CoreAnnotations.DocIDAnnotation.class)); if (gold) { logger.fine("New DOC: (GOLD MENTIONS) =================================================="); } else { logger.fine("New DOC: (Predicted Mentions) =================================================="); } logger.fine(doc.toString()); }