List of usage examples for edu.stanford.nlp.util IntTuple IntTuple
public IntTuple(int num)
From source file:knu.univ.lingvo.coref.Document.java
License:Open Source License
/** initialize positions and corefClusters (put each mention in each CorefCluster) */ private void initializeCorefCluster() { for (int i = 0; i < predictedOrderedMentionsBySentence.size(); i++) { for (int j = 0; j < predictedOrderedMentionsBySentence.get(i).size(); j++) { Mention m = predictedOrderedMentionsBySentence.get(i).get(j); if (allPredictedMentions.containsKey(m.mentionID)) { SieveCoreferenceSystem.logger.warning("WARNING: Already contain mention " + m.mentionID); Mention m1 = allPredictedMentions.get(m.mentionID); SieveCoreferenceSystem.logger.warning( "OLD mention: " + m1.spanToString() + "[" + m1.startIndex + "," + m1.endIndex + "]"); SieveCoreferenceSystem.logger.warning( "NEW mention: " + m.spanToString() + "[" + m.startIndex + "," + m.endIndex + "]"); // SieveCoreferenceSystem.debugPrintMentions(System.err, "PREDICTED ORDERED", predictedOrderedMentionsBySentence); // SieveCoreferenceSystem.debugPrintMentions(System.err, "GOLD ORDERED", goldOrderedMentionsBySentence); }/*w ww . ja va 2 s. co m*/ assert (!allPredictedMentions.containsKey(m.mentionID)); allPredictedMentions.put(m.mentionID, m); IntTuple pos = new IntTuple(2); pos.set(0, i); pos.set(1, j); positions.put(m, pos); m.sentNum = i; assert (!corefClusters.containsKey(m.mentionID)); corefClusters.put(m.mentionID, new CorefCluster(m.mentionID, Generics.newHashSet(Arrays.asList(m)))); m.corefClusterID = m.mentionID; IntTuple headPosition = new IntTuple(2); headPosition.set(0, i); headPosition.set(1, m.headIndex); mentionheadPositions.put(headPosition, m); } } }
From source file:knu.univ.lingvo.coref.Document.java
License:Open Source License
/** Extract gold coref link information */ protected void extractGoldLinks() { // List<List<Mention>> orderedMentionsBySentence = this.getOrderedMentions(); List<Pair<IntTuple, IntTuple>> links = new ArrayList<Pair<IntTuple, IntTuple>>(); // position of each mention in the input matrix, by id Map<Integer, IntTuple> positions = Generics.newHashMap(); // positions of antecedents Map<Integer, List<IntTuple>> antecedents = Generics.newHashMap(); for (int i = 0; i < goldOrderedMentionsBySentence.size(); i++) { for (int j = 0; j < goldOrderedMentionsBySentence.get(i).size(); j++) { Mention m = goldOrderedMentionsBySentence.get(i).get(j); int id = m.mentionID; IntTuple pos = new IntTuple(2); pos.set(0, i);//from w w w .j av a 2s . c o m pos.set(1, j); positions.put(id, pos); antecedents.put(id, new ArrayList<IntTuple>()); } } // SieveCoreferenceSystem.debugPrintMentions(System.err, "", goldOrderedMentionsBySentence); for (List<Mention> mentions : goldOrderedMentionsBySentence) { for (Mention m : mentions) { int id = m.mentionID; IntTuple src = positions.get(id); assert (src != null); if (m.originalRef >= 0) { IntTuple dst = positions.get(m.originalRef); if (dst == null) { throw new RuntimeException("Cannot find gold mention with ID=" + m.originalRef); } // to deal with cataphoric annotation while (dst.get(0) > src.get(0) || (dst.get(0) == src.get(0) && dst.get(1) > src.get(1))) { Mention dstMention = goldOrderedMentionsBySentence.get(dst.get(0)).get(dst.get(1)); m.originalRef = dstMention.originalRef; dstMention.originalRef = id; if (m.originalRef < 0) break; dst = positions.get(m.originalRef); } if (m.originalRef < 0) continue; // A B C: if A<-B, A<-C => make a link B<-C for (int k = dst.get(0); k <= src.get(0); k++) { for (int l = 0; l < goldOrderedMentionsBySentence.get(k).size(); l++) { if (k == dst.get(0) && l < dst.get(1)) continue; if (k == src.get(0) && l > src.get(1)) break; IntTuple missed = new IntTuple(2); missed.set(0, k); missed.set(1, l); if (links.contains(new Pair<IntTuple, IntTuple>(missed, dst))) { antecedents.get(id).add(missed); links.add(new Pair<IntTuple, IntTuple>(src, missed)); } } } links.add(new Pair<IntTuple, IntTuple>(src, dst)); assert (antecedents.get(id) != null); antecedents.get(id).add(dst); List<IntTuple> ants = antecedents.get(m.originalRef); assert (ants != null); for (IntTuple ant : ants) { antecedents.get(id).add(ant); links.add(new Pair<IntTuple, IntTuple>(src, ant)); } } } } goldLinks = links; }
From source file:knu.univ.lingvo.coref.Document.java
License:Open Source License
private boolean findSpeaker(int utterNum, int sentNum, List<CoreMap> sentences, int startIndex, int endIndex, Dictionaries dict) {//from w ww . j ava 2 s .c o m List<CoreLabel> sent = sentences.get(sentNum).get(CoreAnnotations.TokensAnnotation.class); for (int i = startIndex; i < endIndex; i++) { if (sent.get(i).get(CoreAnnotations.UtteranceAnnotation.class) != 0) continue; String lemma = sent.get(i).get(CoreAnnotations.LemmaAnnotation.class); String word = sent.get(i).get(CoreAnnotations.TextAnnotation.class); if (dict.reportVerb.contains(lemma)) { // find subject SemanticGraph dependency = sentences.get(sentNum) .get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class); IndexedWord w = dependency.getNodeByWordPattern(word); if (w != null) { for (Pair<GrammaticalRelation, IndexedWord> child : dependency.childPairs(w)) { if (child.first().getShortName().equals("nsubj")) { String subjectString = child.second().word(); int subjectIndex = child.second().index(); // start from 1 IntTuple headPosition = new IntTuple(2); headPosition.set(0, sentNum); headPosition.set(1, subjectIndex - 1); String speaker; if (mentionheadPositions.containsKey(headPosition)) { speaker = Integer.toString(mentionheadPositions.get(headPosition).mentionID); } else { speaker = subjectString; } speakers.put(utterNum, speaker); return true; } } } else { SieveCoreferenceSystem.logger.warning("Cannot find node in dependency for word " + word); } } } return false; }
From source file:knu.univ.lingvo.coref.Document.java
License:Open Source License
private String findParagraphSpeaker(List<CoreMap> paragraph, int paragraphUtterIndex, String nextParagraphSpeaker, int paragraphOffset, Dictionaries dict) { if (!speakers.containsKey(paragraphUtterIndex)) { if (!nextParagraphSpeaker.equals("")) { speakers.put(paragraphUtterIndex, nextParagraphSpeaker); } else { // find the speaker of this paragraph (John, nbc news) CoreMap lastSent = paragraph.get(paragraph.size() - 1); String speaker = ""; boolean hasVerb = false; for (int i = 0; i < lastSent.get(CoreAnnotations.TokensAnnotation.class).size(); i++) { CoreLabel w = lastSent.get(CoreAnnotations.TokensAnnotation.class).get(i); String pos = w.get(CoreAnnotations.PartOfSpeechAnnotation.class); String ner = w.get(CoreAnnotations.NamedEntityTagAnnotation.class); if (pos.startsWith("V")) { hasVerb = true;//from w w w .j av a2 s .c o m break; } if (ner.startsWith("PER")) { IntTuple headPosition = new IntTuple(2); headPosition.set(0, paragraph.size() - 1 + paragraphOffset); headPosition.set(1, i); if (mentionheadPositions.containsKey(headPosition)) { speaker = Integer.toString(mentionheadPositions.get(headPosition).mentionID); } } } if (!hasVerb && !speaker.equals("")) { speakers.put(paragraphUtterIndex, speaker); } } } return findNextParagraphSpeaker(paragraph, paragraphOffset, dict); }
From source file:knu.univ.lingvo.coref.Document.java
License:Open Source License
private String findNextParagraphSpeaker(List<CoreMap> paragraph, int paragraphOffset, Dictionaries dict) { CoreMap lastSent = paragraph.get(paragraph.size() - 1); String speaker = ""; for (CoreLabel w : lastSent.get(CoreAnnotations.TokensAnnotation.class)) { if (w.get(CoreAnnotations.LemmaAnnotation.class).equals("report") || w.get(CoreAnnotations.LemmaAnnotation.class).equals("say")) { String word = w.get(CoreAnnotations.TextAnnotation.class); SemanticGraph dependency = lastSent .get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class); IndexedWord t = dependency.getNodeByWordPattern(word); for (Pair<GrammaticalRelation, IndexedWord> child : dependency.childPairs(t)) { if (child.first().getShortName().equals("nsubj")) { int subjectIndex = child.second().index(); // start from 1 IntTuple headPosition = new IntTuple(2); headPosition.set(0, paragraph.size() - 1 + paragraphOffset); headPosition.set(1, subjectIndex - 1); if (mentionheadPositions.containsKey(headPosition) && mentionheadPositions.get(headPosition).nerString.startsWith("PER")) { speaker = Integer.toString(mentionheadPositions.get(headPosition).mentionID); }//from ww w.j av a 2 s.com } } } } return speaker; }
From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java
License:Open Source License
/** * Print logs for error analysis// w w w . jav a 2 s . c o m */ public void printTopK(Logger logger, Document document, Semantics semantics) { List<List<Mention>> orderedMentionsBySentence = document.getOrderedMentions(); Map<Integer, CorefCluster> corefClusters = document.corefClusters; Map<Mention, IntTuple> positions = document.allPositions; Map<Integer, Mention> golds = document.allGoldMentions; logger.fine("=======ERROR ANALYSIS========================================================="); // Temporary sieve for getting ordered antecedents DeterministicCorefSieve tmpSieve = new ExactStringMatch(); for (int i = 0; i < orderedMentionsBySentence.size(); i++) { List<Mention> orderedMentions = orderedMentionsBySentence.get(i); for (int j = 0; j < orderedMentions.size(); j++) { Mention m = orderedMentions.get(j); logger.fine("=========Line: " + i + "\tmention: " + j + "======================================================="); logger.fine(m.spanToString() + "\tmentionID: " + m.mentionID + "\tcorefClusterID: " + m.corefClusterID + "\tgoldCorefClusterID: " + m.goldCorefClusterID); CorefCluster corefCluster = corefClusters.get(m.corefClusterID); if (corefCluster != null) { corefCluster.printCorefCluster(logger); } else { logger.finer("CANNOT find coref cluster for cluster " + m.corefClusterID); } logger.fine("-------------------------------------------------------"); boolean oneRecallErrorPrinted = false; boolean onePrecisionErrorPrinted = false; boolean alreadyChoose = false; for (int sentJ = i; sentJ >= 0; sentJ--) { List<Mention> l = tmpSieve.getOrderedAntecedents(sentJ, i, orderedMentions, orderedMentionsBySentence, m, j, corefClusters, dictionaries); // Sort mentions by length whenever we have two mentions beginning at the same position and having the same head for (int ii = 0; ii < l.size(); ii++) { for (int jj = 0; jj < l.size(); jj++) { if (l.get(ii).headString.equals(l.get(jj).headString) && l.get(ii).startIndex == l.get(jj).startIndex && l.get(ii).sameSentence(l.get(jj)) && jj > ii && l.get(ii).spanToString().length() > l.get(jj).spanToString().length()) { logger.finest("FLIPPED: " + l.get(ii).spanToString() + "(" + ii + "), " + l.get(jj).spanToString() + "(" + jj + ")"); l.set(jj, l.set(ii, l.get(jj))); } } } logger.finest("Candidates in sentence #" + sentJ + " for mention: " + m.spanToString()); for (int ii = 0; ii < l.size(); ii++) { logger.finest("\tCandidate #" + ii + ": " + l.get(ii).spanToString()); } for (Mention antecedent : l) { boolean chosen = (m.corefClusterID == antecedent.corefClusterID); IntTuple src = new IntTuple(2); src.set(0, i); src.set(1, j); IntTuple ant = positions.get(antecedent); if (ant == null) { continue; } //correct=(chosen==goldLinks.contains(new Pair<IntTuple, IntTuple>(src,ant))); boolean coreferent = golds.containsKey(m.mentionID) && golds.containsKey(antecedent.mentionID) && (golds.get(m.mentionID).goldCorefClusterID == golds .get(antecedent.mentionID).goldCorefClusterID); boolean correct = (chosen == coreferent); String chosenness = chosen ? "Chosen" : "Not Chosen"; String correctness = correct ? "Correct" : "Incorrect"; logger.fine("\t" + correctness + "\t\t" + chosenness + "\t" + antecedent.spanToString()); CorefCluster mC = corefClusters.get(m.corefClusterID); CorefCluster aC = corefClusters.get(antecedent.corefClusterID); if (chosen && !correct && !onePrecisionErrorPrinted && !alreadyChoose) { onePrecisionErrorPrinted = true; printLinkWithContext(logger, "\nPRECISION ERROR ", src, ant, document, semantics); logger.fine("END of PRECISION ERROR LOG"); } if (!chosen && !correct && !oneRecallErrorPrinted && (!alreadyChoose || (alreadyChoose && onePrecisionErrorPrinted))) { oneRecallErrorPrinted = true; printLinkWithContext(logger, "\nRECALL ERROR ", src, ant, document, semantics); logger.finer("cluster info: "); if (mC != null) { mC.printCorefCluster(logger); } else { logger.finer("CANNOT find coref cluster for cluster " + m.corefClusterID); } logger.finer("----------------------------------------------------------"); if (aC != null) { aC.printCorefCluster(logger); } else { logger.finer("CANNOT find coref cluster for cluster " + m.corefClusterID); } logger.finer(""); logger.fine("END of RECALL ERROR LOG"); } if (chosen) { alreadyChoose = true; } } } logger.fine("\n"); } } logger.fine("==============================================================================="); }