Example usage for edu.stanford.nlp.util IntTuple IntTuple

Introduction

In this page you can find the example usage for edu.stanford.nlp.util IntTuple IntTuple.

Prototype

public IntTuple(int num)

Source Link

Usage

From source file:knu.univ.lingvo.coref.Document.java

License:Open Source License

/** initialize positions and corefClusters (put each mention in each CorefCluster) */
private void initializeCorefCluster() {
    for (int i = 0; i < predictedOrderedMentionsBySentence.size(); i++) {
        for (int j = 0; j < predictedOrderedMentionsBySentence.get(i).size(); j++) {
            Mention m = predictedOrderedMentionsBySentence.get(i).get(j);
            if (allPredictedMentions.containsKey(m.mentionID)) {
                SieveCoreferenceSystem.logger.warning("WARNING: Already contain mention " + m.mentionID);
                Mention m1 = allPredictedMentions.get(m.mentionID);
                SieveCoreferenceSystem.logger.warning(
                        "OLD mention: " + m1.spanToString() + "[" + m1.startIndex + "," + m1.endIndex + "]");
                SieveCoreferenceSystem.logger.warning(
                        "NEW mention: " + m.spanToString() + "[" + m.startIndex + "," + m.endIndex + "]");
                //          SieveCoreferenceSystem.debugPrintMentions(System.err, "PREDICTED ORDERED", predictedOrderedMentionsBySentence);
                //          SieveCoreferenceSystem.debugPrintMentions(System.err, "GOLD ORDERED", goldOrderedMentionsBySentence);
            }/*w ww  .  ja  va  2  s.  co  m*/
            assert (!allPredictedMentions.containsKey(m.mentionID));
            allPredictedMentions.put(m.mentionID, m);

            IntTuple pos = new IntTuple(2);
            pos.set(0, i);
            pos.set(1, j);
            positions.put(m, pos);
            m.sentNum = i;

            assert (!corefClusters.containsKey(m.mentionID));
            corefClusters.put(m.mentionID,
                    new CorefCluster(m.mentionID, Generics.newHashSet(Arrays.asList(m))));
            m.corefClusterID = m.mentionID;

            IntTuple headPosition = new IntTuple(2);
            headPosition.set(0, i);
            headPosition.set(1, m.headIndex);
            mentionheadPositions.put(headPosition, m);
        }
    }
}

From source file:knu.univ.lingvo.coref.Document.java

License:Open Source License

/** Extract gold coref link information */
protected void extractGoldLinks() {
    //    List<List<Mention>> orderedMentionsBySentence = this.getOrderedMentions();
    List<Pair<IntTuple, IntTuple>> links = new ArrayList<Pair<IntTuple, IntTuple>>();

    // position of each mention in the input matrix, by id
    Map<Integer, IntTuple> positions = Generics.newHashMap();
    // positions of antecedents
    Map<Integer, List<IntTuple>> antecedents = Generics.newHashMap();
    for (int i = 0; i < goldOrderedMentionsBySentence.size(); i++) {
        for (int j = 0; j < goldOrderedMentionsBySentence.get(i).size(); j++) {
            Mention m = goldOrderedMentionsBySentence.get(i).get(j);
            int id = m.mentionID;
            IntTuple pos = new IntTuple(2);
            pos.set(0, i);//from   w  w  w  .j  av  a 2s . c o m
            pos.set(1, j);
            positions.put(id, pos);
            antecedents.put(id, new ArrayList<IntTuple>());
        }
    }

    //    SieveCoreferenceSystem.debugPrintMentions(System.err, "", goldOrderedMentionsBySentence);
    for (List<Mention> mentions : goldOrderedMentionsBySentence) {
        for (Mention m : mentions) {
            int id = m.mentionID;
            IntTuple src = positions.get(id);

            assert (src != null);
            if (m.originalRef >= 0) {
                IntTuple dst = positions.get(m.originalRef);
                if (dst == null) {
                    throw new RuntimeException("Cannot find gold mention with ID=" + m.originalRef);
                }

                // to deal with cataphoric annotation
                while (dst.get(0) > src.get(0) || (dst.get(0) == src.get(0) && dst.get(1) > src.get(1))) {
                    Mention dstMention = goldOrderedMentionsBySentence.get(dst.get(0)).get(dst.get(1));
                    m.originalRef = dstMention.originalRef;
                    dstMention.originalRef = id;

                    if (m.originalRef < 0)
                        break;
                    dst = positions.get(m.originalRef);
                }
                if (m.originalRef < 0)
                    continue;

                // A B C: if A<-B, A<-C => make a link B<-C
                for (int k = dst.get(0); k <= src.get(0); k++) {
                    for (int l = 0; l < goldOrderedMentionsBySentence.get(k).size(); l++) {
                        if (k == dst.get(0) && l < dst.get(1))
                            continue;
                        if (k == src.get(0) && l > src.get(1))
                            break;
                        IntTuple missed = new IntTuple(2);
                        missed.set(0, k);
                        missed.set(1, l);
                        if (links.contains(new Pair<IntTuple, IntTuple>(missed, dst))) {
                            antecedents.get(id).add(missed);
                            links.add(new Pair<IntTuple, IntTuple>(src, missed));
                        }
                    }
                }

                links.add(new Pair<IntTuple, IntTuple>(src, dst));

                assert (antecedents.get(id) != null);
                antecedents.get(id).add(dst);

                List<IntTuple> ants = antecedents.get(m.originalRef);
                assert (ants != null);
                for (IntTuple ant : ants) {
                    antecedents.get(id).add(ant);
                    links.add(new Pair<IntTuple, IntTuple>(src, ant));
                }
            }
        }
    }
    goldLinks = links;
}

From source file:knu.univ.lingvo.coref.Document.java

License:Open Source License

private boolean findSpeaker(int utterNum, int sentNum, List<CoreMap> sentences, int startIndex, int endIndex,
        Dictionaries dict) {//from  w ww . j ava 2 s  .c o  m
    List<CoreLabel> sent = sentences.get(sentNum).get(CoreAnnotations.TokensAnnotation.class);
    for (int i = startIndex; i < endIndex; i++) {
        if (sent.get(i).get(CoreAnnotations.UtteranceAnnotation.class) != 0)
            continue;
        String lemma = sent.get(i).get(CoreAnnotations.LemmaAnnotation.class);
        String word = sent.get(i).get(CoreAnnotations.TextAnnotation.class);
        if (dict.reportVerb.contains(lemma)) {
            // find subject
            SemanticGraph dependency = sentences.get(sentNum)
                    .get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
            IndexedWord w = dependency.getNodeByWordPattern(word);

            if (w != null) {
                for (Pair<GrammaticalRelation, IndexedWord> child : dependency.childPairs(w)) {
                    if (child.first().getShortName().equals("nsubj")) {
                        String subjectString = child.second().word();
                        int subjectIndex = child.second().index(); // start from 1
                        IntTuple headPosition = new IntTuple(2);
                        headPosition.set(0, sentNum);
                        headPosition.set(1, subjectIndex - 1);
                        String speaker;
                        if (mentionheadPositions.containsKey(headPosition)) {
                            speaker = Integer.toString(mentionheadPositions.get(headPosition).mentionID);
                        } else {
                            speaker = subjectString;
                        }
                        speakers.put(utterNum, speaker);
                        return true;
                    }
                }
            } else {
                SieveCoreferenceSystem.logger.warning("Cannot find node in dependency for word " + word);
            }
        }
    }
    return false;
}

From source file:knu.univ.lingvo.coref.Document.java

License:Open Source License

private String findParagraphSpeaker(List<CoreMap> paragraph, int paragraphUtterIndex,
        String nextParagraphSpeaker, int paragraphOffset, Dictionaries dict) {
    if (!speakers.containsKey(paragraphUtterIndex)) {
        if (!nextParagraphSpeaker.equals("")) {
            speakers.put(paragraphUtterIndex, nextParagraphSpeaker);
        } else { // find the speaker of this paragraph (John, nbc news)
            CoreMap lastSent = paragraph.get(paragraph.size() - 1);
            String speaker = "";
            boolean hasVerb = false;
            for (int i = 0; i < lastSent.get(CoreAnnotations.TokensAnnotation.class).size(); i++) {
                CoreLabel w = lastSent.get(CoreAnnotations.TokensAnnotation.class).get(i);
                String pos = w.get(CoreAnnotations.PartOfSpeechAnnotation.class);
                String ner = w.get(CoreAnnotations.NamedEntityTagAnnotation.class);
                if (pos.startsWith("V")) {
                    hasVerb = true;//from   w  w w .j  av a2 s .c  o m
                    break;
                }
                if (ner.startsWith("PER")) {
                    IntTuple headPosition = new IntTuple(2);
                    headPosition.set(0, paragraph.size() - 1 + paragraphOffset);
                    headPosition.set(1, i);
                    if (mentionheadPositions.containsKey(headPosition)) {
                        speaker = Integer.toString(mentionheadPositions.get(headPosition).mentionID);
                    }
                }
            }
            if (!hasVerb && !speaker.equals("")) {
                speakers.put(paragraphUtterIndex, speaker);
            }
        }
    }
    return findNextParagraphSpeaker(paragraph, paragraphOffset, dict);
}

From source file:knu.univ.lingvo.coref.Document.java

License:Open Source License

private String findNextParagraphSpeaker(List<CoreMap> paragraph, int paragraphOffset, Dictionaries dict) {
    CoreMap lastSent = paragraph.get(paragraph.size() - 1);
    String speaker = "";
    for (CoreLabel w : lastSent.get(CoreAnnotations.TokensAnnotation.class)) {
        if (w.get(CoreAnnotations.LemmaAnnotation.class).equals("report")
                || w.get(CoreAnnotations.LemmaAnnotation.class).equals("say")) {
            String word = w.get(CoreAnnotations.TextAnnotation.class);
            SemanticGraph dependency = lastSent
                    .get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
            IndexedWord t = dependency.getNodeByWordPattern(word);

            for (Pair<GrammaticalRelation, IndexedWord> child : dependency.childPairs(t)) {
                if (child.first().getShortName().equals("nsubj")) {
                    int subjectIndex = child.second().index(); // start from 1
                    IntTuple headPosition = new IntTuple(2);
                    headPosition.set(0, paragraph.size() - 1 + paragraphOffset);
                    headPosition.set(1, subjectIndex - 1);
                    if (mentionheadPositions.containsKey(headPosition)
                            && mentionheadPositions.get(headPosition).nerString.startsWith("PER")) {
                        speaker = Integer.toString(mentionheadPositions.get(headPosition).mentionID);
                    }//from  ww w.j av  a 2  s.com
                }
            }
        }
    }
    return speaker;
}

From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java

License:Open Source License

/**
 * Print logs for error analysis//  w w  w . jav  a  2  s . c  o m
 */
public void printTopK(Logger logger, Document document, Semantics semantics) {

    List<List<Mention>> orderedMentionsBySentence = document.getOrderedMentions();
    Map<Integer, CorefCluster> corefClusters = document.corefClusters;
    Map<Mention, IntTuple> positions = document.allPositions;
    Map<Integer, Mention> golds = document.allGoldMentions;

    logger.fine("=======ERROR ANALYSIS=========================================================");

    // Temporary sieve for getting ordered antecedents
    DeterministicCorefSieve tmpSieve = new ExactStringMatch();
    for (int i = 0; i < orderedMentionsBySentence.size(); i++) {
        List<Mention> orderedMentions = orderedMentionsBySentence.get(i);
        for (int j = 0; j < orderedMentions.size(); j++) {
            Mention m = orderedMentions.get(j);
            logger.fine("=========Line: " + i + "\tmention: " + j
                    + "=======================================================");
            logger.fine(m.spanToString() + "\tmentionID: " + m.mentionID + "\tcorefClusterID: "
                    + m.corefClusterID + "\tgoldCorefClusterID: " + m.goldCorefClusterID);
            CorefCluster corefCluster = corefClusters.get(m.corefClusterID);
            if (corefCluster != null) {
                corefCluster.printCorefCluster(logger);
            } else {
                logger.finer("CANNOT find coref cluster for cluster " + m.corefClusterID);
            }
            logger.fine("-------------------------------------------------------");

            boolean oneRecallErrorPrinted = false;
            boolean onePrecisionErrorPrinted = false;
            boolean alreadyChoose = false;

            for (int sentJ = i; sentJ >= 0; sentJ--) {
                List<Mention> l = tmpSieve.getOrderedAntecedents(sentJ, i, orderedMentions,
                        orderedMentionsBySentence, m, j, corefClusters, dictionaries);

                // Sort mentions by length whenever we have two mentions beginning at the same position and having the same head
                for (int ii = 0; ii < l.size(); ii++) {
                    for (int jj = 0; jj < l.size(); jj++) {
                        if (l.get(ii).headString.equals(l.get(jj).headString)
                                && l.get(ii).startIndex == l.get(jj).startIndex
                                && l.get(ii).sameSentence(l.get(jj)) && jj > ii
                                && l.get(ii).spanToString().length() > l.get(jj).spanToString().length()) {
                            logger.finest("FLIPPED: " + l.get(ii).spanToString() + "(" + ii + "), "
                                    + l.get(jj).spanToString() + "(" + jj + ")");
                            l.set(jj, l.set(ii, l.get(jj)));
                        }
                    }
                }

                logger.finest("Candidates in sentence #" + sentJ + " for mention: " + m.spanToString());
                for (int ii = 0; ii < l.size(); ii++) {
                    logger.finest("\tCandidate #" + ii + ": " + l.get(ii).spanToString());
                }

                for (Mention antecedent : l) {
                    boolean chosen = (m.corefClusterID == antecedent.corefClusterID);
                    IntTuple src = new IntTuple(2);
                    src.set(0, i);
                    src.set(1, j);

                    IntTuple ant = positions.get(antecedent);
                    if (ant == null) {
                        continue;
                    }
                    //correct=(chosen==goldLinks.contains(new Pair<IntTuple, IntTuple>(src,ant)));
                    boolean coreferent = golds.containsKey(m.mentionID)
                            && golds.containsKey(antecedent.mentionID)
                            && (golds.get(m.mentionID).goldCorefClusterID == golds
                                    .get(antecedent.mentionID).goldCorefClusterID);
                    boolean correct = (chosen == coreferent);

                    String chosenness = chosen ? "Chosen" : "Not Chosen";
                    String correctness = correct ? "Correct" : "Incorrect";
                    logger.fine("\t" + correctness + "\t\t" + chosenness + "\t" + antecedent.spanToString());
                    CorefCluster mC = corefClusters.get(m.corefClusterID);
                    CorefCluster aC = corefClusters.get(antecedent.corefClusterID);

                    if (chosen && !correct && !onePrecisionErrorPrinted && !alreadyChoose) {
                        onePrecisionErrorPrinted = true;
                        printLinkWithContext(logger, "\nPRECISION ERROR ", src, ant, document, semantics);
                        logger.fine("END of PRECISION ERROR LOG");
                    }

                    if (!chosen && !correct && !oneRecallErrorPrinted
                            && (!alreadyChoose || (alreadyChoose && onePrecisionErrorPrinted))) {
                        oneRecallErrorPrinted = true;
                        printLinkWithContext(logger, "\nRECALL ERROR ", src, ant, document, semantics);

                        logger.finer("cluster info: ");
                        if (mC != null) {
                            mC.printCorefCluster(logger);
                        } else {
                            logger.finer("CANNOT find coref cluster for cluster " + m.corefClusterID);
                        }
                        logger.finer("----------------------------------------------------------");
                        if (aC != null) {
                            aC.printCorefCluster(logger);
                        } else {
                            logger.finer("CANNOT find coref cluster for cluster " + m.corefClusterID);
                        }
                        logger.finer("");
                        logger.fine("END of RECALL ERROR LOG");
                    }
                    if (chosen) {
                        alreadyChoose = true;
                    }
                }
            }
            logger.fine("\n");
        }
    }
    logger.fine("===============================================================================");
}