Example usage for edu.stanford.nlp.util IntTuple IntTuple

List of usage examples for edu.stanford.nlp.util IntTuple IntTuple

Introduction

In this page you can find the example usage for edu.stanford.nlp.util IntTuple IntTuple.

Prototype

public IntTuple(int num) 

Source Link

Usage

From source file:knu.univ.lingvo.coref.Document.java

License:Open Source License

/** initialize positions and corefClusters (put each mention in each CorefCluster) */
private void initializeCorefCluster() {
    for (int i = 0; i < predictedOrderedMentionsBySentence.size(); i++) {
        for (int j = 0; j < predictedOrderedMentionsBySentence.get(i).size(); j++) {
            Mention m = predictedOrderedMentionsBySentence.get(i).get(j);
            if (allPredictedMentions.containsKey(m.mentionID)) {
                SieveCoreferenceSystem.logger.warning("WARNING: Already contain mention " + m.mentionID);
                Mention m1 = allPredictedMentions.get(m.mentionID);
                SieveCoreferenceSystem.logger.warning(
                        "OLD mention: " + m1.spanToString() + "[" + m1.startIndex + "," + m1.endIndex + "]");
                SieveCoreferenceSystem.logger.warning(
                        "NEW mention: " + m.spanToString() + "[" + m.startIndex + "," + m.endIndex + "]");
                //          SieveCoreferenceSystem.debugPrintMentions(System.err, "PREDICTED ORDERED", predictedOrderedMentionsBySentence);
                //          SieveCoreferenceSystem.debugPrintMentions(System.err, "GOLD ORDERED", goldOrderedMentionsBySentence);
            }/*w ww  .  ja  va  2  s.  co  m*/
            assert (!allPredictedMentions.containsKey(m.mentionID));
            allPredictedMentions.put(m.mentionID, m);

            IntTuple pos = new IntTuple(2);
            pos.set(0, i);
            pos.set(1, j);
            positions.put(m, pos);
            m.sentNum = i;

            assert (!corefClusters.containsKey(m.mentionID));
            corefClusters.put(m.mentionID,
                    new CorefCluster(m.mentionID, Generics.newHashSet(Arrays.asList(m))));
            m.corefClusterID = m.mentionID;

            IntTuple headPosition = new IntTuple(2);
            headPosition.set(0, i);
            headPosition.set(1, m.headIndex);
            mentionheadPositions.put(headPosition, m);
        }
    }
}

From source file:knu.univ.lingvo.coref.Document.java

License:Open Source License

/** Extract gold coref link information */
protected void extractGoldLinks() {
    //    List<List<Mention>> orderedMentionsBySentence = this.getOrderedMentions();
    List<Pair<IntTuple, IntTuple>> links = new ArrayList<Pair<IntTuple, IntTuple>>();

    // position of each mention in the input matrix, by id
    Map<Integer, IntTuple> positions = Generics.newHashMap();
    // positions of antecedents
    Map<Integer, List<IntTuple>> antecedents = Generics.newHashMap();
    for (int i = 0; i < goldOrderedMentionsBySentence.size(); i++) {
        for (int j = 0; j < goldOrderedMentionsBySentence.get(i).size(); j++) {
            Mention m = goldOrderedMentionsBySentence.get(i).get(j);
            int id = m.mentionID;
            IntTuple pos = new IntTuple(2);
            pos.set(0, i);//from   w  w  w  .j  av  a 2s . c o m
            pos.set(1, j);
            positions.put(id, pos);
            antecedents.put(id, new ArrayList<IntTuple>());
        }
    }

    //    SieveCoreferenceSystem.debugPrintMentions(System.err, "", goldOrderedMentionsBySentence);
    for (List<Mention> mentions : goldOrderedMentionsBySentence) {
        for (Mention m : mentions) {
            int id = m.mentionID;
            IntTuple src = positions.get(id);

            assert (src != null);
            if (m.originalRef >= 0) {
                IntTuple dst = positions.get(m.originalRef);
                if (dst == null) {
                    throw new RuntimeException("Cannot find gold mention with ID=" + m.originalRef);
                }

                // to deal with cataphoric annotation
                while (dst.get(0) > src.get(0) || (dst.get(0) == src.get(0) && dst.get(1) > src.get(1))) {
                    Mention dstMention = goldOrderedMentionsBySentence.get(dst.get(0)).get(dst.get(1));
                    m.originalRef = dstMention.originalRef;
                    dstMention.originalRef = id;

                    if (m.originalRef < 0)
                        break;
                    dst = positions.get(m.originalRef);
                }
                if (m.originalRef < 0)
                    continue;

                // A B C: if A<-B, A<-C => make a link B<-C
                for (int k = dst.get(0); k <= src.get(0); k++) {
                    for (int l = 0; l < goldOrderedMentionsBySentence.get(k).size(); l++) {
                        if (k == dst.get(0) && l < dst.get(1))
                            continue;
                        if (k == src.get(0) && l > src.get(1))
                            break;
                        IntTuple missed = new IntTuple(2);
                        missed.set(0, k);
                        missed.set(1, l);
                        if (links.contains(new Pair<IntTuple, IntTuple>(missed, dst))) {
                            antecedents.get(id).add(missed);
                            links.add(new Pair<IntTuple, IntTuple>(src, missed));
                        }
                    }
                }

                links.add(new Pair<IntTuple, IntTuple>(src, dst));

                assert (antecedents.get(id) != null);
                antecedents.get(id).add(dst);

                List<IntTuple> ants = antecedents.get(m.originalRef);
                assert (ants != null);
                for (IntTuple ant : ants) {
                    antecedents.get(id).add(ant);
                    links.add(new Pair<IntTuple, IntTuple>(src, ant));
                }
            }
        }
    }
    goldLinks = links;
}

From source file:knu.univ.lingvo.coref.Document.java

License:Open Source License

private boolean findSpeaker(int utterNum, int sentNum, List<CoreMap> sentences, int startIndex, int endIndex,
        Dictionaries dict) {//from  w ww . j ava 2 s  .c o  m
    List<CoreLabel> sent = sentences.get(sentNum).get(CoreAnnotations.TokensAnnotation.class);
    for (int i = startIndex; i < endIndex; i++) {
        if (sent.get(i).get(CoreAnnotations.UtteranceAnnotation.class) != 0)
            continue;
        String lemma = sent.get(i).get(CoreAnnotations.LemmaAnnotation.class);
        String word = sent.get(i).get(CoreAnnotations.TextAnnotation.class);
        if (dict.reportVerb.contains(lemma)) {
            // find subject
            SemanticGraph dependency = sentences.get(sentNum)
                    .get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
            IndexedWord w = dependency.getNodeByWordPattern(word);

            if (w != null) {
                for (Pair<GrammaticalRelation, IndexedWord> child : dependency.childPairs(w)) {
                    if (child.first().getShortName().equals("nsubj")) {
                        String subjectString = child.second().word();
                        int subjectIndex = child.second().index(); // start from 1
                        IntTuple headPosition = new IntTuple(2);
                        headPosition.set(0, sentNum);
                        headPosition.set(1, subjectIndex - 1);
                        String speaker;
                        if (mentionheadPositions.containsKey(headPosition)) {
                            speaker = Integer.toString(mentionheadPositions.get(headPosition).mentionID);
                        } else {
                            speaker = subjectString;
                        }
                        speakers.put(utterNum, speaker);
                        return true;
                    }
                }
            } else {
                SieveCoreferenceSystem.logger.warning("Cannot find node in dependency for word " + word);
            }
        }
    }
    return false;
}

From source file:knu.univ.lingvo.coref.Document.java

License:Open Source License

private String findParagraphSpeaker(List<CoreMap> paragraph, int paragraphUtterIndex,
        String nextParagraphSpeaker, int paragraphOffset, Dictionaries dict) {
    if (!speakers.containsKey(paragraphUtterIndex)) {
        if (!nextParagraphSpeaker.equals("")) {
            speakers.put(paragraphUtterIndex, nextParagraphSpeaker);
        } else { // find the speaker of this paragraph (John, nbc news)
            CoreMap lastSent = paragraph.get(paragraph.size() - 1);
            String speaker = "";
            boolean hasVerb = false;
            for (int i = 0; i < lastSent.get(CoreAnnotations.TokensAnnotation.class).size(); i++) {
                CoreLabel w = lastSent.get(CoreAnnotations.TokensAnnotation.class).get(i);
                String pos = w.get(CoreAnnotations.PartOfSpeechAnnotation.class);
                String ner = w.get(CoreAnnotations.NamedEntityTagAnnotation.class);
                if (pos.startsWith("V")) {
                    hasVerb = true;//from   w  w w .j  av a2 s .c  o m
                    break;
                }
                if (ner.startsWith("PER")) {
                    IntTuple headPosition = new IntTuple(2);
                    headPosition.set(0, paragraph.size() - 1 + paragraphOffset);
                    headPosition.set(1, i);
                    if (mentionheadPositions.containsKey(headPosition)) {
                        speaker = Integer.toString(mentionheadPositions.get(headPosition).mentionID);
                    }
                }
            }
            if (!hasVerb && !speaker.equals("")) {
                speakers.put(paragraphUtterIndex, speaker);
            }
        }
    }
    return findNextParagraphSpeaker(paragraph, paragraphOffset, dict);
}

From source file:knu.univ.lingvo.coref.Document.java

License:Open Source License

private String findNextParagraphSpeaker(List<CoreMap> paragraph, int paragraphOffset, Dictionaries dict) {
    CoreMap lastSent = paragraph.get(paragraph.size() - 1);
    String speaker = "";
    for (CoreLabel w : lastSent.get(CoreAnnotations.TokensAnnotation.class)) {
        if (w.get(CoreAnnotations.LemmaAnnotation.class).equals("report")
                || w.get(CoreAnnotations.LemmaAnnotation.class).equals("say")) {
            String word = w.get(CoreAnnotations.TextAnnotation.class);
            SemanticGraph dependency = lastSent
                    .get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
            IndexedWord t = dependency.getNodeByWordPattern(word);

            for (Pair<GrammaticalRelation, IndexedWord> child : dependency.childPairs(t)) {
                if (child.first().getShortName().equals("nsubj")) {
                    int subjectIndex = child.second().index(); // start from 1
                    IntTuple headPosition = new IntTuple(2);
                    headPosition.set(0, paragraph.size() - 1 + paragraphOffset);
                    headPosition.set(1, subjectIndex - 1);
                    if (mentionheadPositions.containsKey(headPosition)
                            && mentionheadPositions.get(headPosition).nerString.startsWith("PER")) {
                        speaker = Integer.toString(mentionheadPositions.get(headPosition).mentionID);
                    }//from  ww w.j av  a 2  s.com
                }
            }
        }
    }
    return speaker;
}

From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java

License:Open Source License

/**
 * Print logs for error analysis//  w w  w . jav  a  2  s . c  o m
 */
public void printTopK(Logger logger, Document document, Semantics semantics) {

    List<List<Mention>> orderedMentionsBySentence = document.getOrderedMentions();
    Map<Integer, CorefCluster> corefClusters = document.corefClusters;
    Map<Mention, IntTuple> positions = document.allPositions;
    Map<Integer, Mention> golds = document.allGoldMentions;

    logger.fine("=======ERROR ANALYSIS=========================================================");

    // Temporary sieve for getting ordered antecedents
    DeterministicCorefSieve tmpSieve = new ExactStringMatch();
    for (int i = 0; i < orderedMentionsBySentence.size(); i++) {
        List<Mention> orderedMentions = orderedMentionsBySentence.get(i);
        for (int j = 0; j < orderedMentions.size(); j++) {
            Mention m = orderedMentions.get(j);
            logger.fine("=========Line: " + i + "\tmention: " + j
                    + "=======================================================");
            logger.fine(m.spanToString() + "\tmentionID: " + m.mentionID + "\tcorefClusterID: "
                    + m.corefClusterID + "\tgoldCorefClusterID: " + m.goldCorefClusterID);
            CorefCluster corefCluster = corefClusters.get(m.corefClusterID);
            if (corefCluster != null) {
                corefCluster.printCorefCluster(logger);
            } else {
                logger.finer("CANNOT find coref cluster for cluster " + m.corefClusterID);
            }
            logger.fine("-------------------------------------------------------");

            boolean oneRecallErrorPrinted = false;
            boolean onePrecisionErrorPrinted = false;
            boolean alreadyChoose = false;

            for (int sentJ = i; sentJ >= 0; sentJ--) {
                List<Mention> l = tmpSieve.getOrderedAntecedents(sentJ, i, orderedMentions,
                        orderedMentionsBySentence, m, j, corefClusters, dictionaries);

                // Sort mentions by length whenever we have two mentions beginning at the same position and having the same head
                for (int ii = 0; ii < l.size(); ii++) {
                    for (int jj = 0; jj < l.size(); jj++) {
                        if (l.get(ii).headString.equals(l.get(jj).headString)
                                && l.get(ii).startIndex == l.get(jj).startIndex
                                && l.get(ii).sameSentence(l.get(jj)) && jj > ii
                                && l.get(ii).spanToString().length() > l.get(jj).spanToString().length()) {
                            logger.finest("FLIPPED: " + l.get(ii).spanToString() + "(" + ii + "), "
                                    + l.get(jj).spanToString() + "(" + jj + ")");
                            l.set(jj, l.set(ii, l.get(jj)));
                        }
                    }
                }

                logger.finest("Candidates in sentence #" + sentJ + " for mention: " + m.spanToString());
                for (int ii = 0; ii < l.size(); ii++) {
                    logger.finest("\tCandidate #" + ii + ": " + l.get(ii).spanToString());
                }

                for (Mention antecedent : l) {
                    boolean chosen = (m.corefClusterID == antecedent.corefClusterID);
                    IntTuple src = new IntTuple(2);
                    src.set(0, i);
                    src.set(1, j);

                    IntTuple ant = positions.get(antecedent);
                    if (ant == null) {
                        continue;
                    }
                    //correct=(chosen==goldLinks.contains(new Pair<IntTuple, IntTuple>(src,ant)));
                    boolean coreferent = golds.containsKey(m.mentionID)
                            && golds.containsKey(antecedent.mentionID)
                            && (golds.get(m.mentionID).goldCorefClusterID == golds
                                    .get(antecedent.mentionID).goldCorefClusterID);
                    boolean correct = (chosen == coreferent);

                    String chosenness = chosen ? "Chosen" : "Not Chosen";
                    String correctness = correct ? "Correct" : "Incorrect";
                    logger.fine("\t" + correctness + "\t\t" + chosenness + "\t" + antecedent.spanToString());
                    CorefCluster mC = corefClusters.get(m.corefClusterID);
                    CorefCluster aC = corefClusters.get(antecedent.corefClusterID);

                    if (chosen && !correct && !onePrecisionErrorPrinted && !alreadyChoose) {
                        onePrecisionErrorPrinted = true;
                        printLinkWithContext(logger, "\nPRECISION ERROR ", src, ant, document, semantics);
                        logger.fine("END of PRECISION ERROR LOG");
                    }

                    if (!chosen && !correct && !oneRecallErrorPrinted
                            && (!alreadyChoose || (alreadyChoose && onePrecisionErrorPrinted))) {
                        oneRecallErrorPrinted = true;
                        printLinkWithContext(logger, "\nRECALL ERROR ", src, ant, document, semantics);

                        logger.finer("cluster info: ");
                        if (mC != null) {
                            mC.printCorefCluster(logger);
                        } else {
                            logger.finer("CANNOT find coref cluster for cluster " + m.corefClusterID);
                        }
                        logger.finer("----------------------------------------------------------");
                        if (aC != null) {
                            aC.printCorefCluster(logger);
                        } else {
                            logger.finer("CANNOT find coref cluster for cluster " + m.corefClusterID);
                        }
                        logger.finer("");
                        logger.fine("END of RECALL ERROR LOG");
                    }
                    if (chosen) {
                        alreadyChoose = true;
                    }
                }
            }
            logger.fine("\n");
        }
    }
    logger.fine("===============================================================================");
}