Example usage for edu.stanford.nlp.stats Counter getCount

Introduction

In this page you can find the example usage for edu.stanford.nlp.stats Counter getCount.

Prototype

double getCount(Object key);

Source Link

Document

Returns the count for this key as a double.

Usage

From source file:knu.univ.lingvo.coref.ACEMentionExtractor.java

License:Open Source License

private static void printRawDoc(List<CoreMap> sentences, List<List<Mention>> allMentions, String filename,
        boolean gold) throws FileNotFoundException {
    StringBuilder doc = new StringBuilder();
    int previousOffset = 0;
    Counter<Integer> mentionCount = new ClassicCounter<Integer>();
    for (List<Mention> l : allMentions) {
        for (Mention m : l) {
            mentionCount.incrementCount(m.goldCorefClusterID);
        }/*from  www.  j  a  v a  2 s . co m*/
    }

    for (int i = 0; i < sentences.size(); i++) {
        CoreMap sentence = sentences.get(i);
        List<Mention> mentions = allMentions.get(i);

        String[] tokens = sentence.get(CoreAnnotations.TextAnnotation.class).split(" ");
        String sent = "";
        List<CoreLabel> t = sentence.get(CoreAnnotations.TokensAnnotation.class);
        if (previousOffset + 2 < t.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class))
            sent += "\n";
        previousOffset = t.get(t.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
        Counter<Integer> startCounts = new ClassicCounter<Integer>();
        Counter<Integer> endCounts = new ClassicCounter<Integer>();
        Map<Integer, Set<Integer>> endID = Generics.newHashMap();
        for (Mention m : mentions) {
            startCounts.incrementCount(m.startIndex);
            endCounts.incrementCount(m.endIndex);
            if (!endID.containsKey(m.endIndex))
                endID.put(m.endIndex, Generics.<Integer>newHashSet());
            endID.get(m.endIndex).add(m.goldCorefClusterID);
        }
        for (int j = 0; j < tokens.length; j++) {
            if (endID.containsKey(j)) {
                for (Integer id : endID.get(j)) {
                    if (mentionCount.getCount(id) != 1 && gold)
                        sent += "]_" + id;
                    else
                        sent += "]";
                }
            }
            for (int k = 0; k < startCounts.getCount(j); k++) {
                if (!sent.endsWith("["))
                    sent += " ";
                sent += "[";
            }
            sent += " ";
            sent = sent + tokens[j];
        }
        for (int k = 0; k < endCounts.getCount(tokens.length); k++) {
            sent += "]";
        }
        sent += "\n";
        doc.append(sent);
    }
    if (gold)
        logger.fine("New DOC: (GOLD MENTIONS) ==================================================");
    else
        logger.fine("New DOC: (Predicted Mentions) ==================================================");
    logger.fine(doc.toString());
}

From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java

License:Open Source License

/**
 * print a coref link information including context and parse tree
 *//*w w w  .  ja v a 2 s  . com*/
private static void printLinkWithContext(Logger logger, String header, IntTuple src, IntTuple dst,
        Document document, Semantics semantics) {
    List<List<Mention>> orderedMentionsBySentence = document.getOrderedMentions();
    List<List<Mention>> goldOrderedMentionsBySentence = document.goldOrderedMentionsBySentence;

    Mention srcMention = orderedMentionsBySentence.get(src.get(0)).get(src.get(1));
    Mention dstMention = orderedMentionsBySentence.get(dst.get(0)).get(dst.get(1));
    List<CoreLabel> srcSentence = srcMention.sentenceWords;
    List<CoreLabel> dstSentence = dstMention.sentenceWords;

    printLink(logger, header, src, dst, orderedMentionsBySentence);

    printList(logger, "Mention:" + srcMention.spanToString(), "Gender:" + srcMention.gender.toString(),
            "Number:" + srcMention.number.toString(), "Animacy:" + srcMention.animacy.toString(),
            "Person:" + srcMention.person.toString(), "NER:" + srcMention.nerString,
            "Head:" + srcMention.headString, "Type:" + srcMention.mentionType.toString(),
            "utter: " + srcMention.headWord.get(CoreAnnotations.UtteranceAnnotation.class),
            "speakerID: " + srcMention.headWord.get(CoreAnnotations.SpeakerAnnotation.class),
            "twinless:" + srcMention.twinless);
    logger.fine("Context:");

    String p = "";
    for (int i = 0; i < srcSentence.size(); i++) {
        if (i == srcMention.startIndex) {
            p += "[";
        }
        if (i == srcMention.endIndex) {
            p += "]";
        }
        p += srcSentence.get(i).word() + " ";
    }
    logger.fine(p);

    StringBuilder golds = new StringBuilder();
    golds.append("Gold mentions in the sentence:\n");
    Counter<Integer> mBegin = new ClassicCounter<Integer>();
    Counter<Integer> mEnd = new ClassicCounter<Integer>();

    for (Mention m : goldOrderedMentionsBySentence.get(src.get(0))) {
        mBegin.incrementCount(m.startIndex);
        mEnd.incrementCount(m.endIndex);
    }
    List<CoreLabel> l = document.annotation.get(CoreAnnotations.SentencesAnnotation.class).get(src.get(0))
            .get(CoreAnnotations.TokensAnnotation.class);
    for (int i = 0; i < l.size(); i++) {
        for (int j = 0; j < mEnd.getCount(i); j++) {
            golds.append("]");
        }
        for (int j = 0; j < mBegin.getCount(i); j++) {
            golds.append("[");
        }
        golds.append(l.get(i).get(CoreAnnotations.TextAnnotation.class));
        golds.append(" ");
    }
    logger.fine(golds.toString());

    printList(logger, "\nAntecedent:" + dstMention.spanToString(), "Gender:" + dstMention.gender.toString(),
            "Number:" + dstMention.number.toString(), "Animacy:" + dstMention.animacy.toString(),
            "Person:" + dstMention.person.toString(), "NER:" + dstMention.nerString,
            "Head:" + dstMention.headString, "Type:" + dstMention.mentionType.toString(),
            "utter: " + dstMention.headWord.get(CoreAnnotations.UtteranceAnnotation.class),
            "speakerID: " + dstMention.headWord.get(CoreAnnotations.SpeakerAnnotation.class),
            "twinless:" + dstMention.twinless);
    logger.fine("Context:");

    p = "";
    for (int i = 0; i < dstSentence.size(); i++) {
        if (i == dstMention.startIndex) {
            p += "[";
        }
        if (i == dstMention.endIndex) {
            p += "]";
        }
        p += dstSentence.get(i).word() + " ";
    }
    logger.fine(p);

    golds = new StringBuilder();
    golds.append("Gold mentions in the sentence:\n");
    mBegin = new ClassicCounter<Integer>();
    mEnd = new ClassicCounter<Integer>();

    for (Mention m : goldOrderedMentionsBySentence.get(dst.get(0))) {
        mBegin.incrementCount(m.startIndex);
        mEnd.incrementCount(m.endIndex);
    }
    l = document.annotation.get(CoreAnnotations.SentencesAnnotation.class).get(dst.get(0))
            .get(CoreAnnotations.TokensAnnotation.class);
    for (int i = 0; i < l.size(); i++) {
        for (int j = 0; j < mEnd.getCount(i); j++) {
            golds.append("]");
        }
        for (int j = 0; j < mBegin.getCount(i); j++) {
            golds.append("[");
        }
        golds.append(l.get(i).get(CoreAnnotations.TextAnnotation.class));
        golds.append(" ");
    }
    logger.fine(golds.toString());

    logger.finer("\nMention:: --------------------------------------------------------");
    try {
        logger.finer(srcMention.dependency.toString());
    } catch (Exception e) {
    } //throw new RuntimeException(e);}
    logger.finer("Parse:");
    logger.finer(formatPennTree(srcMention.contextParseTree));
    logger.finer("\nAntecedent:: -----------------------------------------------------");
    try {
        logger.finer(dstMention.dependency.toString());
    } catch (Exception e) {
    } //throw new RuntimeException(e);}
    logger.finer("Parse:");
    logger.finer(formatPennTree(dstMention.contextParseTree));
}

From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java

License:Open Source License

/**
 * Print raw document for analysis//  ww  w. j a va 2s  . c  o m
 */
public static void printRawDoc(Document document, boolean gold) throws FileNotFoundException {
    List<CoreMap> sentences = document.annotation.get(CoreAnnotations.SentencesAnnotation.class);
    List<List<Mention>> allMentions;
    if (gold) {
        allMentions = document.goldOrderedMentionsBySentence;
    } else {
        allMentions = document.predictedOrderedMentionsBySentence;
    }
    //    String filename = document.annotation.get()

    StringBuilder doc = new StringBuilder();
    int previousOffset = 0;

    for (int i = 0; i < sentences.size(); i++) {
        CoreMap sentence = sentences.get(i);
        List<Mention> mentions = allMentions.get(i);

        List<CoreLabel> t = sentence.get(CoreAnnotations.TokensAnnotation.class);
        String[] tokens = new String[t.size()];
        for (CoreLabel c : t) {
            tokens[c.index() - 1] = c.word();
        }
        if (previousOffset + 2 < t.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)) {
            doc.append("\n");
        }
        previousOffset = t.get(t.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
        Counter<Integer> startCounts = new ClassicCounter<Integer>();
        Counter<Integer> endCounts = new ClassicCounter<Integer>();
        Map<Integer, Set<Mention>> endMentions = Generics.newHashMap();
        for (Mention m : mentions) {
            startCounts.incrementCount(m.startIndex);
            endCounts.incrementCount(m.endIndex);
            if (!endMentions.containsKey(m.endIndex)) {
                endMentions.put(m.endIndex, Generics.<Mention>newHashSet());
            }
            endMentions.get(m.endIndex).add(m);
        }
        for (int j = 0; j < tokens.length; j++) {
            if (endMentions.containsKey(j)) {
                for (Mention m : endMentions.get(j)) {
                    int corefChainId = (gold) ? m.goldCorefClusterID : m.corefClusterID;
                    doc.append("]_").append(corefChainId);
                }
            }
            for (int k = 0; k < startCounts.getCount(j); k++) {
                if (doc.length() > 0 && doc.charAt(doc.length() - 1) != '[') {
                    doc.append(" ");
                }
                doc.append("[");
            }
            if (doc.length() > 0 && doc.charAt(doc.length() - 1) != '[') {
                doc.append(" ");
            }
            doc.append(tokens[j]);
        }
        if (endMentions.containsKey(tokens.length)) {
            for (Mention m : endMentions.get(tokens.length)) {
                int corefChainId = (gold) ? m.goldCorefClusterID : m.corefClusterID;
                doc.append("]_").append(corefChainId); //append("_").append(m.mentionID);
            }
        }

        doc.append("\n");
    }
    logger.fine(document.annotation.get(CoreAnnotations.DocIDAnnotation.class));
    if (gold) {
        logger.fine("New DOC: (GOLD MENTIONS) ==================================================");
    } else {
        logger.fine("New DOC: (Predicted Mentions) ==================================================");
    }
    logger.fine(doc.toString());
}