Example usage for edu.stanford.nlp.stats Counter incrementCount

Introduction

In this page you can find the example usage for edu.stanford.nlp.stats Counter incrementCount.

Prototype

double incrementCount(E key);

Source Link

Document

Increments the count for this key by 1.0.

Usage

From source file:gr.aueb.cs.nlp.wordtagger.classifier.SVMWindows64Factory.java

License:Open Source License

/**
 * Builds a sigmoid model to turn the classifier outputs into probabilities.
 *///from ww  w. ja  v  a 2  s .c  o  m
private LinearClassifier<L, L> fitSigmoid(SVMLightClassifier<L, F> classifier, GeneralDataset<L, F> dataset) {
    RVFDataset<L, L> plattDataset = new RVFDataset<L, L>();
    for (int i = 0; i < dataset.size(); i++) {
        RVFDatum<L, F> d = dataset.getRVFDatum(i);
        Counter<L> scores = classifier.scoresOf((Datum<L, F>) d);
        scores.incrementCount(null);
        plattDataset.add(new RVFDatum<L, L>(scores, d.label()));
    }
    LinearClassifierFactory<L, L> factory = new LinearClassifierFactory<L, L>();
    factory.setPrior(new LogPrior(LogPrior.LogPriorType.NULL));
    return factory.trainClassifier(plattDataset);
}

From source file:ilcc.ccgparser.nnparser.IncNNParser.java

public Dataset genTrainExamples(List<CCGJSentence> sents, List<CCGJTreeNode> trees) throws IOException {
    int numTrans = actsList.size();
    Dataset ret = new Dataset(config.numTokens, numTrans);

    Counter<Integer> tokPosCount = new IntCounter<>();
    System.err.println(Config.SEPARATOR);
    System.err.println("Generate training examples...");
    System.err.println("With #transitions: " + numTrans);
    double start = (long) (System.currentTimeMillis()), end;
    System.err.println("Started at: " + new Date(System.currentTimeMillis()));

    for (int i = 0; i < sents.size(); ++i) {
        if (i > 0) {
            //System.err.print(i + " ");
            if (i % 1000 == 0)
                System.err.print(i + " ");
            if (i % 10000 == 0 || i == sents.size() - 1)
                System.err.println();
        }// w  ww. j a va 2 s.  co m

        CCGJSentence sent = sents.get(i);
        if (sent == null)
            continue;
        srparser.initVars(sent);
        List<ArcJAction> gActList = goldDetails.get(i + 1).getarcActs();
        for (ArcJAction gAct : gActList) {
            ArrayList<ArcJAction> acts = getAction(srparser);
            ArrayList<Integer> rightPerList = null;
            int stacksize = srparser.stack.size();
            if (srparser.incalgo && stacksize > 1) {
                CCGJTreeNode left = srparser.stack.get(stacksize - 2);
                Integer lvertex = left.getConllNode().getNodeId();
                rightPerList = srparser.depGraph.getRightPer(lvertex);
            }
            List<Integer> feature = getFeatures(srparser, rightPerList, sent);

            List<Integer> label = new ArrayList<>(Collections.nCopies(numTrans, -1));
            for (ArcJAction act : acts) {
                Integer id = actsMap.get(act);
                if (id != null) {
                    if (act.equals(gAct))
                        label.set(id, 1);
                    else
                        label.set(id, 0);
                }
            }
            ret.addExample(feature, label);
            for (int j = 0; j < feature.size(); ++j)
                tokPosCount.incrementCount(feature.get(j) * feature.size() + j);
            srparser.applyAction(gAct);
        }
    }
    System.err.println("#Train Examples: " + ret.n);
    end = (long) System.currentTimeMillis();
    System.err.println("Ended at : " + new Date(System.currentTimeMillis()) + " taking " + 0.001 * (end - start)
            + " secs");

    List<Integer> sortedTokens = Counters.toSortedList(tokPosCount, false);
    preComputed = new ArrayList<>(
            sortedTokens.subList(0, Math.min(config.numPreComputed, sortedTokens.size())));

    return ret;
}

From source file:knu.univ.lingvo.coref.ACEMentionExtractor.java

License:Open Source License

private static void printRawDoc(List<CoreMap> sentences, List<List<Mention>> allMentions, String filename,
        boolean gold) throws FileNotFoundException {
    StringBuilder doc = new StringBuilder();
    int previousOffset = 0;
    Counter<Integer> mentionCount = new ClassicCounter<Integer>();
    for (List<Mention> l : allMentions) {
        for (Mention m : l) {
            mentionCount.incrementCount(m.goldCorefClusterID);
        }/*from www.  j  av a2  s.co m*/
    }

    for (int i = 0; i < sentences.size(); i++) {
        CoreMap sentence = sentences.get(i);
        List<Mention> mentions = allMentions.get(i);

        String[] tokens = sentence.get(CoreAnnotations.TextAnnotation.class).split(" ");
        String sent = "";
        List<CoreLabel> t = sentence.get(CoreAnnotations.TokensAnnotation.class);
        if (previousOffset + 2 < t.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class))
            sent += "\n";
        previousOffset = t.get(t.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
        Counter<Integer> startCounts = new ClassicCounter<Integer>();
        Counter<Integer> endCounts = new ClassicCounter<Integer>();
        Map<Integer, Set<Integer>> endID = Generics.newHashMap();
        for (Mention m : mentions) {
            startCounts.incrementCount(m.startIndex);
            endCounts.incrementCount(m.endIndex);
            if (!endID.containsKey(m.endIndex))
                endID.put(m.endIndex, Generics.<Integer>newHashSet());
            endID.get(m.endIndex).add(m.goldCorefClusterID);
        }
        for (int j = 0; j < tokens.length; j++) {
            if (endID.containsKey(j)) {
                for (Integer id : endID.get(j)) {
                    if (mentionCount.getCount(id) != 1 && gold)
                        sent += "]_" + id;
                    else
                        sent += "]";
                }
            }
            for (int k = 0; k < startCounts.getCount(j); k++) {
                if (!sent.endsWith("["))
                    sent += " ";
                sent += "[";
            }
            sent += " ";
            sent = sent + tokens[j];
        }
        for (int k = 0; k < endCounts.getCount(tokens.length); k++) {
            sent += "]";
        }
        sent += "\n";
        doc.append(sent);
    }
    if (gold)
        logger.fine("New DOC: (GOLD MENTIONS) ==================================================");
    else
        logger.fine("New DOC: (Predicted Mentions) ==================================================");
    logger.fine(doc.toString());
}

From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java

License:Open Source License

/**
 * print a coref link information including context and parse tree
 *///from   w ww  .j  ava  2 s  . c  om
private static void printLinkWithContext(Logger logger, String header, IntTuple src, IntTuple dst,
        Document document, Semantics semantics) {
    List<List<Mention>> orderedMentionsBySentence = document.getOrderedMentions();
    List<List<Mention>> goldOrderedMentionsBySentence = document.goldOrderedMentionsBySentence;

    Mention srcMention = orderedMentionsBySentence.get(src.get(0)).get(src.get(1));
    Mention dstMention = orderedMentionsBySentence.get(dst.get(0)).get(dst.get(1));
    List<CoreLabel> srcSentence = srcMention.sentenceWords;
    List<CoreLabel> dstSentence = dstMention.sentenceWords;

    printLink(logger, header, src, dst, orderedMentionsBySentence);

    printList(logger, "Mention:" + srcMention.spanToString(), "Gender:" + srcMention.gender.toString(),
            "Number:" + srcMention.number.toString(), "Animacy:" + srcMention.animacy.toString(),
            "Person:" + srcMention.person.toString(), "NER:" + srcMention.nerString,
            "Head:" + srcMention.headString, "Type:" + srcMention.mentionType.toString(),
            "utter: " + srcMention.headWord.get(CoreAnnotations.UtteranceAnnotation.class),
            "speakerID: " + srcMention.headWord.get(CoreAnnotations.SpeakerAnnotation.class),
            "twinless:" + srcMention.twinless);
    logger.fine("Context:");

    String p = "";
    for (int i = 0; i < srcSentence.size(); i++) {
        if (i == srcMention.startIndex) {
            p += "[";
        }
        if (i == srcMention.endIndex) {
            p += "]";
        }
        p += srcSentence.get(i).word() + " ";
    }
    logger.fine(p);

    StringBuilder golds = new StringBuilder();
    golds.append("Gold mentions in the sentence:\n");
    Counter<Integer> mBegin = new ClassicCounter<Integer>();
    Counter<Integer> mEnd = new ClassicCounter<Integer>();

    for (Mention m : goldOrderedMentionsBySentence.get(src.get(0))) {
        mBegin.incrementCount(m.startIndex);
        mEnd.incrementCount(m.endIndex);
    }
    List<CoreLabel> l = document.annotation.get(CoreAnnotations.SentencesAnnotation.class).get(src.get(0))
            .get(CoreAnnotations.TokensAnnotation.class);
    for (int i = 0; i < l.size(); i++) {
        for (int j = 0; j < mEnd.getCount(i); j++) {
            golds.append("]");
        }
        for (int j = 0; j < mBegin.getCount(i); j++) {
            golds.append("[");
        }
        golds.append(l.get(i).get(CoreAnnotations.TextAnnotation.class));
        golds.append(" ");
    }
    logger.fine(golds.toString());

    printList(logger, "\nAntecedent:" + dstMention.spanToString(), "Gender:" + dstMention.gender.toString(),
            "Number:" + dstMention.number.toString(), "Animacy:" + dstMention.animacy.toString(),
            "Person:" + dstMention.person.toString(), "NER:" + dstMention.nerString,
            "Head:" + dstMention.headString, "Type:" + dstMention.mentionType.toString(),
            "utter: " + dstMention.headWord.get(CoreAnnotations.UtteranceAnnotation.class),
            "speakerID: " + dstMention.headWord.get(CoreAnnotations.SpeakerAnnotation.class),
            "twinless:" + dstMention.twinless);
    logger.fine("Context:");

    p = "";
    for (int i = 0; i < dstSentence.size(); i++) {
        if (i == dstMention.startIndex) {
            p += "[";
        }
        if (i == dstMention.endIndex) {
            p += "]";
        }
        p += dstSentence.get(i).word() + " ";
    }
    logger.fine(p);

    golds = new StringBuilder();
    golds.append("Gold mentions in the sentence:\n");
    mBegin = new ClassicCounter<Integer>();
    mEnd = new ClassicCounter<Integer>();

    for (Mention m : goldOrderedMentionsBySentence.get(dst.get(0))) {
        mBegin.incrementCount(m.startIndex);
        mEnd.incrementCount(m.endIndex);
    }
    l = document.annotation.get(CoreAnnotations.SentencesAnnotation.class).get(dst.get(0))
            .get(CoreAnnotations.TokensAnnotation.class);
    for (int i = 0; i < l.size(); i++) {
        for (int j = 0; j < mEnd.getCount(i); j++) {
            golds.append("]");
        }
        for (int j = 0; j < mBegin.getCount(i); j++) {
            golds.append("[");
        }
        golds.append(l.get(i).get(CoreAnnotations.TextAnnotation.class));
        golds.append(" ");
    }
    logger.fine(golds.toString());

    logger.finer("\nMention:: --------------------------------------------------------");
    try {
        logger.finer(srcMention.dependency.toString());
    } catch (Exception e) {
    } //throw new RuntimeException(e);}
    logger.finer("Parse:");
    logger.finer(formatPennTree(srcMention.contextParseTree));
    logger.finer("\nAntecedent:: -----------------------------------------------------");
    try {
        logger.finer(dstMention.dependency.toString());
    } catch (Exception e) {
    } //throw new RuntimeException(e);}
    logger.finer("Parse:");
    logger.finer(formatPennTree(dstMention.contextParseTree));
}

From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java

License:Open Source License

/**
 * Print raw document for analysis/* ww w  .j a  va 2s. c o  m*/
 */
public static void printRawDoc(Document document, boolean gold) throws FileNotFoundException {
    List<CoreMap> sentences = document.annotation.get(CoreAnnotations.SentencesAnnotation.class);
    List<List<Mention>> allMentions;
    if (gold) {
        allMentions = document.goldOrderedMentionsBySentence;
    } else {
        allMentions = document.predictedOrderedMentionsBySentence;
    }
    //    String filename = document.annotation.get()

    StringBuilder doc = new StringBuilder();
    int previousOffset = 0;

    for (int i = 0; i < sentences.size(); i++) {
        CoreMap sentence = sentences.get(i);
        List<Mention> mentions = allMentions.get(i);

        List<CoreLabel> t = sentence.get(CoreAnnotations.TokensAnnotation.class);
        String[] tokens = new String[t.size()];
        for (CoreLabel c : t) {
            tokens[c.index() - 1] = c.word();
        }
        if (previousOffset + 2 < t.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)) {
            doc.append("\n");
        }
        previousOffset = t.get(t.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
        Counter<Integer> startCounts = new ClassicCounter<Integer>();
        Counter<Integer> endCounts = new ClassicCounter<Integer>();
        Map<Integer, Set<Mention>> endMentions = Generics.newHashMap();
        for (Mention m : mentions) {
            startCounts.incrementCount(m.startIndex);
            endCounts.incrementCount(m.endIndex);
            if (!endMentions.containsKey(m.endIndex)) {
                endMentions.put(m.endIndex, Generics.<Mention>newHashSet());
            }
            endMentions.get(m.endIndex).add(m);
        }
        for (int j = 0; j < tokens.length; j++) {
            if (endMentions.containsKey(j)) {
                for (Mention m : endMentions.get(j)) {
                    int corefChainId = (gold) ? m.goldCorefClusterID : m.corefClusterID;
                    doc.append("]_").append(corefChainId);
                }
            }
            for (int k = 0; k < startCounts.getCount(j); k++) {
                if (doc.length() > 0 && doc.charAt(doc.length() - 1) != '[') {
                    doc.append(" ");
                }
                doc.append("[");
            }
            if (doc.length() > 0 && doc.charAt(doc.length() - 1) != '[') {
                doc.append(" ");
            }
            doc.append(tokens[j]);
        }
        if (endMentions.containsKey(tokens.length)) {
            for (Mention m : endMentions.get(tokens.length)) {
                int corefChainId = (gold) ? m.goldCorefClusterID : m.corefClusterID;
                doc.append("]_").append(corefChainId); //append("_").append(m.mentionID);
            }
        }

        doc.append("\n");
    }
    logger.fine(document.annotation.get(CoreAnnotations.DocIDAnnotation.class));
    if (gold) {
        logger.fine("New DOC: (GOLD MENTIONS) ==================================================");
    } else {
        logger.fine("New DOC: (Predicted Mentions) ==================================================");
    }
    logger.fine(doc.toString());
}