Example usage for edu.stanford.nlp.stats Counter incrementCount

List of usage examples for edu.stanford.nlp.stats Counter incrementCount

Introduction

In this page you can find the example usage for edu.stanford.nlp.stats Counter incrementCount.

Prototype

double incrementCount(E key);

Source Link

Document

Increments the count for this key by 1.0.

Usage

From source file:gr.aueb.cs.nlp.wordtagger.classifier.SVMWindows64Factory.java

License:Open Source License

/**
 * Builds a sigmoid model to turn the classifier outputs into probabilities.
 *///from ww  w. ja  v  a 2  s .c  o  m
private LinearClassifier<L, L> fitSigmoid(SVMLightClassifier<L, F> classifier, GeneralDataset<L, F> dataset) {
    RVFDataset<L, L> plattDataset = new RVFDataset<L, L>();
    for (int i = 0; i < dataset.size(); i++) {
        RVFDatum<L, F> d = dataset.getRVFDatum(i);
        Counter<L> scores = classifier.scoresOf((Datum<L, F>) d);
        scores.incrementCount(null);
        plattDataset.add(new RVFDatum<L, L>(scores, d.label()));
    }
    LinearClassifierFactory<L, L> factory = new LinearClassifierFactory<L, L>();
    factory.setPrior(new LogPrior(LogPrior.LogPriorType.NULL));
    return factory.trainClassifier(plattDataset);
}

From source file:ilcc.ccgparser.nnparser.IncNNParser.java

public Dataset genTrainExamples(List<CCGJSentence> sents, List<CCGJTreeNode> trees) throws IOException {
    int numTrans = actsList.size();
    Dataset ret = new Dataset(config.numTokens, numTrans);

    Counter<Integer> tokPosCount = new IntCounter<>();
    System.err.println(Config.SEPARATOR);
    System.err.println("Generate training examples...");
    System.err.println("With #transitions: " + numTrans);
    double start = (long) (System.currentTimeMillis()), end;
    System.err.println("Started at: " + new Date(System.currentTimeMillis()));

    for (int i = 0; i < sents.size(); ++i) {
        if (i > 0) {
            //System.err.print(i + " ");
            if (i % 1000 == 0)
                System.err.print(i + " ");
            if (i % 10000 == 0 || i == sents.size() - 1)
                System.err.println();
        }// w  ww. j a va 2 s.  co m

        CCGJSentence sent = sents.get(i);
        if (sent == null)
            continue;
        srparser.initVars(sent);
        List<ArcJAction> gActList = goldDetails.get(i + 1).getarcActs();
        for (ArcJAction gAct : gActList) {
            ArrayList<ArcJAction> acts = getAction(srparser);
            ArrayList<Integer> rightPerList = null;
            int stacksize = srparser.stack.size();
            if (srparser.incalgo && stacksize > 1) {
                CCGJTreeNode left = srparser.stack.get(stacksize - 2);
                Integer lvertex = left.getConllNode().getNodeId();
                rightPerList = srparser.depGraph.getRightPer(lvertex);
            }
            List<Integer> feature = getFeatures(srparser, rightPerList, sent);

            List<Integer> label = new ArrayList<>(Collections.nCopies(numTrans, -1));
            for (ArcJAction act : acts) {
                Integer id = actsMap.get(act);
                if (id != null) {
                    if (act.equals(gAct))
                        label.set(id, 1);
                    else
                        label.set(id, 0);
                }
            }
            ret.addExample(feature, label);
            for (int j = 0; j < feature.size(); ++j)
                tokPosCount.incrementCount(feature.get(j) * feature.size() + j);
            srparser.applyAction(gAct);
        }
    }
    System.err.println("#Train Examples: " + ret.n);
    end = (long) System.currentTimeMillis();
    System.err.println("Ended at : " + new Date(System.currentTimeMillis()) + " taking " + 0.001 * (end - start)
            + " secs");

    List<Integer> sortedTokens = Counters.toSortedList(tokPosCount, false);
    preComputed = new ArrayList<>(
            sortedTokens.subList(0, Math.min(config.numPreComputed, sortedTokens.size())));

    return ret;
}

From source file:knu.univ.lingvo.coref.ACEMentionExtractor.java

License:Open Source License

private static void printRawDoc(List<CoreMap> sentences, List<List<Mention>> allMentions, String filename,
        boolean gold) throws FileNotFoundException {
    StringBuilder doc = new StringBuilder();
    int previousOffset = 0;
    Counter<Integer> mentionCount = new ClassicCounter<Integer>();
    for (List<Mention> l : allMentions) {
        for (Mention m : l) {
            mentionCount.incrementCount(m.goldCorefClusterID);
        }/*from www.  j  av a2  s.co m*/
    }

    for (int i = 0; i < sentences.size(); i++) {
        CoreMap sentence = sentences.get(i);
        List<Mention> mentions = allMentions.get(i);

        String[] tokens = sentence.get(CoreAnnotations.TextAnnotation.class).split(" ");
        String sent = "";
        List<CoreLabel> t = sentence.get(CoreAnnotations.TokensAnnotation.class);
        if (previousOffset + 2 < t.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class))
            sent += "\n";
        previousOffset = t.get(t.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
        Counter<Integer> startCounts = new ClassicCounter<Integer>();
        Counter<Integer> endCounts = new ClassicCounter<Integer>();
        Map<Integer, Set<Integer>> endID = Generics.newHashMap();
        for (Mention m : mentions) {
            startCounts.incrementCount(m.startIndex);
            endCounts.incrementCount(m.endIndex);
            if (!endID.containsKey(m.endIndex))
                endID.put(m.endIndex, Generics.<Integer>newHashSet());
            endID.get(m.endIndex).add(m.goldCorefClusterID);
        }
        for (int j = 0; j < tokens.length; j++) {
            if (endID.containsKey(j)) {
                for (Integer id : endID.get(j)) {
                    if (mentionCount.getCount(id) != 1 && gold)
                        sent += "]_" + id;
                    else
                        sent += "]";
                }
            }
            for (int k = 0; k < startCounts.getCount(j); k++) {
                if (!sent.endsWith("["))
                    sent += " ";
                sent += "[";
            }
            sent += " ";
            sent = sent + tokens[j];
        }
        for (int k = 0; k < endCounts.getCount(tokens.length); k++) {
            sent += "]";
        }
        sent += "\n";
        doc.append(sent);
    }
    if (gold)
        logger.fine("New DOC: (GOLD MENTIONS) ==================================================");
    else
        logger.fine("New DOC: (Predicted Mentions) ==================================================");
    logger.fine(doc.toString());
}

From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java

License:Open Source License

/**
 * print a coref link information including context and parse tree
 *///from   w ww  .j  ava  2 s  . c  om
private static void printLinkWithContext(Logger logger, String header, IntTuple src, IntTuple dst,
        Document document, Semantics semantics) {
    List<List<Mention>> orderedMentionsBySentence = document.getOrderedMentions();
    List<List<Mention>> goldOrderedMentionsBySentence = document.goldOrderedMentionsBySentence;

    Mention srcMention = orderedMentionsBySentence.get(src.get(0)).get(src.get(1));
    Mention dstMention = orderedMentionsBySentence.get(dst.get(0)).get(dst.get(1));
    List<CoreLabel> srcSentence = srcMention.sentenceWords;
    List<CoreLabel> dstSentence = dstMention.sentenceWords;

    printLink(logger, header, src, dst, orderedMentionsBySentence);

    printList(logger, "Mention:" + srcMention.spanToString(), "Gender:" + srcMention.gender.toString(),
            "Number:" + srcMention.number.toString(), "Animacy:" + srcMention.animacy.toString(),
            "Person:" + srcMention.person.toString(), "NER:" + srcMention.nerString,
            "Head:" + srcMention.headString, "Type:" + srcMention.mentionType.toString(),
            "utter: " + srcMention.headWord.get(CoreAnnotations.UtteranceAnnotation.class),
            "speakerID: " + srcMention.headWord.get(CoreAnnotations.SpeakerAnnotation.class),
            "twinless:" + srcMention.twinless);
    logger.fine("Context:");

    String p = "";
    for (int i = 0; i < srcSentence.size(); i++) {
        if (i == srcMention.startIndex) {
            p += "[";
        }
        if (i == srcMention.endIndex) {
            p += "]";
        }
        p += srcSentence.get(i).word() + " ";
    }
    logger.fine(p);

    StringBuilder golds = new StringBuilder();
    golds.append("Gold mentions in the sentence:\n");
    Counter<Integer> mBegin = new ClassicCounter<Integer>();
    Counter<Integer> mEnd = new ClassicCounter<Integer>();

    for (Mention m : goldOrderedMentionsBySentence.get(src.get(0))) {
        mBegin.incrementCount(m.startIndex);
        mEnd.incrementCount(m.endIndex);
    }
    List<CoreLabel> l = document.annotation.get(CoreAnnotations.SentencesAnnotation.class).get(src.get(0))
            .get(CoreAnnotations.TokensAnnotation.class);
    for (int i = 0; i < l.size(); i++) {
        for (int j = 0; j < mEnd.getCount(i); j++) {
            golds.append("]");
        }
        for (int j = 0; j < mBegin.getCount(i); j++) {
            golds.append("[");
        }
        golds.append(l.get(i).get(CoreAnnotations.TextAnnotation.class));
        golds.append(" ");
    }
    logger.fine(golds.toString());

    printList(logger, "\nAntecedent:" + dstMention.spanToString(), "Gender:" + dstMention.gender.toString(),
            "Number:" + dstMention.number.toString(), "Animacy:" + dstMention.animacy.toString(),
            "Person:" + dstMention.person.toString(), "NER:" + dstMention.nerString,
            "Head:" + dstMention.headString, "Type:" + dstMention.mentionType.toString(),
            "utter: " + dstMention.headWord.get(CoreAnnotations.UtteranceAnnotation.class),
            "speakerID: " + dstMention.headWord.get(CoreAnnotations.SpeakerAnnotation.class),
            "twinless:" + dstMention.twinless);
    logger.fine("Context:");

    p = "";
    for (int i = 0; i < dstSentence.size(); i++) {
        if (i == dstMention.startIndex) {
            p += "[";
        }
        if (i == dstMention.endIndex) {
            p += "]";
        }
        p += dstSentence.get(i).word() + " ";
    }
    logger.fine(p);

    golds = new StringBuilder();
    golds.append("Gold mentions in the sentence:\n");
    mBegin = new ClassicCounter<Integer>();
    mEnd = new ClassicCounter<Integer>();

    for (Mention m : goldOrderedMentionsBySentence.get(dst.get(0))) {
        mBegin.incrementCount(m.startIndex);
        mEnd.incrementCount(m.endIndex);
    }
    l = document.annotation.get(CoreAnnotations.SentencesAnnotation.class).get(dst.get(0))
            .get(CoreAnnotations.TokensAnnotation.class);
    for (int i = 0; i < l.size(); i++) {
        for (int j = 0; j < mEnd.getCount(i); j++) {
            golds.append("]");
        }
        for (int j = 0; j < mBegin.getCount(i); j++) {
            golds.append("[");
        }
        golds.append(l.get(i).get(CoreAnnotations.TextAnnotation.class));
        golds.append(" ");
    }
    logger.fine(golds.toString());

    logger.finer("\nMention:: --------------------------------------------------------");
    try {
        logger.finer(srcMention.dependency.toString());
    } catch (Exception e) {
    } //throw new RuntimeException(e);}
    logger.finer("Parse:");
    logger.finer(formatPennTree(srcMention.contextParseTree));
    logger.finer("\nAntecedent:: -----------------------------------------------------");
    try {
        logger.finer(dstMention.dependency.toString());
    } catch (Exception e) {
    } //throw new RuntimeException(e);}
    logger.finer("Parse:");
    logger.finer(formatPennTree(dstMention.contextParseTree));
}

From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java

License:Open Source License

/**
 * Print raw document for analysis/* ww w  .j a  va 2s. c o  m*/
 */
public static void printRawDoc(Document document, boolean gold) throws FileNotFoundException {
    List<CoreMap> sentences = document.annotation.get(CoreAnnotations.SentencesAnnotation.class);
    List<List<Mention>> allMentions;
    if (gold) {
        allMentions = document.goldOrderedMentionsBySentence;
    } else {
        allMentions = document.predictedOrderedMentionsBySentence;
    }
    //    String filename = document.annotation.get()

    StringBuilder doc = new StringBuilder();
    int previousOffset = 0;

    for (int i = 0; i < sentences.size(); i++) {
        CoreMap sentence = sentences.get(i);
        List<Mention> mentions = allMentions.get(i);

        List<CoreLabel> t = sentence.get(CoreAnnotations.TokensAnnotation.class);
        String[] tokens = new String[t.size()];
        for (CoreLabel c : t) {
            tokens[c.index() - 1] = c.word();
        }
        if (previousOffset + 2 < t.get(0).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)) {
            doc.append("\n");
        }
        previousOffset = t.get(t.size() - 1).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
        Counter<Integer> startCounts = new ClassicCounter<Integer>();
        Counter<Integer> endCounts = new ClassicCounter<Integer>();
        Map<Integer, Set<Mention>> endMentions = Generics.newHashMap();
        for (Mention m : mentions) {
            startCounts.incrementCount(m.startIndex);
            endCounts.incrementCount(m.endIndex);
            if (!endMentions.containsKey(m.endIndex)) {
                endMentions.put(m.endIndex, Generics.<Mention>newHashSet());
            }
            endMentions.get(m.endIndex).add(m);
        }
        for (int j = 0; j < tokens.length; j++) {
            if (endMentions.containsKey(j)) {
                for (Mention m : endMentions.get(j)) {
                    int corefChainId = (gold) ? m.goldCorefClusterID : m.corefClusterID;
                    doc.append("]_").append(corefChainId);
                }
            }
            for (int k = 0; k < startCounts.getCount(j); k++) {
                if (doc.length() > 0 && doc.charAt(doc.length() - 1) != '[') {
                    doc.append(" ");
                }
                doc.append("[");
            }
            if (doc.length() > 0 && doc.charAt(doc.length() - 1) != '[') {
                doc.append(" ");
            }
            doc.append(tokens[j]);
        }
        if (endMentions.containsKey(tokens.length)) {
            for (Mention m : endMentions.get(tokens.length)) {
                int corefChainId = (gold) ? m.goldCorefClusterID : m.corefClusterID;
                doc.append("]_").append(corefChainId); //append("_").append(m.mentionID);
            }
        }

        doc.append("\n");
    }
    logger.fine(document.annotation.get(CoreAnnotations.DocIDAnnotation.class));
    if (gold) {
        logger.fine("New DOC: (GOLD MENTIONS) ==================================================");
    } else {
        logger.fine("New DOC: (Predicted Mentions) ==================================================");
    }
    logger.fine(doc.toString());
}