Example usage for edu.stanford.nlp.stats Counter incrementCount

List of usage examples for edu.stanford.nlp.stats Counter incrementCount

Introduction

In this page you can find the example usage for edu.stanford.nlp.stats Counter incrementCount.

Prototype

double incrementCount(E key, double value);

Source Link

Document

Increments the count for the given key by the given value.

Usage

From source file:lv.lnb.ner.MergeEntityInformation.java

License:Open Source License

private static void add_word(String word, String normalform, String category, String doc, int mention_count) {
    if (word.trim().length() < 3)
        return;/* ww  w  .j ava2s  .co  m*/
    if (word.contains(",")) {
        String[] wordparts = word.split(",");
        String[] formparts = normalform.split(",");
        if (wordparts.length != formparts.length) {
            System.err.println(String.format("Nesakrt komatu skaits '%s' un '%s'.", word, normalform));
            return;
        }
        for (int i = 0; i < wordparts.length; i++) {
            add_word(wordparts[i].trim(), formparts[i].trim(), category, doc, mention_count);
        }
        return;
    }
    if (word.contains(" .")) {
        String[] wordparts = word.split(Pattern.quote(" ."));
        String[] formparts = normalform.split(Pattern.quote(" ."));
        if (wordparts.length != formparts.length) {
            System.err.println(String.format("Nesakrt punktu skaits '%s' un '%s'.", word, normalform));
            return;
        }
        for (int i = 0; i < wordparts.length; i++) {
            add_word(wordparts[i].trim(), formparts[i].trim(), category, doc, mention_count);
        }
        return;
    }

    word = word.replace("CX", "CK");
    word = word.replace(" eela", " iela");
    word = word.replace("Kr .", "Kr.");
    word = word.replace("Riga", "Rga");
    word = word.replace("Kreemija", "Krievija");
    word = word.replace("Kreewija", "Krievija");
    word = word.replace("Wahzija", "V?cija");
    word = word.replace("Cehoslovakija", "ehoslovakija");
    word = word.replaceAll("[aA]ugst?k?s [pP]adomes$", "Augst?k? Padome");
    word = word.replaceAll("[fF]ederatvaj? [rR]epublik?$", "Federatv? Republika");
    word = word.replaceAll("[dD]emokr?tiskaj? [rR]epublik?$", "Demokr?tisk? Republika");
    word = word.replaceAll("[kK]omunistisk?s [pP]artijas$", "Komunistisk? Partija");
    word = word.replace(" lela", " iela");
    word = word.replace(" lels", " iela");
    word = word.replace("dzv. ", "");
    if (word.equalsIgnoreCase("Hitlera"))
        word = "Hitlers";
    if (word.equalsIgnoreCase("Raia"))
        word = "Rainis";
    if (word.equalsIgnoreCase("Vtola"))
        word = "Vtols";
    if (word.equalsIgnoreCase("Mocarta"))
        word = "Mocarts";
    if (word.equalsIgnoreCase("aikovska"))
        word = "aikovskis";
    if (word.equalsIgnoreCase("Daugavpil"))
        word = "Daugavpils";
    if (word.equalsIgnoreCase("Liep?jas"))
        word = "Liep?ja";
    if (word.equalsIgnoreCase("Liep?j?"))
        word = "Liep?ja";
    if (word.equalsIgnoreCase("Csu"))
        word = "Csis";
    if (word.equalsIgnoreCase("Blaumaa"))
        word = "Blaumanis";
    if (word.equalsIgnoreCase("Lietavas"))
        word = "Lietava";
    if (word.equalsIgnoreCase("ns"))
        word = "na";
    if (word.equalsIgnoreCase("Strencis"))
        word = "Stren?i";
    if (word.equalsIgnoreCase("Rig?"))
        word = "Rga";
    if (word.equalsIgnoreCase("Kubs"))
        word = "Kuba";
    if (word.equalsIgnoreCase("Rig?"))
        word = "Rga";
    if (word.equalsIgnoreCase("Kijevs"))
        word = "Kijeva";
    if (word.equalsIgnoreCase("Latmija"))
        word = "Latvija";
    if (word.equalsIgnoreCase("Trbat?"))
        word = "Trbata";
    if (word.equalsIgnoreCase("Sabil"))
        word = "Sabile";
    if (word.equalsIgnoreCase("Melluzis"))
        word = "Mellui";
    if (word.equalsIgnoreCase("Polijas") || word.equalsIgnoreCase("Polij?"))
        word = "Polija";
    //hack nesaprastiem locjumiem
    if (word.endsWith("ijas"))
        word = word.substring(0, word.length() - 1);
    if (word.endsWith("ij?"))
        word = word.substring(0, word.length() - 1) + "a";

    normalform = normalform.replace("CX", "CK");
    normalform = normalform.replace("|", "");
    normalform = normalform.replace(" eela", " iela");
    normalform = normalform.replace("Kr .", "Kr.");
    normalform = normalform.replace("riga", "rga");
    normalform = normalform.replace("kreewija", "krievija");
    normalform = normalform.replace("kreemija", "krievija");
    normalform = normalform.replace("wahzija", "v?cija");
    normalform = normalform.replace(" lels", " iela");
    normalform = normalform.replace("dzv. ", "");
    if (normalform.equalsIgnoreCase("Hitlera"))
        normalform = "hitlers";
    if (normalform.equalsIgnoreCase("Raia"))
        normalform = "rainis";
    if (normalform.equalsIgnoreCase("Vtola"))
        normalform = "vtols";
    if (normalform.equalsIgnoreCase("Mocarta"))
        normalform = "mocarts";
    if (normalform.equalsIgnoreCase("aikovska"))
        normalform = "?aikovskis";
    if (normalform.equalsIgnoreCase("Daugavpil"))
        normalform = "daugavpils";
    if (normalform.equalsIgnoreCase("Liep?jas"))
        normalform = "liep?ja";
    if (normalform.equalsIgnoreCase("Liep?j?"))
        normalform = "liep?ja";
    if (normalform.equalsIgnoreCase("blaumaa"))
        normalform = "blaumanis";
    if (normalform.equalsIgnoreCase("ns"))
        normalform = "na";
    if (normalform.equalsIgnoreCase("Polijas") || normalform.equalsIgnoreCase("Polijs"))
        normalform = "polija";
    if (normalform.endsWith("ijas"))
        normalform = normalform.substring(0, normalform.length() - 1);
    if (normalform.endsWith("ij?"))
        normalform = normalform.substring(0, normalform.length() - 1) + "a";

    if (blacklist.containsKey(word))
        return;
    if (blacklist.containsKey(normalform))
        return;
    if (normalform.contains(" un ") && !normalform.contains("opera"))
        return;
    if (normalform.length() > 50)
        return;

    String key = category + "|" + normalform;
    counter.incrementCount(key, mention_count);

    Counter<String> forms = popular_forms.get(key);
    if (forms == null) {
        forms = new IntCounter<String>();
        popular_forms.put(key, forms);
    }
    forms.incrementCount(word, mention_count);

    counterbydoc.incrementCount(key + "|" + doc, mention_count);
}

From source file:nate.reading.SlotInducer.java

/**
 * Creates an overall count of arguments summed across all of the given predicates.
 * The predicates are a list of IDs, indexing positions in the given names list.
 * @param ids A set of indices into the names list.
 * @param names The list of tokens that have arguments in our corpus.
 * @param argCounts The map from token to argument counts.
 *//*w  w w .j av a2s  . c o m*/
private Counter<String> sumArgs(Collection<String> names, VerbArgCounts argCounts) {
    Counter<String> sum = new IntCounter<String>();
    if (names != null && argCounts != null) {
        for (String slotname : names) {
            Map<String, Integer> subcounts = argCounts.getArgsForSlot(slotname);
            if (subcounts != null) {
                for (Map.Entry<String, Integer> entry : subcounts.entrySet())
                    sum.incrementCount(entry.getKey(), entry.getValue());
            }
        }
    }
    return sum;
}