List of usage examples for edu.stanford.nlp.stats Counter incrementCount
double incrementCount(E key, double value);
From source file:lv.lnb.ner.MergeEntityInformation.java
License:Open Source License
private static void add_word(String word, String normalform, String category, String doc, int mention_count) { if (word.trim().length() < 3) return;/* ww w .j ava2s .co m*/ if (word.contains(",")) { String[] wordparts = word.split(","); String[] formparts = normalform.split(","); if (wordparts.length != formparts.length) { System.err.println(String.format("Nesakrt komatu skaits '%s' un '%s'.", word, normalform)); return; } for (int i = 0; i < wordparts.length; i++) { add_word(wordparts[i].trim(), formparts[i].trim(), category, doc, mention_count); } return; } if (word.contains(" .")) { String[] wordparts = word.split(Pattern.quote(" .")); String[] formparts = normalform.split(Pattern.quote(" .")); if (wordparts.length != formparts.length) { System.err.println(String.format("Nesakrt punktu skaits '%s' un '%s'.", word, normalform)); return; } for (int i = 0; i < wordparts.length; i++) { add_word(wordparts[i].trim(), formparts[i].trim(), category, doc, mention_count); } return; } word = word.replace("CX", "CK"); word = word.replace(" eela", " iela"); word = word.replace("Kr .", "Kr."); word = word.replace("Riga", "Rga"); word = word.replace("Kreemija", "Krievija"); word = word.replace("Kreewija", "Krievija"); word = word.replace("Wahzija", "V?cija"); word = word.replace("Cehoslovakija", "ehoslovakija"); word = word.replaceAll("[aA]ugst?k?s [pP]adomes$", "Augst?k? Padome"); word = word.replaceAll("[fF]ederatvaj? [rR]epublik?$", "Federatv? Republika"); word = word.replaceAll("[dD]emokr?tiskaj? [rR]epublik?$", "Demokr?tisk? Republika"); word = word.replaceAll("[kK]omunistisk?s [pP]artijas$", "Komunistisk? Partija"); word = word.replace(" lela", " iela"); word = word.replace(" lels", " iela"); word = word.replace("dzv. ", ""); if (word.equalsIgnoreCase("Hitlera")) word = "Hitlers"; if (word.equalsIgnoreCase("Raia")) word = "Rainis"; if (word.equalsIgnoreCase("Vtola")) word = "Vtols"; if (word.equalsIgnoreCase("Mocarta")) word = "Mocarts"; if (word.equalsIgnoreCase("aikovska")) word = "aikovskis"; if (word.equalsIgnoreCase("Daugavpil")) word = "Daugavpils"; if (word.equalsIgnoreCase("Liep?jas")) word = "Liep?ja"; if (word.equalsIgnoreCase("Liep?j?")) word = "Liep?ja"; if (word.equalsIgnoreCase("Csu")) word = "Csis"; if (word.equalsIgnoreCase("Blaumaa")) word = "Blaumanis"; if (word.equalsIgnoreCase("Lietavas")) word = "Lietava"; if (word.equalsIgnoreCase("ns")) word = "na"; if (word.equalsIgnoreCase("Strencis")) word = "Stren?i"; if (word.equalsIgnoreCase("Rig?")) word = "Rga"; if (word.equalsIgnoreCase("Kubs")) word = "Kuba"; if (word.equalsIgnoreCase("Rig?")) word = "Rga"; if (word.equalsIgnoreCase("Kijevs")) word = "Kijeva"; if (word.equalsIgnoreCase("Latmija")) word = "Latvija"; if (word.equalsIgnoreCase("Trbat?")) word = "Trbata"; if (word.equalsIgnoreCase("Sabil")) word = "Sabile"; if (word.equalsIgnoreCase("Melluzis")) word = "Mellui"; if (word.equalsIgnoreCase("Polijas") || word.equalsIgnoreCase("Polij?")) word = "Polija"; //hack nesaprastiem locjumiem if (word.endsWith("ijas")) word = word.substring(0, word.length() - 1); if (word.endsWith("ij?")) word = word.substring(0, word.length() - 1) + "a"; normalform = normalform.replace("CX", "CK"); normalform = normalform.replace("|", ""); normalform = normalform.replace(" eela", " iela"); normalform = normalform.replace("Kr .", "Kr."); normalform = normalform.replace("riga", "rga"); normalform = normalform.replace("kreewija", "krievija"); normalform = normalform.replace("kreemija", "krievija"); normalform = normalform.replace("wahzija", "v?cija"); normalform = normalform.replace(" lels", " iela"); normalform = normalform.replace("dzv. ", ""); if (normalform.equalsIgnoreCase("Hitlera")) normalform = "hitlers"; if (normalform.equalsIgnoreCase("Raia")) normalform = "rainis"; if (normalform.equalsIgnoreCase("Vtola")) normalform = "vtols"; if (normalform.equalsIgnoreCase("Mocarta")) normalform = "mocarts"; if (normalform.equalsIgnoreCase("aikovska")) normalform = "?aikovskis"; if (normalform.equalsIgnoreCase("Daugavpil")) normalform = "daugavpils"; if (normalform.equalsIgnoreCase("Liep?jas")) normalform = "liep?ja"; if (normalform.equalsIgnoreCase("Liep?j?")) normalform = "liep?ja"; if (normalform.equalsIgnoreCase("blaumaa")) normalform = "blaumanis"; if (normalform.equalsIgnoreCase("ns")) normalform = "na"; if (normalform.equalsIgnoreCase("Polijas") || normalform.equalsIgnoreCase("Polijs")) normalform = "polija"; if (normalform.endsWith("ijas")) normalform = normalform.substring(0, normalform.length() - 1); if (normalform.endsWith("ij?")) normalform = normalform.substring(0, normalform.length() - 1) + "a"; if (blacklist.containsKey(word)) return; if (blacklist.containsKey(normalform)) return; if (normalform.contains(" un ") && !normalform.contains("opera")) return; if (normalform.length() > 50) return; String key = category + "|" + normalform; counter.incrementCount(key, mention_count); Counter<String> forms = popular_forms.get(key); if (forms == null) { forms = new IntCounter<String>(); popular_forms.put(key, forms); } forms.incrementCount(word, mention_count); counterbydoc.incrementCount(key + "|" + doc, mention_count); }
From source file:nate.reading.SlotInducer.java
/** * Creates an overall count of arguments summed across all of the given predicates. * The predicates are a list of IDs, indexing positions in the given names list. * @param ids A set of indices into the names list. * @param names The list of tokens that have arguments in our corpus. * @param argCounts The map from token to argument counts. *//*w w w .j av a2s . c o m*/ private Counter<String> sumArgs(Collection<String> names, VerbArgCounts argCounts) { Counter<String> sum = new IntCounter<String>(); if (names != null && argCounts != null) { for (String slotname : names) { Map<String, Integer> subcounts = argCounts.getArgsForSlot(slotname); if (subcounts != null) { for (Map.Entry<String, Integer> entry : subcounts.entrySet()) sum.incrementCount(entry.getKey(), entry.getValue()); } } } return sum; }