List of usage examples for com.google.common.collect Multiset add
int add(@Nullable E element, int occurrences);
From source file:com.github.pffy.chinese.freq.ImmutableChineseFrequency.java
/** * Builds this object with an input text. Expecting Chinese characters. * /*from ww w . j a va 2 s.c o m*/ * @param input Chinese text for frequency analysis */ public ImmutableChineseFrequency(String input) { init(); // Counts int inputCount, removedCount, hanziCount, uniqueHanziCount, processedCount; Scanner sc; // Output data String csvOutput = ""; String tsvOutput = ""; String txtOutput = ""; String csv, tsv, txt; String summaryString = ""; String hz, py; int freq; // Google Guava magic String hanzi; Multiset<String> hanziSet = HashMultiset.create(); Iterable<Multiset.Entry<String>> hanziSortedByCount; Iterator<Multiset.Entry<String>> keys; Multiset.Entry<String> key; ImmutableList<Multiset.Entry<String>> hanziList; if (input == null || input.isEmpty()) { throw new NullPointerException(this.MSG_EMPTYNULL_STRING); } inputCount = input.length(); input = retainHanzi(input); removedCount = inputCount - input.length(); hanziCount = input.length(); sc = new Scanner(input); sc.useDelimiter(""); // accumulate: counts occurrences while (sc.hasNext()) { hanzi = (String) sc.next(); hanziSet.add(hanzi, 1); } sc.close(); uniqueHanziCount = hanziSet.elementSet().size(); processedCount = 0; hanziSortedByCount = Multisets.copyHighestCountFirst(hanziSet).entrySet(); hanziList = Multisets.copyHighestCountFirst(hanziSet).entrySet().asList(); keys = hanziSortedByCount.iterator(); while (keys.hasNext()) { key = (Multiset.Entry<String>) keys.next(); hz = (String) key.getElement().replaceAll("x \\d{1,}", ""); py = (String) this.hpdx.get(hz); freq = (int) key.getCount(); // check null first to avoid NullPointerException. lazy code. if (py == null || py.isEmpty()) { // not mapped yet. that is okay move on. continue; } csv = this.CRLF + hz + "," + py + "," + freq; csvOutput += csv; tsv = this.CRLF + hz + "\t" + py + "\t" + freq; tsvOutput += tsv; txt = this.CRLF + padSummary(hz + " [" + py + "]", this.PADSIZE_SUMMARY) + freq; txtOutput += txt; processedCount++; } summaryString += padSummary(this.MSG_TOTAL_COUNT, this.PADSIZE_SUMMARY) + inputCount; summaryString += this.CRLF + padSummary(this.MSG_REMOVED_COUNT, this.PADSIZE_SUMMARY) + removedCount; summaryString += this.CRLF + padSummary(this.MSG_HANZI_COUNT, this.PADSIZE_SUMMARY) + hanziCount; summaryString += this.CRLF + padSummary(this.MSG_UNIQUE_COUNT, this.PADSIZE_SUMMARY) + uniqueHanziCount; summaryString += this.CRLF + padSummary(this.MSG_PROCESSED_COUNT, this.PADSIZE_SUMMARY) + processedCount; if (processedCount > 0) { csvOutput = this.HEADER_ROW_CSV + csvOutput; tsvOutput = this.HEADER_ROW_TSV + tsvOutput; txtOutput = this.HEADER_ROW_TXT + txtOutput; } this.input = input; this.inputCount = inputCount; this.removedCount = removedCount; this.hanziCount = hanziCount; this.uniqueHanziCount = uniqueHanziCount; this.processedCount = processedCount; this.summary = summaryString; this.hanziList = hanziList; this.csvOutput = csvOutput; this.tsvOutput = tsvOutput; this.txtOutput = txtOutput; }