List of usage examples for com.google.common.collect Multiset elementSet
Set<E> elementSet();
From source file:com.b2international.snowowl.snomed.reasoner.server.diff.SourceConceptNamespaceAndModuleAssigner.java
@Override public void allocateRelationshipIdsAndModules(Multiset<String> conceptIds, final SnomedEditingContext editingContext) { Multiset<String> reservedIdsByNamespace = HashMultiset.create(); for (Multiset.Entry<String> conceptIdWithCount : conceptIds.entrySet()) { String namespace = SnomedIdentifiers.getNamespace(conceptIdWithCount.getElement()); reservedIdsByNamespace.add(namespace, conceptIdWithCount.getCount()); }/*from w w w. j a v a2s.com*/ ISnomedIdentifierService identifierService = getServiceForClass(ISnomedIdentifierService.class); for (Multiset.Entry<String> namespaceWithCount : reservedIdsByNamespace.entrySet()) { Collection<String> reservedIds = identifierService.reserve(namespaceWithCount.getElement(), ComponentCategory.RELATIONSHIP, namespaceWithCount.getCount()); this.reservedIds.addAll(reservedIds); namespaceToRelationshipIdMap.put(namespaceWithCount.getElement(), reservedIds.iterator()); } for (String conceptId : conceptIds.elementSet()) { Concept concept = editingContext.lookup(conceptId, Concept.class); conceptIdToRelationshipModuleMap.put(conceptId, concept.getModule()); } }
From source file:com.cloudera.knittingboar.records.RCV1RecordFactory.java
public static void ScanFile(String file, int debug_break_cnt) throws IOException { ConstantValueEncoder encoder_test = new ConstantValueEncoder("test"); BufferedReader reader = null; // Collection<String> words int line_count = 0; Multiset<String> class_count = ConcurrentHashMultiset.create(); Multiset<String> namespaces = ConcurrentHashMultiset.create(); try {// w w w . ja v a 2 s . c om // System.out.println( newsgroup ); reader = new BufferedReader(new FileReader(file)); String line = reader.readLine(); while (line != null && line.length() > 0) { // shard_writer.write(line + "\n"); // out += line; String[] parts = line.split(" "); // System.out.println( "Class: " + parts[0] ); class_count.add(parts[0]); namespaces.add(parts[1]); line = reader.readLine(); line_count++; Vector v = new RandomAccessSparseVector(FEATURES); for (int x = 2; x < parts.length; x++) { // encoder_test.addToVector(parts[x], v); // System.out.println( parts[x] ); String[] feature = parts[x].split(":"); int index = Integer.parseInt(feature[0]) % FEATURES; double val = Double.parseDouble(feature[1]); // System.out.println( feature[1] + " = " + val ); if (index < FEATURES) { v.set(index, val); } else { System.out.println("Could Hash: " + index + " to " + (index % FEATURES)); } } Utils.PrintVectorSectionNonZero(v, 10); System.out.println("###"); if (line_count > debug_break_cnt) { break; } } System.out.println("Total Rec Count: " + line_count); System.out.println("-------------------- "); System.out.println("Classes"); for (String word : class_count.elementSet()) { System.out.println("Class " + word + ": " + class_count.count(word) + " "); } System.out.println("-------------------- "); System.out.println("NameSpaces:"); for (String word : namespaces.elementSet()) { System.out.println("Namespace " + word + ": " + namespaces.count(word) + " "); } /* * TokenStream ts = analyzer.tokenStream("text", reader); * ts.addAttribute(CharTermAttribute.class); * * // for each word in the stream, minus non-word stuff, add word to * collection while (ts.incrementToken()) { String s = * ts.getAttribute(CharTermAttribute.class).toString(); * //System.out.print( " " + s ); //words.add(s); out += s + " "; } */ } finally { reader.close(); } // return out + "\n"; }
From source file:com.github.pffy.chinese.freq.ImmutableChineseFrequency.java
/** * Builds this object with an input text. Expecting Chinese characters. * //w w w .j a v a 2 s . c o m * @param input Chinese text for frequency analysis */ public ImmutableChineseFrequency(String input) { init(); // Counts int inputCount, removedCount, hanziCount, uniqueHanziCount, processedCount; Scanner sc; // Output data String csvOutput = ""; String tsvOutput = ""; String txtOutput = ""; String csv, tsv, txt; String summaryString = ""; String hz, py; int freq; // Google Guava magic String hanzi; Multiset<String> hanziSet = HashMultiset.create(); Iterable<Multiset.Entry<String>> hanziSortedByCount; Iterator<Multiset.Entry<String>> keys; Multiset.Entry<String> key; ImmutableList<Multiset.Entry<String>> hanziList; if (input == null || input.isEmpty()) { throw new NullPointerException(this.MSG_EMPTYNULL_STRING); } inputCount = input.length(); input = retainHanzi(input); removedCount = inputCount - input.length(); hanziCount = input.length(); sc = new Scanner(input); sc.useDelimiter(""); // accumulate: counts occurrences while (sc.hasNext()) { hanzi = (String) sc.next(); hanziSet.add(hanzi, 1); } sc.close(); uniqueHanziCount = hanziSet.elementSet().size(); processedCount = 0; hanziSortedByCount = Multisets.copyHighestCountFirst(hanziSet).entrySet(); hanziList = Multisets.copyHighestCountFirst(hanziSet).entrySet().asList(); keys = hanziSortedByCount.iterator(); while (keys.hasNext()) { key = (Multiset.Entry<String>) keys.next(); hz = (String) key.getElement().replaceAll("x \\d{1,}", ""); py = (String) this.hpdx.get(hz); freq = (int) key.getCount(); // check null first to avoid NullPointerException. lazy code. if (py == null || py.isEmpty()) { // not mapped yet. that is okay move on. continue; } csv = this.CRLF + hz + "," + py + "," + freq; csvOutput += csv; tsv = this.CRLF + hz + "\t" + py + "\t" + freq; tsvOutput += tsv; txt = this.CRLF + padSummary(hz + " [" + py + "]", this.PADSIZE_SUMMARY) + freq; txtOutput += txt; processedCount++; } summaryString += padSummary(this.MSG_TOTAL_COUNT, this.PADSIZE_SUMMARY) + inputCount; summaryString += this.CRLF + padSummary(this.MSG_REMOVED_COUNT, this.PADSIZE_SUMMARY) + removedCount; summaryString += this.CRLF + padSummary(this.MSG_HANZI_COUNT, this.PADSIZE_SUMMARY) + hanziCount; summaryString += this.CRLF + padSummary(this.MSG_UNIQUE_COUNT, this.PADSIZE_SUMMARY) + uniqueHanziCount; summaryString += this.CRLF + padSummary(this.MSG_PROCESSED_COUNT, this.PADSIZE_SUMMARY) + processedCount; if (processedCount > 0) { csvOutput = this.HEADER_ROW_CSV + csvOutput; tsvOutput = this.HEADER_ROW_TSV + tsvOutput; txtOutput = this.HEADER_ROW_TXT + txtOutput; } this.input = input; this.inputCount = inputCount; this.removedCount = removedCount; this.hanziCount = hanziCount; this.uniqueHanziCount = uniqueHanziCount; this.processedCount = processedCount; this.summary = summaryString; this.hanziList = hanziList; this.csvOutput = csvOutput; this.tsvOutput = tsvOutput; this.txtOutput = txtOutput; }
From source file:org.eclipse.sirius.ui.debug.SiriusDebugView.java
private void addShowPayloadAccessLogAction() { addAction("Show Payload Access Log", new Runnable() { @Override/*w w w. j a va 2 s .c om*/ public void run() { int max = 50; List<FeatureAccess> log = PayloadMarkerAdapter.INSTANCE.getAccessLog(); int totalSize = log.size(); int shown = Math.min(totalSize, max); TabularReport tr = new TabularReport(/* "Timestamp", */"EObject", "Feature"); try { PayloadMarkerAdapter.INSTANCE.setEnable(false); for (int i = log.size() > max ? log.size() - max : 0; i < log.size(); i++) { FeatureAccess featureAccess = log.get(i); tr.addLine(Arrays.asList(/* * String.format("%tT", * featureAccess.timestamp), */((Component) featureAccess.setting.getEObject()).getName(), featureAccess.setting.getEStructuralFeature().getName())); } } finally { PayloadMarkerAdapter.INSTANCE.setEnable(true); } StringBuilder sb = new StringBuilder(); sb.append("Showing " + shown + " of " + totalSize + " accesses.\n"); Multiset<String> contexts = PayloadMarkerAdapter.INSTANCE.getUniqueContexts(); sb.append("Unique contexts: " + contexts.elementSet().size()).append("\n\n"); int i = 0; for (String stack : contexts.elementSet()) { int count = contexts.count(stack); sb.append("Context #" + i++ + " (" + count + " occurrences)").append("\n"); sb.append(stack).append("\n"); } sb.append("\n").append(tr.print()).append("\n"); setText(sb.toString()); } }); }
From source file:fabric.worker.transaction.DeadlockDetectorThread.java
/** * Resolves deadlocks by aborting transactions. * // w ww . j a va 2 s . c o m * @param cycles * the set of deadlocks, represented by the logs of transactions * involved in waits-for cycles. */ private void resolveDeadlocks(Set<Set<Log>> cycles) { // Turn the set of cycles into a map from top-level TIDs to sorted multisets // of transaction logs. The multisets are sorted by transaction depth, outer // transactions first. LongKeyMap<Multiset<Log>> logsByTopLevelTid = new LongKeyHashMap<Multiset<Log>>(); for (Set<Log> cycle : cycles) { for (Log log : cycle) { long topLevelTid = log.getTid().topTid; Multiset<Log> logs = logsByTopLevelTid.get(topLevelTid); if (logs == null) { logs = TreeMultiset.create(LOG_COMPARATOR); logsByTopLevelTid.put(topLevelTid, logs); } logs.add(log); } } // Abort transactions to break up cycles. Transactions involved in more // cycles are aborted first. while (!cycles.isEmpty()) { // Figure out which top-level transaction(s) is involved in the most number // of deadlocks. int curMax = 0; LongSet abortCandidates = new LongHashSet(); for (LongKeyMap.Entry<Multiset<Log>> entry : logsByTopLevelTid.entrySet()) { int curSize = entry.getValue().size(); if (curMax > curSize) continue; if (curMax < curSize) { curMax = curSize; abortCandidates.clear(); } abortCandidates.add(entry.getKey()); } // Figure out which transaction to abort. (Pick the newest one.) Log toAbort = null; Multiset<Log> abortSet = null; for (LongIterator it = abortCandidates.iterator(); it.hasNext();) { long curTopLevelTid = it.next(); Multiset<Log> curCandidateSet = logsByTopLevelTid.get(curTopLevelTid); Log curCandidate = curCandidateSet.iterator().next(); if (toAbort == null || toAbort.startTime < curCandidate.startTime) { toAbort = curCandidate; abortSet = curCandidateSet; } } // Abort the transaction. WORKER_DEADLOCK_LOGGER.log(Level.FINE, "Aborting {0}", toAbort); toAbort.flagRetry(); // Fix up our data structures to reflect the aborted transaction. for (Iterator<Set<Log>> cycleIt = cycles.iterator(); cycleIt.hasNext();) { Set<Log> cycle = cycleIt.next(); // Check if the cycle has a transaction that was aborted. if (!haveCommonElements(cycle, abortSet.elementSet())) continue; // Cycle was broken, so remove from the set of cycles. cycleIt.remove(); // Fix up logsByTopLevelTid. for (Log log : cycle) { long topLevelTid = log.getTid().topTid; Multiset<Log> logs = logsByTopLevelTid.get(topLevelTid); logs.remove(log); if (logs.isEmpty()) { logsByTopLevelTid.remove(topLevelTid); } } } } }
From source file:org.bridgedb.tools.qc.PatternChecker.java
public void run(File f) throws SQLException, IDMapperException { String database = "" + f; //TODO: we can use the new Iterator interface here... DBConnector con = new DataDerby(); Connection sqlcon = null;/* w w w .j a v a 2s .c o m*/ sqlcon = con.createConnection(database, 0); Multimap<DataSource, String> missExamples = HashMultimap.create(); Multiset<DataSource> misses = HashMultiset.create(); Multiset<DataSource> totals = HashMultiset.create(); Map<DataSource, Pattern> patterns = DataSourcePatterns.getPatterns(); // String url = "jdbc:derby:jar:(" + f + ")database"; // IDMapperRdb gdb = SimpleGdbFactory.createInstance("" + f, url); Statement st = sqlcon.createStatement(); ResultSet rs = st.executeQuery("select id, code from datanode"); while (rs.next()) { String id = rs.getString(1); String syscode = rs.getString(2); if (DataSource.systemCodeExists(syscode)) { DataSource ds = DataSource.getExistingBySystemCode(syscode); if (patterns.get(ds) == null) continue; // skip if there is no pattern defined. Set<DataSource> matches = DataSourcePatterns.getDataSourceMatches(id); if (!matches.contains(ds)) { if (missExamples.get(ds).size() < 10) missExamples.put(ds, id); misses.add(ds); } totals.add(ds); } } // String code = rs.getString (2); //System.out.println (id + "\t" + code); for (DataSource ds : totals.elementSet()) { int miss = misses.count(ds); int total = totals.count(ds); if (miss > 0) { String severity = miss < (total / 25) ? "WARNING" : "ERROR"; System.out.println(severity + ": " + miss + "/" + total + " (" + miss * 100 / total + "%) ids do not match expected pattern for " + ds); System.out.println(severity + ": expected pattern is '" + patterns.get(ds) + "'"); boolean first = true; for (String id : missExamples.get(ds)) { System.out.print(first ? severity + ": aberrant ids are e.g. " : ", "); first = false; System.out.print("'" + id + "'"); } System.out.println(); } } allMisses.addAll(misses); allTotals.addAll(totals); }
From source file:org.apache.mahout.classifier.sgd.NewsgroupHelper.java
Vector encodeFeatureVector(File file, int actual, int leakType, Multiset<String> overallCounts) throws IOException { long date = (long) (1000 * (DATE_REFERENCE + actual * MONTH + 1 * WEEK * rand.nextDouble())); Multiset<String> words = ConcurrentHashMultiset.create(); BufferedReader reader = Files.newReader(file, Charsets.UTF_8); try {/* w w w . j a va 2s .c o m*/ String line = reader.readLine(); Reader dateString = new StringReader(DATE_FORMATS[leakType % 3].format(new Date(date))); countWords(analyzer, words, dateString, overallCounts); while (line != null && !line.isEmpty()) { boolean countHeader = (line.startsWith("From:") || line.startsWith("Subject:") || line.startsWith("Keywords:") || line.startsWith("Summary:")) && leakType < 6; do { Reader in = new StringReader(line); if (countHeader) { countWords(analyzer, words, in, overallCounts); } line = reader.readLine(); } while (line != null && line.startsWith(" ")); } if (leakType < 3) { countWords(analyzer, words, reader, overallCounts); } } finally { Closeables.closeQuietly(reader); } Vector v = new RandomAccessSparseVector(FEATURES); bias.addToVector("", 1, v); for (String word : words.elementSet()) { encoder.addToVector(word, Math.log1p(words.count(word)), v); } return v; }
From source file:org.apache.mahout.classifier.NewsgroupHelper.java
public Vector encodeFeatureVector(File file, int actual, int leakType, Multiset<String> overallCounts) throws IOException { long date = (long) (1000 * (DATE_REFERENCE + actual * MONTH + 1 * WEEK * rand.nextDouble())); Multiset<String> words = ConcurrentHashMultiset.create(); BufferedReader reader = Files.newReader(file, Charsets.UTF_8); try {/*from w ww . j av a 2 s . c om*/ String line = reader.readLine(); Reader dateString = new StringReader(DATE_FORMATS[leakType % 3].format(new Date(date))); countWords(analyzer, words, dateString, overallCounts); while (line != null && !line.isEmpty()) { boolean countHeader = (line.startsWith("From:") || line.startsWith("Subject:") || line.startsWith("Keywords:") || line.startsWith("Summary:")) && leakType < 6; do { Reader in = new StringReader(line); if (countHeader) { countWords(analyzer, words, in, overallCounts); } line = reader.readLine(); } while (line != null && line.startsWith(" ")); } if (leakType < 3) { countWords(analyzer, words, reader, overallCounts); } } finally { Closeables.close(reader, true); } Vector v = new RandomAccessSparseVector(FEATURES); bias.addToVector("", 1, v); for (String word : words.elementSet()) { encoder.addToVector(word, Math.log1p(words.count(word)), v); } return v; }
From source file:org.eclipse.incquery.runtime.base.core.NavigationHelperImpl.java
@Override public Set<EObject> getHoldersOfFeature(EStructuralFeature _feature) { Object feature = toKey(_feature); Multiset<EObject> holders = contentAdapter.getFeatureToHolderMap().get(feature); if (holders == null) { return Collections.emptySet(); } else {//w w w. j a va2 s.c o m return Collections.unmodifiableSet(holders.elementSet()); } }
From source file:BibTex.IOmethods.java
public void writeConnectedCategories(Set<BibTexRef> refs) throws IOException { BufferedWriter bw = new BufferedWriter(new FileWriter(folder + "connected categories.csv")); StringBuilder sb = new StringBuilder(); int maxCountCategory = 0; sb.append("Source,Target,Type,Weight").append("\n"); //creation of convenient data structures for I/O Multiset<Edge> edges = HashMultiset.create(); Multiset<String> multisetCategoryNames = HashMultiset.create(); for (BibTexRef ref : refs) { Set<Category> categories = ref.getCategories(); Set<String> categoriesNames = new HashSet(); for (Category category : categories) { categoriesNames.add(category.getCategoryName()); multisetCategoryNames.add(category.getCategoryName()); }//w ww. jav a 2 s. c om FindAllPairs findAllPairs = new FindAllPairs(); List<Pair<String>> pairs = findAllPairs.getAllUndirectedPairsAsList(categoriesNames); for (Pair<String> pair : pairs) { Edge edge = new Edge(); edge.setNode1(pair.getLeft()); edge.setNode2(pair.getRight()); edges.add(edge); } } //finding the max number for a category, for normalization purposes for (String string : multisetCategoryNames.elementSet()) { if (maxCountCategory < multisetCategoryNames.count(string)) { maxCountCategory = multisetCategoryNames.count(string); } } //writing of the first line of the csv: headers of the categories. for (Edge edge : edges.elementSet()) { //we devalue the weight of an edge by how frequent the 2 nodes of the edge are. float weight = edges.count(edge) / (float) (multisetCategoryNames.count(edge.getNode1()) * multisetCategoryNames.count(edge.getNode2())); // float weight = edges.count(edge); //normalization to a 0 -> 10 scale to visualize the weight on Gephi weight = weight * 10 / (float) maxCountCategory * 100000; sb.append(edge.getNode1()).append(",").append(edge.getNode2()).append(",Undirected,").append(weight); sb.append("\n"); } bw.write(sb.toString()); bw.close(); }