List of usage examples for com.google.common.collect Multiset count
int count(@Nullable Object element);
From source file:org.eclipse.sirius.diagram.sequence.ui.tool.internal.edit.validator.PositionsChecker.java
/** * Inspect all sequence events and check that there is no conflicts. * //from w w w .j a v a 2s.co m * @return a collection of invalid positions. */ public Collection<Integer> getInvalidPositions() { final Multiset<Integer> positions = HashMultiset.create(); // Check conflicts for (ISequenceEvent ise : diagram.getAllOrderedDelimitedSequenceEvents()) { Range futureRange = rangeFunction.apply(ise); int futureLowerBound = futureRange.getLowerBound(); int futureUpperBound = futureRange.getUpperBound(); if (ise instanceof Execution) { Execution exec = (Execution) ise; if (!exec.getStartMessage().some()) { positions.add(futureLowerBound); } if (!exec.getEndMessage().some()) { positions.add(futureUpperBound); } } else if (ise instanceof Operand) { positions.add(futureLowerBound); } else if (ise instanceof Message) { positions.add(futureLowerBound); if (((Message) ise).isReflective()) { positions.add(futureUpperBound); } } else if (ise instanceof State && ise.isLogicallyInstantaneous()) { positions.add(futureRange.middleValue()); } else { positions.add(futureLowerBound); positions.add(futureUpperBound); } } Set<Integer> invalidPositions = Sets.newHashSet(); Iterables.addAll(invalidPositions, Iterables.filter(positions, new Predicate<Integer>() { public boolean apply(Integer input) { int count = positions.count(input); return count != 1; } })); return invalidPositions; }
From source file:BibTex.IOmethods.java
public void writeCategoriesPerYearInCsv(Set<BibTexRef> refs) throws IOException { BufferedWriter bw = new BufferedWriter(new FileWriter(folder + "years and categories.csv")); StringBuilder sb = new StringBuilder(); String sep = "|"; //creation of 2 convenient data structures for I/O Map<String, Multiset<String>> yearsToCategories = new HashMap(); List<String> categoryNames = new ArrayList(); for (BibTexRef ref : refs) { String year = ref.getYear(); Set<Category> categories = ref.getCategories(); for (Category category : categories) { if (!categoryNames.contains(category.getCategoryName())) { categoryNames.add(category.getCategoryName()); }/* w w w.j ava 2 s .c om*/ } if (yearsToCategories.containsKey(year)) { for (Category category : categories) { yearsToCategories.get(year).add(category.getCategoryName()); } } else { Multiset<String> categoriesForOneYear = HashMultiset.create(); for (Category category : categories) { categoriesForOneYear.add(category.getCategoryName()); } yearsToCategories.put(year, categoriesForOneYear); } } //writing of the first line of the csv: headers of the categories. // first cell is empty. This is the first column, and will serve for the headers of the rows (which are the years) sb.append(sep); for (String categoryName : categoryNames) { sb.append(categoryName); sb.append(sep); } sb.append("\n"); //writing of all subsequent lines: one per year for (String year : yearsToCategories.keySet()) { sb.append(year).append(sep); Multiset<String> categoriesForOneYear = yearsToCategories.get(year); for (String categoryName : categoryNames) { int count = categoriesForOneYear.count(categoryName); sb.append(count).append(sep); } sb.append("\n"); } bw.write(sb.toString()); bw.close(); }
From source file:BibTex.IOmethods.java
public void writeConnectedCategories(Set<BibTexRef> refs) throws IOException { BufferedWriter bw = new BufferedWriter(new FileWriter(folder + "connected categories.csv")); StringBuilder sb = new StringBuilder(); int maxCountCategory = 0; sb.append("Source,Target,Type,Weight").append("\n"); //creation of convenient data structures for I/O Multiset<Edge> edges = HashMultiset.create(); Multiset<String> multisetCategoryNames = HashMultiset.create(); for (BibTexRef ref : refs) { Set<Category> categories = ref.getCategories(); Set<String> categoriesNames = new HashSet(); for (Category category : categories) { categoriesNames.add(category.getCategoryName()); multisetCategoryNames.add(category.getCategoryName()); }/*from www .ja v a 2s . c om*/ FindAllPairs findAllPairs = new FindAllPairs(); List<Pair<String>> pairs = findAllPairs.getAllUndirectedPairsAsList(categoriesNames); for (Pair<String> pair : pairs) { Edge edge = new Edge(); edge.setNode1(pair.getLeft()); edge.setNode2(pair.getRight()); edges.add(edge); } } //finding the max number for a category, for normalization purposes for (String string : multisetCategoryNames.elementSet()) { if (maxCountCategory < multisetCategoryNames.count(string)) { maxCountCategory = multisetCategoryNames.count(string); } } //writing of the first line of the csv: headers of the categories. for (Edge edge : edges.elementSet()) { //we devalue the weight of an edge by how frequent the 2 nodes of the edge are. float weight = edges.count(edge) / (float) (multisetCategoryNames.count(edge.getNode1()) * multisetCategoryNames.count(edge.getNode2())); // float weight = edges.count(edge); //normalization to a 0 -> 10 scale to visualize the weight on Gephi weight = weight * 10 / (float) maxCountCategory * 100000; sb.append(edge.getNode1()).append(",").append(edge.getNode2()).append(",Undirected,").append(weight); sb.append("\n"); } bw.write(sb.toString()); bw.close(); }
From source file:BibTex.IOmethods.java
public void writeSuperCategoriesPerYearInCsv(Set<BibTexRef> refs) throws IOException { BufferedWriter bw = new BufferedWriter(new FileWriter(folder + "years and suppercategories.csv")); StringBuilder sb = new StringBuilder(); String sep = "|"; //creation of 2 convenient data structures for I/O Map<String, Multiset<String>> yearsToCategories = new HashMap(); List<String> categoryNames = new ArrayList(); for (BibTexRef ref : refs) { String year = ref.getYear(); Set<Category> categories = ref.getCategories(); for (Category category : categories) { if (!categoryNames.contains(category.getSuperCategory())) { categoryNames.add(category.getSuperCategory()); }/* w ww.j a va 2 s .c o m*/ } if (yearsToCategories.containsKey(year)) { for (Category category : categories) { yearsToCategories.get(year).add(category.getSuperCategory()); } } else { Multiset<String> categoriesForOneYear = HashMultiset.create(); for (Category category : categories) { categoriesForOneYear.add(category.getSuperCategory()); } yearsToCategories.put(year, categoriesForOneYear); } } //writing of the first line of the csv: headers of the categories. // first cell is empty. This is the first column, and will serve for the headers of the rows (which are the years) sb.append(sep); for (String categoryName : categoryNames) { sb.append(categoryName); sb.append(sep); } sb.append("\n"); //writing of all subsequent lines: one per year for (String year : yearsToCategories.keySet()) { sb.append(year).append(sep); Multiset<String> categoriesForOneYear = yearsToCategories.get(year); for (String categoryName : categoryNames) { int count = categoriesForOneYear.count(categoryName); sb.append(count).append(sep); } sb.append("\n"); } bw.write(sb.toString()); bw.close(); }
From source file:itemsetmining.itemset.ItemsetTree.java
/** * Pearson's chi-squared test for itemset independence. This tests the * empirical itemset distribution against the independence model. * * <p>//from w ww.ja va 2 s . c om * N.B. the chi-squared distribution has one degree of freedom. * * @see S. Brin et al. Beyond Market Baskets: Generalizing Association Rules * to Correlations */ private double recursiveChiSquared(final int n, final BitSet cell, final int[] sortedItems, final Multiset<Integer> singletons) { double chiSquared = 0.; if (n == sortedItems.length) { double pInd = noTransactions; final int[] inItems = new int[cell.cardinality()]; final int[] outItems = new int[n - cell.cardinality()]; int i = 0, j = 0; for (int k = 0; k < n; k++) { if (cell.get(k)) { inItems[i] = sortedItems[k]; i++; pInd *= singletons.count(sortedItems[k]) / (double) noTransactions; } else { outItems[j] = sortedItems[k]; j++; pInd *= (noTransactions - singletons.count(sortedItems[k])) / (double) noTransactions; } } final double pEmp = countEmpirical(inItems, outItems, root, new int[0]); chiSquared = ((pEmp - pInd) * (pEmp - pInd)) / pInd; } else { final BitSet celln = (BitSet) cell.clone(); celln.set(n); chiSquared += recursiveChiSquared(n + 1, celln, sortedItems, singletons); chiSquared += recursiveChiSquared(n + 1, cell, sortedItems, singletons); } return chiSquared; }
From source file:com.cloudera.knittingboar.records.TwentyNewsgroupsRecordFactory.java
/** * Processes single line of input into: - target variable - Feature vector * //from ww w . ja va 2 s . co m * @throws Exception */ public int processLine(String line, Vector v) throws Exception { String[] parts = line.split(this.class_id_split_string); if (parts.length < 2) { throw new Exception("wtf: line not formed well."); } String newsgroup_name = parts[0]; String msg = parts[1]; // p.269 --------------------------------------------------------- Map<String, Set<Integer>> traceDictionary = new TreeMap<String, Set<Integer>>(); // encodes the text content in both the subject and the body of the email FeatureVectorEncoder encoder = new StaticWordValueEncoder("body"); encoder.setProbes(2); encoder.setTraceDictionary(traceDictionary); // provides a constant offset that the model can use to encode the average // frequency // of each class FeatureVectorEncoder bias = new ConstantValueEncoder("Intercept"); bias.setTraceDictionary(traceDictionary); int actual = newsGroups.intern(newsgroup_name); // newsGroups.values().contains(arg0) // System.out.println( "> newsgroup name: " + newsgroup_name ); // System.out.println( "> newsgroup id: " + actual ); Multiset<String> words = ConcurrentHashMultiset.create(); /* * // System.out.println("record: "); for ( int x = 1; x < parts.length; x++ * ) { //String s = ts.getAttribute(CharTermAttribute.class).toString(); // * System.out.print( " " + parts[x] ); String foo = parts[x].trim(); * System.out.print( " " + foo ); words.add( foo ); * * } // System.out.println("\nEOR"); System.out.println( "\nwords found: " + * (parts.length - 1) ); System.out.println( "words in set: " + words.size() * + ", " + words.toString() ); */ StringReader in = new StringReader(msg); countWords(analyzer, words, in); // ----- p.271 ----------- // Vector v = new RandomAccessSparseVector(FEATURES); // original value does nothing in a ContantValueEncoder bias.addToVector("", 1, v); // original value does nothing in a ContantValueEncoder // lines.addToVector("", lineCount / 30, v); // original value does nothing in a ContantValueEncoder // logLines.addToVector("", Math.log(lineCount + 1), v); // now scan through all the words and add them // System.out.println( "############### " + words.toArray().length); for (String word : words.elementSet()) { encoder.addToVector(word, Math.log(1 + words.count(word)), v); // System.out.print( words.count(word) + " " ); } // System.out.println("\nEOL\n"); return actual; }
From source file:fr.ens.transcriptome.aozan.collectors.UndeterminedIndexesProcessThreads.java
/** * Compute for a sample the number of clusters that can be recovered. * @param sampleName sample name//www . j av a 2s . c o m * @param indicesCounts multiset that contain data to process * @param resultKeySuffix the suffix for the run data key entry * @return the number of cluster that can be recovered for the sample */ private int computeRecoverableSampleClusterCount(final String sampleName, final Multiset<String> indicesCounts, final String resultKeySuffix) { int recoverableClusterCount = 0; if (!this.isSkipProcessResult) { // Sum the number of cluster that can be recovered if (this.newSamplesIndexes.containsKey(sampleName)) { for (final String newIndex : this.newSamplesIndexes.get(sampleName)) { if (indicesCounts.contains(newIndex)) { final int count = indicesCounts.count(newIndex); recoverableClusterCount += count; } } } } // Set the result for the sample getResults().put("undeterminedindices" + ".lane" + this.lane + ".sample." + sampleName + resultKeySuffix, recoverableClusterCount); return recoverableClusterCount; }
From source file:com.music.service.text.TimelineToMusicService.java
private Variation getVariation(List<Tweet> tweets, TimelineMusic meta) { Morphology morphology = new Morphology(new StringReader("")); Multiset<String> words = HashMultiset.create(); for (Tweet tweet : tweets) { String tweetText = tweet.getText().toLowerCase(); List<String> urls = TimelineToMusicService.extractUrls(tweetText); for (String url : urls) { tweetText = tweetText.replace(url, ""); }/*from w w w . j ava 2 s . c om*/ List<String> usernames = TimelineToMusicService.extractMentionedUsernames(tweetText); for (String username : usernames) { tweetText = tweetText.replace(username, "").replace("rt", ""); } String[] wordsInTweet = tweetText.split("[^\\p{L}&&[^']]+"); for (String word : wordsInTweet) { try { words.add(morphology.stem(word)); } catch (Exception ex) { words.add(word); } } } words.removeAll(stopwords); // if a word is mentioned more times than is 4% of the tweets, it's considered a topic double topicThreshold = tweets.size() * 4 / 100; for (Iterator<String> it = words.iterator(); it.hasNext();) { String word = it.next(); // remove stopwords not in the list (e.g. in a different language). // We consider all words less than 4 characters to be stop words if (word == null || word.length() < 4) { it.remove(); } else if (words.count(word) < topicThreshold) { it.remove(); } } meta.setTopKeywords(new HashSet<>(words.elementSet())); // the more topics you have, the more variative music if (meta.getTopKeywords().size() > 40) { return Variation.EXTREMELY_VARIATIVE; } else if (meta.getTopKeywords().size() > 30) { return Variation.VERY_VARIATIVE; } else if (meta.getTopKeywords().size() > 20) { return Variation.MOVING; } else if (meta.getTopKeywords().size() > 10) { return Variation.AVERAGE; } else { return Variation.MONOTONOUS; } }
From source file:org.sonar.server.component.ws.ComponentAppAction.java
private void appendMeasures(JsonWriter json, Map<String, MeasureDto> measuresByMetricKey, Multiset<String> severitiesAggregation, Integer periodIndex) { json.name("measures").beginObject(); json.prop("fNcloc", formatMeasureOrVariation(measuresByMetricKey.get(CoreMetrics.NCLOC_KEY), periodIndex)); json.prop("fCoverage", formatMeasureOrVariation(coverageMeasure(measuresByMetricKey), periodIndex)); json.prop("fDuplicationDensity", formatMeasureOrVariation( measuresByMetricKey.get(CoreMetrics.DUPLICATED_LINES_DENSITY_KEY), periodIndex)); json.prop("fDebt", formatMeasureOrVariation(measuresByMetricKey.get(CoreMetrics.TECHNICAL_DEBT_KEY), periodIndex)); json.prop("fSqaleRating", formatMeasureOrVariation(measuresByMetricKey.get(CoreMetrics.SQALE_RATING_KEY), periodIndex)); json.prop("fSqaleDebtRatio", formatMeasureOrVariation(measuresByMetricKey.get(CoreMetrics.SQALE_DEBT_RATIO_KEY), periodIndex)); json.prop("fTests", formatMeasureOrVariation(measuresByMetricKey.get(CoreMetrics.TESTS_KEY), periodIndex)); json.prop("fIssues", i18n.formatInteger(UserSession.get().locale(), severitiesAggregation.size())); for (String severity : severitiesAggregation.elementSet()) { json.prop("f" + StringUtils.capitalize(severity.toLowerCase()) + "Issues", i18n.formatInteger(UserSession.get().locale(), severitiesAggregation.count(severity))); }/*w w w .j ava2 s . c o m*/ json.endObject(); }
From source file:tv.dyndns.kishibe.qmaclone.server.database.DirectDatabase.java
@Override public Map<Integer, Integer> getUserCodeToIndicatedProblems() throws DatabaseException { final Map<Integer, Integer> problemIdToUserCode = Maps.newHashMap(); try {//from w ww . ja v a 2 s .co m runner.query( "SELECT problem.ID, creation_log.USER_CODE, creation_log.DATE FROM problem, creation_log " + "WHERE INDICATION != 0 AND problem.ID = creation_log.PROBLEM_ID", new ResultSetHandler<Void>() { @Override public Void handle(ResultSet resultSet) throws SQLException { while (resultSet.next()) { int problemId = resultSet.getInt("ID"); int userCode = resultSet.getInt("creation_log.USER_CODE"); // ???????????? problemIdToUserCode.put(problemId, userCode); } return null; } }); } catch (SQLException e) { throw new DatabaseException(e); } Multiset<Integer> userCodes = HashMultiset.create(problemIdToUserCode.values()); Map<Integer, Integer> userCodeToIndicatedProblems = Maps.newHashMap(); for (int userCode : userCodes) { userCodeToIndicatedProblems.put(userCode, userCodes.count(userCode)); } return userCodeToIndicatedProblems; }