List of usage examples for com.google.common.collect Multisets copyHighestCountFirst
@Beta public static <E> ImmutableMultiset<E> copyHighestCountFirst(Multiset<E> multiset)
From source file:com.android.tools.idea.editors.theme.attributes.AttributesModelColorPaletteModel.java
private void loadColors() { if (myResourceResolver == null) { myColorList = Collections.emptyList(); return;/* w w w . j a v a2 s .co m*/ } int rows = myModel.getRowCount(); Multiset<Color> colorSet = HashMultiset.create(); for (int i = 0; i < rows; i++) { if (myModel.getCellClass(i, 0) != Color.class) { continue; } EditedStyleItem item = (EditedStyleItem) myModel.getValueAt(i, 0); for (Color color : ResourceHelper.resolveMultipleColors(myResourceResolver, item.getSelectedValue(), myProject)) { myColorReferences.put(color, item); colorSet.add(color); } } myColorList = ImmutableList.copyOf(Multisets.copyHighestCountFirst(colorSet).elementSet()); }
From source file:pl.polzone.classifier.Classifier.java
public String predict(java.util.List<String> words) { final Multiset<String> scores = HashMultiset.create(); for (String word : words) { word = stem(word);/*from w ww .j a v a2 s . c o m*/ if (wordCount.getCount(word) > feedCount / 2) continue; if (occurences.containsKey(word)) for (Object category : occurences.get(word).uniqueSet()) scores.add((String) category, occurences.get(word).getCount(category) + (feedCount - wordCount.getCount(word))); } if (scores.isEmpty()) return null; Iterator<Entry<String>> sorted = Multisets.copyHighestCountFirst(scores).entrySet().iterator(); String highest = sorted.next().getElement(); if (sorted.hasNext()) { String runnerUp = sorted.next().getElement(); if (scores.count(highest) > scores.count(runnerUp) * 2) feed(highest, words); } return highest; }
From source file:cpw.mods.inventorysorter.SortingHandler.java
private void compactInventory(Action.ActionContext context, Multiset<ItemStackHolder> itemcounts) { final UnmodifiableIterator<Multiset.Entry<ItemStackHolder>> itemsIterator; try {/*w w w. j av a2 s . c o m*/ itemsIterator = Multisets.copyHighestCountFirst(itemcounts).entrySet().iterator(); } catch (Exception e) { FMLLog.log(Level.WARN, e, "Weird, the sorting didn't quite work!"); return; } int slotLow = context.slotMapping.begin; int slotHigh = context.slotMapping.end + 1; Multiset.Entry<ItemStackHolder> stackHolder = itemsIterator.hasNext() ? itemsIterator.next() : null; int itemCount = stackHolder != null ? stackHolder.getCount() : 0; for (int i = slotLow; i < slotHigh; i++) { final Slot slot = context.player.openContainer.getSlot(i); ItemStack target = null; if (itemCount > 0 && stackHolder != null) { target = stackHolder.getElement().is.copy(); target.stackSize = itemCount > target.getMaxStackSize() ? target.getMaxStackSize() : itemCount; } if ((target != null && !slot.isItemValid(target)) || !slot.canTakeStack(context.player)) continue; slot.putStack(target); itemCount -= (target != null ? target.stackSize : 0); if (itemCount == 0) { stackHolder = itemsIterator.hasNext() ? itemsIterator.next() : null; itemCount = stackHolder != null ? stackHolder.getCount() : 0; } } }
From source file:org.datacleaner.components.machinelearning.impl.MLFeatureUtils.java
public static Set<String> sanitizeFeatureVectorSet(Multiset<String> values, MLTrainingConstraints constraints) { final Set<String> resultSet; final int maxFeatures = constraints.getMaxFeatures(); if (maxFeatures > 0) { resultSet = new TreeSet<>(); final Iterator<String> highestCountFirst = Multisets.copyHighestCountFirst(values).elementSet() .iterator();//from w ww .j av a2s .c o m // populate "resultSet" using "highestCountFirst" for (int i = 0; i < maxFeatures; i++) { if (highestCountFirst.hasNext()) { final String value = highestCountFirst.next(); resultSet.add(value); } } } else { resultSet = new TreeSet<>(values.elementSet()); } final boolean includeFeaturesForUniqueValues = constraints.isIncludeFeaturesForUniqueValues(); if (!includeFeaturesForUniqueValues) { // remove uniques in "values" from "resultSet". for (Iterator<String> it = resultSet.iterator(); it.hasNext();) { final String value = it.next(); if (values.count(value) == 1) { it.remove(); } } } return resultSet; }
From source file:webreduce.indexing.luceneSearcher.java
public List<Dataset> search() throws IOException { List<Dataset> resultList; resultList = new ArrayList<>(); BooleanQuery.Builder finalQueryBuilder = new BooleanQuery.Builder(); BooleanQuery.Builder entityQueryBuilder = new BooleanQuery.Builder(); BooleanQuery.Builder attributeQueryBuilder = new BooleanQuery.Builder(); //gives me queries QueryParser qpa = new QueryParser(ATTRIBUTES_FIELD, new CustomAnalyzer()); QueryParser qpe = new QueryParser(ENTITIES_FIELD, new CustomAnalyzer()); //QueryWrapperFilter queryFilter = new QueryWrapperFilter(query); //CachingWrapperFilter cachingFilter = new CachingWrapperFilter(queryFilter); //CachingWrapperQuery typeFilterR = new CachingWrapperFilter(new TermsFilter(new Term(TABLE_TYPE_FIELD, "RELATION"))); IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexDir))); IndexSearcher searcher = new IndexSearcher(reader); QueryBuilder queryBuilder = new QueryBuilder(new CustomAnalyzer()); System.out.println("Attributes: \n" + Arrays.deepToString(attributes)); System.out.println("Entities: \n" + Arrays.deepToString(entities)); //add attributes one by one for (String a : attributes) { Query qa;/* w w w . j ava2 s .c o m*/ try { qa = qpa.parse("\"" + a + "\""); attributeQueryBuilder.add(qa, BooleanClause.Occur.SHOULD); } catch (ParseException ex) { } } //end of for loop //remove null HashSet<String> entitySet; entitySet = new HashSet<>(Arrays.asList(entities)); entitySet.remove(null); entities = entitySet.toArray(new String[entitySet.size()]); System.out.println("Entities after null removal \n" + Arrays.deepToString(entities)); Multiset<Integer> docNoCount; docNoCount = HashMultiset.create(); //Take only top 50 entities; String[] entities50 = new String[50]; System.arraycopy(entities, 0, entities50, 0, 50); System.out.println(Arrays.deepToString(entities50)); for (String e : entities50) { System.out.println(e); if (e == null) { continue; } Query qe; try { qe = qpe.parse(QueryParserBase.escape(e)); //Query qeph = qpe.parse("\"" + QueryParserBase.escape(e) + "\""); finalQueryBuilder.add(qe, BooleanClause.Occur.MUST); //add entities boolean query finalQueryBuilder.add(attributeQueryBuilder.build(), BooleanClause.Occur.MUST); //add attributes query TopDocs td = searcher.search(finalQueryBuilder.build(), numberOfResults * 10); for (ScoreDoc sd : td.scoreDocs) { int docNo = sd.doc; docNoCount.add(docNo); } } catch (ParseException ex) { } System.out.println("Top Doc id: \n" + Multisets.copyHighestCountFirst(docNoCount).entrySet().iterator().next().getElement()); } //Sort the returned docs by their frequency and store it in docNoSorted ImmutableMultiset<Integer> docNoSorted = Multisets.copyHighestCountFirst(docNoCount); //Get the entry set of the frequency ordered document set ImmutableSet<Multiset.Entry<Integer>> entrySet = Multisets.copyHighestCountFirst(docNoCount).entrySet(); //Get the iterator for the sorted entry set UnmodifiableIterator<Multiset.Entry<Integer>> iterator = entrySet.iterator(); int bestDocId = iterator.next().getElement(); System.out.println("first count" + iterator.next()); // Set<Integer> elementSet = docNoSorted.elementSet(); Integer next = elementSet.iterator().next(); System.out.println("Most frequent document id: " + next); int resultSetSize; resultSetSize = docNoSorted.elementSet().size(); System.out.println("Entry Set Size: " + resultSetSize + " Cardinality: " + docNoSorted.size()); Set<Integer> elementSet1 = Multisets.copyHighestCountFirst(docNoSorted).elementSet(); List<Integer> t = new ArrayList<Integer>(elementSet1); List<Integer> subList = t.subList(0, numberOfResults); //ArrayList subArrayList = new ArrayList(subList); Iterator<Integer> subListIterator = subList.iterator(); //we have all the web table doc IDs //We snould take while (subListIterator.hasNext()) { int docID = subListIterator.next(); Document doc; doc = searcher.doc(docID); String jsonString = doc.get("full_result"); Dataset er = Dataset.fromJson(jsonString); resultList.add(er); } return resultList; }
From source file:qa.qcri.nadeef.core.pipeline.EquivalentClass.java
/** * {@inheritDoc}/*from w w w . j av a 2 s. c o m*/ */ @Override public Collection<Fix> decide(Collection<Fix> fixes) { List<HashSet<Cell>> clusters = Lists.newArrayList(); // a map between a cell and n HashMap<Cell, HashSet<Cell>> clusterMap = Maps.newHashMap(); HashMap<Cell, String> assignMap = Maps.newHashMap(); // a map between cell and fix, used for getting the original vid. HashMap<Cell, Fix> fixMap = Maps.newHashMap(); // Clustering all the fixes. int count = 0; for (Fix fix : fixes) { Cell leftCell = fix.getLeft(); fixMap.put(leftCell, fix); if (fix.isRightConstant()) { // TODO: do a statistic on the assign count. assignMap.put(leftCell, fix.getRightValue()); continue; } Cell rightCell = fix.getRight(); fixMap.put(rightCell, fix); if (assignMap.containsKey(leftCell)) { assignMap.remove(leftCell); } if (assignMap.containsKey(rightCell)) { assignMap.remove(rightCell); } HashSet<Cell> leftCluster = null; HashSet<Cell> rightCluster = null; // when the left column is already in a cluster if (clusterMap.containsKey(leftCell)) { leftCluster = clusterMap.get(leftCell); if (!leftCluster.contains(rightCell)) { // union of two cluster of cell sets. if (clusterMap.containsKey(rightCell)) { rightCluster = clusterMap.get(rightCell); for (Cell cell : rightCluster) { leftCluster.add(cell); clusterMap.put(cell, leftCluster); } rightCluster.clear(); clusters.remove(rightCluster); } else { clusterMap.put(rightCell, leftCluster); leftCluster.add(rightCell); } } } else if (clusterMap.containsKey(rightCell)) { // when the right column is already in the cluster rightCluster = clusterMap.get(rightCell); if (!rightCluster.contains(leftCell)) { // union of two cluster of cell sets. if (clusterMap.containsKey(leftCell)) { leftCluster = clusterMap.get(leftCell); for (Cell cell : leftCluster) { rightCluster.add(cell); clusterMap.put(cell, rightCluster); } for (Cell cell : leftCluster) { leftCluster.remove(cell); } clusters.remove(leftCluster); } else { clusterMap.put(leftCell, rightCluster); rightCluster.add(leftCell); } } } else { // both left and right are not in any of the cluster // create a new cluster of containing both. HashSet<Cell> cluster = Sets.newHashSet(); cluster.add(leftCell); cluster.add(rightCell); clusterMap.put(leftCell, cluster); clusterMap.put(rightCell, cluster); clusters.add(cluster); } } // start to count each cluster and decide the final fix based on // percentage. List<Fix> result = Lists.newArrayList(); // for final execution of all the fixes, we use 0 as default as the fix id. Fix.Builder fixBuilder = new Fix.Builder(); count = 0; for (HashSet<Cell> cluster : clusters) { Multiset<Object> countSet = HashMultiset.create(); for (Cell cell : cluster) { countSet.add(cell.getValue()); } countSet = Multisets.copyHighestCountFirst(countSet); Object value = countSet.iterator().next(); for (Cell cell : cluster) { if (cell.getValue().equals(value)) { // skip the correct value. continue; } Fix originalFix = fixMap.get(cell); Fix newFix = fixBuilder.vid(originalFix.getVid()).left(cell).right(value.toString()).build(); result.add(newFix); } count++; } // collect the remaining constant assign fix. Set<Map.Entry<Cell, String>> entries = assignMap.entrySet(); for (Map.Entry<Cell, String> entry : entries) { Fix newFix = fixBuilder.left(entry.getKey()).right(entry.getValue()).build(); result.add(newFix); } setPercentage(1.0f); return result; }
From source file:com.github.pffy.chinese.freq.ImmutableChineseFrequency.java
/** * Builds this object with an input text. Expecting Chinese characters. * /* w w w . j ava 2s. c om*/ * @param input Chinese text for frequency analysis */ public ImmutableChineseFrequency(String input) { init(); // Counts int inputCount, removedCount, hanziCount, uniqueHanziCount, processedCount; Scanner sc; // Output data String csvOutput = ""; String tsvOutput = ""; String txtOutput = ""; String csv, tsv, txt; String summaryString = ""; String hz, py; int freq; // Google Guava magic String hanzi; Multiset<String> hanziSet = HashMultiset.create(); Iterable<Multiset.Entry<String>> hanziSortedByCount; Iterator<Multiset.Entry<String>> keys; Multiset.Entry<String> key; ImmutableList<Multiset.Entry<String>> hanziList; if (input == null || input.isEmpty()) { throw new NullPointerException(this.MSG_EMPTYNULL_STRING); } inputCount = input.length(); input = retainHanzi(input); removedCount = inputCount - input.length(); hanziCount = input.length(); sc = new Scanner(input); sc.useDelimiter(""); // accumulate: counts occurrences while (sc.hasNext()) { hanzi = (String) sc.next(); hanziSet.add(hanzi, 1); } sc.close(); uniqueHanziCount = hanziSet.elementSet().size(); processedCount = 0; hanziSortedByCount = Multisets.copyHighestCountFirst(hanziSet).entrySet(); hanziList = Multisets.copyHighestCountFirst(hanziSet).entrySet().asList(); keys = hanziSortedByCount.iterator(); while (keys.hasNext()) { key = (Multiset.Entry<String>) keys.next(); hz = (String) key.getElement().replaceAll("x \\d{1,}", ""); py = (String) this.hpdx.get(hz); freq = (int) key.getCount(); // check null first to avoid NullPointerException. lazy code. if (py == null || py.isEmpty()) { // not mapped yet. that is okay move on. continue; } csv = this.CRLF + hz + "," + py + "," + freq; csvOutput += csv; tsv = this.CRLF + hz + "\t" + py + "\t" + freq; tsvOutput += tsv; txt = this.CRLF + padSummary(hz + " [" + py + "]", this.PADSIZE_SUMMARY) + freq; txtOutput += txt; processedCount++; } summaryString += padSummary(this.MSG_TOTAL_COUNT, this.PADSIZE_SUMMARY) + inputCount; summaryString += this.CRLF + padSummary(this.MSG_REMOVED_COUNT, this.PADSIZE_SUMMARY) + removedCount; summaryString += this.CRLF + padSummary(this.MSG_HANZI_COUNT, this.PADSIZE_SUMMARY) + hanziCount; summaryString += this.CRLF + padSummary(this.MSG_UNIQUE_COUNT, this.PADSIZE_SUMMARY) + uniqueHanziCount; summaryString += this.CRLF + padSummary(this.MSG_PROCESSED_COUNT, this.PADSIZE_SUMMARY) + processedCount; if (processedCount > 0) { csvOutput = this.HEADER_ROW_CSV + csvOutput; tsvOutput = this.HEADER_ROW_TSV + tsvOutput; txtOutput = this.HEADER_ROW_TXT + txtOutput; } this.input = input; this.inputCount = inputCount; this.removedCount = removedCount; this.hanziCount = hanziCount; this.uniqueHanziCount = uniqueHanziCount; this.processedCount = processedCount; this.summary = summaryString; this.hanziList = hanziList; this.csvOutput = csvOutput; this.tsvOutput = tsvOutput; this.txtOutput = txtOutput; }
From source file:com.seajas.search.codex.service.social.SocialProfileService.java
@Override public TwitterProfileSummaryDto getTwitterProfileSummary(final long twitterProfileId) { List<Tweet> tweets = socialFacade.getUserTimeline(twitterProfileId); SocialProfileDto socialProfileDto = null; TwitterProfile twitterProfile = socialFacade.getTwitterProfile(twitterProfileId); if (twitterProfile != null) { socialProfileDto = SocialProfileDto.translate(twitterProfile); socialProfileDto//from ww w . j a v a 2 s. c o m .setProfileImageMediaUrl(this.storeImageOnMediaServer(twitterProfile.getProfileImageUrl())); } Multiset<Long> mentionedCounter = HashMultiset.create(); Multiset<String> hashTagCounter = HashMultiset.create(); this.countTwitterEntities(tweets, mentionedCounter, hashTagCounter); mentionedCounter = Multisets.copyHighestCountFirst(mentionedCounter); hashTagCounter = Multisets.copyHighestCountFirst(hashTagCounter); List<MentionedDto> mentions = this.buildTwitterMentionedList(mentionedCounter); List<HashTagDto> hashTagList = Lists.newArrayList(); for (String hashTag : hashTagCounter.elementSet()) { hashTagList.add(new HashTagDto(hashTag, hashTagCounter.count(hashTag))); } return new TwitterProfileSummaryDto(socialProfileDto, hashTagList, mentions); }
From source file:org.mule.module.extension.internal.util.MuleExtensionUtils.java
private static Set<String> collectRepeatedNames(Collection<? extends Described> describedCollection) { if (CollectionUtils.isEmpty(describedCollection)) { return ImmutableSet.of(); }// w w w . j a va 2 s. c o m Multiset<String> names = LinkedHashMultiset.create(); for (Described described : describedCollection) { if (described == null) { throw new IllegalArgumentException("A null described was provided"); } names.add(described.getName()); } names = Multisets.copyHighestCountFirst(names); Set<String> repeatedNames = new HashSet<>(); for (String name : names) { if (names.count(name) == 1) { break; } repeatedNames.add(name); } return repeatedNames; }
From source file:net.betaengine.smartconfig.device.decoder.Solver.java
private int getNibbleCount() { Iterator<Multiset.Entry<Integer>> i = Multisets.copyHighestCountFirst(lengths).entrySet().iterator(); Multiset.Entry<Integer> entry = i.next(); int minLength = entry.getElement(); int count = entry.getCount(); while (i.hasNext()) { entry = i.next();//w w w . j av a 2s . co m if (entry.getCount() < count) { break; } minLength = Math.min(minLength, entry.getElement()); } return minLength * 2; }