List of usage examples for com.google.common.collect ImmutableMultiset elementSet
Set<E> elementSet();
From source file:org.trnltk.experiment.morphology.ambiguity.AmbiguityClassifier.java
public static void main(String[] args) throws IOException, JSONException { int numberOfWords = 0; int numberOfParseResults = 0; final Multiset<ParseResultDifference> differenceSet = HashMultiset.create(); final Multiset<ParseResultDifference> differenceSetWithoutRootDifferences = HashMultiset.create(); final File folder = new File("D:\\devl\\data\\1MSentences\\split"); final File[] files = folder.listFiles(); if (files == null) throw new RuntimeException(); final StopWatch stopWatch = new StopWatch(); stopWatch.start();// ww w . ja v a2 s . c o m for (int fileIndex = 0; fileIndex < files.length; fileIndex++) { File file = files[fileIndex]; System.out.println("Processing file " + file); // final BufferedReader reader = new BufferedReader(new FileReader(file)); // while (reader.ready()) { // reader.readLine(); // } final ParseResultReader parseResultReader = new ParseResultReader(); final ParseResultDiffTool parseResultDiffTool = new ParseResultDiffTool(); final List<WordParseResultEntry> parseResultEntries = parseResultReader .getParseResultEntries(Files.newReader(file, Charsets.UTF_8)); numberOfWords += parseResultEntries.size(); //noinspection ForLoopReplaceableByForEach for (int parseResultEntryIndex = 0; parseResultEntryIndex < parseResultEntries .size(); parseResultEntryIndex++) { WordParseResultEntry parseResultEntry = parseResultEntries.get(parseResultEntryIndex); final List<ParseResult> parseResults = parseResultEntry.getParseResults(); numberOfParseResults += parseResults.size(); for (int i = 0; i < parseResults.size(); i++) { final ParseResult leftParseResult = parseResults.get(i); for (int j = i + 1; j < parseResults.size(); j++) { final ParseResult rightParseResult = parseResults.get(j); final ParseResultDifference difference = parseResultDiffTool.findDifference(leftParseResult, rightParseResult); final boolean added = differenceSet.add(difference); if (added && difference.hasNoRootDifference() && difference.hasPartDifference()) differenceSetWithoutRootDifferences.add(difference); } } } //noinspection ConstantConditions if (fileIndex == 0) break; } stopWatch.stop(); final long time = stopWatch.getTime(); System.out.println(stopWatch); System.out.println(Long.valueOf(time).doubleValue() / (51)); System.out.println("Number of words : " + numberOfWords); System.out.println("Number of parseResults : " + numberOfParseResults); System.out.println("Number of distinct differences : " + differenceSet.elementSet().size()); System.out.println("numberOfDistinctDifferencesWithoutRootDifference : " + differenceSetWithoutRootDifferences.elementSet().size()); final ImmutableMultiset<ParseResultDifference> sortedDifferenceSetWithoutRootDifferences = Multisets .copyHighestCountFirst(differenceSetWithoutRootDifferences); for (ParseResultDifference parseResultDifference : sortedDifferenceSetWithoutRootDifferences.elementSet()) { final int count = sortedDifferenceSetWithoutRootDifferences.count(parseResultDifference); if (count > 100) { System.out.println(count); System.out.println(parseResultDifference); } } }
From source file:com.publictransitanalytics.scoregenerator.output.ComparativeSectorReachInformation.java
private static Map<SimplePath, Integer> getPathCounts(final Set<MovementPath> bestPaths) throws InterruptedException { final Map<SimplePath, Integer> pathCounts; final TreeMultimap<Integer, SimplePath> frequencyMap = TreeMultimap.create(Integer::compareTo, (p1, p2) -> p1.toString().compareTo(p2.toString())); if (bestPaths != null) { final ImmutableMultiset.Builder<SimplePath> bestSimplePathsBuilder = ImmutableMultiset.builder(); for (final MovementPath bestPath : bestPaths) { bestSimplePathsBuilder.add(new SimplePath(bestPath)); }// w w w . ja va2 s .c om final ImmutableMultiset<SimplePath> bestSimplePaths = bestSimplePathsBuilder.build(); for (final SimplePath path : bestSimplePaths.elementSet()) { frequencyMap.put(bestSimplePaths.count(path), path); } pathCounts = new LinkedHashMap<>(); for (final Integer frequency : frequencyMap.keySet().descendingSet()) { final NavigableSet<SimplePath> pathsForFrequency = frequencyMap.get(frequency); for (final SimplePath pathForFrequency : pathsForFrequency) { pathCounts.put(pathForFrequency, frequency); } } } else { pathCounts = null; } return pathCounts; }
From source file:com.publictransitanalytics.scoregenerator.output.SectorReachInformation.java
public SectorReachInformation(final Set<MovementPath> bestPaths, final int count, final Set<LocalDateTime> reachTimes) throws InterruptedException { reachCount = count;//from w ww . j av a 2 s .co m this.reachTimes = reachTimes.stream().map(time -> time.toLocalTime().toString()) .collect(Collectors.toSet()); final TreeMultimap<Integer, SimplePath> frequencyMap = TreeMultimap.create(Integer::compareTo, (p1, p2) -> p1.toString().compareTo(p2.toString())); if (bestPaths != null) { final ImmutableMultiset.Builder<SimplePath> bestSimplePathsBuilder = ImmutableMultiset.builder(); for (final MovementPath bestPath : bestPaths) { bestSimplePathsBuilder.add(new SimplePath(bestPath)); } final ImmutableMultiset<SimplePath> bestSimplePaths = bestSimplePathsBuilder.build(); for (final SimplePath path : bestSimplePaths.elementSet()) { frequencyMap.put(bestSimplePaths.count(path), path); } pathCounts = new LinkedHashMap<>(); for (final Integer frequency : frequencyMap.keySet().descendingSet()) { final NavigableSet<SimplePath> pathsForFrequency = frequencyMap.get(frequency); for (final SimplePath pathForFrequency : pathsForFrequency) { pathCounts.put(pathForFrequency, frequency); } } } else { pathCounts = null; } }
From source file:de.topobyte.osm4j.utils.executables.OsmContributorHistogram.java
private void run() throws IOException { long total = 0; OsmIterator iterator = createIterator(); while (iterator.hasNext()) { EntityContainer entityContainer = iterator.next(); OsmEntity entity = entityContainer.getEntity(); OsmMetadata metadata = entity.getMetadata(); total++;/*from w w w. j a v a 2 s . c o m*/ if (metadata == null) { continue; } long uid = metadata.getUid(); String username = metadata.getUser(); counter.add(uid); if (!map.containsEntry(uid, username)) { map.put(uid, username); } } if (counter.isEmpty()) { System.out.println("No metadata found"); return; } long sum = 0; ImmutableMultiset<Long> histogram = Multisets.copyHighestCountFirst(counter); long first = histogram.iterator().next(); int firstCount = histogram.count(first); int firstLength = String.format("%d", firstCount).length(); String pattern = String.format("[%%6.2f%%%%] %%%dd: %%d (%%s)", firstLength); for (long id : histogram.elementSet()) { int count = histogram.count(id); sum += count; double done = sum / (double) total; List<String> names = new ArrayList<>(map.get(id)); Collections.sort(names); System.out.println(String.format(pattern, done * 100, count, id, Joiner.on(",").join(names))); } finish(); }
From source file:org.trnltk.apps.experiments.AmbiguityMatrixApp.java
private List<String> getDistinctWordsWithEnoughOccurrences(ImmutableMultiset<String> wordCountSet) { final List<String> uniqueWordsWithEnoughOccurrences = new ArrayList<String>(); for (String surface : wordCountSet.elementSet()) { int count = wordCountSet.count(surface); if (count > 5) uniqueWordsWithEnoughOccurrences.add(surface); }/*w w w.j a va 2 s. com*/ return uniqueWordsWithEnoughOccurrences; }
From source file:webreduce.indexing.luceneSearcher.java
public List<Dataset> search() throws IOException { List<Dataset> resultList; resultList = new ArrayList<>(); BooleanQuery.Builder finalQueryBuilder = new BooleanQuery.Builder(); BooleanQuery.Builder entityQueryBuilder = new BooleanQuery.Builder(); BooleanQuery.Builder attributeQueryBuilder = new BooleanQuery.Builder(); //gives me queries QueryParser qpa = new QueryParser(ATTRIBUTES_FIELD, new CustomAnalyzer()); QueryParser qpe = new QueryParser(ENTITIES_FIELD, new CustomAnalyzer()); //QueryWrapperFilter queryFilter = new QueryWrapperFilter(query); //CachingWrapperFilter cachingFilter = new CachingWrapperFilter(queryFilter); //CachingWrapperQuery typeFilterR = new CachingWrapperFilter(new TermsFilter(new Term(TABLE_TYPE_FIELD, "RELATION"))); IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexDir))); IndexSearcher searcher = new IndexSearcher(reader); QueryBuilder queryBuilder = new QueryBuilder(new CustomAnalyzer()); System.out.println("Attributes: \n" + Arrays.deepToString(attributes)); System.out.println("Entities: \n" + Arrays.deepToString(entities)); //add attributes one by one for (String a : attributes) { Query qa;/*from ww w . ja va2 s. c o m*/ try { qa = qpa.parse("\"" + a + "\""); attributeQueryBuilder.add(qa, BooleanClause.Occur.SHOULD); } catch (ParseException ex) { } } //end of for loop //remove null HashSet<String> entitySet; entitySet = new HashSet<>(Arrays.asList(entities)); entitySet.remove(null); entities = entitySet.toArray(new String[entitySet.size()]); System.out.println("Entities after null removal \n" + Arrays.deepToString(entities)); Multiset<Integer> docNoCount; docNoCount = HashMultiset.create(); //Take only top 50 entities; String[] entities50 = new String[50]; System.arraycopy(entities, 0, entities50, 0, 50); System.out.println(Arrays.deepToString(entities50)); for (String e : entities50) { System.out.println(e); if (e == null) { continue; } Query qe; try { qe = qpe.parse(QueryParserBase.escape(e)); //Query qeph = qpe.parse("\"" + QueryParserBase.escape(e) + "\""); finalQueryBuilder.add(qe, BooleanClause.Occur.MUST); //add entities boolean query finalQueryBuilder.add(attributeQueryBuilder.build(), BooleanClause.Occur.MUST); //add attributes query TopDocs td = searcher.search(finalQueryBuilder.build(), numberOfResults * 10); for (ScoreDoc sd : td.scoreDocs) { int docNo = sd.doc; docNoCount.add(docNo); } } catch (ParseException ex) { } System.out.println("Top Doc id: \n" + Multisets.copyHighestCountFirst(docNoCount).entrySet().iterator().next().getElement()); } //Sort the returned docs by their frequency and store it in docNoSorted ImmutableMultiset<Integer> docNoSorted = Multisets.copyHighestCountFirst(docNoCount); //Get the entry set of the frequency ordered document set ImmutableSet<Multiset.Entry<Integer>> entrySet = Multisets.copyHighestCountFirst(docNoCount).entrySet(); //Get the iterator for the sorted entry set UnmodifiableIterator<Multiset.Entry<Integer>> iterator = entrySet.iterator(); int bestDocId = iterator.next().getElement(); System.out.println("first count" + iterator.next()); // Set<Integer> elementSet = docNoSorted.elementSet(); Integer next = elementSet.iterator().next(); System.out.println("Most frequent document id: " + next); int resultSetSize; resultSetSize = docNoSorted.elementSet().size(); System.out.println("Entry Set Size: " + resultSetSize + " Cardinality: " + docNoSorted.size()); Set<Integer> elementSet1 = Multisets.copyHighestCountFirst(docNoSorted).elementSet(); List<Integer> t = new ArrayList<Integer>(elementSet1); List<Integer> subList = t.subList(0, numberOfResults); //ArrayList subArrayList = new ArrayList(subList); Iterator<Integer> subListIterator = subList.iterator(); //we have all the web table doc IDs //We snould take while (subListIterator.hasNext()) { int docID = subListIterator.next(); Document doc; doc = searcher.doc(docID); String jsonString = doc.get("full_result"); Dataset er = Dataset.fromJson(jsonString); resultList.add(er); } return resultList; }