List of usage examples for com.google.common.collect Iterators mergeSorted
@Beta public static <T> UnmodifiableIterator<T> mergeSorted(Iterable<? extends Iterator<? extends T>> iterators, Comparator<? super T> comparator)
From source file:com.spotify.heroic.metric.MetricCollection.java
public static MetricCollection mergeSorted(final MetricType type, final List<List<? extends Metric>> values) { final List<Metric> data = ImmutableList.copyOf(Iterators.mergeSorted( ImmutableList.copyOf(values.stream().map(Iterable::iterator).iterator()), Metric.comparator())); return build(type, data); }
From source file:org.broad.igv.data.CombinedDataSource.java
public List<LocusScore> getSummaryScoresForRange(String chr, int startLocation, int endLocation, int zoom) { List<LocusScore> outerScores = this.source0.getSummaryScores(chr, startLocation, endLocation, zoom); List<LocusScore> innerScores = this.source1.getSummaryScores(chr, startLocation, endLocation, zoom); int initialSize = outerScores.size() + innerScores.size(); List<LocusScore> combinedScoresList = new ArrayList<LocusScore>(initialSize); if (initialSize == 0) return combinedScoresList; //TODO We assume that having no data from one source is the identity operation, that may not be true if (innerScores.size() == 0) return outerScores; if (outerScores.size() == 0) return innerScores; /**/*from w w w . j a va 2 s.c om*/ * We first generate the chunks which will need to be calculated separately * This is the set of all start/end positions of outerScores and innerScores * We could be a bit smarter, but this is simpler and there's no problem with * skipping over intervals which don't have data later. * * Following that, for each interval generated, we search outerScores and innerScores * for the unique LocusScore which contains the generated interval. */ //Generate the boundaries for the new combined regions Set<Integer> boundariesSet = new LinkedHashSet<Integer>(2 * initialSize); Iterator<LocusScore> dualIter = Iterators.mergeSorted( Arrays.asList(innerScores.iterator(), outerScores.iterator()), new Comparator<LocusScore>() { @Override public int compare(LocusScore o1, LocusScore o2) { return o1.getStart() - o2.getStart(); } }); while (dualIter.hasNext()) { LocusScore score = dualIter.next(); boundariesSet.add(score.getStart()); boundariesSet.add(score.getEnd()); } Integer[] boundariesArray = boundariesSet.toArray(new Integer[0]); Arrays.sort(boundariesArray); int outerScoreInd = 0; int innerScoreInd = 0; //Calculate value for each interval for (int bb = 0; bb < boundariesArray.length - 1; bb++) { int start = boundariesArray[bb]; int end = boundariesArray[bb + 1]; //It shouldn't be possible for more than one LocusScore of either //tracks to overlap each interval, since the start/ends //were based on all start/ends of the inputs outerScoreInd = findContains(start, end, outerScores, Math.max(outerScoreInd, 0)); innerScoreInd = findContains(start, end, innerScores, Math.max(innerScoreInd, 0)); LocusScore outerScore = getContains(outerScores, outerScoreInd); LocusScore innerScore = getContains(innerScores, innerScoreInd); if (outerScore == null && innerScore == null) continue; float score = combineScores(outerScore, innerScore); BasicScore newScore = new BasicScore(start, end, score); combinedScoresList.add(newScore); } return combinedScoresList; }
From source file:org.kiji.schema.impl.cassandra.CassandraKijiRowScanner.java
/** * Creates a KijiRowScanner over a CassandraKijiTable. * * @param table being scanned./*from w w w . j a v a 2 s. c o m*/ * @param dataRequest of scan. * @param cellDecoderProvider of table being scanned. * @param resultSets of scan. * @throws IOException if there is a problem creating the row scanner. */ public CassandraKijiRowScanner(CassandraKijiTable table, KijiDataRequest dataRequest, CellDecoderProvider cellDecoderProvider, List<ResultSet> resultSets) throws IOException { mConstructorStack = CLEANUP_LOG.isDebugEnabled() ? Debug.getStackTrace() : ""; mDataRequest = dataRequest; mLayout = table.getLayout(); mTable = table; mCellDecoderProvider = cellDecoderProvider; mEntityIdFactory = EntityIdFactory.getFactory(mTable.getLayout()); final State oldState = mState.getAndSet(State.OPEN); Preconditions.checkState(oldState == State.UNINITIALIZED, "Cannot open KijiRowScanner instance in state %s.", oldState); // Create an iterator to hold the Row objects returned by the column scans. The iterator should // return Row objects in order of token and then EntityID, so to that Kiji entities have their // Row objects contiguously served by the iterator. List<Iterator<Row>> rowIterators = Lists.newArrayList(); for (ResultSet resultSet : resultSets) { Iterator<Row> rowIterator = resultSet.iterator(); rowIterators.add(Iterators.peekingIterator(rowIterator)); } mRowsIterator = Iterators .peekingIterator(Iterators.mergeSorted(rowIterators, new RowComparator(mTable.getLayout()))); }
From source file:com.yandex.yoctodb.v1.immutable.V1CompositeDatabase.java
@Override public int executeAndUnlimitedCount(@NotNull final Query query, @NotNull final DocumentProcessor processor) { int result = 0; final Iterator<ScoredDocument<?>> iterator; // Doing merging iff there is sorting if (query.hasSorting()) { final List<Iterator<? extends ScoredDocument<?>>> results = new ArrayList<>(databases.size()); for (IndexedDatabase db : databases) { final BitSet docs = query.filteredUnlimited(db, bitSetPool); if (docs != null) { assert !docs.isEmpty(); final int dbSize = db.getDocumentCount(); final int count = docs.cardinality(); final BitSet filter; if (count == dbSize) { filter = new ReadOnlyOneBitSet(dbSize); } else { filter = docs;/*from w w w .ja v a 2 s . c o m*/ } results.add(query.sortedUnlimited(filter, db, bitSetPool)); result += count; } } if (results.isEmpty()) { return 0; } iterator = Iterators.mergeSorted(results, SCORED_DOCUMENT_COMPARATOR); } else { final List<QueryContext> results = new ArrayList<>(databases.size()); for (IndexedDatabase db : databases) { final BitSet docs = query.filteredUnlimited(db, bitSetPool); if (docs != null) { assert !docs.isEmpty(); final int dbSize = db.getDocumentCount(); final int count = docs.cardinality(); final BitSet filter; if (count == dbSize) { filter = new ReadOnlyOneBitSet(dbSize); } else { filter = docs; } results.add(new QueryContext(filter, db, bitSetPool)); result += count; } } if (results.isEmpty()) { return 0; } iterator = Iterators.concat(new SortResultIterator(query, results.iterator())); } // Skipping values if (query.getSkip() != 0) { Iterators.advance(iterator, query.getSkip()); } // Limited final Iterator<ScoredDocument<?>> limitedIterator; if (query.getLimit() == Integer.MAX_VALUE) { limitedIterator = iterator; } else { limitedIterator = Iterators.limit(iterator, query.getLimit()); } while (limitedIterator.hasNext()) { final ScoredDocument<?> document = limitedIterator.next(); if (!processor.process(document.getDocument(), document.getDatabase())) { return result; } } return result; }
From source file:org.locationtech.geogig.api.RevTreeImpl.java
@Override public Iterator<Node> children() { Preconditions.checkState(!buckets().isPresent()); ImmutableList<Node> trees = trees().or(ImmutableList.<Node>of()); ImmutableList<Node> features = features().or(ImmutableList.<Node>of()); if (trees.isEmpty()) { return features.iterator(); }/*from w ww . j a v a2 s. co m*/ if (features.isEmpty()) { return trees.iterator(); } return Iterators.mergeSorted(ImmutableList.of(trees.iterator(), features.iterator()), ordering); }
From source file:org.kiji.schema.impl.cassandra.CassandraKijiResultScanner.java
/** * Get an iterator of the entity IDs in a list of Cassandra Kiji tables that correspond to a * subset of cassandra tables in a Kiji table. * * @param tables The Cassandra tables to get Entity IDs from. * @param options The scan options. May specify start and stop tokens. * @param table The Kiji Cassandra table which the Cassandra tables belong to. * @param layout The layout of the Kiji Cassandra table. * @return An iterator of Entity IDs.// ww w . ja v a2 s . c o m */ public static Iterator<EntityId> getEntityIDs(final List<CassandraTableName> tables, final CassandraKijiScannerOptions options, final CassandraKijiTable table, final KijiTableLayout layout) { final List<ResultSetFuture> localityGroupFutures = FluentIterable.from(tables) .transform(new Function<CassandraTableName, Statement>() { /** {@inheritDoc} */ @Override public Statement apply(final CassandraTableName tableName) { return CQLUtils.getEntityIDScanStatement(layout, tableName, options); } }).transform(new Function<Statement, ResultSetFuture>() { /** {@inheritDoc} */ @Override public ResultSetFuture apply(final Statement statement) { return table.getAdmin().executeAsync(statement); } }) // Force futures to execute by sending results to a list .toList(); // We can use the DISTINCT optimization iff the entity ID contains only hashed components RowKeyFormat2 keyFormat = (RowKeyFormat2) layout.getDesc().getKeysFormat(); final boolean deduplicateComponents = keyFormat.getRangeScanStartIndex() != keyFormat.getComponents() .size(); if (deduplicateComponents) { LOG.warn( "Scanning a Cassandra Kiji table with non-hashed entity ID components is" + " inefficient. Consider hashing all entity ID components. Table: {}.", table.getURI()); } final List<Iterator<TokenRowKeyComponents>> tokenRowKeyStreams = FluentIterable.from(localityGroupFutures) .transform(new Function<ResultSetFuture, Iterator<Row>>() { /** {@inheritDoc} */ @Override public Iterator<Row> apply(final ResultSetFuture future) { return CassandraKijiResult.unwrapFuture(future).iterator(); } }).transform(new Function<Iterator<Row>, Iterator<TokenRowKeyComponents>>() { /** {@inheritDoc} */ @Override public Iterator<TokenRowKeyComponents> apply(final Iterator<Row> rows) { return Iterators.transform(rows, RowDecoders.getRowKeyDecoderFunction(layout)); } }).transform(new Function<Iterator<TokenRowKeyComponents>, Iterator<TokenRowKeyComponents>>() { /** {@inheritDoc} */ @Override public Iterator<TokenRowKeyComponents> apply(final Iterator<TokenRowKeyComponents> components) { if (deduplicateComponents) { return IteratorUtils.deduplicatingIterator(components); } else { return components; } } }).toList(); return Iterators.transform( IteratorUtils.deduplicatingIterator( Iterators.mergeSorted(tokenRowKeyStreams, TokenRowKeyComponentsComparator.getInstance())), RowDecoders.getEntityIdFunction(table)); }
From source file:org.apache.jackrabbit.oak.query.UnionQueryImpl.java
@Override public Iterator<ResultRowImpl> getRows() { prepare();/*from w ww .jav a 2s.co m*/ if (explain) { String plan = getPlan(); columns = new ColumnImpl[] { new ColumnImpl("explain", "plan", "plan") }; ResultRowImpl r = new ResultRowImpl(this, Tree.EMPTY_ARRAY, new PropertyValue[] { PropertyValues.newString(plan) }, null, null); return Arrays.asList(r).iterator(); } if (LOG.isDebugEnabled()) { if (isInternal) { LOG.trace("query union plan {}", getPlan()); } else { LOG.debug("query union plan {}", getPlan()); } } boolean distinct = !unionAll; Comparator<ResultRowImpl> orderBy = ResultRowImpl.getComparator(orderings); Iterator<ResultRowImpl> it; final Iterator<ResultRowImpl> leftRows = left.getRows(); final Iterator<ResultRowImpl> rightRows = right.getRows(); Iterator<ResultRowImpl> leftIter = leftRows; Iterator<ResultRowImpl> rightIter = rightRows; // if measure retrieve the backing delegate iterator instead if (measure) { leftIter = ((MeasuringIterator) leftRows).getDelegate(); rightIter = ((MeasuringIterator) rightRows).getDelegate(); } // Since sorted by index use a merge iterator if (isSortedByIndex()) { it = FilterIterators.newCombinedFilter( Iterators.mergeSorted(ImmutableList.of(leftIter, rightIter), orderBy), distinct, limit, offset, null, settings); } else { it = FilterIterators.newCombinedFilter(Iterators.concat(leftIter, rightIter), distinct, limit, offset, orderBy, settings); } if (measure) { // return the measuring iterator for the union it = new MeasuringIterator(this, it) { MeasuringIterator left = (MeasuringIterator) leftRows; MeasuringIterator right = (MeasuringIterator) rightRows; @Override protected void setColumns(ColumnImpl[] cols) { columns = cols; left.setColumns(cols); right.setColumns(cols); } @Override protected Map<String, Long> getSelectorScanCount() { // Merge the 2 maps from the left and right queries to get the selector counts Map<String, Long> leftSelectorScan = left.getSelectorScanCount(); Map<String, Long> rightSelectorScan = right.getSelectorScanCount(); Map<String, Long> unionScan = Maps.newHashMap(leftSelectorScan); for (String key : rightSelectorScan.keySet()) { if (unionScan.containsKey(key)) { unionScan.put(key, rightSelectorScan.get(key) + unionScan.get(key)); } else { unionScan.put(key, rightSelectorScan.get(key)); } } return unionScan; } @Override protected long getReadCount() { return left.getReadCount() + right.getReadCount(); } }; } return it; }
From source file:com.google.cloud.genomics.localrepo.QueryEngine.java
private SearchReadsResponse searchReads(Map<File, PeekingIterator<SAMRecordWithSkip>> iterators, final int end, Predicate<String> readsetFilter) { List<Read> reads = new ArrayList<>(); for (Iterator<SAMRecordWithSkip> iterator = Iterators .limit(Iterators.mergeSorted(iterators.values(), Comparator.naturalOrder()), pageSize); iterator .hasNext();) {/*from w w w . ja v a 2s. co m*/ SAMRecordWithSkip next = iterator.next(); String readsetId = getReadsetId(next.file, next.record); if (readsetFilter.test(readsetId)) { reads.add(read(readsetId, next.record)); } } Map<File, PeekingIterator<SAMRecordWithSkip>> nonEmptyIterators = Maps.filterValues(iterators, iterator -> iterator.hasNext()); return SearchReadsResponse.create(reads, nonEmptyIterators.isEmpty() ? null : QueryDescriptor.create(new HashMap<>(Maps.transformValues(nonEmptyIterators, new Function<PeekingIterator<SAMRecordWithSkip>, QueryDescriptor.Start>() { @Override public QueryDescriptor.Start apply( PeekingIterator<SAMRecordWithSkip> iterator) { SAMRecordWithSkip peek = iterator.peek(); SAMRecord record = peek.record; return QueryDescriptor.Start.create(record.getReferenceName(), record.getAlignmentStart(), peek.skip); } })), end).toString()); }
From source file:com.simiacryptus.text.CharTrie.java
private <T extends Comparable<T>> Stream<TrieNode> max(Function<TrieNode, T> fn, int maxResults, TrieNode node) {//from w w w.j a va 2 s .co m return StreamSupport.stream(Spliterators.spliteratorUnknownSize( Iterators.mergeSorted(Stream .concat(Stream.of(Stream.of(node)), node.getChildren().map(x -> max(fn, maxResults, x))) .map(x -> x.iterator()).collect(Collectors.toList()), Comparator.comparing(fn).reversed()), Spliterator.ORDERED), false).limit(maxResults).collect(Collectors.toList()).stream(); }