Example usage for com.google.common.collect Iterators mergeSorted

List of usage examples for com.google.common.collect Iterators mergeSorted

Introduction

In this page you can find the example usage for com.google.common.collect Iterators mergeSorted.

Prototype

@Beta
public static <T> UnmodifiableIterator<T> mergeSorted(Iterable<? extends Iterator<? extends T>> iterators,
        Comparator<? super T> comparator) 

Source Link

Document

Returns an iterator over the merged contents of all given iterators , traversing every element of the input iterators.

Usage

From source file:com.spotify.heroic.metric.MetricCollection.java

public static MetricCollection mergeSorted(final MetricType type, final List<List<? extends Metric>> values) {
    final List<Metric> data = ImmutableList.copyOf(Iterators.mergeSorted(
            ImmutableList.copyOf(values.stream().map(Iterable::iterator).iterator()), Metric.comparator()));
    return build(type, data);
}

From source file:org.broad.igv.data.CombinedDataSource.java

public List<LocusScore> getSummaryScoresForRange(String chr, int startLocation, int endLocation, int zoom) {

    List<LocusScore> outerScores = this.source0.getSummaryScores(chr, startLocation, endLocation, zoom);
    List<LocusScore> innerScores = this.source1.getSummaryScores(chr, startLocation, endLocation, zoom);

    int initialSize = outerScores.size() + innerScores.size();
    List<LocusScore> combinedScoresList = new ArrayList<LocusScore>(initialSize);

    if (initialSize == 0)
        return combinedScoresList;

    //TODO We assume that having no data from one source is the identity operation, that may not be true
    if (innerScores.size() == 0)
        return outerScores;
    if (outerScores.size() == 0)
        return innerScores;

    /**/*from  w w w  .  j a  va  2 s.c om*/
     * We first generate the chunks which will need to be calculated separately
     * This is the set of all start/end positions of outerScores and innerScores
     * We could be a bit smarter, but this is simpler and there's no problem with
     * skipping over intervals which don't have data later.
     *
     * Following that, for each interval generated, we search outerScores and innerScores
     * for the unique LocusScore which contains the generated interval.
     */

    //Generate the boundaries for the new combined regions
    Set<Integer> boundariesSet = new LinkedHashSet<Integer>(2 * initialSize);
    Iterator<LocusScore> dualIter = Iterators.mergeSorted(
            Arrays.asList(innerScores.iterator(), outerScores.iterator()), new Comparator<LocusScore>() {

                @Override
                public int compare(LocusScore o1, LocusScore o2) {
                    return o1.getStart() - o2.getStart();
                }
            });
    while (dualIter.hasNext()) {
        LocusScore score = dualIter.next();
        boundariesSet.add(score.getStart());
        boundariesSet.add(score.getEnd());
    }
    Integer[] boundariesArray = boundariesSet.toArray(new Integer[0]);
    Arrays.sort(boundariesArray);

    int outerScoreInd = 0;
    int innerScoreInd = 0;
    //Calculate value for each interval
    for (int bb = 0; bb < boundariesArray.length - 1; bb++) {
        int start = boundariesArray[bb];
        int end = boundariesArray[bb + 1];
        //It shouldn't be possible for more than one LocusScore of either
        //tracks to overlap each interval, since the start/ends
        //were based on all start/ends of the inputs
        outerScoreInd = findContains(start, end, outerScores, Math.max(outerScoreInd, 0));
        innerScoreInd = findContains(start, end, innerScores, Math.max(innerScoreInd, 0));
        LocusScore outerScore = getContains(outerScores, outerScoreInd);
        LocusScore innerScore = getContains(innerScores, innerScoreInd);

        if (outerScore == null && innerScore == null)
            continue;
        float score = combineScores(outerScore, innerScore);
        BasicScore newScore = new BasicScore(start, end, score);
        combinedScoresList.add(newScore);
    }
    return combinedScoresList;
}

From source file:org.kiji.schema.impl.cassandra.CassandraKijiRowScanner.java

/**
 * Creates a KijiRowScanner over a CassandraKijiTable.
 *
 * @param table being scanned./*from  w w  w . j a v  a  2 s.  c o m*/
 * @param dataRequest of scan.
 * @param cellDecoderProvider of table being scanned.
 * @param resultSets of scan.
 * @throws IOException if there is a problem creating the row scanner.
 */
public CassandraKijiRowScanner(CassandraKijiTable table, KijiDataRequest dataRequest,
        CellDecoderProvider cellDecoderProvider, List<ResultSet> resultSets) throws IOException {

    mConstructorStack = CLEANUP_LOG.isDebugEnabled() ? Debug.getStackTrace() : "";

    mDataRequest = dataRequest;
    mLayout = table.getLayout();
    mTable = table;
    mCellDecoderProvider = cellDecoderProvider;
    mEntityIdFactory = EntityIdFactory.getFactory(mTable.getLayout());

    final State oldState = mState.getAndSet(State.OPEN);
    Preconditions.checkState(oldState == State.UNINITIALIZED,
            "Cannot open KijiRowScanner instance in state %s.", oldState);

    // Create an iterator to hold the Row objects returned by the column scans.  The iterator should
    // return Row objects in order of token and then EntityID, so to that Kiji entities have their
    // Row objects contiguously served by the iterator.
    List<Iterator<Row>> rowIterators = Lists.newArrayList();
    for (ResultSet resultSet : resultSets) {
        Iterator<Row> rowIterator = resultSet.iterator();
        rowIterators.add(Iterators.peekingIterator(rowIterator));
    }

    mRowsIterator = Iterators
            .peekingIterator(Iterators.mergeSorted(rowIterators, new RowComparator(mTable.getLayout())));
}

From source file:com.yandex.yoctodb.v1.immutable.V1CompositeDatabase.java

@Override
public int executeAndUnlimitedCount(@NotNull final Query query, @NotNull final DocumentProcessor processor) {
    int result = 0;
    final Iterator<ScoredDocument<?>> iterator;

    // Doing merging iff there is sorting
    if (query.hasSorting()) {
        final List<Iterator<? extends ScoredDocument<?>>> results = new ArrayList<>(databases.size());
        for (IndexedDatabase db : databases) {
            final BitSet docs = query.filteredUnlimited(db, bitSetPool);
            if (docs != null) {
                assert !docs.isEmpty();

                final int dbSize = db.getDocumentCount();
                final int count = docs.cardinality();
                final BitSet filter;
                if (count == dbSize) {
                    filter = new ReadOnlyOneBitSet(dbSize);
                } else {
                    filter = docs;/*from w  w w .ja v a  2  s . c  o  m*/
                }
                results.add(query.sortedUnlimited(filter, db, bitSetPool));
                result += count;
            }
        }

        if (results.isEmpty()) {
            return 0;
        }

        iterator = Iterators.mergeSorted(results, SCORED_DOCUMENT_COMPARATOR);
    } else {
        final List<QueryContext> results = new ArrayList<>(databases.size());
        for (IndexedDatabase db : databases) {
            final BitSet docs = query.filteredUnlimited(db, bitSetPool);
            if (docs != null) {
                assert !docs.isEmpty();

                final int dbSize = db.getDocumentCount();
                final int count = docs.cardinality();
                final BitSet filter;
                if (count == dbSize) {
                    filter = new ReadOnlyOneBitSet(dbSize);
                } else {
                    filter = docs;
                }
                results.add(new QueryContext(filter, db, bitSetPool));
                result += count;
            }
        }

        if (results.isEmpty()) {
            return 0;
        }

        iterator = Iterators.concat(new SortResultIterator(query, results.iterator()));
    }

    // Skipping values
    if (query.getSkip() != 0) {
        Iterators.advance(iterator, query.getSkip());
    }

    // Limited
    final Iterator<ScoredDocument<?>> limitedIterator;
    if (query.getLimit() == Integer.MAX_VALUE) {
        limitedIterator = iterator;
    } else {
        limitedIterator = Iterators.limit(iterator, query.getLimit());
    }

    while (limitedIterator.hasNext()) {
        final ScoredDocument<?> document = limitedIterator.next();
        if (!processor.process(document.getDocument(), document.getDatabase())) {
            return result;
        }
    }

    return result;
}

From source file:org.locationtech.geogig.api.RevTreeImpl.java

@Override
public Iterator<Node> children() {
    Preconditions.checkState(!buckets().isPresent());
    ImmutableList<Node> trees = trees().or(ImmutableList.<Node>of());
    ImmutableList<Node> features = features().or(ImmutableList.<Node>of());
    if (trees.isEmpty()) {
        return features.iterator();
    }/*from w ww . j a  v  a2  s. co  m*/
    if (features.isEmpty()) {
        return trees.iterator();
    }
    return Iterators.mergeSorted(ImmutableList.of(trees.iterator(), features.iterator()), ordering);
}

From source file:org.kiji.schema.impl.cassandra.CassandraKijiResultScanner.java

/**
 * Get an iterator of the entity IDs in a list of Cassandra Kiji tables that correspond to a
 * subset of cassandra tables in a Kiji table.
 *
 * @param tables The Cassandra tables to get Entity IDs from.
 * @param options The scan options. May specify start and stop tokens.
 * @param table The Kiji Cassandra table which the Cassandra tables belong to.
 * @param layout The layout of the Kiji Cassandra table.
 * @return An iterator of Entity IDs.// ww  w  .  ja  v a2 s  .  c o m
 */
public static Iterator<EntityId> getEntityIDs(final List<CassandraTableName> tables,
        final CassandraKijiScannerOptions options, final CassandraKijiTable table,
        final KijiTableLayout layout) {

    final List<ResultSetFuture> localityGroupFutures = FluentIterable.from(tables)
            .transform(new Function<CassandraTableName, Statement>() {
                /** {@inheritDoc} */
                @Override
                public Statement apply(final CassandraTableName tableName) {
                    return CQLUtils.getEntityIDScanStatement(layout, tableName, options);
                }
            }).transform(new Function<Statement, ResultSetFuture>() {
                /** {@inheritDoc} */
                @Override
                public ResultSetFuture apply(final Statement statement) {
                    return table.getAdmin().executeAsync(statement);
                }
            })
            // Force futures to execute by sending results to a list
            .toList();

    // We can use the DISTINCT optimization iff the entity ID contains only hashed components
    RowKeyFormat2 keyFormat = (RowKeyFormat2) layout.getDesc().getKeysFormat();
    final boolean deduplicateComponents = keyFormat.getRangeScanStartIndex() != keyFormat.getComponents()
            .size();

    if (deduplicateComponents) {
        LOG.warn(
                "Scanning a Cassandra Kiji table with non-hashed entity ID components is"
                        + " inefficient.  Consider hashing all entity ID components. Table: {}.",
                table.getURI());
    }

    final List<Iterator<TokenRowKeyComponents>> tokenRowKeyStreams = FluentIterable.from(localityGroupFutures)
            .transform(new Function<ResultSetFuture, Iterator<Row>>() {
                /** {@inheritDoc} */
                @Override
                public Iterator<Row> apply(final ResultSetFuture future) {
                    return CassandraKijiResult.unwrapFuture(future).iterator();
                }
            }).transform(new Function<Iterator<Row>, Iterator<TokenRowKeyComponents>>() {
                /** {@inheritDoc} */
                @Override
                public Iterator<TokenRowKeyComponents> apply(final Iterator<Row> rows) {
                    return Iterators.transform(rows, RowDecoders.getRowKeyDecoderFunction(layout));
                }
            }).transform(new Function<Iterator<TokenRowKeyComponents>, Iterator<TokenRowKeyComponents>>() {
                /** {@inheritDoc} */
                @Override
                public Iterator<TokenRowKeyComponents> apply(final Iterator<TokenRowKeyComponents> components) {
                    if (deduplicateComponents) {
                        return IteratorUtils.deduplicatingIterator(components);
                    } else {
                        return components;
                    }
                }
            }).toList();

    return Iterators.transform(
            IteratorUtils.deduplicatingIterator(
                    Iterators.mergeSorted(tokenRowKeyStreams, TokenRowKeyComponentsComparator.getInstance())),
            RowDecoders.getEntityIdFunction(table));
}

From source file:org.apache.jackrabbit.oak.query.UnionQueryImpl.java

@Override
public Iterator<ResultRowImpl> getRows() {
    prepare();/*from w ww .jav a  2s.co m*/
    if (explain) {
        String plan = getPlan();
        columns = new ColumnImpl[] { new ColumnImpl("explain", "plan", "plan") };
        ResultRowImpl r = new ResultRowImpl(this, Tree.EMPTY_ARRAY,
                new PropertyValue[] { PropertyValues.newString(plan) }, null, null);
        return Arrays.asList(r).iterator();
    }
    if (LOG.isDebugEnabled()) {
        if (isInternal) {
            LOG.trace("query union plan {}", getPlan());
        } else {
            LOG.debug("query union plan {}", getPlan());
        }
    }
    boolean distinct = !unionAll;
    Comparator<ResultRowImpl> orderBy = ResultRowImpl.getComparator(orderings);

    Iterator<ResultRowImpl> it;
    final Iterator<ResultRowImpl> leftRows = left.getRows();
    final Iterator<ResultRowImpl> rightRows = right.getRows();
    Iterator<ResultRowImpl> leftIter = leftRows;
    Iterator<ResultRowImpl> rightIter = rightRows;

    // if measure retrieve the backing delegate iterator instead
    if (measure) {
        leftIter = ((MeasuringIterator) leftRows).getDelegate();
        rightIter = ((MeasuringIterator) rightRows).getDelegate();
    }
    // Since sorted by index use a merge iterator
    if (isSortedByIndex()) {
        it = FilterIterators.newCombinedFilter(
                Iterators.mergeSorted(ImmutableList.of(leftIter, rightIter), orderBy), distinct, limit, offset,
                null, settings);
    } else {
        it = FilterIterators.newCombinedFilter(Iterators.concat(leftIter, rightIter), distinct, limit, offset,
                orderBy, settings);
    }

    if (measure) {
        // return the measuring iterator for the union
        it = new MeasuringIterator(this, it) {
            MeasuringIterator left = (MeasuringIterator) leftRows;
            MeasuringIterator right = (MeasuringIterator) rightRows;

            @Override
            protected void setColumns(ColumnImpl[] cols) {
                columns = cols;
                left.setColumns(cols);
                right.setColumns(cols);
            }

            @Override
            protected Map<String, Long> getSelectorScanCount() {
                // Merge the 2 maps from the left and right queries to get the selector counts
                Map<String, Long> leftSelectorScan = left.getSelectorScanCount();
                Map<String, Long> rightSelectorScan = right.getSelectorScanCount();
                Map<String, Long> unionScan = Maps.newHashMap(leftSelectorScan);
                for (String key : rightSelectorScan.keySet()) {
                    if (unionScan.containsKey(key)) {
                        unionScan.put(key, rightSelectorScan.get(key) + unionScan.get(key));
                    } else {
                        unionScan.put(key, rightSelectorScan.get(key));
                    }
                }
                return unionScan;
            }

            @Override
            protected long getReadCount() {
                return left.getReadCount() + right.getReadCount();
            }
        };
    }

    return it;
}

From source file:com.google.cloud.genomics.localrepo.QueryEngine.java

private SearchReadsResponse searchReads(Map<File, PeekingIterator<SAMRecordWithSkip>> iterators, final int end,
        Predicate<String> readsetFilter) {
    List<Read> reads = new ArrayList<>();
    for (Iterator<SAMRecordWithSkip> iterator = Iterators
            .limit(Iterators.mergeSorted(iterators.values(), Comparator.naturalOrder()), pageSize); iterator
                    .hasNext();) {/*from  w  w  w  .  ja v a  2s.  co  m*/
        SAMRecordWithSkip next = iterator.next();
        String readsetId = getReadsetId(next.file, next.record);
        if (readsetFilter.test(readsetId)) {
            reads.add(read(readsetId, next.record));
        }
    }
    Map<File, PeekingIterator<SAMRecordWithSkip>> nonEmptyIterators = Maps.filterValues(iterators,
            iterator -> iterator.hasNext());
    return SearchReadsResponse.create(reads,
            nonEmptyIterators.isEmpty() ? null
                    : QueryDescriptor.create(new HashMap<>(Maps.transformValues(nonEmptyIterators,
                            new Function<PeekingIterator<SAMRecordWithSkip>, QueryDescriptor.Start>() {
                                @Override
                                public QueryDescriptor.Start apply(
                                        PeekingIterator<SAMRecordWithSkip> iterator) {
                                    SAMRecordWithSkip peek = iterator.peek();
                                    SAMRecord record = peek.record;
                                    return QueryDescriptor.Start.create(record.getReferenceName(),
                                            record.getAlignmentStart(), peek.skip);
                                }
                            })), end).toString());
}

From source file:com.simiacryptus.text.CharTrie.java

private <T extends Comparable<T>> Stream<TrieNode> max(Function<TrieNode, T> fn, int maxResults,
        TrieNode node) {//from w  w  w.j  a va  2  s  .co m
    return StreamSupport.stream(Spliterators.spliteratorUnknownSize(
            Iterators.mergeSorted(Stream
                    .concat(Stream.of(Stream.of(node)), node.getChildren().map(x -> max(fn, maxResults, x)))
                    .map(x -> x.iterator()).collect(Collectors.toList()), Comparator.comparing(fn).reversed()),
            Spliterator.ORDERED), false).limit(maxResults).collect(Collectors.toList()).stream();
}