Example usage for com.google.common.collect Lists partition

Introduction

In this page you can find the example usage for com.google.common.collect Lists partition.

Prototype

public static <T> List<List<T>> partition(List<T> list, int size)

Source Link

Document

Returns consecutive List#subList(int,int) sublists of a list, each of the same size (the final list may be smaller).

Usage

From source file:com.b2international.snowowl.snomed.api.impl.SnomedBrowserService.java

private RepositoryCommitRequestBuilder createBulkCommit(String branchPath,
        List<? extends ISnomedBrowserConcept> concepts, Boolean allowCreate, String userId,
        List<ExtendedLocale> locales, final String commitComment) {

    final Stopwatch watch = Stopwatch.createStarted();

    final BulkRequestBuilder<TransactionContext> bulkRequest = BulkRequest.create();

    InputFactory inputFactory = new InputFactory(getBranch(branchPath));

    // Process concepts in batches of 1000
    for (List<? extends ISnomedBrowserConcept> updatedConceptsBatch : Lists.partition(concepts, 1000)) {

        // Load existing versions in bulk
        Set<String> conceptIds = updatedConceptsBatch.stream().map(ISnomedBrowserConcept::getConceptId)
                .filter(Objects::nonNull).collect(Collectors.toSet());

        Set<ISnomedBrowserConcept> existingConcepts = getConceptDetailsInBulk(branchPath, conceptIds, locales);

        Map<String, ISnomedBrowserConcept> existingConceptsMap = existingConcepts.stream()
                .collect(Collectors.toMap(ISnomedBrowserConcept::getConceptId, concept -> concept));

        // For each concept add component updates to the bulk request
        for (ISnomedBrowserConcept concept : updatedConceptsBatch) {

            ISnomedBrowserConcept existingConcept = existingConceptsMap.get(concept.getConceptId());

            if (existingConcept == null) {

                if (allowCreate) {

                    final SnomedConceptCreateRequest req = inputFactory.createComponentInput(concept,
                            SnomedConceptCreateRequest.class);
                    bulkRequest.add(req);

                } else {
                    // If one existing concept is not found fail the whole commit 
                    throw new ComponentNotFoundException("Snomed Concept", concept.getConceptId());
                }//w  w  w  . jav a 2s.c o  m

            } else {
                update(concept, existingConcept, userId, locales, bulkRequest, inputFactory);
            }

        }
    }

    // Commit everything at once
    final RepositoryCommitRequestBuilder commit = SnomedRequests.prepareCommit().setUserId(userId)
            .setCommitComment(commitComment).setPreparationTime(watch.elapsed(TimeUnit.MILLISECONDS))
            .setBody(bulkRequest);

    return commit;
}

From source file:com.netflix.metacat.connector.hive.sql.DirectSqlGetPartition.java

private Void populateParameters(final List<Long> ids, final String sql, final String idName,
        final Map<Long, Map<String, String>> params) {
    if (ids.size() > 5000) {
        final List<List<Long>> subFilterPartitionNamesList = Lists.partition(ids, 5000);
        subFilterPartitionNamesList/* www  .  j a v  a  2s. com*/
                .forEach(subPartitions -> params.putAll(this.getParameters(subPartitions, sql, idName)));
    } else {
        params.putAll(this.getParameters(ids, sql, idName));
    }
    return null;
}

From source file:com.palantir.atlasdb.keyvalue.cassandra.CassandraKeyValueService.java

private List<Callable<Void>> getLoadWithTsTasksForSingleHost(final InetAddress host, final String tableName,
        Collection<Cell> cells, final long startTs, final boolean loadAllTs, final ThreadSafeResultVisitor v,
        final ConsistencyLevel consistency) throws Exception {
    final ColumnParent colFam = new ColumnParent(internalTableName(tableName));
    TreeMultimap<byte[], Cell> cellsByCol = TreeMultimap.create(UnsignedBytes.lexicographicalComparator(),
            Ordering.natural());//from  ww w . j  a  v a 2  s. c  o m
    for (Cell cell : cells) {
        cellsByCol.put(cell.getColumnName(), cell);
    }
    List<Callable<Void>> tasks = Lists.newArrayList();
    int fetchBatchCount = configManager.getConfig().fetchBatchCount();
    for (final byte[] col : cellsByCol.keySet()) {
        if (cellsByCol.get(col).size() > fetchBatchCount) {
            log.warn(
                    "Re-batching in getLoadWithTsTasksForSingleHost a call to {} for table {} that attempted to "
                            + "multiget {} rows; this may indicate overly-large batching on a higher level.\n{}",
                    host, tableName, cellsByCol.get(col).size(),
                    CassandraKeyValueServices.getFilteredStackTrace("com.palantir"));
        }
        for (final List<Cell> partition : Lists.partition(ImmutableList.copyOf(cellsByCol.get(col)),
                fetchBatchCount)) {
            tasks.add(new Callable<Void>() {
                @Override
                public Void call() throws Exception {
                    return clientPool.runWithPooledResourceOnHost(host,
                            new FunctionCheckedException<Client, Void, Exception>() {
                                @Override
                                public Void apply(Client client) throws Exception {
                                    ByteBuffer start = CassandraKeyValueServices.makeCompositeBuffer(col,
                                            startTs - 1);
                                    ByteBuffer end = CassandraKeyValueServices.makeCompositeBuffer(col, -1);
                                    SliceRange slice = new SliceRange(start, end, false,
                                            loadAllTs ? Integer.MAX_VALUE : 1);
                                    SlicePredicate pred = new SlicePredicate();
                                    pred.setSlice_range(slice);

                                    List<ByteBuffer> rowNames = Lists
                                            .newArrayListWithCapacity(partition.size());
                                    for (Cell c : partition) {
                                        rowNames.add(ByteBuffer.wrap(c.getRowName()));
                                    }
                                    Map<ByteBuffer, List<ColumnOrSuperColumn>> results = multigetInternal(
                                            client, tableName, rowNames, colFam, pred, consistency);
                                    v.visit(results);
                                    return null;
                                }

                                @Override
                                public String toString() {
                                    return "multiget_slice(" + host + ", " + colFam + ", " + partition.size()
                                            + " rows" + ")";
                                }
                            });
                }
            });
        }
    }
    return tasks;
}

From source file:org.nd4j.linalg.dataset.DataSet.java

/**
 * Partitions a dataset in to mini batches where
 * each dataset in each list is of the specified number of examples
 *
 * @param num the number to split by/* ww  w  .  ja  v  a 2  s  .c o m*/
 * @return the partitioned datasets
 */
@Override
public List<DataSet> batchBy(int num) {
    List<DataSet> batched = Lists.newArrayList();
    for (List<DataSet> splitBatch : Lists.partition(asList(), num)) {
        batched.add(DataSet.merge(splitBatch));
    }
    return batched;
}

From source file:com.facebook.presto.hive.metastore.glue.GlueHiveMetastore.java

private List<Partition> batchGetPartition(String databaseName, String tableName, List<String> partitionNames) {
    try {/*from w ww  .  j  a  va 2  s . c  o  m*/
        List<PartitionValueList> partitionValueLists = partitionNames.stream()
                .map(partitionName -> new PartitionValueList().withValues(toPartitionValues(partitionName)))
                .collect(toList());

        List<List<PartitionValueList>> batchedPartitionValueLists = Lists.partition(partitionValueLists,
                BATCH_GET_PARTITION_MAX_PAGE_SIZE);
        List<Future<BatchGetPartitionResult>> batchGetPartitionFutures = new ArrayList<>();
        List<Partition> result = new ArrayList<>();

        for (List<PartitionValueList> partitions : batchedPartitionValueLists) {
            batchGetPartitionFutures.add(glueClient.batchGetPartitionAsync(new BatchGetPartitionRequest()
                    .withDatabaseName(databaseName).withTableName(tableName).withPartitionsToGet(partitions)));
        }

        for (Future<BatchGetPartitionResult> future : batchGetPartitionFutures) {
            future.get().getPartitions()
                    .forEach(partition -> result.add(GlueToPrestoConverter.convertPartition(partition)));
        }

        return result;
    } catch (AmazonServiceException | InterruptedException | ExecutionException e) {
        if (e instanceof InterruptedException) {
            Thread.currentThread().interrupt();
        }
        throw new PrestoException(HIVE_METASTORE_ERROR, e);
    }
}

From source file:com.netflix.metacat.main.services.search.ElasticSearchMetacatRefresh.java

/**
 * Process the list of tables in batches.
 *
 * @param databaseName database name/* w ww.  j  a va2  s . c  o m*/
 * @param tableNames   table names
 * @return A future containing the tasks
 */
private ListenableFuture<Void> processTables(final QualifiedName databaseName,
        final List<QualifiedName> tableNames) {
    final List<List<QualifiedName>> tableNamesBatches = Lists.partition(tableNames, 500);
    final List<ListenableFuture<Void>> processTablesBatchFutures = tableNamesBatches.stream()
            .map(subTableNames -> _processTables(databaseName, subTableNames)).collect(Collectors.toList());

    return Futures.transform(Futures.successfulAsList(processTablesBatchFutures), Functions.constant(null));
}

From source file:com.facebook.presto.hive.metastore.glue.GlueHiveMetastore.java

@Override
public void addPartitions(String databaseName, String tableName, List<PartitionWithStatistics> partitions) {
    try {//  w  w  w  .java2s . co m
        List<List<PartitionWithStatistics>> batchedPartitions = Lists.partition(partitions,
                BATCH_CREATE_PARTITION_MAX_PAGE_SIZE);
        List<Future<BatchCreatePartitionResult>> futures = new ArrayList<>();

        for (List<PartitionWithStatistics> partitionBatch : batchedPartitions) {
            List<PartitionInput> partitionInputs = partitionBatch.stream()
                    .map(GlueInputConverter::convertPartition).collect(toList());
            futures.add(glueClient
                    .batchCreatePartitionAsync(new BatchCreatePartitionRequest().withDatabaseName(databaseName)
                            .withTableName(tableName).withPartitionInputList(partitionInputs)));
        }

        for (Future<BatchCreatePartitionResult> future : futures) {
            BatchCreatePartitionResult result = future.get();
            propagatePartitionErrorToPrestoException(databaseName, tableName, result.getErrors());
        }
    } catch (AmazonServiceException | InterruptedException | ExecutionException e) {
        if (e instanceof InterruptedException) {
            Thread.currentThread().interrupt();
        }
        throw new PrestoException(HIVE_METASTORE_ERROR, e);
    }
}

From source file:org.nd4j.linalg.dataset.DataSet.java

/**
 * Partitions the data transform by the specified number.
 *
 * @param num the number to split by/*  w ww.  j a v  a 2  s  .  c om*/
 * @return the partitioned data transform
 */
@Override
public List<DataSet> dataSetBatches(int num) {
    List<List<DataSet>> list = Lists.partition(asList(), num);
    List<DataSet> ret = new ArrayList<>();
    for (List<DataSet> l : list)
        ret.add(DataSet.merge(l));
    return ret;

}

From source file:com.netflix.metacat.connector.hive.sql.DirectSqlGetPartition.java

private <T> List<T> getHandlerResults(final String databaseName, final String tableName,
        @Nullable final String filterExpression, @Nullable final List<String> partitionIds, final String sql,
        final ResultSetExtractor resultSetExtractor, @Nullable final String joinSql,
        @Nullable final String filterSql, @Nullable final List<Object> filterParams, @Nullable final Sort sort,
        @Nullable final Pageable pageable, final boolean forceDisableAudit) {
    ////from w  ww . j  a va  2s  . c  o  m
    // Limiting the in clause to 5000 part names because the sql query with the IN clause for part_name(767 bytes)
    // will hit the max sql query length(max_allowed_packet for our RDS) if we use more than 5400 or so
    //
    List<T> partitions = Lists.newArrayList();
    if (partitionIds != null && partitionIds.size() > 5000) {
        final List<List<String>> subFilterPartitionNamesList = Lists.partition(partitionIds, 5000);
        final List<T> finalPartitions = partitions;
        subFilterPartitionNamesList.forEach(
                subPartitionIds -> finalPartitions.addAll(this.getSubHandlerResultsFromQuery(databaseName,
                        tableName, filterExpression, subPartitionIds, sql, resultSetExtractor, joinSql,
                        filterSql, filterParams, sort, pageable, forceDisableAudit)));
    } else {
        partitions = this.getSubHandlerResultsFromQuery(databaseName, tableName, filterExpression, partitionIds,
                sql, resultSetExtractor, joinSql, filterSql, filterParams, sort, pageable, forceDisableAudit);
    }
    return partitions;
}

From source file:org.apache.jackrabbit.oak.plugins.document.mongo.MongoDocumentStore.java

/**
 * Try to apply all the {@link UpdateOp}s with at least MongoDB requests as
 * possible. The return value is the list of the old documents (before
 * applying changes). The mechanism is as follows:
 *
 * <ol>//from w w w  .  j a v a2  s  .  com
 * <li>For each UpdateOp try to read the assigned document from the cache.
 *     Add them to {@code oldDocs}.</li>
 * <li>Prepare a list of all UpdateOps that doesn't have their documents and
 *     read them in one find() call. Add results to {@code oldDocs}.</li>
 * <li>Prepare a bulk update. For each remaining UpdateOp add following
 *     operation:
 *   <ul>
 *   <li>Find document with the same id and the same mod_count as in the
 *       {@code oldDocs}.</li>
 *   <li>Apply changes from the UpdateOps.</li>
 *   </ul>
 * </li>
 * <li>Execute the bulk update.</li>
 * </ol>
 *
 * If some other process modifies the target documents between points 2 and
 * 3, the mod_count will be increased as well and the bulk update will fail
 * for the concurrently modified docs. The method will then remove the
 * failed documents from the {@code oldDocs} and restart the process from
 * point 2. It will stop after 3rd iteration.
 */
@SuppressWarnings("unchecked")
@CheckForNull
@Override
public <T extends Document> List<T> createOrUpdate(Collection<T> collection, List<UpdateOp> updateOps) {
    log("createOrUpdate", updateOps);

    Map<String, UpdateOp> operationsToCover = new LinkedHashMap<String, UpdateOp>();
    List<UpdateOp> duplicates = new ArrayList<UpdateOp>();
    Map<UpdateOp, T> results = new LinkedHashMap<UpdateOp, T>();

    final Stopwatch watch = startWatch();
    try {
        for (UpdateOp updateOp : updateOps) {
            UpdateUtils.assertUnconditional(updateOp);
            UpdateOp clone = updateOp.copy();
            if (operationsToCover.containsKey(updateOp.getId())) {
                duplicates.add(clone);
            } else {
                operationsToCover.put(updateOp.getId(), clone);
            }
            results.put(clone, null);
        }

        Map<String, T> oldDocs = new HashMap<String, T>();
        if (collection == Collection.NODES) {
            oldDocs.putAll((Map<String, T>) getCachedNodes(operationsToCover.keySet()));
        }

        for (int i = 0; i <= bulkRetries; i++) {
            if (operationsToCover.size() <= 2) {
                // bulkUpdate() method invokes Mongo twice, so sending 2 updates
                // in bulk mode wouldn't result in any performance gain
                break;
            }
            for (List<UpdateOp> partition : Lists.partition(Lists.newArrayList(operationsToCover.values()),
                    bulkSize)) {
                Map<UpdateOp, T> successfulUpdates = bulkUpdate(collection, partition, oldDocs);
                results.putAll(successfulUpdates);
                operationsToCover.values().removeAll(successfulUpdates.keySet());
            }
        }

        // if there are some changes left, we'll apply them one after another
        Iterator<UpdateOp> it = Iterators.concat(operationsToCover.values().iterator(), duplicates.iterator());
        while (it.hasNext()) {
            UpdateOp op = it.next();
            it.remove();
            T oldDoc = createOrUpdate(collection, op);
            if (oldDoc != null) {
                results.put(op, oldDoc);
            }
        }
    } finally {
        stats.doneCreateOrUpdate(watch.elapsed(TimeUnit.NANOSECONDS), collection,
                Lists.transform(updateOps, new Function<UpdateOp, String>() {
                    @Override
                    public String apply(UpdateOp input) {
                        return input.getId();
                    }
                }));
    }
    List<T> resultList = new ArrayList<T>(results.values());
    log("createOrUpdate returns", resultList);
    return resultList;
}