Example usage for com.google.common.collect Lists partition

List of usage examples for com.google.common.collect Lists partition

Introduction

In this page you can find the example usage for com.google.common.collect Lists partition.

Prototype

public static <T> List<List<T>> partition(List<T> list, int size) 

Source Link

Document

Returns consecutive List#subList(int,int) sublists of a list, each of the same size (the final list may be smaller).

Usage

From source file:com.b2international.snowowl.snomed.api.impl.SnomedBrowserService.java

private RepositoryCommitRequestBuilder createBulkCommit(String branchPath,
        List<? extends ISnomedBrowserConcept> concepts, Boolean allowCreate, String userId,
        List<ExtendedLocale> locales, final String commitComment) {

    final Stopwatch watch = Stopwatch.createStarted();

    final BulkRequestBuilder<TransactionContext> bulkRequest = BulkRequest.create();

    InputFactory inputFactory = new InputFactory(getBranch(branchPath));

    // Process concepts in batches of 1000
    for (List<? extends ISnomedBrowserConcept> updatedConceptsBatch : Lists.partition(concepts, 1000)) {

        // Load existing versions in bulk
        Set<String> conceptIds = updatedConceptsBatch.stream().map(ISnomedBrowserConcept::getConceptId)
                .filter(Objects::nonNull).collect(Collectors.toSet());

        Set<ISnomedBrowserConcept> existingConcepts = getConceptDetailsInBulk(branchPath, conceptIds, locales);

        Map<String, ISnomedBrowserConcept> existingConceptsMap = existingConcepts.stream()
                .collect(Collectors.toMap(ISnomedBrowserConcept::getConceptId, concept -> concept));

        // For each concept add component updates to the bulk request
        for (ISnomedBrowserConcept concept : updatedConceptsBatch) {

            ISnomedBrowserConcept existingConcept = existingConceptsMap.get(concept.getConceptId());

            if (existingConcept == null) {

                if (allowCreate) {

                    final SnomedConceptCreateRequest req = inputFactory.createComponentInput(concept,
                            SnomedConceptCreateRequest.class);
                    bulkRequest.add(req);

                } else {
                    // If one existing concept is not found fail the whole commit 
                    throw new ComponentNotFoundException("Snomed Concept", concept.getConceptId());
                }//w  w  w  . jav a 2s.c o  m

            } else {
                update(concept, existingConcept, userId, locales, bulkRequest, inputFactory);
            }

        }
    }

    // Commit everything at once
    final RepositoryCommitRequestBuilder commit = SnomedRequests.prepareCommit().setUserId(userId)
            .setCommitComment(commitComment).setPreparationTime(watch.elapsed(TimeUnit.MILLISECONDS))
            .setBody(bulkRequest);

    return commit;
}

From source file:com.netflix.metacat.connector.hive.sql.DirectSqlGetPartition.java

private Void populateParameters(final List<Long> ids, final String sql, final String idName,
        final Map<Long, Map<String, String>> params) {
    if (ids.size() > 5000) {
        final List<List<Long>> subFilterPartitionNamesList = Lists.partition(ids, 5000);
        subFilterPartitionNamesList/* www  .  j a v  a  2s. com*/
                .forEach(subPartitions -> params.putAll(this.getParameters(subPartitions, sql, idName)));
    } else {
        params.putAll(this.getParameters(ids, sql, idName));
    }
    return null;
}

From source file:com.palantir.atlasdb.keyvalue.cassandra.CassandraKeyValueService.java

private List<Callable<Void>> getLoadWithTsTasksForSingleHost(final InetAddress host, final String tableName,
        Collection<Cell> cells, final long startTs, final boolean loadAllTs, final ThreadSafeResultVisitor v,
        final ConsistencyLevel consistency) throws Exception {
    final ColumnParent colFam = new ColumnParent(internalTableName(tableName));
    TreeMultimap<byte[], Cell> cellsByCol = TreeMultimap.create(UnsignedBytes.lexicographicalComparator(),
            Ordering.natural());//from  ww w . j  a  v a 2  s. c  o m
    for (Cell cell : cells) {
        cellsByCol.put(cell.getColumnName(), cell);
    }
    List<Callable<Void>> tasks = Lists.newArrayList();
    int fetchBatchCount = configManager.getConfig().fetchBatchCount();
    for (final byte[] col : cellsByCol.keySet()) {
        if (cellsByCol.get(col).size() > fetchBatchCount) {
            log.warn(
                    "Re-batching in getLoadWithTsTasksForSingleHost a call to {} for table {} that attempted to "
                            + "multiget {} rows; this may indicate overly-large batching on a higher level.\n{}",
                    host, tableName, cellsByCol.get(col).size(),
                    CassandraKeyValueServices.getFilteredStackTrace("com.palantir"));
        }
        for (final List<Cell> partition : Lists.partition(ImmutableList.copyOf(cellsByCol.get(col)),
                fetchBatchCount)) {
            tasks.add(new Callable<Void>() {
                @Override
                public Void call() throws Exception {
                    return clientPool.runWithPooledResourceOnHost(host,
                            new FunctionCheckedException<Client, Void, Exception>() {
                                @Override
                                public Void apply(Client client) throws Exception {
                                    ByteBuffer start = CassandraKeyValueServices.makeCompositeBuffer(col,
                                            startTs - 1);
                                    ByteBuffer end = CassandraKeyValueServices.makeCompositeBuffer(col, -1);
                                    SliceRange slice = new SliceRange(start, end, false,
                                            loadAllTs ? Integer.MAX_VALUE : 1);
                                    SlicePredicate pred = new SlicePredicate();
                                    pred.setSlice_range(slice);

                                    List<ByteBuffer> rowNames = Lists
                                            .newArrayListWithCapacity(partition.size());
                                    for (Cell c : partition) {
                                        rowNames.add(ByteBuffer.wrap(c.getRowName()));
                                    }
                                    Map<ByteBuffer, List<ColumnOrSuperColumn>> results = multigetInternal(
                                            client, tableName, rowNames, colFam, pred, consistency);
                                    v.visit(results);
                                    return null;
                                }

                                @Override
                                public String toString() {
                                    return "multiget_slice(" + host + ", " + colFam + ", " + partition.size()
                                            + " rows" + ")";
                                }
                            });
                }
            });
        }
    }
    return tasks;
}

From source file:org.nd4j.linalg.dataset.DataSet.java

/**
 * Partitions a dataset in to mini batches where
 * each dataset in each list is of the specified number of examples
 *
 * @param num the number to split by/* ww  w  .  ja  v  a 2  s  .c o m*/
 * @return the partitioned datasets
 */
@Override
public List<DataSet> batchBy(int num) {
    List<DataSet> batched = Lists.newArrayList();
    for (List<DataSet> splitBatch : Lists.partition(asList(), num)) {
        batched.add(DataSet.merge(splitBatch));
    }
    return batched;
}

From source file:com.facebook.presto.hive.metastore.glue.GlueHiveMetastore.java

private List<Partition> batchGetPartition(String databaseName, String tableName, List<String> partitionNames) {
    try {/*from w ww  .  j  a  va 2  s . c  o  m*/
        List<PartitionValueList> partitionValueLists = partitionNames.stream()
                .map(partitionName -> new PartitionValueList().withValues(toPartitionValues(partitionName)))
                .collect(toList());

        List<List<PartitionValueList>> batchedPartitionValueLists = Lists.partition(partitionValueLists,
                BATCH_GET_PARTITION_MAX_PAGE_SIZE);
        List<Future<BatchGetPartitionResult>> batchGetPartitionFutures = new ArrayList<>();
        List<Partition> result = new ArrayList<>();

        for (List<PartitionValueList> partitions : batchedPartitionValueLists) {
            batchGetPartitionFutures.add(glueClient.batchGetPartitionAsync(new BatchGetPartitionRequest()
                    .withDatabaseName(databaseName).withTableName(tableName).withPartitionsToGet(partitions)));
        }

        for (Future<BatchGetPartitionResult> future : batchGetPartitionFutures) {
            future.get().getPartitions()
                    .forEach(partition -> result.add(GlueToPrestoConverter.convertPartition(partition)));
        }

        return result;
    } catch (AmazonServiceException | InterruptedException | ExecutionException e) {
        if (e instanceof InterruptedException) {
            Thread.currentThread().interrupt();
        }
        throw new PrestoException(HIVE_METASTORE_ERROR, e);
    }
}

From source file:com.netflix.metacat.main.services.search.ElasticSearchMetacatRefresh.java

/**
 * Process the list of tables in batches.
 *
 * @param databaseName database name/* w ww.  j  a va2  s . c  o m*/
 * @param tableNames   table names
 * @return A future containing the tasks
 */
private ListenableFuture<Void> processTables(final QualifiedName databaseName,
        final List<QualifiedName> tableNames) {
    final List<List<QualifiedName>> tableNamesBatches = Lists.partition(tableNames, 500);
    final List<ListenableFuture<Void>> processTablesBatchFutures = tableNamesBatches.stream()
            .map(subTableNames -> _processTables(databaseName, subTableNames)).collect(Collectors.toList());

    return Futures.transform(Futures.successfulAsList(processTablesBatchFutures), Functions.constant(null));
}

From source file:com.facebook.presto.hive.metastore.glue.GlueHiveMetastore.java

@Override
public void addPartitions(String databaseName, String tableName, List<PartitionWithStatistics> partitions) {
    try {//  w  w  w  .java2s . co m
        List<List<PartitionWithStatistics>> batchedPartitions = Lists.partition(partitions,
                BATCH_CREATE_PARTITION_MAX_PAGE_SIZE);
        List<Future<BatchCreatePartitionResult>> futures = new ArrayList<>();

        for (List<PartitionWithStatistics> partitionBatch : batchedPartitions) {
            List<PartitionInput> partitionInputs = partitionBatch.stream()
                    .map(GlueInputConverter::convertPartition).collect(toList());
            futures.add(glueClient
                    .batchCreatePartitionAsync(new BatchCreatePartitionRequest().withDatabaseName(databaseName)
                            .withTableName(tableName).withPartitionInputList(partitionInputs)));
        }

        for (Future<BatchCreatePartitionResult> future : futures) {
            BatchCreatePartitionResult result = future.get();
            propagatePartitionErrorToPrestoException(databaseName, tableName, result.getErrors());
        }
    } catch (AmazonServiceException | InterruptedException | ExecutionException e) {
        if (e instanceof InterruptedException) {
            Thread.currentThread().interrupt();
        }
        throw new PrestoException(HIVE_METASTORE_ERROR, e);
    }
}

From source file:org.nd4j.linalg.dataset.DataSet.java

/**
 * Partitions the data transform by the specified number.
 *
 * @param num the number to split by/*  w ww.  j a v  a 2  s  .  c om*/
 * @return the partitioned data transform
 */
@Override
public List<DataSet> dataSetBatches(int num) {
    List<List<DataSet>> list = Lists.partition(asList(), num);
    List<DataSet> ret = new ArrayList<>();
    for (List<DataSet> l : list)
        ret.add(DataSet.merge(l));
    return ret;

}

From source file:com.netflix.metacat.connector.hive.sql.DirectSqlGetPartition.java

private <T> List<T> getHandlerResults(final String databaseName, final String tableName,
        @Nullable final String filterExpression, @Nullable final List<String> partitionIds, final String sql,
        final ResultSetExtractor resultSetExtractor, @Nullable final String joinSql,
        @Nullable final String filterSql, @Nullable final List<Object> filterParams, @Nullable final Sort sort,
        @Nullable final Pageable pageable, final boolean forceDisableAudit) {
    ////from w  ww . j  a va  2s  . c  o  m
    // Limiting the in clause to 5000 part names because the sql query with the IN clause for part_name(767 bytes)
    // will hit the max sql query length(max_allowed_packet for our RDS) if we use more than 5400 or so
    //
    List<T> partitions = Lists.newArrayList();
    if (partitionIds != null && partitionIds.size() > 5000) {
        final List<List<String>> subFilterPartitionNamesList = Lists.partition(partitionIds, 5000);
        final List<T> finalPartitions = partitions;
        subFilterPartitionNamesList.forEach(
                subPartitionIds -> finalPartitions.addAll(this.getSubHandlerResultsFromQuery(databaseName,
                        tableName, filterExpression, subPartitionIds, sql, resultSetExtractor, joinSql,
                        filterSql, filterParams, sort, pageable, forceDisableAudit)));
    } else {
        partitions = this.getSubHandlerResultsFromQuery(databaseName, tableName, filterExpression, partitionIds,
                sql, resultSetExtractor, joinSql, filterSql, filterParams, sort, pageable, forceDisableAudit);
    }
    return partitions;
}

From source file:org.apache.jackrabbit.oak.plugins.document.mongo.MongoDocumentStore.java

/**
 * Try to apply all the {@link UpdateOp}s with at least MongoDB requests as
 * possible. The return value is the list of the old documents (before
 * applying changes). The mechanism is as follows:
 *
 * <ol>//from w w w  .  j a v a2  s  .  com
 * <li>For each UpdateOp try to read the assigned document from the cache.
 *     Add them to {@code oldDocs}.</li>
 * <li>Prepare a list of all UpdateOps that doesn't have their documents and
 *     read them in one find() call. Add results to {@code oldDocs}.</li>
 * <li>Prepare a bulk update. For each remaining UpdateOp add following
 *     operation:
 *   <ul>
 *   <li>Find document with the same id and the same mod_count as in the
 *       {@code oldDocs}.</li>
 *   <li>Apply changes from the UpdateOps.</li>
 *   </ul>
 * </li>
 * <li>Execute the bulk update.</li>
 * </ol>
 *
 * If some other process modifies the target documents between points 2 and
 * 3, the mod_count will be increased as well and the bulk update will fail
 * for the concurrently modified docs. The method will then remove the
 * failed documents from the {@code oldDocs} and restart the process from
 * point 2. It will stop after 3rd iteration.
 */
@SuppressWarnings("unchecked")
@CheckForNull
@Override
public <T extends Document> List<T> createOrUpdate(Collection<T> collection, List<UpdateOp> updateOps) {
    log("createOrUpdate", updateOps);

    Map<String, UpdateOp> operationsToCover = new LinkedHashMap<String, UpdateOp>();
    List<UpdateOp> duplicates = new ArrayList<UpdateOp>();
    Map<UpdateOp, T> results = new LinkedHashMap<UpdateOp, T>();

    final Stopwatch watch = startWatch();
    try {
        for (UpdateOp updateOp : updateOps) {
            UpdateUtils.assertUnconditional(updateOp);
            UpdateOp clone = updateOp.copy();
            if (operationsToCover.containsKey(updateOp.getId())) {
                duplicates.add(clone);
            } else {
                operationsToCover.put(updateOp.getId(), clone);
            }
            results.put(clone, null);
        }

        Map<String, T> oldDocs = new HashMap<String, T>();
        if (collection == Collection.NODES) {
            oldDocs.putAll((Map<String, T>) getCachedNodes(operationsToCover.keySet()));
        }

        for (int i = 0; i <= bulkRetries; i++) {
            if (operationsToCover.size() <= 2) {
                // bulkUpdate() method invokes Mongo twice, so sending 2 updates
                // in bulk mode wouldn't result in any performance gain
                break;
            }
            for (List<UpdateOp> partition : Lists.partition(Lists.newArrayList(operationsToCover.values()),
                    bulkSize)) {
                Map<UpdateOp, T> successfulUpdates = bulkUpdate(collection, partition, oldDocs);
                results.putAll(successfulUpdates);
                operationsToCover.values().removeAll(successfulUpdates.keySet());
            }
        }

        // if there are some changes left, we'll apply them one after another
        Iterator<UpdateOp> it = Iterators.concat(operationsToCover.values().iterator(), duplicates.iterator());
        while (it.hasNext()) {
            UpdateOp op = it.next();
            it.remove();
            T oldDoc = createOrUpdate(collection, op);
            if (oldDoc != null) {
                results.put(op, oldDoc);
            }
        }
    } finally {
        stats.doneCreateOrUpdate(watch.elapsed(TimeUnit.NANOSECONDS), collection,
                Lists.transform(updateOps, new Function<UpdateOp, String>() {
                    @Override
                    public String apply(UpdateOp input) {
                        return input.getId();
                    }
                }));
    }
    List<T> resultList = new ArrayList<T>(results.values());
    log("createOrUpdate returns", resultList);
    return resultList;
}