List of usage examples for com.google.common.collect Lists partition
public static <T> List<List<T>> partition(List<T> list, int size)
From source file:com.b2international.snowowl.snomed.api.impl.SnomedBrowserService.java
private RepositoryCommitRequestBuilder createBulkCommit(String branchPath, List<? extends ISnomedBrowserConcept> concepts, Boolean allowCreate, String userId, List<ExtendedLocale> locales, final String commitComment) { final Stopwatch watch = Stopwatch.createStarted(); final BulkRequestBuilder<TransactionContext> bulkRequest = BulkRequest.create(); InputFactory inputFactory = new InputFactory(getBranch(branchPath)); // Process concepts in batches of 1000 for (List<? extends ISnomedBrowserConcept> updatedConceptsBatch : Lists.partition(concepts, 1000)) { // Load existing versions in bulk Set<String> conceptIds = updatedConceptsBatch.stream().map(ISnomedBrowserConcept::getConceptId) .filter(Objects::nonNull).collect(Collectors.toSet()); Set<ISnomedBrowserConcept> existingConcepts = getConceptDetailsInBulk(branchPath, conceptIds, locales); Map<String, ISnomedBrowserConcept> existingConceptsMap = existingConcepts.stream() .collect(Collectors.toMap(ISnomedBrowserConcept::getConceptId, concept -> concept)); // For each concept add component updates to the bulk request for (ISnomedBrowserConcept concept : updatedConceptsBatch) { ISnomedBrowserConcept existingConcept = existingConceptsMap.get(concept.getConceptId()); if (existingConcept == null) { if (allowCreate) { final SnomedConceptCreateRequest req = inputFactory.createComponentInput(concept, SnomedConceptCreateRequest.class); bulkRequest.add(req); } else { // If one existing concept is not found fail the whole commit throw new ComponentNotFoundException("Snomed Concept", concept.getConceptId()); }//w w w . jav a 2s.c o m } else { update(concept, existingConcept, userId, locales, bulkRequest, inputFactory); } } } // Commit everything at once final RepositoryCommitRequestBuilder commit = SnomedRequests.prepareCommit().setUserId(userId) .setCommitComment(commitComment).setPreparationTime(watch.elapsed(TimeUnit.MILLISECONDS)) .setBody(bulkRequest); return commit; }
From source file:com.netflix.metacat.connector.hive.sql.DirectSqlGetPartition.java
private Void populateParameters(final List<Long> ids, final String sql, final String idName, final Map<Long, Map<String, String>> params) { if (ids.size() > 5000) { final List<List<Long>> subFilterPartitionNamesList = Lists.partition(ids, 5000); subFilterPartitionNamesList/* www . j a v a 2s. com*/ .forEach(subPartitions -> params.putAll(this.getParameters(subPartitions, sql, idName))); } else { params.putAll(this.getParameters(ids, sql, idName)); } return null; }
From source file:com.palantir.atlasdb.keyvalue.cassandra.CassandraKeyValueService.java
private List<Callable<Void>> getLoadWithTsTasksForSingleHost(final InetAddress host, final String tableName, Collection<Cell> cells, final long startTs, final boolean loadAllTs, final ThreadSafeResultVisitor v, final ConsistencyLevel consistency) throws Exception { final ColumnParent colFam = new ColumnParent(internalTableName(tableName)); TreeMultimap<byte[], Cell> cellsByCol = TreeMultimap.create(UnsignedBytes.lexicographicalComparator(), Ordering.natural());//from ww w . j a v a 2 s. c o m for (Cell cell : cells) { cellsByCol.put(cell.getColumnName(), cell); } List<Callable<Void>> tasks = Lists.newArrayList(); int fetchBatchCount = configManager.getConfig().fetchBatchCount(); for (final byte[] col : cellsByCol.keySet()) { if (cellsByCol.get(col).size() > fetchBatchCount) { log.warn( "Re-batching in getLoadWithTsTasksForSingleHost a call to {} for table {} that attempted to " + "multiget {} rows; this may indicate overly-large batching on a higher level.\n{}", host, tableName, cellsByCol.get(col).size(), CassandraKeyValueServices.getFilteredStackTrace("com.palantir")); } for (final List<Cell> partition : Lists.partition(ImmutableList.copyOf(cellsByCol.get(col)), fetchBatchCount)) { tasks.add(new Callable<Void>() { @Override public Void call() throws Exception { return clientPool.runWithPooledResourceOnHost(host, new FunctionCheckedException<Client, Void, Exception>() { @Override public Void apply(Client client) throws Exception { ByteBuffer start = CassandraKeyValueServices.makeCompositeBuffer(col, startTs - 1); ByteBuffer end = CassandraKeyValueServices.makeCompositeBuffer(col, -1); SliceRange slice = new SliceRange(start, end, false, loadAllTs ? Integer.MAX_VALUE : 1); SlicePredicate pred = new SlicePredicate(); pred.setSlice_range(slice); List<ByteBuffer> rowNames = Lists .newArrayListWithCapacity(partition.size()); for (Cell c : partition) { rowNames.add(ByteBuffer.wrap(c.getRowName())); } Map<ByteBuffer, List<ColumnOrSuperColumn>> results = multigetInternal( client, tableName, rowNames, colFam, pred, consistency); v.visit(results); return null; } @Override public String toString() { return "multiget_slice(" + host + ", " + colFam + ", " + partition.size() + " rows" + ")"; } }); } }); } } return tasks; }
From source file:org.nd4j.linalg.dataset.DataSet.java
/** * Partitions a dataset in to mini batches where * each dataset in each list is of the specified number of examples * * @param num the number to split by/* ww w . ja v a 2 s .c o m*/ * @return the partitioned datasets */ @Override public List<DataSet> batchBy(int num) { List<DataSet> batched = Lists.newArrayList(); for (List<DataSet> splitBatch : Lists.partition(asList(), num)) { batched.add(DataSet.merge(splitBatch)); } return batched; }
From source file:com.facebook.presto.hive.metastore.glue.GlueHiveMetastore.java
private List<Partition> batchGetPartition(String databaseName, String tableName, List<String> partitionNames) { try {/*from w ww . j a va 2 s . c o m*/ List<PartitionValueList> partitionValueLists = partitionNames.stream() .map(partitionName -> new PartitionValueList().withValues(toPartitionValues(partitionName))) .collect(toList()); List<List<PartitionValueList>> batchedPartitionValueLists = Lists.partition(partitionValueLists, BATCH_GET_PARTITION_MAX_PAGE_SIZE); List<Future<BatchGetPartitionResult>> batchGetPartitionFutures = new ArrayList<>(); List<Partition> result = new ArrayList<>(); for (List<PartitionValueList> partitions : batchedPartitionValueLists) { batchGetPartitionFutures.add(glueClient.batchGetPartitionAsync(new BatchGetPartitionRequest() .withDatabaseName(databaseName).withTableName(tableName).withPartitionsToGet(partitions))); } for (Future<BatchGetPartitionResult> future : batchGetPartitionFutures) { future.get().getPartitions() .forEach(partition -> result.add(GlueToPrestoConverter.convertPartition(partition))); } return result; } catch (AmazonServiceException | InterruptedException | ExecutionException e) { if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); } throw new PrestoException(HIVE_METASTORE_ERROR, e); } }
From source file:com.netflix.metacat.main.services.search.ElasticSearchMetacatRefresh.java
/** * Process the list of tables in batches. * * @param databaseName database name/* w ww. j a va2 s . c o m*/ * @param tableNames table names * @return A future containing the tasks */ private ListenableFuture<Void> processTables(final QualifiedName databaseName, final List<QualifiedName> tableNames) { final List<List<QualifiedName>> tableNamesBatches = Lists.partition(tableNames, 500); final List<ListenableFuture<Void>> processTablesBatchFutures = tableNamesBatches.stream() .map(subTableNames -> _processTables(databaseName, subTableNames)).collect(Collectors.toList()); return Futures.transform(Futures.successfulAsList(processTablesBatchFutures), Functions.constant(null)); }
From source file:com.facebook.presto.hive.metastore.glue.GlueHiveMetastore.java
@Override public void addPartitions(String databaseName, String tableName, List<PartitionWithStatistics> partitions) { try {// w w w .java2s . co m List<List<PartitionWithStatistics>> batchedPartitions = Lists.partition(partitions, BATCH_CREATE_PARTITION_MAX_PAGE_SIZE); List<Future<BatchCreatePartitionResult>> futures = new ArrayList<>(); for (List<PartitionWithStatistics> partitionBatch : batchedPartitions) { List<PartitionInput> partitionInputs = partitionBatch.stream() .map(GlueInputConverter::convertPartition).collect(toList()); futures.add(glueClient .batchCreatePartitionAsync(new BatchCreatePartitionRequest().withDatabaseName(databaseName) .withTableName(tableName).withPartitionInputList(partitionInputs))); } for (Future<BatchCreatePartitionResult> future : futures) { BatchCreatePartitionResult result = future.get(); propagatePartitionErrorToPrestoException(databaseName, tableName, result.getErrors()); } } catch (AmazonServiceException | InterruptedException | ExecutionException e) { if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); } throw new PrestoException(HIVE_METASTORE_ERROR, e); } }
From source file:org.nd4j.linalg.dataset.DataSet.java
/** * Partitions the data transform by the specified number. * * @param num the number to split by/* w ww. j a v a 2 s . c om*/ * @return the partitioned data transform */ @Override public List<DataSet> dataSetBatches(int num) { List<List<DataSet>> list = Lists.partition(asList(), num); List<DataSet> ret = new ArrayList<>(); for (List<DataSet> l : list) ret.add(DataSet.merge(l)); return ret; }
From source file:com.netflix.metacat.connector.hive.sql.DirectSqlGetPartition.java
private <T> List<T> getHandlerResults(final String databaseName, final String tableName, @Nullable final String filterExpression, @Nullable final List<String> partitionIds, final String sql, final ResultSetExtractor resultSetExtractor, @Nullable final String joinSql, @Nullable final String filterSql, @Nullable final List<Object> filterParams, @Nullable final Sort sort, @Nullable final Pageable pageable, final boolean forceDisableAudit) { ////from w ww . j a va 2s . c o m // Limiting the in clause to 5000 part names because the sql query with the IN clause for part_name(767 bytes) // will hit the max sql query length(max_allowed_packet for our RDS) if we use more than 5400 or so // List<T> partitions = Lists.newArrayList(); if (partitionIds != null && partitionIds.size() > 5000) { final List<List<String>> subFilterPartitionNamesList = Lists.partition(partitionIds, 5000); final List<T> finalPartitions = partitions; subFilterPartitionNamesList.forEach( subPartitionIds -> finalPartitions.addAll(this.getSubHandlerResultsFromQuery(databaseName, tableName, filterExpression, subPartitionIds, sql, resultSetExtractor, joinSql, filterSql, filterParams, sort, pageable, forceDisableAudit))); } else { partitions = this.getSubHandlerResultsFromQuery(databaseName, tableName, filterExpression, partitionIds, sql, resultSetExtractor, joinSql, filterSql, filterParams, sort, pageable, forceDisableAudit); } return partitions; }
From source file:org.apache.jackrabbit.oak.plugins.document.mongo.MongoDocumentStore.java
/** * Try to apply all the {@link UpdateOp}s with at least MongoDB requests as * possible. The return value is the list of the old documents (before * applying changes). The mechanism is as follows: * * <ol>//from w w w . j a v a2 s . com * <li>For each UpdateOp try to read the assigned document from the cache. * Add them to {@code oldDocs}.</li> * <li>Prepare a list of all UpdateOps that doesn't have their documents and * read them in one find() call. Add results to {@code oldDocs}.</li> * <li>Prepare a bulk update. For each remaining UpdateOp add following * operation: * <ul> * <li>Find document with the same id and the same mod_count as in the * {@code oldDocs}.</li> * <li>Apply changes from the UpdateOps.</li> * </ul> * </li> * <li>Execute the bulk update.</li> * </ol> * * If some other process modifies the target documents between points 2 and * 3, the mod_count will be increased as well and the bulk update will fail * for the concurrently modified docs. The method will then remove the * failed documents from the {@code oldDocs} and restart the process from * point 2. It will stop after 3rd iteration. */ @SuppressWarnings("unchecked") @CheckForNull @Override public <T extends Document> List<T> createOrUpdate(Collection<T> collection, List<UpdateOp> updateOps) { log("createOrUpdate", updateOps); Map<String, UpdateOp> operationsToCover = new LinkedHashMap<String, UpdateOp>(); List<UpdateOp> duplicates = new ArrayList<UpdateOp>(); Map<UpdateOp, T> results = new LinkedHashMap<UpdateOp, T>(); final Stopwatch watch = startWatch(); try { for (UpdateOp updateOp : updateOps) { UpdateUtils.assertUnconditional(updateOp); UpdateOp clone = updateOp.copy(); if (operationsToCover.containsKey(updateOp.getId())) { duplicates.add(clone); } else { operationsToCover.put(updateOp.getId(), clone); } results.put(clone, null); } Map<String, T> oldDocs = new HashMap<String, T>(); if (collection == Collection.NODES) { oldDocs.putAll((Map<String, T>) getCachedNodes(operationsToCover.keySet())); } for (int i = 0; i <= bulkRetries; i++) { if (operationsToCover.size() <= 2) { // bulkUpdate() method invokes Mongo twice, so sending 2 updates // in bulk mode wouldn't result in any performance gain break; } for (List<UpdateOp> partition : Lists.partition(Lists.newArrayList(operationsToCover.values()), bulkSize)) { Map<UpdateOp, T> successfulUpdates = bulkUpdate(collection, partition, oldDocs); results.putAll(successfulUpdates); operationsToCover.values().removeAll(successfulUpdates.keySet()); } } // if there are some changes left, we'll apply them one after another Iterator<UpdateOp> it = Iterators.concat(operationsToCover.values().iterator(), duplicates.iterator()); while (it.hasNext()) { UpdateOp op = it.next(); it.remove(); T oldDoc = createOrUpdate(collection, op); if (oldDoc != null) { results.put(op, oldDoc); } } } finally { stats.doneCreateOrUpdate(watch.elapsed(TimeUnit.NANOSECONDS), collection, Lists.transform(updateOps, new Function<UpdateOp, String>() { @Override public String apply(UpdateOp input) { return input.getId(); } })); } List<T> resultList = new ArrayList<T>(results.values()); log("createOrUpdate returns", resultList); return resultList; }