List of usage examples for com.google.common.collect Lists partition
public static <T> List<List<T>> partition(List<T> list, int size)
From source file:org.sonar.db.DatabaseUtils.java
/** * Partition by 1000 elements a list of input and execute a consumer on each part. * * The goal is to prevent issue with ORACLE when there's more than 1000 elements in a 'in ('X', 'Y', ...)' * and with MsSQL when there's more than 2000 parameters in a query *//*from w w w . java2s . co m*/ public static <T> void executeLargeInputsWithoutOutput(Collection<T> input, Consumer<List<T>> consumer) { if (input.isEmpty()) { return; } List<List<T>> partitions = Lists.partition(newArrayList(input), PARTITION_SIZE_FOR_ORACLE); for (List<T> partition : partitions) { consumer.accept(partition); } }
From source file:org.apache.druid.indexing.overlord.autoscaling.ec2.EC2AutoScaler.java
@Override public List<String> idToIpLookup(List<String> nodeIds) { final List<String> retVal = FluentIterable // chunk requests to avoid hitting default AWS limits on filters .from(Lists.partition(nodeIds, MAX_AWS_FILTER_VALUES)) .transformAndConcat(new Function<List<String>, Iterable<Reservation>>() { @Override/*from w ww. j a v a 2 s. co m*/ public Iterable<Reservation> apply(List<String> input) { return amazonEC2Client.describeInstances( new DescribeInstancesRequest().withFilters(new Filter("instance-id", input))) .getReservations(); } }).transformAndConcat(new Function<Reservation, Iterable<Instance>>() { @Override public Iterable<Instance> apply(Reservation reservation) { return reservation.getInstances(); } }).transform(new Function<Instance, String>() { @Override public String apply(Instance instance) { return instance.getPrivateIpAddress(); } }).toList(); log.debug("Performing lookup: %s --> %s", nodeIds, retVal); return retVal; }
From source file:net.orzo.Calculation.java
/** *///from w w w. j a v a 2 s . c o m private List<List<String>> groupResults(IntermediateResults originalResults, int numGroups) { /* * Note: In terms of performance, it is essential to split * keys so each worker has roughly the same amount of items to * process. Unfortunately, the current solution does not contain * any such optimization. */ List<String> keys = new ArrayList<>(originalResults.keys()); int calcNumGroups = Math.min(numGroups, keys.size()); // cannot use more workers than keys if (keys.size() > 0) { int itemsPerChunk = (int) Math.ceil(keys.size() / (float) calcNumGroups); return Lists.partition(keys, itemsPerChunk); } else { List<List<String>> ans = new ArrayList(); ans.add(keys); return ans; } }
From source file:com.ibm.watson.developer_cloud.professor_languo.data_model.QuestionAnswerSet.java
/** * Divides up a train set into folds, for cross-fold validation and/or cross-fold ensembling. If * the number of folds is less than or equal to 0, then this generates a single fold with all of * the questions being both in AND out of the fold (e.g., the set becomes both train and test for * cross-fold validation). Otherwise, numFolds are generated and for each fold every question in * the input set is either in the fold or out of the fold (and none is in both). Questions are * assigned randomly to folds. All questions are in exactly one fold. * /* w ww. j a v a 2s . co m*/ * @param numFolds Number of folds if positive. Otherwise, produce one superfold with all * questions in AND out. * @return A list of folds. */ public List<Fold> fold(int numFolds) { List<Fold> retval = new ArrayList<Fold>(); if (numFolds <= 0) { retval.add(new Fold(this, this)); } else { List<Question> questions = new ArrayList<>(getQuestions()); Collections.shuffle(questions, random); int numQuestions = questions.size(); int foldSize = numQuestions / numFolds; List<List<Question>> allFoldQuestions = Lists.partition(questions, foldSize); for (List<Question> inFold : allFoldQuestions) { List<Question> outOfFold = new ArrayList<>(getQuestions()); outOfFold.removeAll(inFold); Fold fold = new Fold(makeSet(inFold, source), makeSet(outOfFold, source)); retval.add(fold); } } return retval; }
From source file:org.jboss.hal.ballroom.dataprovider.DataProvider.java
private List<T> paged(List<T> values) { List<List<T>> pages = Lists.partition(values, pageInfo.getPageSize()); return pages.get(min(pageInfo.getPage(), pages.size() - 1)); }
From source file:nl.knaw.huygens.timbuctoo.index.solr.SolrIndex.java
@Override public List<Map<String, Object>> getDataByIds(List<String> ids, List<SortParameter> sort) throws SearchException { final int maxNumberOfIdsSolrSupports = 1000; List<List<String>> idsPart = Lists.partition(ids, maxNumberOfIdsSolrSupports); List<Map<String, Object>> results = Lists.newArrayList(); List<SolrQuery.SortClause> sortClauses = Lists.newArrayList(); for (SortParameter sortParameter : sort) { SolrQuery.ORDER order = SolrQuery.ORDER.desc; if (SortDirection.ASCENDING.equals(sortParameter.getDirection())) { order = SolrQuery.ORDER.asc; }/*from ww w. j a v a2s. c o m*/ sortClauses.add(new SolrQuery.SortClause(sortParameter.getFieldname(), order)); } for (List<String> part : idsPart) { addResultsOfPartialQuery(part, results, sortClauses); } return results; }
From source file:hu.bme.mit.trainbenchmark.benchmark.fourstore.driver.FourStoreDriver.java
public void insertEdgesWithVertex(final Multimap<String, String> edges, final String edgeType, final String targetVertexType) throws IOException { if (edges.isEmpty()) { return;//from www .j a v a 2s . co m } final ArrayList<String> sourceVertices = new ArrayList<>(edges.keySet()); final List<List<String>> sourceVerticesPartitions = Lists.partition(sourceVertices, PARTITION_SIZE); for (final List<String> sourceVerticesPartition : sourceVerticesPartitions) { final Multimap<String, String> edgePartition = ArrayListMultimap.create(); for (final String sourceVertexURI : sourceVerticesPartition) { final Collection<String> targetVertexURIs = edges.get(sourceVertexURI); edgePartition.putAll(sourceVertexURI, targetVertexURIs); } insertEdgesWithVertexPartition(edgePartition, edgeType, targetVertexType); } }
From source file:edu.cmu.lti.oaqa.baseqa.providers.ml.classifiers.ClassifierProvider.java
default List<List<String>> crossTrainPredict(List<Map<String, Double>> X, List<String> Y, ResampleType resampleType, int limit) throws AnalysisEngineProcessException { Set<Integer> indexes = IntStream.range(0, X.size()).boxed().collect(toSet()); List<Integer> indexList = new ArrayList<>(indexes); Collections.shuffle(indexList); int nfolds = (int) Math.ceil(indexList.size() / 10.0); List<List<String>> ret = IntStream.range(0, X.size()).mapToObj(i -> new ArrayList<String>()) .collect(toList());/* ww w .j a v a 2s .c om*/ for (List<Integer> cvTestIndexes : Lists.partition(indexList, nfolds)) { List<Map<String, Double>> cvTrainX = new ArrayList<>(); List<String> cvTrainY = new ArrayList<>(); Sets.difference(indexes, new HashSet<>(cvTestIndexes)).forEach(cvTrainIndex -> { cvTrainX.add(X.get(cvTrainIndex)); cvTrainY.add(Y.get(cvTrainIndex)); }); train(cvTrainX, cvTrainY, resampleType, false); for (int cvTestIndex : cvTestIndexes) { List<String> result = predict(X.get(cvTestIndex), limit).stream().collect(toList()); ret.set(cvTestIndex, result); } } return ret; }
From source file:com.cloudant.sync.replication.PullStrategy.java
private int processOneChangesBatch(ChangesResultWrapper changeFeeds) throws ExecutionException, InterruptedException, DocumentException { String feed = String.format("Change feed: { last_seq: %s, change size: %s}", changeFeeds.getLastSeq(), changeFeeds.getResults().size()); logger.info(feed);//from w ww . j a va 2 s. co m Multimap<String, String> openRevs = changeFeeds.openRevisions(0, changeFeeds.size()); Map<String, Collection<String>> missingRevisions = this.targetDb.getDbCore().revsDiff(openRevs); int changesProcessed = 0; // Process the changes in batches List<String> ids = Lists.newArrayList(missingRevisions.keySet()); List<List<String>> batches = Lists.partition(ids, this.insertBatchSize); for (List<String> batch : batches) { List<BatchItem> batchesToInsert = new ArrayList<BatchItem>(); if (this.state.cancel) { break; } try { Iterable<DocumentRevsList> result = createTask(batch, missingRevisions); for (DocumentRevsList revsList : result) { // We promise not to insert documents after cancel is set if (this.state.cancel) { break; } // attachments, keyed by docId and revId, so that // we can add the attachments to the correct leaf // nodes HashMap<String[], List<PreparedAttachment>> atts = new HashMap<String[], List<PreparedAttachment>>(); // now put together a list of attachments we need to download if (!this.pullAttachmentsInline) { try { for (DocumentRevs documentRevs : revsList) { Map<String, Object> attachments = documentRevs.getAttachments(); // keep track of attachments we are going to prepare ArrayList<PreparedAttachment> preparedAtts = new ArrayList<PreparedAttachment>(); atts.put(new String[] { documentRevs.getId(), documentRevs.getRev() }, preparedAtts); for (Map.Entry<String, Object> entry : attachments.entrySet()) { Map attachmentMetadata = (Map) entry.getValue(); int revpos = (Integer) attachmentMetadata.get("revpos"); String contentType = (String) attachmentMetadata.get("content_type"); String encoding = (String) attachmentMetadata.get("encoding"); long length = (Integer) attachmentMetadata.get("length"); long encodedLength = 0; // encodedLength can default to 0 if // it's not encoded if (Attachment.getEncodingFromString(encoding) != Attachment.Encoding.Plain) { encodedLength = (Integer) attachmentMetadata.get("encoded_length"); } // do we already have the attachment @ this revpos? // look back up the tree for this document and see: // if we already have it, then we don't need to fetch it DocumentRevs.Revisions revs = documentRevs.getRevisions(); int offset = revs.getStart() - revpos; if (offset >= 0 && offset < revs.getIds().size()) { String revId = String.valueOf(revpos) + "-" + revs.getIds().get(offset); Attachment a = this.targetDb.getDbCore().getAttachment(documentRevs.getId(), revId, entry.getKey()); if (a != null) { // skip attachment, already got it continue; } } UnsavedStreamAttachment usa = this.sourceDb.getAttachmentStream( documentRevs.getId(), documentRevs.getRev(), entry.getKey(), contentType, encoding); // by preparing the attachment here, it is downloaded outside // of the database transaction preparedAtts.add(this.targetDb.prepareAttachment(usa, length, encodedLength)); } } } catch (Exception e) { logger.log(Level.SEVERE, "There was a problem downloading an attachment to the" + " datastore, terminating replication", e); this.state.cancel = true; } } if (this.state.cancel) { break; } batchesToInsert.add(new BatchItem(revsList, atts)); changesProcessed++; } this.targetDb.bulkInsert(batchesToInsert, this.pullAttachmentsInline); } catch (Exception e) { throw new ExecutionException(e); } } if (!this.state.cancel) { try { this.targetDb.putCheckpoint(this.getReplicationId(), changeFeeds.getLastSeq()); } catch (DatastoreException e) { logger.log(Level.WARNING, "Failed to put checkpoint doc, next replication will " + "start from previous checkpoint", e); } } return changesProcessed; }
From source file:org.eclipse.sirius.common.ui.tools.api.navigator.GroupingContentProvider.java
private Object[] defaultGroupChildren(Object parent, Object[] children) { if (children.length > getTriggerSize()) { List<List<Object>> partition = Lists.partition(Arrays.asList(children), config.groupSize); Object[] result = new Object[partition.size()]; int indexOfResult = 0; for (List<Object> indexedChildren : partition) { int currentOffset = indexOfResult * config.groupSize; GroupingItem currentGroup = new GroupingItem(currentOffset, parent, indexedChildren); result[indexOfResult] = currentGroup; indexOfResult++;//from www . j a v a 2 s . c o m } return result; } else { return children; } }