List of usage examples for com.google.common.util.concurrent Futures successfulAsList
@Beta @CheckReturnValue public static <V> ListenableFuture<List<V>> successfulAsList( Iterable<? extends ListenableFuture<? extends V>> futures)
From source file:io.druid.indexing.kafka.supervisor.KafkaSupervisor.java
private KafkaSupervisorReport generateReport(boolean includeOffsets) { int numPartitions = 0; for (Map<Integer, Long> partitionGroup : partitionGroups.values()) { numPartitions += partitionGroup.size(); }/* ww w . j av a 2 s . co m*/ KafkaSupervisorReport report = new KafkaSupervisorReport(dataSource, DateTime.now(), ioConfig.getTopic(), numPartitions, ioConfig.getReplicas(), ioConfig.getTaskDuration().getMillis() / 1000); List<TaskReportData> taskReports = Lists.newArrayList(); List<ListenableFuture<Map<Integer, Long>>> futures = Lists.newArrayList(); try { for (TaskGroup taskGroup : taskGroups.values()) { for (Map.Entry<String, TaskData> entry : taskGroup.tasks.entrySet()) { String taskId = entry.getKey(); DateTime startTime = entry.getValue().startTime; Long remainingSeconds = null; if (startTime != null) { remainingSeconds = Math.max(0, ioConfig.getTaskDuration().getMillis() - (DateTime.now().getMillis() - startTime.getMillis())) / 1000; } taskReports.add(new TaskReportData(taskId, (includeOffsets ? taskGroup.partitionOffsets : null), null, startTime, remainingSeconds, TaskReportData.TaskType.ACTIVE)); if (includeOffsets) { futures.add(taskClient.getCurrentOffsetsAsync(taskId, false)); } } } for (List<TaskGroup> taskGroups : pendingCompletionTaskGroups.values()) { for (TaskGroup taskGroup : taskGroups) { for (Map.Entry<String, TaskData> entry : taskGroup.tasks.entrySet()) { String taskId = entry.getKey(); DateTime startTime = entry.getValue().startTime; Long remainingSeconds = null; if (taskGroup.completionTimeout != null) { remainingSeconds = Math.max(0, taskGroup.completionTimeout.getMillis() - DateTime.now().getMillis()) / 1000; } taskReports.add( new TaskReportData(taskId, (includeOffsets ? taskGroup.partitionOffsets : null), null, startTime, remainingSeconds, TaskReportData.TaskType.PUBLISHING)); if (includeOffsets) { futures.add(taskClient.getCurrentOffsetsAsync(taskId, false)); } } } } List<Map<Integer, Long>> results = Futures.successfulAsList(futures).get(); for (int i = 0; i < taskReports.size(); i++) { TaskReportData reportData = taskReports.get(i); if (includeOffsets) { reportData.setCurrentOffsets(results.get(i)); } report.addTask(reportData); } } catch (Exception e) { log.warn(e, "Failed to generate status report"); } return report; }
From source file:org.apache.druid.indexing.kafka.supervisor.KafkaSupervisor.java
private void checkTaskDuration() throws InterruptedException, ExecutionException, TimeoutException { final List<ListenableFuture<Map<Integer, Long>>> futures = Lists.newArrayList(); final List<Integer> futureGroupIds = Lists.newArrayList(); for (Entry<Integer, TaskGroup> entry : taskGroups.entrySet()) { Integer groupId = entry.getKey(); TaskGroup group = entry.getValue(); // find the longest running task from this group DateTime earliestTaskStart = DateTimes.nowUtc(); for (TaskData taskData : group.tasks.values()) { // startTime can be null if kafkaSupervisor is stopped gracefully before processing any runNotice if (taskData.startTime != null && earliestTaskStart.isAfter(taskData.startTime)) { earliestTaskStart = taskData.startTime; }//w w w . j ava2s . c o m } // if this task has run longer than the configured duration, signal all tasks in the group to persist if (earliestTaskStart.plus(ioConfig.getTaskDuration()).isBeforeNow()) { log.info("Task group [%d] has run for [%s]", groupId, ioConfig.getTaskDuration()); futureGroupIds.add(groupId); futures.add(checkpointTaskGroup(group, true)); } } List<Map<Integer, Long>> results = Futures.successfulAsList(futures).get(futureTimeoutInSeconds, TimeUnit.SECONDS); for (int j = 0; j < results.size(); j++) { Integer groupId = futureGroupIds.get(j); TaskGroup group = taskGroups.get(groupId); Map<Integer, Long> endOffsets = results.get(j); if (endOffsets != null) { // set a timeout and put this group in pendingCompletionTaskGroups so that it can be monitored for completion group.completionTimeout = DateTimes.nowUtc().plus(ioConfig.getCompletionTimeout()); pendingCompletionTaskGroups.computeIfAbsent(groupId, k -> new CopyOnWriteArrayList<>()).add(group); // set endOffsets as the next startOffsets for (Entry<Integer, Long> entry : endOffsets.entrySet()) { partitionGroups.get(groupId).put(entry.getKey(), entry.getValue()); } } else { log.warn("All tasks in group [%s] failed to transition to publishing state, killing tasks [%s]", groupId, group.taskIds()); for (String id : group.taskIds()) { killTask(id); } // clear partitionGroups, so that latest offsets from db is used as start offsets not the stale ones // if tasks did some successful incremental handoffs partitionGroups.get(groupId).replaceAll((partition, offset) -> NOT_SET); } // remove this task group from the list of current task groups now that it has been handled taskGroups.remove(groupId); } }
From source file:io.druid.indexing.jdbc.supervisor.JDBCSupervisor.java
private void updateCurrentOffsets() throws InterruptedException, ExecutionException, TimeoutException { log.info("updateCurrentOffsets called"); final List<ListenableFuture<Void>> futures = Stream .concat(taskGroups.values().stream().flatMap(taskGroup -> taskGroup.tasks.entrySet().stream()), pendingCompletionTaskGroups.values().stream().flatMap(List::stream) .flatMap(taskGroup -> taskGroup.tasks.entrySet().stream())) .map(task -> Futures.transform(taskClient.getCurrentOffsetsAsync(task.getKey(), false), (Function<Map<Integer, Long>, Void>) (currentOffsets) -> { log.info("TaskClient currentOffsets is [%s]", currentOffsets); if (currentOffsets != null && !currentOffsets.isEmpty()) { task.getValue().currentOffsets = currentOffsets; log.info("task.getValue().currentOffsets is " + task.getValue().currentOffsets); } else { }//from w w w .j a v a2 s . c o m return null; })) .collect(Collectors.toList()); log.info("CurrentOffsets size is " + taskGroups.values().size()); Futures.successfulAsList(futures).get(futureTimeoutInSeconds, TimeUnit.SECONDS); }
From source file:org.apache.druid.indexing.kafka.supervisor.KafkaSupervisor.java
private ListenableFuture<Map<Integer, Long>> checkpointTaskGroup(final TaskGroup taskGroup, final boolean finalize) { if (finalize) { // 1) Check if any task completed (in which case we're done) and kill unassigned tasks Iterator<Entry<String, TaskData>> i = taskGroup.tasks.entrySet().iterator(); while (i.hasNext()) { Entry<String, TaskData> taskEntry = i.next(); String taskId = taskEntry.getKey(); TaskData task = taskEntry.getValue(); // task.status can be null if kafkaSupervisor is stopped gracefully before processing any runNotice. if (task.status != null) { if (task.status.isSuccess()) { // If any task in this group has already completed, stop the rest of the tasks in the group and return. // This will cause us to create a new set of tasks next cycle that will start from the offsets in // metadata store (which will have advanced if we succeeded in publishing and will remain the same if // publishing failed and we need to re-ingest) return Futures.transform(stopTasksInGroup(taskGroup), new Function<Object, Map<Integer, Long>>() { @Nullable @Override public Map<Integer, Long> apply(@Nullable Object input) { return null; }/* ww w . j a v a 2 s . c o m*/ }); } if (task.status.isRunnable()) { if (taskInfoProvider.getTaskLocation(taskId).equals(TaskLocation.unknown())) { log.info("Killing task [%s] which hasn't been assigned to a worker", taskId); killTask(taskId); i.remove(); } } } } } // 2) Pause running tasks final List<ListenableFuture<Map<Integer, Long>>> pauseFutures = Lists.newArrayList(); final List<String> pauseTaskIds = ImmutableList.copyOf(taskGroup.taskIds()); for (final String taskId : pauseTaskIds) { pauseFutures.add(taskClient.pauseAsync(taskId)); } return Futures.transform(Futures.successfulAsList(pauseFutures), new Function<List<Map<Integer, Long>>, Map<Integer, Long>>() { @Nullable @Override public Map<Integer, Long> apply(List<Map<Integer, Long>> input) { // 3) Build a map of the highest offset read by any task in the group for each partition final Map<Integer, Long> endOffsets = new HashMap<>(); for (int i = 0; i < input.size(); i++) { Map<Integer, Long> result = input.get(i); if (result == null || result.isEmpty()) { // kill tasks that didn't return a value String taskId = pauseTaskIds.get(i); log.warn("Task [%s] failed to respond to [pause] in a timely manner, killing task", taskId); killTask(taskId); taskGroup.tasks.remove(taskId); } else { // otherwise build a map of the highest offsets seen for (Entry<Integer, Long> offset : result.entrySet()) { if (!endOffsets.containsKey(offset.getKey()) || endOffsets.get(offset.getKey()).compareTo(offset.getValue()) < 0) { endOffsets.put(offset.getKey(), offset.getValue()); } } } } // 4) Set the end offsets for each task to the values from step 3 and resume the tasks. All the tasks should // finish reading and start publishing within a short period, depending on how in sync the tasks were. final List<ListenableFuture<Boolean>> setEndOffsetFutures = Lists.newArrayList(); final List<String> setEndOffsetTaskIds = ImmutableList.copyOf(taskGroup.taskIds()); if (setEndOffsetTaskIds.isEmpty()) { log.info("All tasks in taskGroup [%d] have failed, tasks will be re-created", taskGroup.groupId); return null; } try { if (endOffsets.equals(taskGroup.sequenceOffsets.lastEntry().getValue())) { log.warn( "Checkpoint [%s] is same as the start offsets [%s] of latest sequence for the task group [%d]", endOffsets, taskGroup.sequenceOffsets.lastEntry().getValue(), taskGroup.groupId); } log.info("Setting endOffsets for tasks in taskGroup [%d] to %s and resuming", taskGroup.groupId, endOffsets); for (final String taskId : setEndOffsetTaskIds) { setEndOffsetFutures .add(taskClient.setEndOffsetsAsync(taskId, endOffsets, finalize)); } List<Boolean> results = Futures.successfulAsList(setEndOffsetFutures) .get(futureTimeoutInSeconds, TimeUnit.SECONDS); for (int i = 0; i < results.size(); i++) { if (results.get(i) == null || !results.get(i)) { String taskId = setEndOffsetTaskIds.get(i); log.warn( "Task [%s] failed to respond to [set end offsets] in a timely manner, killing task", taskId); killTask(taskId); taskGroup.tasks.remove(taskId); } } } catch (Exception e) { log.error("Something bad happened [%s]", e.getMessage()); Throwables.propagate(e); } if (taskGroup.tasks.isEmpty()) { log.info("All tasks in taskGroup [%d] have failed, tasks will be re-created", taskGroup.groupId); return null; } return endOffsets; } }, workerExec); }
From source file:org.apache.druid.indexing.kafka.supervisor.KafkaSupervisor.java
/** * Monitors [pendingCompletionTaskGroups] for tasks that have completed. If any task in a task group has completed, we * can safely stop the rest of the tasks in that group. If a task group has exceeded its publishing timeout, then * we need to stop all tasks in not only that task group but also 1) any subsequent task group that is also pending * completion and 2) the current task group that is running, because the assumption that we have handled up to the * starting offset for subsequent task groups is no longer valid, and subsequent tasks would fail as soon as they * attempted to publish because of the contiguous range consistency check. *///from w w w . j av a 2 s . c o m private void checkPendingCompletionTasks() throws ExecutionException, InterruptedException, TimeoutException { List<ListenableFuture<?>> futures = Lists.newArrayList(); for (Entry<Integer, CopyOnWriteArrayList<TaskGroup>> pendingGroupList : pendingCompletionTaskGroups .entrySet()) { boolean stopTasksInTaskGroup = false; Integer groupId = pendingGroupList.getKey(); CopyOnWriteArrayList<TaskGroup> taskGroupList = pendingGroupList.getValue(); List<TaskGroup> toRemove = Lists.newArrayList(); for (TaskGroup group : taskGroupList) { boolean foundSuccess = false, entireTaskGroupFailed = false; if (stopTasksInTaskGroup) { // One of the earlier groups that was handling the same partition set timed out before the segments were // published so stop any additional groups handling the same partition set that are pending completion. futures.add(stopTasksInGroup(group)); toRemove.add(group); continue; } Iterator<Entry<String, TaskData>> iTask = group.tasks.entrySet().iterator(); while (iTask.hasNext()) { final Entry<String, TaskData> entry = iTask.next(); final String taskId = entry.getKey(); final TaskData taskData = entry.getValue(); Preconditions.checkNotNull(taskData.status, "WTH? task[%s] has a null status", taskId); if (taskData.status.isFailure()) { iTask.remove(); // remove failed task if (group.tasks.isEmpty()) { // if all tasks in the group have failed, just nuke all task groups with this partition set and restart entireTaskGroupFailed = true; break; } } if (taskData.status.isSuccess()) { // If one of the pending completion tasks was successful, stop the rest of the tasks in the group as // we no longer need them to publish their segment. log.info("Task [%s] completed successfully, stopping tasks %s", taskId, group.taskIds()); futures.add(stopTasksInGroup(group)); foundSuccess = true; toRemove.add(group); // remove the TaskGroup from the list of pending completion task groups break; // skip iterating the rest of the tasks in this group as they've all been stopped now } } if ((!foundSuccess && group.completionTimeout.isBeforeNow()) || entireTaskGroupFailed) { if (entireTaskGroupFailed) { log.warn( "All tasks in group [%d] failed to publish, killing all tasks for these partitions", groupId); } else { log.makeAlert( "No task in [%s] for taskGroup [%d] succeeded before the completion timeout elapsed [%s]!", group.taskIds(), groupId, ioConfig.getCompletionTimeout()).emit(); } // reset partitions offsets for this task group so that they will be re-read from metadata storage partitionGroups.get(groupId).replaceAll((partition, offset) -> NOT_SET); // kill all the tasks in this pending completion group killTasksInGroup(group); // set a flag so the other pending completion groups for this set of partitions will also stop stopTasksInTaskGroup = true; // kill all the tasks in the currently reading task group and remove the bad task group killTasksInGroup(taskGroups.remove(groupId)); toRemove.add(group); } } taskGroupList.removeAll(toRemove); } // wait for all task shutdowns to complete before returning Futures.successfulAsList(futures).get(futureTimeoutInSeconds, TimeUnit.SECONDS); }
From source file:org.apache.druid.indexing.kafka.supervisor.KafkaSupervisor.java
private void checkCurrentTaskState() throws ExecutionException, InterruptedException, TimeoutException { List<ListenableFuture<?>> futures = Lists.newArrayList(); Iterator<Entry<Integer, TaskGroup>> iTaskGroups = taskGroups.entrySet().iterator(); while (iTaskGroups.hasNext()) { Entry<Integer, TaskGroup> taskGroupEntry = iTaskGroups.next(); Integer groupId = taskGroupEntry.getKey(); TaskGroup taskGroup = taskGroupEntry.getValue(); // Iterate the list of known tasks in this group and: // 1) Kill any tasks which are not "current" (have the partitions, starting offsets, and minimumMessageTime // & maximumMessageTime (if applicable) in [taskGroups]) // 2) Remove any tasks that have failed from the list // 3) If any task completed successfully, stop all the tasks in this group and move to the next group log.debug("Task group [%d] pre-pruning: %s", groupId, taskGroup.taskIds()); Iterator<Entry<String, TaskData>> iTasks = taskGroup.tasks.entrySet().iterator(); while (iTasks.hasNext()) { Entry<String, TaskData> task = iTasks.next(); String taskId = task.getKey(); TaskData taskData = task.getValue(); // stop and remove bad tasks from the task group if (!isTaskCurrent(groupId, taskId)) { log.info("Stopping task [%s] which does not match the expected offset range and ingestion spec", taskId);/*ww w . j ava 2 s.c o m*/ futures.add(stopTask(taskId, false)); iTasks.remove(); continue; } Preconditions.checkNotNull(taskData.status, "WTH? task[%s] has a null status", taskId); // remove failed tasks if (taskData.status.isFailure()) { iTasks.remove(); continue; } // check for successful tasks, and if we find one, stop all tasks in the group and remove the group so it can // be recreated with the next set of offsets if (taskData.status.isSuccess()) { futures.add(stopTasksInGroup(taskGroup)); iTaskGroups.remove(); break; } } log.debug("Task group [%d] post-pruning: %s", groupId, taskGroup.taskIds()); } // wait for all task shutdowns to complete before returning Futures.successfulAsList(futures).get(futureTimeoutInSeconds, TimeUnit.SECONDS); }
From source file:org.apache.druid.indexing.kafka.supervisor.KafkaSupervisor.java
private ListenableFuture<?> stopTasksInGroup(@Nullable TaskGroup taskGroup) { if (taskGroup == null) { return Futures.immediateFuture(null); }//ww w . j a va2s. c o m final List<ListenableFuture<Void>> futures = Lists.newArrayList(); for (Entry<String, TaskData> entry : taskGroup.tasks.entrySet()) { final String taskId = entry.getKey(); final TaskData taskData = entry.getValue(); if (taskData.status == null) { killTask(taskId); } else if (!taskData.status.isComplete()) { futures.add(stopTask(taskId, false)); } } return Futures.successfulAsList(futures); }
From source file:org.apache.druid.indexing.kafka.supervisor.KafkaSupervisor.java
private void updateCurrentOffsets() throws InterruptedException, ExecutionException, TimeoutException { final List<ListenableFuture<Void>> futures = Stream .concat(taskGroups.values().stream().flatMap(taskGroup -> taskGroup.tasks.entrySet().stream()), pendingCompletionTaskGroups.values().stream().flatMap(List::stream) .flatMap(taskGroup -> taskGroup.tasks.entrySet().stream())) .map(task -> Futures.transform(taskClient.getCurrentOffsetsAsync(task.getKey(), false), (Function<Map<Integer, Long>, Void>) (currentOffsets) -> { if (currentOffsets != null && !currentOffsets.isEmpty()) { task.getValue().currentOffsets = currentOffsets; }//from w w w . jav a 2s . co m return null; })) .collect(Collectors.toList()); Futures.successfulAsList(futures).get(futureTimeoutInSeconds, TimeUnit.SECONDS); }
From source file:org.apache.druid.indexing.kafka.supervisor.KafkaSupervisor.java
/** * Collect row ingestion stats from all tasks managed by this supervisor. * * @return A map of groupId->taskId->task row stats * * @throws InterruptedException// ww w .j a v a 2s .co m * @throws ExecutionException * @throws TimeoutException */ private Map<String, Map<String, Object>> getCurrentTotalStats() throws InterruptedException, ExecutionException, TimeoutException { Map<String, Map<String, Object>> allStats = Maps.newHashMap(); final List<ListenableFuture<StatsFromTaskResult>> futures = new ArrayList<>(); final List<Pair<Integer, String>> groupAndTaskIds = new ArrayList<>(); for (int groupId : taskGroups.keySet()) { TaskGroup group = taskGroups.get(groupId); for (String taskId : group.taskIds()) { futures.add(Futures.transform(taskClient.getMovingAveragesAsync(taskId), (Function<Map<String, Object>, StatsFromTaskResult>) (currentStats) -> { return new StatsFromTaskResult(groupId, taskId, currentStats); })); groupAndTaskIds.add(new Pair<>(groupId, taskId)); } } for (int groupId : pendingCompletionTaskGroups.keySet()) { TaskGroup group = taskGroups.get(groupId); for (String taskId : group.taskIds()) { futures.add(Futures.transform(taskClient.getMovingAveragesAsync(taskId), (Function<Map<String, Object>, StatsFromTaskResult>) (currentStats) -> { return new StatsFromTaskResult(groupId, taskId, currentStats); })); groupAndTaskIds.add(new Pair<>(groupId, taskId)); } } List<StatsFromTaskResult> results = Futures.successfulAsList(futures).get(futureTimeoutInSeconds, TimeUnit.SECONDS); for (int i = 0; i < results.size(); i++) { StatsFromTaskResult result = results.get(i); if (result != null) { Map<String, Object> groupMap = allStats.computeIfAbsent(result.getGroupId(), k -> Maps.newHashMap()); groupMap.put(result.getTaskId(), result.getStats()); } else { Pair<Integer, String> groupAndTaskId = groupAndTaskIds.get(i); log.error("Failed to get stats for group[%d]-task[%s]", groupAndTaskId.lhs, groupAndTaskId.rhs); } } return allStats; }