Java tutorial
/* * Licensed to Metamarkets Group Inc. (Metamarkets) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. Metamarkets licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package io.druid.indexing.kafka; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Charsets; import com.google.common.base.Optional; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableMap; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; import com.google.common.util.concurrent.MoreExecutors; import com.metamx.emitter.EmittingLogger; import com.metamx.http.client.HttpClient; import com.metamx.http.client.Request; import com.metamx.http.client.response.FullResponseHandler; import com.metamx.http.client.response.FullResponseHolder; import io.druid.concurrent.Execs; import io.druid.indexing.common.RetryPolicy; import io.druid.indexing.common.RetryPolicyConfig; import io.druid.indexing.common.RetryPolicyFactory; import io.druid.indexing.common.TaskInfoProvider; import io.druid.indexing.common.TaskLocation; import io.druid.indexing.common.TaskStatus; import io.druid.java.util.common.IAE; import io.druid.java.util.common.IOE; import io.druid.java.util.common.ISE; import io.druid.java.util.common.StringUtils; import io.druid.segment.realtime.firehose.ChatHandlerResource; import org.jboss.netty.channel.ChannelException; import org.jboss.netty.handler.codec.http.HttpMethod; import org.jboss.netty.handler.codec.http.HttpResponseStatus; import org.joda.time.DateTime; import org.joda.time.Duration; import org.joda.time.Period; import javax.ws.rs.core.MediaType; import java.io.IOException; import java.net.Socket; import java.net.URI; import java.util.Map; import java.util.concurrent.Callable; public class KafkaIndexTaskClient { public static class NoTaskLocationException extends RuntimeException { public NoTaskLocationException(String message) { super(message); } } public static class TaskNotRunnableException extends RuntimeException { public TaskNotRunnableException(String message) { super(message); } } public static final int MAX_RETRY_WAIT_SECONDS = 10; private static final int MIN_RETRY_WAIT_SECONDS = 2; private static final EmittingLogger log = new EmittingLogger(KafkaIndexTaskClient.class); private static final String BASE_PATH = "/druid/worker/v1/chat"; private static final int TASK_MISMATCH_RETRY_DELAY_SECONDS = 5; private final HttpClient httpClient; private final ObjectMapper jsonMapper; private final TaskInfoProvider taskInfoProvider; private final Duration httpTimeout; private final RetryPolicyFactory retryPolicyFactory; private final ListeningExecutorService executorService; private final long numRetries; public KafkaIndexTaskClient(HttpClient httpClient, ObjectMapper jsonMapper, TaskInfoProvider taskInfoProvider, String dataSource, int numThreads, Duration httpTimeout, long numRetries) { this.httpClient = httpClient; this.jsonMapper = jsonMapper; this.taskInfoProvider = taskInfoProvider; this.httpTimeout = httpTimeout; this.numRetries = numRetries; this.retryPolicyFactory = createRetryPolicyFactory(); this.executorService = MoreExecutors.listeningDecorator( Execs.multiThreaded(numThreads, StringUtils.format("KafkaIndexTaskClient-%s-%%d", dataSource))); } public void close() { executorService.shutdownNow(); } public boolean stop(final String id, final boolean publish) { log.debug("Stop task[%s] publish[%s]", id, publish); try { final FullResponseHolder response = submitRequest(id, HttpMethod.POST, "stop", publish ? "publish=true" : null, true); return response.getStatus().getCode() / 100 == 2; } catch (NoTaskLocationException e) { return false; } catch (TaskNotRunnableException e) { log.info("Task [%s] couldn't be stopped because it is no longer running", id); return true; } catch (Exception e) { log.warn(e, "Exception while stopping task [%s]", id); return false; } } public boolean resume(final String id) { log.debug("Resume task[%s]", id); try { final FullResponseHolder response = submitRequest(id, HttpMethod.POST, "resume", null, true); return response.getStatus().getCode() / 100 == 2; } catch (NoTaskLocationException e) { return false; } } public Map<Integer, Long> pause(final String id) { return pause(id, 0); } public Map<Integer, Long> pause(final String id, final long timeout) { log.debug("Pause task[%s] timeout[%d]", id, timeout); try { final FullResponseHolder response = submitRequest(id, HttpMethod.POST, "pause", timeout > 0 ? StringUtils.format("timeout=%d", timeout) : null, true); if (response.getStatus().equals(HttpResponseStatus.OK)) { log.info("Task [%s] paused successfully", id); return jsonMapper.readValue(response.getContent(), new TypeReference<Map<Integer, Long>>() { }); } final RetryPolicy retryPolicy = retryPolicyFactory.makeRetryPolicy(); while (true) { if (getStatus(id) == KafkaIndexTask.Status.PAUSED) { return getCurrentOffsets(id, true); } final Duration delay = retryPolicy.getAndIncrementRetryDelay(); if (delay == null) { log.error("Task [%s] failed to pause, aborting", id); throw new ISE("Task [%s] failed to pause, aborting", id); } else { final long sleepTime = delay.getMillis(); log.info("Still waiting for task [%s] to pause; will try again in [%s]", id, new Duration(sleepTime).toString()); Thread.sleep(sleepTime); } } } catch (NoTaskLocationException e) { log.error("Exception [%s] while pausing Task [%s]", e.getMessage(), id); return ImmutableMap.of(); } catch (IOException | InterruptedException e) { log.error("Exception [%s] while pausing Task [%s]", e.getMessage(), id); throw Throwables.propagate(e); } } public KafkaIndexTask.Status getStatus(final String id) { log.debug("GetStatus task[%s]", id); try { final FullResponseHolder response = submitRequest(id, HttpMethod.GET, "status", null, true); return jsonMapper.readValue(response.getContent(), KafkaIndexTask.Status.class); } catch (NoTaskLocationException e) { return KafkaIndexTask.Status.NOT_STARTED; } catch (IOException e) { throw Throwables.propagate(e); } } public DateTime getStartTime(final String id) { log.debug("GetStartTime task[%s]", id); try { final FullResponseHolder response = submitRequest(id, HttpMethod.GET, "time/start", null, true); return response.getContent() == null || response.getContent().isEmpty() ? null : jsonMapper.readValue(response.getContent(), DateTime.class); } catch (NoTaskLocationException e) { return null; } catch (IOException e) { throw Throwables.propagate(e); } } public Map<Integer, Long> getCurrentOffsets(final String id, final boolean retry) { log.debug("GetCurrentOffsets task[%s] retry[%s]", id, retry); try { final FullResponseHolder response = submitRequest(id, HttpMethod.GET, "offsets/current", null, retry); return jsonMapper.readValue(response.getContent(), new TypeReference<Map<Integer, Long>>() { }); } catch (NoTaskLocationException e) { return ImmutableMap.of(); } catch (IOException e) { throw Throwables.propagate(e); } } public Map<Integer, Long> getEndOffsets(final String id) { log.debug("GetEndOffsets task[%s]", id); try { final FullResponseHolder response = submitRequest(id, HttpMethod.GET, "offsets/end", null, true); return jsonMapper.readValue(response.getContent(), new TypeReference<Map<Integer, Long>>() { }); } catch (NoTaskLocationException e) { return ImmutableMap.of(); } catch (IOException e) { throw Throwables.propagate(e); } } public boolean setEndOffsets(final String id, final Map<Integer, Long> endOffsets) { return setEndOffsets(id, endOffsets, false); } public boolean setEndOffsets(final String id, final Map<Integer, Long> endOffsets, final boolean resume) { log.debug("SetEndOffsets task[%s] endOffsets[%s] resume[%s]", id, endOffsets, resume); try { final FullResponseHolder response = submitRequest(id, HttpMethod.POST, "offsets/end", resume ? "resume=true" : null, jsonMapper.writeValueAsBytes(endOffsets), true); return response.getStatus().getCode() / 100 == 2; } catch (NoTaskLocationException e) { return false; } catch (IOException e) { throw Throwables.propagate(e); } } public ListenableFuture<Boolean> stopAsync(final String id, final boolean publish) { return executorService.submit(new Callable<Boolean>() { @Override public Boolean call() throws Exception { return stop(id, publish); } }); } public ListenableFuture<Boolean> resumeAsync(final String id) { return executorService.submit(new Callable<Boolean>() { @Override public Boolean call() throws Exception { return resume(id); } }); } public ListenableFuture<Map<Integer, Long>> pauseAsync(final String id) { return pauseAsync(id, 0); } public ListenableFuture<Map<Integer, Long>> pauseAsync(final String id, final long timeout) { return executorService.submit(new Callable<Map<Integer, Long>>() { @Override public Map<Integer, Long> call() throws Exception { return pause(id, timeout); } }); } public ListenableFuture<KafkaIndexTask.Status> getStatusAsync(final String id) { return executorService.submit(new Callable<KafkaIndexTask.Status>() { @Override public KafkaIndexTask.Status call() throws Exception { return getStatus(id); } }); } public ListenableFuture<DateTime> getStartTimeAsync(final String id) { return executorService.submit(new Callable<DateTime>() { @Override public DateTime call() throws Exception { return getStartTime(id); } }); } public ListenableFuture<Map<Integer, Long>> getCurrentOffsetsAsync(final String id, final boolean retry) { return executorService.submit(new Callable<Map<Integer, Long>>() { @Override public Map<Integer, Long> call() throws Exception { return getCurrentOffsets(id, retry); } }); } public ListenableFuture<Map<Integer, Long>> getEndOffsetsAsync(final String id) { return executorService.submit(new Callable<Map<Integer, Long>>() { @Override public Map<Integer, Long> call() throws Exception { return getEndOffsets(id); } }); } public ListenableFuture<Boolean> setEndOffsetsAsync(final String id, final Map<Integer, Long> endOffsets) { return setEndOffsetsAsync(id, endOffsets, false); } public ListenableFuture<Boolean> setEndOffsetsAsync(final String id, final Map<Integer, Long> endOffsets, final boolean resume) { return executorService.submit(new Callable<Boolean>() { @Override public Boolean call() throws Exception { return setEndOffsets(id, endOffsets, resume); } }); } @VisibleForTesting RetryPolicyFactory createRetryPolicyFactory() { // Retries [numRetries] times before giving up; this should be set long enough to handle any temporary // unresponsiveness such as network issues, if a task is still in the process of starting up, or if the task is in // the middle of persisting to disk and doesn't respond immediately. return new RetryPolicyFactory(new RetryPolicyConfig().setMinWait(Period.seconds(MIN_RETRY_WAIT_SECONDS)) .setMaxWait(Period.seconds(MAX_RETRY_WAIT_SECONDS)).setMaxRetryCount(numRetries)); } @VisibleForTesting void checkConnection(String host, int port) throws IOException { new Socket(host, port).close(); } private FullResponseHolder submitRequest(String id, HttpMethod method, String pathSuffix, String query, boolean retry) { return submitRequest(id, method, pathSuffix, query, new byte[0], retry); } private FullResponseHolder submitRequest(String id, HttpMethod method, String pathSuffix, String query, byte[] content, boolean retry) { final RetryPolicy retryPolicy = retryPolicyFactory.makeRetryPolicy(); while (true) { FullResponseHolder response = null; Request request = null; TaskLocation location = TaskLocation.unknown(); String path = StringUtils.format("%s/%s/%s", BASE_PATH, id, pathSuffix); Optional<TaskStatus> status = taskInfoProvider.getTaskStatus(id); if (!status.isPresent() || !status.get().isRunnable()) { throw new TaskNotRunnableException( StringUtils.format("Aborting request because task [%s] is not runnable", id)); } String host = location.getHost(); String scheme = ""; int port = -1; try { location = taskInfoProvider.getTaskLocation(id); if (location.equals(TaskLocation.unknown())) { throw new NoTaskLocationException( StringUtils.format("No TaskLocation available for task [%s]", id)); } host = location.getHost(); scheme = location.getTlsPort() >= 0 ? "https" : "http"; port = location.getTlsPort() >= 0 ? location.getTlsPort() : location.getPort(); // Netty throws some annoying exceptions if a connection can't be opened, which happens relatively frequently // for tasks that happen to still be starting up, so test the connection first to keep the logs clean. checkConnection(host, port); try { URI serviceUri = new URI(scheme, null, host, port, path, query, null); request = new Request(method, serviceUri.toURL()); // used to validate that we are talking to the correct worker request.addHeader(ChatHandlerResource.TASK_ID_HEADER, id); if (content.length > 0) { request.setContent(MediaType.APPLICATION_JSON, content); } log.debug("HTTP %s: %s", method.getName(), serviceUri.toString()); response = httpClient.go(request, new FullResponseHandler(Charsets.UTF_8), httpTimeout).get(); } catch (Exception e) { Throwables.propagateIfInstanceOf(e.getCause(), IOException.class); Throwables.propagateIfInstanceOf(e.getCause(), ChannelException.class); throw Throwables.propagate(e); } int responseCode = response.getStatus().getCode(); if (responseCode / 100 == 2) { return response; } else if (responseCode == 400) { // don't bother retrying if it's a bad request throw new IAE("Received 400 Bad Request with body: %s", response.getContent()); } else { throw new IOE("Received status [%d]", responseCode); } } catch (IOException | ChannelException e) { // Since workers are free to move tasks around to different ports, there is a chance that a task may have been // moved but our view of its location has not been updated yet from ZK. To detect this case, we send a header // identifying our expected recipient in the request; if this doesn't correspond to the worker we messaged, the // worker will return an HTTP 404 with its ID in the response header. If we get a mismatching task ID, then // we will wait for a short period then retry the request indefinitely, expecting the task's location to // eventually be updated. final Duration delay; if (response != null && response.getStatus().equals(HttpResponseStatus.NOT_FOUND)) { String headerId = response.getResponse().headers().get(ChatHandlerResource.TASK_ID_HEADER); if (headerId != null && !headerId.equals(id)) { log.warn("Expected worker to have taskId [%s] but has taskId [%s], will retry in [%d]s", id, headerId, TASK_MISMATCH_RETRY_DELAY_SECONDS); delay = Duration.standardSeconds(TASK_MISMATCH_RETRY_DELAY_SECONDS); } else { delay = retryPolicy.getAndIncrementRetryDelay(); } } else { delay = retryPolicy.getAndIncrementRetryDelay(); } String urlForLog = (request != null ? request.getUrl().toString() : StringUtils.format("%s://%s:%d%s", scheme, host, port, path)); if (!retry) { // if retry=false, we probably aren't too concerned if the operation doesn't succeed (i.e. the request was // for informational purposes only) so don't log a scary stack trace log.info("submitRequest failed for [%s], with message [%s]", urlForLog, e.getMessage()); Throwables.propagate(e); } else if (delay == null) { log.warn(e, "Retries exhausted for [%s], last exception:", urlForLog); Throwables.propagate(e); } else { try { final long sleepTime = delay.getMillis(); log.debug("Bad response HTTP [%s] from [%s]; will try again in [%s] (body/exception: [%s])", (response != null ? response.getStatus().getCode() : "no response"), urlForLog, new Duration(sleepTime).toString(), (response != null ? response.getContent() : e.getMessage())); Thread.sleep(sleepTime); } catch (InterruptedException e2) { Throwables.propagate(e2); } } } catch (NoTaskLocationException e) { log.info( "No TaskLocation available for task [%s], this task may not have been assigned to a worker yet or " + "may have already completed", id); throw e; } catch (Exception e) { log.warn(e, "Exception while sending request"); throw e; } } } }