gobblin.util.ExecutorsUtils.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.util.ExecutorsUtils.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.util;

import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

import com.google.common.util.concurrent.ListeningScheduledExecutorService;
import gobblin.util.executors.MDCPropagatingCallable;
import gobblin.util.executors.MDCPropagatingRunnable;
import gobblin.util.executors.MDCPropagatingScheduledExecutorService;
import org.slf4j.Logger;

import com.google.common.base.Function;
import com.google.common.base.Optional;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import gobblin.util.executors.MDCPropagatingExecutorService;

/**
 * A utility class to use with {@link java.util.concurrent.Executors} in cases such as when creating new thread pools.
 *
 * @author Yinan Li
 */
public class ExecutorsUtils {

    private static final ThreadFactory DEFAULT_THREAD_FACTORY = newThreadFactory(Optional.<Logger>absent());

    public static final long EXECUTOR_SERVICE_SHUTDOWN_TIMEOUT = 60;
    public static final TimeUnit EXECUTOR_SERVICE_SHUTDOWN_TIMEOUT_TIMEUNIT = TimeUnit.SECONDS;

    /**
     * Get a default {@link java.util.concurrent.ThreadFactory}.
     *
     * @return the default {@link java.util.concurrent.ThreadFactory}
     */
    public static ThreadFactory defaultThreadFactory() {
        return DEFAULT_THREAD_FACTORY;
    }

    /**
     * Get a new {@link java.util.concurrent.ThreadFactory} that uses a {@link LoggingUncaughtExceptionHandler}
     * to handle uncaught exceptions.
     *
     * @param logger an {@link com.google.common.base.Optional} wrapping the {@link org.slf4j.Logger} that the
     *               {@link LoggingUncaughtExceptionHandler} uses to log uncaught exceptions thrown in threads
     * @return a new {@link java.util.concurrent.ThreadFactory}
     */
    public static ThreadFactory newThreadFactory(Optional<Logger> logger) {
        return newThreadFactory(logger, Optional.<String>absent());
    }

    /**
     * Get a new {@link java.util.concurrent.ThreadFactory} that uses a {@link LoggingUncaughtExceptionHandler}
     * to handle uncaught exceptions and the given thread name format.
     *
     * @param logger an {@link com.google.common.base.Optional} wrapping the {@link org.slf4j.Logger} that the
     *               {@link LoggingUncaughtExceptionHandler} uses to log uncaught exceptions thrown in threads
     * @param nameFormat an {@link com.google.common.base.Optional} wrapping a thread naming format
     * @return a new {@link java.util.concurrent.ThreadFactory}
     */
    public static ThreadFactory newThreadFactory(Optional<Logger> logger, Optional<String> nameFormat) {
        return newThreadFactory(new ThreadFactoryBuilder(), logger, nameFormat);
    }

    /**
     * Get a new {@link ThreadFactory} that uses a {@link LoggingUncaughtExceptionHandler}
     * to handle uncaught exceptions, uses the given thread name format, and produces daemon threads.
     *
     * @param logger an {@link Optional} wrapping the {@link Logger} that the
     *               {@link LoggingUncaughtExceptionHandler} uses to log uncaught exceptions thrown in threads
     * @param nameFormat an {@link Optional} wrapping a thread naming format
     * @return a new {@link ThreadFactory}
     */
    public static ThreadFactory newDaemonThreadFactory(Optional<Logger> logger, Optional<String> nameFormat) {
        return newThreadFactory(new ThreadFactoryBuilder().setDaemon(true), logger, nameFormat);
    }

    private static ThreadFactory newThreadFactory(ThreadFactoryBuilder builder, Optional<Logger> logger,
            Optional<String> nameFormat) {
        if (nameFormat.isPresent()) {
            builder.setNameFormat(nameFormat.get());
        }
        return builder.setUncaughtExceptionHandler(new LoggingUncaughtExceptionHandler(logger)).build();
    }

    /**
     * Creates an {@link ListeningExecutorService} whose {@code submit}
     * and {@code execute} methods propagate the MDC information across
     * thread boundaries.
     * @param executorService the {@link ExecutorService} to wrap
     * @return a new instance of {@link ListeningExecutorService}
     */
    public static ListeningExecutorService loggingDecorator(ExecutorService executorService) {
        return new MDCPropagatingExecutorService(executorService);
    }

    /**
     * Creates an {@link ListeningScheduledExecutorService} whose
     * {@code submit}, {@code execute}, {@code schedule},
     * {@code scheduleAtFixedRate}, and {@code scheduleWithFixedDelay}
     * methods propagate the MDC information across thread boundaries.
     * @param scheduledExecutorService the {@link ScheduledExecutorService} to wrap
     * @return a new instance of {@link ListeningScheduledExecutorService}
     */
    public static ListeningScheduledExecutorService loggingDecorator(
            ScheduledExecutorService scheduledExecutorService) {
        return new MDCPropagatingScheduledExecutorService(scheduledExecutorService);
    }

    /**
     * Creates an {@link Runnable} which propagates the MDC
     * information across thread boundaries.
     * @param runnable the {@link Runnable} to wrap
     * @return a new instance of {@link Runnable}
     */
    public static Runnable loggingDecorator(Runnable runnable) {
        if (runnable instanceof MDCPropagatingRunnable) {
            return runnable;
        }
        return new MDCPropagatingRunnable(runnable);
    }

    /**
     * Creates an {@link Callable<T>} which propagates the MDC
     * information across thread boundaries.
     * @param callable the {@link Callable<T>} to wrap
     * @return a new instance of {@link Callable<T>}
     */
    public static <T> Callable<T> loggingDecorator(Callable<T> callable) {
        if (callable instanceof MDCPropagatingCallable) {
            return callable;
        }
        return new MDCPropagatingCallable<T>(callable);
    }

    /**
     * Shutdown an {@link ExecutorService} gradually, first disabling new task submissions and later cancelling
     * existing tasks.
     *
     * <p>
     *   The implementation is based on the implementation of Guava's MoreExecutors.shutdownAndAwaitTermination,
     *   which is available since version 17.0. We cannot use Guava version 17.0 or after directly, however, as
     *   it cannot be used with Hadoop 2.6.0 or after due to the issue reported in HADOOP-10961.
     * </p>
     *
     * @param executorService the {@link ExecutorService} to shutdown
     * @param logger an {@link Optional} wrapping the {@link Logger} that is used to log metadata of the executorService
     *               if it cannot shutdown all its threads
     * @param timeout the maximum time to wait for the {@code ExecutorService} to terminate
     * @param unit the time unit of the timeout argument
     */
    public static void shutdownExecutorService(ExecutorService executorService, Optional<Logger> logger,
            long timeout, TimeUnit unit) {
        Preconditions.checkNotNull(unit);
        // Disable new tasks from being submitted
        executorService.shutdown();

        if (logger.isPresent()) {
            logger.get().info("Attempting to shutdown ExecutorService: " + executorService);
        }

        try {
            long halfTimeoutNanos = TimeUnit.NANOSECONDS.convert(timeout, unit) / 2;
            // Wait for half the duration of the timeout for existing tasks to terminate
            if (!executorService.awaitTermination(halfTimeoutNanos, TimeUnit.NANOSECONDS)) {
                // Cancel currently executing tasks
                executorService.shutdownNow();

                if (logger.isPresent()) {
                    logger.get().info("Shutdown un-successful, attempting shutdownNow of ExecutorService: "
                            + executorService);
                }

                // Wait the other half of the timeout for tasks to respond to being cancelled
                if (!executorService.awaitTermination(halfTimeoutNanos, TimeUnit.NANOSECONDS)
                        && logger.isPresent()) {
                    logger.get().error("Could not shutdown all threads in ExecutorService: " + executorService);
                }
            } else if (logger.isPresent()) {
                logger.get().info("Successfully shutdown ExecutorService: " + executorService);
            }
        } catch (InterruptedException ie) {
            // Preserve interrupt status
            Thread.currentThread().interrupt();
            // (Re-)Cancel if current thread also interrupted
            executorService.shutdownNow();

            if (logger.isPresent()) {
                logger.get().info("Attempting to shutdownNow ExecutorService: " + executorService);
            }
        }
    }

    /**
     * Shutdown an {@link ExecutorService} gradually, first disabling new task submissions and
     * later cancelling existing tasks.
     *
     * <p>
     *   This method calls {@link #shutdownExecutorService(ExecutorService, Optional, long, TimeUnit)}
     *   with default timeout time {@link #EXECUTOR_SERVICE_SHUTDOWN_TIMEOUT} and time unit
     *   {@link #EXECUTOR_SERVICE_SHUTDOWN_TIMEOUT_TIMEUNIT}.
     * </p>
     *
     * @param executorService the {@link ExecutorService} to shutdown
     * @param logger an {@link Optional} wrapping a {@link Logger} to be used during shutdown
     */
    public static void shutdownExecutorService(ExecutorService executorService, Optional<Logger> logger) {
        shutdownExecutorService(executorService, logger, EXECUTOR_SERVICE_SHUTDOWN_TIMEOUT,
                EXECUTOR_SERVICE_SHUTDOWN_TIMEOUT_TIMEUNIT);
    }

    /**
     * A utility method to parallelize loops. Applies the {@link Function} to every element in the {@link List} in
     * parallel by spawning threads. A list containing the result obtained by applying the function is returned. The
     * method is a blocking call and will wait for all the elements in the list to be processed or timeoutInSecs which
     * ever is earlier.
     * <p>
     * <b>NOTE: The method is an all or none implementation. Meaning, if any of the thread fails, the method will throw an
     * {@link ExecutionException} even if other threads completed successfully</b>
     * </p>
     *
     * <ul>
     * <li>Uses a Fixed thread pool of size threadCount.
     * <li>Uses {@link #shutdownExecutorService(ExecutorService, Optional, long, TimeUnit)} to shutdown the executor
     * service
     * <li>All threads are daemon threads
     * </ul>
     *
     * @param list input list on which the function is applied in parallel
     * @param function to be applied on every element of the list
     * @param threadCount to be used to process the list
     * @param timeoutInSecs to wait for all the threads to complete
     * @param logger an {@link Optional} wrapping a {@link Logger} to be used during shutdown
     *
     * @return a list containing the result obtained by applying the function on each element of the input list in the
     *         same order
     *
     * @throws IllegalArgumentException if input list or function is null
     * @throws ExecutionException <ul>
     *           <li>if any computation threw an exception
     *           <li>if any computation was cancelled
     *           <li>if any thread was interrupted while waiting
     *           <ul>
     */
    public static <F, T> List<T> parallelize(final List<F> list, final Function<F, T> function, int threadCount,
            int timeoutInSecs, Optional<Logger> logger) throws ExecutionException {

        Preconditions.checkArgument(list != null, "Input list can not be null");
        Preconditions.checkArgument(function != null, "Function can not be null");

        final List<T> results = Lists.newArrayListWithCapacity(list.size());
        List<Future<T>> futures = Lists.newArrayListWithCapacity(list.size());

        ExecutorService executorService = MoreExecutors.getExitingExecutorService((ThreadPoolExecutor) Executors
                .newFixedThreadPool(threadCount, ExecutorsUtils.newThreadFactory(logger)), 2, TimeUnit.MINUTES);

        for (final F l : list) {
            futures.add(executorService.submit(new Callable<T>() {
                @Override
                public T call() throws Exception {
                    return function.apply(l);
                }
            }));
        }

        ExecutorsUtils.shutdownExecutorService(executorService, logger, timeoutInSecs, TimeUnit.SECONDS);

        for (Future<T> future : futures) {
            try {
                results.add(future.get());
            } catch (InterruptedException e) {
                throw new ExecutionException("Thread interrupted", e);
            }
        }

        return results;
    }
}