metlos.executors.batch.BatchExecutor.java Source code

Introduction

Here is the source code for metlos.executors.batch.BatchExecutor.java
Source

/*
 * RHQ Management Platform
 * Copyright (C) 2005-2012 Red Hat, Inc.
 * All rights reserved.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation version 2 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 */

package metlos.executors.batch;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.concurrent.RejectedExecutionHandler;
import java.util.concurrent.RunnableFuture;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

import metlos.executors.support.QueueBlockingDecorator;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
 * This is an extension of the {@link ThreadPoolExecutor} that add 4 new methods:
 * <ul>
 * <li> {@link #invokeAllWithin(Collection, long, TimeUnit)}
 * <li> {@link #executeAllWithin(Collection, long, TimeUnit)}
 * <li> {@link #submitWithPreferedDuration(Collection, long, TimeUnit)}
 * <li> {@link #submitWithPreferedDurationAndFixedDelay(Collection, long, long, long, TimeUnit)}
 * </ul>
 * <p>
 * Those methods ensure that given collection of tasks is executed in a time as close as possible
 * to provided duration.
 * 
 * @author Lukas Krejci
 */
public class BatchExecutor extends ThreadPoolExecutor {

    private static final Log LOG = LogFactory.getLog(BatchExecutor.class);

    protected static final RejectedExecutionHandler DEFAULT_REJECTED_EXECUTION_HANDLER = new AbortPolicy();

    protected static class BatchRecord {
        AtomicInteger currentlyRunningTasks = new AtomicInteger();
        AtomicInteger elementsRan = new AtomicInteger();
        AtomicLong cumulativeExecutionTime = new AtomicLong();
        AtomicLong nextElementStartTime = new AtomicLong();
        long finishTimeNanos;
        int nofElements;
    }

    protected static class RepetitionRecord {
        Collection<? extends Runnable> tasks;
        long delayNanos;
        long durationNanos;
    }

    protected interface BatchedRunnableFuture<T> extends RunnableFuture<T>, Comparable<BatchedRunnableFuture<T>> {

        BatchRecord getBatchRecord();

        long getIdealFinishTimeNanos();

        long getSequenceNumber();
    }

    protected class BatchReferringRunnable<T> extends FutureTask<T> implements BatchedRunnableFuture<T> {

        protected final BatchRecord batchRecord;

        //these two are used for ordering purposes and bear no significance wrt
        //the actual time when the task gets executed.
        protected final long idealFinishTimeNanos;
        protected final long sequenceNumber;

        protected final RepetitionRecord repetitionRecord;

        public BatchReferringRunnable(Callable<T> callable, BatchRecord batchRecord,
                RepetitionRecord repetitionRecord, long idealFinishTimeNanos) {
            super(callable);
            this.batchRecord = batchRecord;
            this.repetitionRecord = repetitionRecord;
            this.idealFinishTimeNanos = idealFinishTimeNanos;
            sequenceNumber = SEQUENCER.incrementAndGet();
        }

        public BatchReferringRunnable(Runnable runnable, T returnValue, BatchRecord batchRecord,
                RepetitionRecord repetitionRecord, long idealFinishTimeNanos) {
            super(runnable, returnValue);
            this.batchRecord = batchRecord;
            this.repetitionRecord = repetitionRecord;
            this.idealFinishTimeNanos = idealFinishTimeNanos;
            sequenceNumber = SEQUENCER.incrementAndGet();
        }

        @Override
        public BatchRecord getBatchRecord() {
            return batchRecord;
        }

        public RepetitionRecord getRepetitionRecord() {
            return repetitionRecord;
        }

        @Override
        public long getIdealFinishTimeNanos() {
            return idealFinishTimeNanos;
        }

        @Override
        public long getSequenceNumber() {
            return sequenceNumber;
        }

        @Override
        public void run() {
            long duration = 0;
            int runningTasks = 0;
            if (batchRecord != null) {
                duration = now();
                //we need to get the number of running tasks now, before we actually run our
                //payload. That is because at this very moment, this number reflects the reality
                //much better than after running the payload where we get much more variance due
                //to different durations payloads take to finish.
                //this number is important because we use it to compute the time gaps between running
                //the payloads and we need to have an exact idea about how many tasks were
                //executing concurrently at any given time.
                runningTasks = batchRecord.currentlyRunningTasks.incrementAndGet();
            }

            try {
                super.run();
            } finally {
                if (batchRecord != null) {
                    duration = now() - duration;

                    if (LOG.isTraceEnabled()) {
                        LOG.trace("Task " + this + " took " + duration + "ns.");
                    }

                    long executionTime = batchRecord.cumulativeExecutionTime.addAndGet(duration);
                    int elementsRan = batchRecord.elementsRan.incrementAndGet();

                    batchRecord.nextElementStartTime
                            .set(getNextIdealStartTime(runningTasks, executionTime, elementsRan));

                    if (LOG.isTraceEnabled()) {
                        LOG.trace("Setting the next task execution time to "
                                + batchRecord.nextElementStartTime.get());
                    }

                    batchRecord.currentlyRunningTasks.decrementAndGet();

                    if (repetitionRecord != null) {
                        rescheduleIfNeeded();
                    }
                }

            }
        }

        protected void rescheduleIfNeeded() {
            if (batchRecord.nofElements <= batchRecord.elementsRan.get()
                    && batchRecord.currentlyRunningTasks.get() == 0) {
                BatchExecutor.this.submitWithPreferedDurationAndFixedDelay(repetitionRecord.tasks,
                        repetitionRecord.delayNanos, repetitionRecord.durationNanos, repetitionRecord.delayNanos,
                        TimeUnit.NANOSECONDS);
            }
        }

        protected long getNextIdealStartTime(int currentlyRunningTasks, long executionTime, int elementsRan) {
            long now = now();
            long time2Go = batchRecord.finishTimeNanos - now;
            long tasks2Go = batchRecord.nofElements - elementsRan;

            double avgExecutionTime = ((double) executionTime) / elementsRan;

            //make the next task run after a longer delay if there is more than 1 of tasks 
            //running at this very moment.
            double idealExecutionTime = ((double) time2Go) / tasks2Go * currentlyRunningTasks;

            //idealExecutionTime - avgExecution is the "time gap" between the tasks.
            //note that idealExecutionTime - avgExecutionTime can be negative, which would
            //set the next element start time in the past.
            //but that's ok - it is a sign of the batch running late and will only make the
            //elements further down the line run without any delay.
            return now + (long) (idealExecutionTime - avgExecutionTime);
        }

        @Override
        public int compareTo(BatchedRunnableFuture<T> o) {
            if (this == o) {
                return 0;
            } else {
                int diff = (int) (idealFinishTimeNanos - o.getIdealFinishTimeNanos());
                if (diff == 0) {
                    return (int) (sequenceNumber - o.getSequenceNumber());
                } else {
                    return diff;
                }
            }
        }
    }

    /**
     * System.nanoTime() is not required to be positive, so let's establish
     * a base from which to start counting the time.
     * (inspired by java.util.concurrent.ScheduledThreadPoolExecutor)
     */
    private static final long EPOCH_START = System.nanoTime();

    /**
     * In a rare case when two tasks would be scheduled to be executed at exactly the same
     * time (in nanoseconds), they are going to have unique sequence numbers which
     * we can base their ordering on. This "sequencer" is used to obtain such unique
     * sequence numbers.
     */
    private static final AtomicLong SEQUENCER = new AtomicLong();

    protected static class TaskQueue<T extends BatchReferringRunnable<?>> extends QueueBlockingDecorator<T> {

        public TaskQueue() {
            this(new PriorityQueue<T>());
        }

        protected TaskQueue(Queue<T> q) {
            super(q);
        }

        @Override
        public T peek() {
            getLock().lock();
            try {
                T ret = getDecoratedQueue().peek();
                return getWaitingTime(ret) <= 0 ? ret : null;
            } finally {
                getLock().unlock();
            }
        }

        @Override
        public T poll() {
            getLock().lock();
            try {
                Queue<T> q = getDecoratedQueue();
                T ret = q.peek();

                if (ret == null) {
                    //the underlying queue contains a null;
                    return q.poll();
                }

                long waitTimeNanos = getWaitingTime(ret);
                if (waitTimeNanos <= 0) {
                    ret = q.poll();
                    if (!q.isEmpty()) {
                        getAvailabilityCondition().signalAll();
                    }

                    if (LOG.isTraceEnabled()) {
                        LOG.trace(" Polling task " + ret + " for execution.");
                    }

                    return ret;
                } else {
                    if (LOG.isTraceEnabled()) {
                        LOG.trace("Task " + ret + " not ready for execution yet.");
                    }

                    return null;
                }
            } finally {
                getLock().unlock();
            }
        }

        @Override
        public T take() throws InterruptedException {
            getLock().lockInterruptibly();
            try {
                Queue<T> q = getDecoratedQueue();

                while (true) {
                    if (q.isEmpty()) {
                        getAvailabilityCondition().await();
                    } else {

                        T ret = q.peek();

                        if (ret == null) {
                            //the underlying queue contains a null;
                            return q.poll();
                        }

                        long waitTimeNanos = getWaitingTime(ret);
                        if (waitTimeNanos <= 0) {
                            ret = q.poll();
                            if (!q.isEmpty()) {
                                getAvailabilityCondition().signalAll();
                            }

                            if (LOG.isTraceEnabled()) {
                                LOG.trace(" Polling task " + ret + " for execution.");
                            }

                            return ret;
                        } else {
                            if (LOG.isTraceEnabled()) {
                                LOG.trace("Task " + ret + " not ready for execution yet, waiting " + waitTimeNanos
                                        + "ns.");
                            }
                            getAvailabilityCondition().awaitNanos(waitTimeNanos);
                        }
                    }
                }
            } finally {
                getLock().unlock();
            }
        }

        private long getWaitingTime(T element) {
            if (LOG.isTraceEnabled()) {
                String message = "Checking for ready state of " + element + ": batch is " + element.getBatchRecord()
                        + ", ";
                if (element.getBatchRecord() != null) {
                    message += "start time is " + element.getBatchRecord().nextElementStartTime.get() + ", ";
                }
                message += "now is " + now();
                LOG.trace(message);
            }
            return element.getBatchRecord() == null ? 0
                    : element.getBatchRecord().nextElementStartTime.get() - now();
        }
    }

    @SuppressWarnings("unchecked")
    protected TaskQueue<BatchReferringRunnable<?>> getTaskQueue() {
        return (TaskQueue<BatchReferringRunnable<?>>) (BlockingQueue<?>) getQueue();
    }

    /**
     * This enables an otherwise illegal direct cast from {@link TaskQueue} to a {@link BlockingQueue}
     * of runnables. This is safe to do in constructors, because the queue is only ever going to be 
     * exclusively added to by this class, which will ensure that the runnable being inserted is in fact
     * BatchReferringRunnable.
     * 
     * @param q
     * @return
     */
    @SuppressWarnings("unchecked")
    private static BlockingQueue<Runnable> asQueueOfRunnables(TaskQueue<?> q) {
        //let's be brutal
        return (BlockingQueue<Runnable>) (BlockingQueue<?>) q;
    }

    /**
     * @param corePoolSize
     * @param maximumPoolSize
     * @param keepAliveTime
     * @param unit
     */
    public BatchExecutor(int corePoolSize, int maximumPoolSize, long keepAliveTime, TimeUnit unit) {
        this(corePoolSize, maximumPoolSize, keepAliveTime, unit, Executors.defaultThreadFactory(),
                DEFAULT_REJECTED_EXECUTION_HANDLER, new TaskQueue<BatchReferringRunnable<?>>());
    }

    /**
     * @param corePoolSize
     * @param maximumPoolSize
     * @param keepAliveTime
     * @param unit
     * @param threadFactory
     */
    public BatchExecutor(int corePoolSize, int maximumPoolSize, long keepAliveTime, TimeUnit unit,
            ThreadFactory threadFactory) {
        this(corePoolSize, maximumPoolSize, keepAliveTime, unit, threadFactory, DEFAULT_REJECTED_EXECUTION_HANDLER,
                new TaskQueue<BatchReferringRunnable<?>>());
    }

    /**
     * @param corePoolSize
     * @param maximumPoolSize
     * @param keepAliveTime
     * @param unit
     * @param threadFactory
     * @param handler
     */
    public BatchExecutor(int corePoolSize, int maximumPoolSize, long keepAliveTime, TimeUnit unit,
            ThreadFactory threadFactory, RejectedExecutionHandler handler) {
        this(corePoolSize, maximumPoolSize, keepAliveTime, unit, threadFactory, handler,
                new TaskQueue<BatchReferringRunnable<?>>());
    }

    protected <T extends BatchReferringRunnable<?>> BatchExecutor(int corePoolSize, int maximumPoolSize,
            long keepAliveTime, TimeUnit unit, ThreadFactory threadFactory, RejectedExecutionHandler handler,
            TaskQueue<T> queue) {
        super(corePoolSize, maximumPoolSize, keepAliveTime, unit, asQueueOfRunnables(queue), threadFactory,
                handler);
        init();
    }

    @Override
    public void execute(Runnable command) {
        Runnable r = newTaskFor(command, null);
        super.execute(r);
    }

    @Override
    public <T> Future<T> submit(Callable<T> task) {
        RunnableFuture<T> f = newTaskFor(task);
        super.execute(f);
        return f;
    }

    @Override
    public Future<?> submit(Runnable task) {
        RunnableFuture<?> f = newTaskFor(task, null);
        super.execute(f);
        return f;
    }

    @Override
    public <T> Future<T> submit(Runnable task, T result) {
        RunnableFuture<T> f = newTaskFor(task, result);
        super.execute(f);
        return f;
    };

    /**
     * This schedules the given collection of commands so that all commands are finished executing
     * before the given time. The commands are scheduled so that the overall execution time is as close
     * to the target time as possible, but the overall time can also exceed the given time if the CPU
     * is under heavy load for example.
     * <p>
     * This of course assumes that the commands are "similar" in their computational requirements, i.e.
     * that they form a "batch" of similar tasks to execute. During execution, an average execution time
     * is used to compute the delays between individual runs. This approach would fail horribly if 
     * the tasks had very different computational demands and took wildly different durations to complete.
     * <p>
     * This method is therefore different from {@link #invokeAll(Collection, long, TimeUnit)} which can
     * cause some tasks to not execute if the timeout occurs.
     *
     * @param commands the commands to execute. All of them should have similar computational requirements
     * for this method to be able to spread their execution in the given duration.
     * @param duration the duration all the tasks should execute in. This is fulfilled only on best-effort
     * basis and cannot be guaranteed.
     * @param unit the time unit of the duration
     * 
     * @return the list of futures each corresponding to the execution of a single Runnable from 
     * the collection of commands in the same sequential order. Note that the tasks are not 
     * guaranteed to have finished upon return from this method (unlike in the 
     * {@link #invokeAll(Collection)} method).
     */
    public List<Future<?>> executeAllWithin(Collection<? extends Runnable> commands, long duration, TimeUnit unit) {
        BatchRecord batchRecord = createNewBatchRecord(commands.size(), unit, duration, 0);

        //it actually is ok if this is negative - we've got so little time to execute
        //the tasks that we are late already :) - because the execution time of the tasks
        //will be in past, they will be scheduled with no further delays
        long idealFinishTime = batchRecord.nextElementStartTime.get();
        long increment = (batchRecord.finishTimeNanos - idealFinishTime) / batchRecord.nofElements;
        List<Future<?>> ret = new ArrayList<Future<?>>();
        for (Runnable command : commands) {
            RunnableFuture<?> task = newTaskFor(command, null, batchRecord, null, idealFinishTime);
            super.execute(task);
            ret.add(task);
            idealFinishTime += increment;
        }

        return ret;
    }

    /**
     * Akin to {@link #executeAllWithin(Collection, long, TimeUnit)} but doesn't collect the futures.
     * <p>
     * This method is more appropriate if you don't need to know the results of the commands or if you
     * submit a large number of them and are memory-constrained.
     * 
     * @see #executeAllWithin(Collection, long, TimeUnit)
     */
    public void submitWithPreferedDuration(Collection<? extends Runnable> commands, long duration, TimeUnit unit) {
        BatchRecord batchRecord = createNewBatchRecord(commands.size(), unit, duration, 0);

        long idealFinishTime = batchRecord.nextElementStartTime.get();
        long increment = (batchRecord.finishTimeNanos - idealFinishTime) / batchRecord.nofElements;
        for (Runnable command : commands) {
            RunnableFuture<?> task = newTaskFor(command, null, batchRecord, null, idealFinishTime);
            super.execute(task);
            idealFinishTime += increment;
        }
    }

    /**
     * Another variation on {@link #invokeAllWithin(Collection, long, TimeUnit)}. The commands
     * will be run repeatedly (forever) with each "batch" executing with given duration.
     * There will be a given delay between two consecutive executions of the command sets.
     * <p>
     * Each execution takes a snapshot of the provided collection and executes only the commands
     * present in the collection at the time of the call of this method. Once all the commands executed,
     * another snapshot of the collection is taken and the commands are rescheduled.
     * <p>
     * This means that if you intend the collection of the commands to be mutable and change over time once
     * it has been submitted to the executor, the collection <b>MUST</b> be able to handle concurrent
     * access and modification.
     * 
     * @param commands the collection of commands to repeatedly execute
     * @param initialDelay the initial delay before the execution starts (in the provided time unit)
     * @param duration the expected duration of the execution of all commands
     * @param delay the delay between two consecutive executions of the command sets
     * @param unit the time unit of the time related parameters
     */
    public void submitWithPreferedDurationAndFixedDelay(Collection<? extends Runnable> commands, long initialDelay,
            long duration, long delay, TimeUnit unit) {
        prepareForNextRepetition(commands);

        BatchRecord batchRecord = createNewBatchRecord(commands.size(), unit, duration, initialDelay);

        RepetitionRecord repetitionRecord = new RepetitionRecord();
        repetitionRecord.tasks = commands;
        repetitionRecord.delayNanos = unit.toNanos(delay);
        repetitionRecord.durationNanos = unit.toNanos(duration);

        long idealFinishTime = batchRecord.nextElementStartTime.get();
        long increment = (batchRecord.finishTimeNanos - idealFinishTime) / batchRecord.nofElements;
        for (Runnable command : commands) {
            RunnableFuture<?> task = newTaskFor(command, null, batchRecord, repetitionRecord, idealFinishTime);
            super.execute(task);
            idealFinishTime += increment;
        }
    }

    /**
     * Akin to {@link #executeAllWithin(Collection, long, TimeUnit)} but using Callables.
     */
    public <T> List<Future<T>> invokeAllWithin(Collection<? extends Callable<T>> commands, long duration,
            TimeUnit unit) {
        BatchRecord batchRecord = createNewBatchRecord(commands.size(), unit, duration, 0);

        //it actually is ok if this is negative - we've got so little time to execute
        //the tasks that we are late already :) - because the execution time of the tasks
        //will be in past, they will be scheduled with no further delays
        long increment = (batchRecord.finishTimeNanos - now()) / batchRecord.nofElements;
        long idealFinishTime = batchRecord.nextElementStartTime.get();
        List<Future<T>> ret = new ArrayList<Future<T>>();
        for (Callable<T> command : commands) {
            RunnableFuture<T> task = newTaskFor(command, batchRecord, null, idealFinishTime);
            super.execute(task);
            ret.add(task);
            idealFinishTime += increment;
        }

        return ret;
    }

    protected <T> BatchReferringRunnable<T> newTaskFor(Callable<T> callable, BatchRecord batchRecord,
            RepetitionRecord repetitionRecord, long idealFinishTime) {
        return new BatchReferringRunnable<T>(callable, batchRecord, repetitionRecord, idealFinishTime);
    }

    protected <T> BatchReferringRunnable<T> newTaskFor(Runnable runnable, T result, BatchRecord batchRecord,
            RepetitionRecord repetitionRecord, long idealFinishTime) {
        return new BatchReferringRunnable<T>(runnable, result, batchRecord, repetitionRecord, idealFinishTime);
    }

    //these two methods are implemented for the correct interoperability with #invokeAny and other not-overriden
    //methods.
    @Override
    protected <T> BatchReferringRunnable<T> newTaskFor(Callable<T> callable) {
        return newTaskFor(callable, null, null, now());
    }

    @Override
    protected <T> BatchReferringRunnable<T> newTaskFor(Runnable runnable, T value) {
        return newTaskFor(runnable, value, null, null, now());
    };

    protected static long now() {
        return System.nanoTime() - EPOCH_START;
    }

    protected static BatchRecord createNewBatchRecord(int nofElements, TimeUnit unit, long duration,
            long initialDelay) {
        BatchRecord batchRecord = new BatchRecord();
        batchRecord.nofElements = nofElements;
        long now = now() + unit.toNanos(initialDelay);
        batchRecord.nextElementStartTime.set(now);
        batchRecord.finishTimeNanos = now + unit.toNanos(duration);
        return batchRecord;
    }

    /**
     * Called during construction. This method ensures that the core threads are initialized
     * which ensures correct behavior when submitting tasks for execution.
     * <p>
     * If you override this method, make sure to call <code>super.init();</code> otherwise
     * the executor won't behave as expected.
     */
    protected void init() {
        //this is important so that all of our tasks get queued in the queue rather
        //than submitted directly. We do depend on this because the queue is actually
        //responsible for delaying the tasks until they are ready.
        prestartAllCoreThreads();
    }

    /**
     * If the tasks need some kind of pre-processing before they are submitted for the next repeated
     * execution, the subclasses may override this method to perform such pre-processing.
     * <p>
     * By default, this method does nothing.
     * 
     * @param tasks
     */
    protected void prepareForNextRepetition(Collection<? extends Runnable> tasks) {
        //default implementation does nothing
    }
}