gobblin.runtime.Task.java Source code

Java tutorial

Introduction

Here is the source code for gobblin.runtime.Task.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package gobblin.runtime;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.commons.lang3.BooleanUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.MDC;

import com.google.common.base.Optional;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import com.google.common.io.Closer;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;

import lombok.NoArgsConstructor;

import gobblin.Constructs;
import gobblin.commit.SpeculativeAttemptAwareConstruct;
import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.State;
import gobblin.configuration.WorkUnitState;
import gobblin.converter.Converter;
import gobblin.fork.CopyHelper;
import gobblin.fork.CopyNotSupportedException;
import gobblin.fork.Copyable;
import gobblin.fork.ForkOperator;
import gobblin.instrumented.extractor.InstrumentedExtractorBase;
import gobblin.instrumented.extractor.InstrumentedExtractorDecorator;
import gobblin.metrics.MetricContext;
import gobblin.metrics.event.EventSubmitter;
import gobblin.metrics.event.TaskEvent;
import gobblin.publisher.DataPublisher;
import gobblin.publisher.SingleTaskDataPublisher;
import gobblin.qualitychecker.row.RowLevelPolicyCheckResults;
import gobblin.qualitychecker.row.RowLevelPolicyChecker;
import gobblin.runtime.fork.AsynchronousFork;
import gobblin.runtime.fork.Fork;
import gobblin.runtime.fork.SynchronousFork;
import gobblin.runtime.task.TaskIFace;
import gobblin.runtime.util.TaskMetrics;
import gobblin.source.extractor.Extractor;
import gobblin.source.extractor.JobCommitPolicy;
import gobblin.source.extractor.RecordEnvelope;
import gobblin.source.extractor.StreamingExtractor;
import gobblin.state.ConstructState;
import gobblin.util.ConfigUtils;
import gobblin.writer.AcknowledgableRecordEnvelope;
import gobblin.writer.AcknowledgableWatermark;
import gobblin.writer.DataWriter;
import gobblin.writer.FineGrainedWatermarkTracker;
import gobblin.writer.MultiWriterWatermarkManager;
import gobblin.writer.TrackerBasedWatermarkManager;
import gobblin.writer.WatermarkAwareWriter;
import gobblin.writer.WatermarkManager;
import gobblin.writer.WatermarkStorage;

/**
 * A physical unit of execution for a Gobblin {@link gobblin.source.workunit.WorkUnit}.
 *
 * <p>
 *     Each task is executed by a single thread in a thread pool managed by the {@link TaskExecutor}
 *     and each {@link Fork} of the task is executed in a separate thread pool also managed by the
 *     {@link TaskExecutor}.
 *
 *     Each {@link Task} consists of the following steps:
 *     <ul>
 *       <li>Extracting, converting, and forking the source schema.</li>
 *       <li>Extracting, converting, doing row-level quality checking, and forking each data record.</li>
 *       <li>Putting each forked record into the record queue managed by each {@link Fork}.</li>
 *       <li>Committing output data of each {@link Fork} once all {@link Fork}s finish.</li>
 *       <li>Cleaning up and exiting.</li>
 *     </ul>
 *
 *     Each {@link Fork} consists of the following steps:
 *     <ul>
 *       <li>Getting the next record off the record queue.</li>
 *       <li>Converting the record and doing row-level quality checking if applicable.</li>
 *       <li>Writing the record out if it passes the quality checking.</li>
 *       <li>Cleaning up and exiting once all the records have been processed.</li>
 *     </ul>
 * </p>
 *
 * @author Yinan Li
 */
@NoArgsConstructor(force = true)
public class Task implements TaskIFace {

    private static final Logger LOG = LoggerFactory.getLogger(Task.class);

    private final String jobId;
    private final String taskId;
    private final String taskKey;
    private final TaskContext taskContext;
    private final TaskState taskState;
    private final TaskStateTracker taskStateTracker;
    private final TaskExecutor taskExecutor;
    private final Optional<CountDownLatch> countDownLatch;
    private final Map<Optional<Fork>, Optional<Future<?>>> forks = Maps.newLinkedHashMap();

    // Number of task retries
    private final AtomicInteger retryCount = new AtomicInteger();

    private final Converter converter;
    private final InstrumentedExtractorBase extractor;
    private final RowLevelPolicyChecker rowChecker;
    private final ExecutionModel taskMode;
    private final String watermarkingStrategy;
    private final Optional<WatermarkManager> watermarkManager;
    private final Optional<FineGrainedWatermarkTracker> watermarkTracker;
    private final Optional<WatermarkStorage> watermarkStorage;

    private final Closer closer;

    private long startTime;
    private volatile long lastRecordPulledTimestampMillis;
    private final AtomicLong recordsPulled;

    private final AtomicBoolean shutdownRequested;
    private final CountDownLatch shutdownLatch;

    /**
     * Instantiate a new {@link Task}.
     *
     * @param context a {@link TaskContext} containing all necessary information to construct and run a {@link Task}
     * @param taskStateTracker a {@link TaskStateTracker} for tracking task state
     * @param taskExecutor a {@link TaskExecutor} for executing the {@link Task} and its {@link Fork}s
     * @param countDownLatch an optional {@link java.util.concurrent.CountDownLatch} used to signal the task completion
     */
    public Task(TaskContext context, TaskStateTracker taskStateTracker, TaskExecutor taskExecutor,
            Optional<CountDownLatch> countDownLatch) {
        this.taskContext = context;
        this.taskState = context.getTaskState();
        this.jobId = this.taskState.getJobId();
        this.taskId = this.taskState.getTaskId();
        this.taskKey = this.taskState.getTaskKey();
        this.taskStateTracker = taskStateTracker;
        this.taskExecutor = taskExecutor;
        this.countDownLatch = countDownLatch;
        this.closer = Closer.create();
        this.closer.register(this.taskState.getTaskBrokerNullable());
        this.extractor = closer
                .register(new InstrumentedExtractorDecorator<>(this.taskState, this.taskContext.getExtractor()));

        this.converter = closer.register(new MultiConverter(this.taskContext.getConverters()));
        try {
            this.rowChecker = closer.register(this.taskContext.getRowLevelPolicyChecker());
        } catch (Exception e) {
            try {
                closer.close();
            } catch (Throwable t) {
                LOG.error("Failed to close all open resources", t);
            }
            throw new RuntimeException("Failed to instantiate row checker.", e);
        }

        this.taskMode = getExecutionModel(this.taskState);
        this.recordsPulled = new AtomicLong(0);
        this.lastRecordPulledTimestampMillis = 0;
        this.shutdownRequested = new AtomicBoolean(false);
        this.shutdownLatch = new CountDownLatch(1);

        // Setup Streaming constructs

        this.watermarkingStrategy = "FineGrain"; // TODO: Configure

        if (isStreamingTask()) {
            Extractor underlyingExtractor = this.taskContext.getRawSourceExtractor();
            if (!(underlyingExtractor instanceof StreamingExtractor)) {
                LOG.error(
                        "Extractor {}  is not an instance of StreamingExtractor but the task is configured to run in continuous mode",
                        underlyingExtractor.getClass().getName());
                throw new TaskInstantiationException("Extraction " + underlyingExtractor.getClass().getName()
                        + " is not an instance of StreamingExtractor but the task is configured to run in continuous mode");
            }

            this.watermarkStorage = Optional.of(taskContext.getWatermarkStorage());
            Config config;
            try {
                config = ConfigUtils.propertiesToConfig(taskState.getProperties());
            } catch (Exception e) {
                LOG.warn("Failed to deserialize taskState into Config.. continuing with an empty config", e);
                config = ConfigFactory.empty();
            }

            long commitIntervalMillis = ConfigUtils.getLong(config,
                    TaskConfigurationKeys.STREAMING_WATERMARK_COMMIT_INTERVAL_MILLIS,
                    TaskConfigurationKeys.DEFAULT_STREAMING_WATERMARK_COMMIT_INTERVAL_MILLIS);
            if (watermarkingStrategy.equals("FineGrain")) { // TODO: Configure
                this.watermarkTracker = Optional.of(this.closer.register(new FineGrainedWatermarkTracker(config)));
                this.watermarkManager = Optional.of((WatermarkManager) this.closer
                        .register(new TrackerBasedWatermarkManager(this.watermarkStorage.get(),
                                this.watermarkTracker.get(), commitIntervalMillis, Optional.of(this.LOG))));

            } else {
                // writer-based watermarking
                this.watermarkManager = Optional
                        .of((WatermarkManager) this.closer.register(new MultiWriterWatermarkManager(
                                this.watermarkStorage.get(), commitIntervalMillis, Optional.of(this.LOG))));
                this.watermarkTracker = Optional.absent();
            }
        } else {
            this.watermarkManager = Optional.absent();
            this.watermarkTracker = Optional.absent();
            this.watermarkStorage = Optional.absent();
        }
    }

    public static ExecutionModel getExecutionModel(State state) {
        String mode = state.getProp(TaskConfigurationKeys.TASK_EXECUTION_MODE,
                TaskConfigurationKeys.DEFAULT_TASK_EXECUTION_MODE);
        try {
            return ExecutionModel.valueOf(mode.toUpperCase());
        } catch (Exception e) {
            LOG.warn("Could not find an execution model corresponding to {}, returning {}", mode,
                    ExecutionModel.BATCH, e);
            return ExecutionModel.BATCH;
        }
    }

    private boolean areSingleBranchTasksSynchronous(TaskContext taskContext) {
        return BooleanUtils.toBoolean(
                taskContext.getTaskState().getProp(TaskConfigurationKeys.TASK_IS_SINGLE_BRANCH_SYNCHRONOUS,
                        TaskConfigurationKeys.DEFAULT_TASK_IS_SINGLE_BRANCH_SYNCHRONOUS));
    }

    private boolean isStreamingTask() {
        return this.taskMode.equals(ExecutionModel.STREAMING);
    }

    public boolean awaitShutdown(long timeoutInMillis) throws InterruptedException {
        return this.shutdownLatch.await(timeoutInMillis, TimeUnit.MILLISECONDS);
    }

    private void completeShutdown() {
        this.shutdownLatch.countDown();
    }

    private boolean shutdownRequested() {
        if (!this.shutdownRequested.get()) {
            this.shutdownRequested.set(Thread.currentThread().isInterrupted());
        }
        return this.shutdownRequested.get();
    }

    public void shutdown() {
        this.shutdownRequested.set(true);
    }

    public String getProgress() {
        long currentTime = System.currentTimeMillis();
        long lastRecordTimeElapsed = currentTime - this.lastRecordPulledTimestampMillis;
        if (isStreamingTask()) {
            WatermarkManager.CommitStatus commitStatus = this.watermarkManager.get().getCommitStatus();
            long lastWatermarkCommitTimeElapsed = currentTime
                    - commitStatus.getLastWatermarkCommitSuccessTimestampMillis();

            String progressString = String.format(
                    "recordsPulled:%d, lastRecordExtracted: %d ms ago, "
                            + "lastWatermarkCommitted: %d ms ago, lastWatermarkCommitted: %s",
                    this.recordsPulled.get(), lastRecordTimeElapsed, lastWatermarkCommitTimeElapsed,
                    commitStatus.getLastCommittedWatermarks());
            return progressString;
        } else {
            String progressString = String.format("recordsPulled:%d, lastRecordExtracted: %d ms ago",
                    this.recordsPulled.get(), lastRecordTimeElapsed);
            return progressString;
        }
    }

    @Override
    @SuppressWarnings("unchecked")
    public void run() {
        MDC.put(ConfigurationKeys.TASK_KEY_KEY, this.taskKey);
        this.startTime = System.currentTimeMillis();
        this.taskState.setStartTime(startTime);
        this.taskState.setWorkingState(WorkUnitState.WorkingState.RUNNING);

        // Clear the map so it starts with a fresh set of forks for each run/retry
        this.forks.clear();
        RowLevelPolicyChecker rowChecker = null;
        try {
            // Get the fork operator. By default IdentityForkOperator is used with a single branch.
            ForkOperator forkOperator = closer.register(this.taskContext.getForkOperator());
            forkOperator.init(this.taskState);
            int branches = forkOperator.getBranches(this.taskState);
            // Set fork.branches explicitly here so the rest task flow can pick it up
            this.taskState.setProp(ConfigurationKeys.FORK_BRANCHES_KEY, branches);

            // Extract, convert, and fork the source schema.
            Object schema = converter.convertSchema(extractor.getSchema(), this.taskState);
            List<Boolean> forkedSchemas = forkOperator.forkSchema(this.taskState, schema);
            if (forkedSchemas.size() != branches) {
                throw new ForkBranchMismatchException(
                        String.format("Number of forked schemas [%d] is not equal to number of branches [%d]",
                                forkedSchemas.size(), branches));
            }

            if (inMultipleBranches(forkedSchemas) && !(CopyHelper.isCopyable(schema))) {
                throw new CopyNotSupportedException(schema + " is not copyable");
            }

            rowChecker = closer.register(this.taskContext.getRowLevelPolicyChecker());
            RowLevelPolicyCheckResults rowResults = new RowLevelPolicyCheckResults();

            if (!areSingleBranchTasksSynchronous(this.taskContext) || branches > 1) {
                // Create one fork for each forked branch
                for (int i = 0; i < branches; i++) {
                    if (forkedSchemas.get(i)) {
                        AsynchronousFork fork = closer.register(new AsynchronousFork(this.taskContext,
                                schema instanceof Copyable ? ((Copyable) schema).copy() : schema, branches, i,
                                this.taskMode));
                        configureStreamingFork(fork, watermarkingStrategy);
                        // Run the Fork
                        this.forks.put(Optional.<Fork>of(fork),
                                Optional.<Future<?>>of(this.taskExecutor.submit(fork)));
                    } else {
                        this.forks.put(Optional.<Fork>absent(), Optional.<Future<?>>absent());
                    }
                }
            } else {
                SynchronousFork fork = closer.register(new SynchronousFork(this.taskContext,
                        schema instanceof Copyable ? ((Copyable) schema).copy() : schema, branches, 0,
                        this.taskMode));
                configureStreamingFork(fork, watermarkingStrategy);
                this.forks.put(Optional.<Fork>of(fork), Optional.<Future<?>>of(this.taskExecutor.submit(fork)));
            }

            if (isStreamingTask()) {

                // Start watermark manager and tracker
                if (this.watermarkTracker.isPresent()) {
                    this.watermarkTracker.get().start();
                }
                this.watermarkManager.get().start();

                ((StreamingExtractor) this.taskContext.getRawSourceExtractor()).start(this.watermarkStorage.get());

                RecordEnvelope recordEnvelope;
                // Extract, convert, and fork one source record at a time.
                while (!shutdownRequested()
                        && (recordEnvelope = (RecordEnvelope) extractor.readRecord(null)) != null) {
                    onRecordExtract();
                    AcknowledgableWatermark ackableWatermark = new AcknowledgableWatermark(
                            recordEnvelope.getWatermark());
                    if (watermarkTracker.isPresent()) {
                        watermarkTracker.get().track(ackableWatermark);
                    }
                    for (Object convertedRecord : converter.convertRecord(schema, recordEnvelope.getRecord(),
                            this.taskState)) {
                        processRecord(convertedRecord, forkOperator, rowChecker, rowResults, branches,
                                ackableWatermark.incrementAck());
                    }
                    ackableWatermark.ack();
                }
            } else {
                Object record;
                // Extract, convert, and fork one source record at a time.
                while ((record = extractor.readRecord(null)) != null) {
                    onRecordExtract();
                    for (Object convertedRecord : converter.convertRecord(schema, record, this.taskState)) {
                        processRecord(convertedRecord, forkOperator, rowChecker, rowResults, branches, null);
                    }
                }
            }

            LOG.info("Extracted " + this.recordsPulled + " data records");
            LOG.info("Row quality checker finished with results: " + rowResults.getResults());

            this.taskState.setProp(ConfigurationKeys.EXTRACTOR_ROWS_EXTRACTED, this.recordsPulled);
            this.taskState.setProp(ConfigurationKeys.EXTRACTOR_ROWS_EXPECTED, extractor.getExpectedRecordCount());

            for (Optional<Fork> fork : this.forks.keySet()) {
                if (fork.isPresent()) {
                    // Tell the fork that the main branch is completed and no new incoming data records should be expected
                    fork.get().markParentTaskDone();
                }
            }

            for (Optional<Future<?>> forkFuture : this.forks.values()) {
                if (forkFuture.isPresent()) {
                    try {
                        long forkFutureStartTime = System.nanoTime();
                        forkFuture.get().get();
                        long forkDuration = System.nanoTime() - forkFutureStartTime;
                        LOG.info("Task shutdown: Fork future reaped in {} millis", forkDuration / 1000000);
                    } catch (InterruptedException ie) {
                        Thread.currentThread().interrupt();
                    }
                }
            }

            //TODO: Move these to explicit shutdown phase
            if (watermarkManager.isPresent()) {
                watermarkManager.get().close();
            }
            if (watermarkTracker.isPresent()) {
                watermarkTracker.get().close();
            }
        } catch (Throwable t) {
            failTask(t);
        } finally {
            this.taskStateTracker.onTaskRunCompletion(this);
            completeShutdown();
        }
    }

    private void configureStreamingFork(Fork fork, String watermarkingStrategy) throws IOException {
        if (isStreamingTask()) {
            DataWriter forkWriter = fork.getWriter();
            if (forkWriter instanceof WatermarkAwareWriter) {
                if (watermarkingStrategy.equals("WriterBased")) {
                    ((MultiWriterWatermarkManager) this.watermarkManager.get())
                            .registerWriter((WatermarkAwareWriter) forkWriter);
                }
            } else {
                String errorMessage = String.format(
                        "The Task is configured to run in continuous mode, "
                                + "but the writer %s is not a WatermarkAwareWriter",
                        forkWriter.getClass().getName());
                LOG.error(errorMessage);
                throw new RuntimeException(errorMessage);
            }
        }
    }

    private void onRecordExtract() {
        this.recordsPulled.incrementAndGet();
        this.lastRecordPulledTimestampMillis = System.currentTimeMillis();
    }

    private void failTask(Throwable t) {
        LOG.error(String.format("Task %s failed", this.taskId), t);
        this.taskState.setWorkingState(WorkUnitState.WorkingState.FAILED);
        this.taskState.setProp(ConfigurationKeys.TASK_FAILURE_EXCEPTION_KEY, Throwables.getStackTraceAsString(t));
    }

    /**
     * Whether the task should directly publish its output data to the final publisher output directory.
     *
     * <p>
     *   The task should publish its output data directly if {@link ConfigurationKeys#PUBLISH_DATA_AT_JOB_LEVEL}
     *   is set to false AND any of the following conditions is satisfied:
     *
     *   <ul>
     *     <li>The {@link JobCommitPolicy#COMMIT_ON_PARTIAL_SUCCESS} policy is used.</li>
     *     <li>The {@link JobCommitPolicy#COMMIT_SUCCESSFUL_TASKS} policy is used. and all {@link Fork}s of this
     *     {@link Task} succeeded.</li>
     *   </ul>
     * </p>
     */
    private boolean shouldPublishDataInTask() {
        boolean publishDataAtJobLevel = this.taskState.getPropAsBoolean(ConfigurationKeys.PUBLISH_DATA_AT_JOB_LEVEL,
                ConfigurationKeys.DEFAULT_PUBLISH_DATA_AT_JOB_LEVEL);
        if (publishDataAtJobLevel) {
            LOG.info(String.format("%s is true. Will publish data at the job level.",
                    ConfigurationKeys.PUBLISH_DATA_AT_JOB_LEVEL));
            return false;
        }

        JobCommitPolicy jobCommitPolicy = JobCommitPolicy.getCommitPolicy(this.taskState);

        if (jobCommitPolicy == JobCommitPolicy.COMMIT_SUCCESSFUL_TASKS) {
            return this.taskState.getWorkingState() == WorkUnitState.WorkingState.SUCCESSFUL;
        }

        if (jobCommitPolicy == JobCommitPolicy.COMMIT_ON_PARTIAL_SUCCESS) {
            return true;
        }

        LOG.info("Will publish data at the job level with job commit policy: " + jobCommitPolicy);
        return false;
    }

    private void publishTaskData() throws IOException {
        Closer closer = Closer.create();
        try {
            Class<? extends DataPublisher> dataPublisherClass = getTaskPublisherClass();
            SingleTaskDataPublisher publisher = closer
                    .register(SingleTaskDataPublisher.getInstance(dataPublisherClass, this.taskState));

            LOG.info("Publishing data from task " + this.taskId);
            publisher.publish(this.taskState);
        } catch (ClassCastException e) {
            LOG.error(String.format("To publish data in task, the publisher class must extend %s",
                    SingleTaskDataPublisher.class.getSimpleName()), e);
            this.taskState.setTaskFailureException(e);
            throw closer.rethrow(e);
        } catch (Throwable t) {
            this.taskState.setTaskFailureException(t);
            throw closer.rethrow(t);
        } finally {
            closer.close();
        }
    }

    @SuppressWarnings("unchecked")
    private Class<? extends DataPublisher> getTaskPublisherClass() throws ReflectiveOperationException {
        if (this.taskState.contains(ConfigurationKeys.TASK_DATA_PUBLISHER_TYPE)) {
            return (Class<? extends DataPublisher>) Class
                    .forName(this.taskState.getProp(ConfigurationKeys.TASK_DATA_PUBLISHER_TYPE));
        }
        return (Class<? extends DataPublisher>) Class.forName(this.taskState
                .getProp(ConfigurationKeys.DATA_PUBLISHER_TYPE, ConfigurationKeys.DEFAULT_DATA_PUBLISHER_TYPE));
    }

    /** Get the ID of the job this {@link Task} belongs to.
     *
     * @return ID of the job this {@link Task} belongs to.
     */
    public String getJobId() {
        return this.jobId;
    }

    /**
     * Get the ID of this task.
     *
     * @return ID of this task
     */
    public String getTaskId() {
        return this.taskId;
    }

    /**
     * Get the key of this task.
     *
     * @return Key of this task
     */
    public String getTaskKey() {
        return this.taskKey;
    }

    /**
     * Get the {@link TaskContext} associated with this task.
     *
     * @return {@link TaskContext} associated with this task
     */
    public TaskContext getTaskContext() {
        return this.taskContext;
    }

    /**
     * Get the state of this task.
     *
     * @return state of this task
     */
    public TaskState getTaskState() {
        return this.taskState;
    }

    @Override
    public State getPersistentState() {
        return getTaskState();
    }

    @Override
    public State getExecutionMetadata() {
        return getTaskState();
    }

    @Override
    public WorkUnitState.WorkingState getWorkingState() {
        return getTaskState().getWorkingState();
    }

    /**
     * Get the list of {@link Fork}s created by this {@link Task}.
     *
     * @return the list of {@link Fork}s created by this {@link Task}
     */
    public List<Optional<Fork>> getForks() {
        return ImmutableList.copyOf(this.forks.keySet());
    }

    /**
     * Update record-level metrics.
     */
    public void updateRecordMetrics() {
        for (Optional<Fork> fork : this.forks.keySet()) {
            if (fork.isPresent()) {
                fork.get().updateRecordMetrics();
            }
        }
    }

    /**
     * Update byte-level metrics.
     *
     * <p>
     *     This method is only supposed to be called after the writer commits.
     * </p>
     */
    public void updateByteMetrics() {
        try {
            for (Optional<Fork> fork : this.forks.keySet()) {
                if (fork.isPresent()) {
                    fork.get().updateByteMetrics();
                }
            }
        } catch (IOException ioe) {
            LOG.error("Failed to update byte-level metrics for task " + this.taskId, ioe);
        }
    }

    /**
     * Increment the retry count of this task.
     */
    public void incrementRetryCount() {
        this.retryCount.incrementAndGet();
    }

    /**
     * Get the number of times this task has been retried.
     *
     * @return number of times this task has been retried
     */
    public int getRetryCount() {
        return this.retryCount.get();
    }

    /**
     * Mark the completion of this {@link Task}.
     */
    public void markTaskCompletion() {
        if (this.countDownLatch.isPresent()) {
            this.countDownLatch.get().countDown();
        }

        this.taskState.setProp(ConfigurationKeys.TASK_RETRIES_KEY, this.retryCount.get());
    }

    @Override
    public String toString() {
        return this.taskId;
    }

    /**
     * Process a (possibly converted) record.
     */
    @SuppressWarnings("unchecked")
    private void processRecord(Object convertedRecord, ForkOperator forkOperator, RowLevelPolicyChecker rowChecker,
            RowLevelPolicyCheckResults rowResults, int branches, AcknowledgableWatermark watermark)
            throws Exception {
        // Skip the record if quality checking fails
        if (!rowChecker.executePolicies(convertedRecord, rowResults)) {
            if (watermark != null) {
                watermark.ack();
            }
            return;
        }

        List<Boolean> forkedRecords = forkOperator.forkDataRecord(this.taskState, convertedRecord);
        if (forkedRecords.size() != branches) {
            throw new ForkBranchMismatchException(
                    String.format("Number of forked data records [%d] is not equal to number of branches [%d]",
                            forkedRecords.size(), branches));
        }

        boolean needToCopy = inMultipleBranches(forkedRecords);
        // we only have to copy a record if it needs to go into multiple forks
        if (needToCopy && !(CopyHelper.isCopyable(convertedRecord))) {
            throw new CopyNotSupportedException(convertedRecord.getClass().getName() + " is not copyable");
        }

        int branch = 0;
        int copyInstance = 0;
        for (Optional<Fork> fork : this.forks.keySet()) {
            if (fork.isPresent() && forkedRecords.get(branch)) {
                Object recordForFork = CopyHelper.copy(convertedRecord, copyInstance);
                copyInstance++;
                if (isStreamingTask()) {
                    // Send the record, watermark pair down the fork
                    recordForFork = new AcknowledgableRecordEnvelope<>(recordForFork, watermark.incrementAck());
                }
                // Put the record into the record queue of each fork. A put may timeout and return a false, in which
                // case the put is retried until it is successful.
                boolean succeeded = false;
                while (!succeeded) {
                    succeeded = fork.get().putRecord(recordForFork);
                }
            }
            branch++;
        }
        if (watermark != null) {
            watermark.ack();
        }
    }

    /**
     * Check if a schema or data record is being passed to more than one branches.
     */
    private static boolean inMultipleBranches(List<Boolean> branches) {
        int inBranches = 0;
        for (Boolean bool : branches) {
            if (bool && ++inBranches > 1) {
                break;
            }
        }
        return inBranches > 1;
    }

    /**
     * Get the total number of records written by every {@link Fork}s of this {@link Task}.
     *
     * @return the number of records written by every {@link Fork}s of this {@link Task}
     */
    private long getRecordsWritten() {
        long recordsWritten = 0;
        for (Optional<Fork> fork : this.forks.keySet()) {
            recordsWritten += fork.get().getRecordsWritten();
        }
        return recordsWritten;
    }

    /**
     * Get the total number of bytes written by every {@link Fork}s of this {@link Task}.
     *
     * @return the number of bytes written by every {@link Fork}s of this {@link Task}
     */
    private long getBytesWritten() {
        long bytesWritten = 0;
        for (Optional<Fork> fork : this.forks.keySet()) {
            bytesWritten += fork.get().getBytesWritten();
        }
        return bytesWritten;
    }

    /**
     * Get the final state of each construct used by this task and add it to the {@link gobblin.runtime.TaskState}.
     * @param extractor the {@link gobblin.instrumented.extractor.InstrumentedExtractorBase} used by this task.
     * @param converter the {@link gobblin.converter.Converter} used by this task.
     * @param rowChecker the {@link RowLevelPolicyChecker} used by this task.
     */
    private void addConstructsFinalStateToTaskState(InstrumentedExtractorBase<?, ?> extractor,
            Converter<?, ?, ?, ?> converter, RowLevelPolicyChecker rowChecker) {
        ConstructState constructState = new ConstructState();
        if (extractor != null) {
            constructState.addConstructState(Constructs.EXTRACTOR, new ConstructState(extractor.getFinalState()));
        }
        if (converter != null) {
            constructState.addConstructState(Constructs.CONVERTER, new ConstructState(converter.getFinalState()));
        }
        if (rowChecker != null) {
            constructState.addConstructState(Constructs.ROW_QUALITY_CHECKER,
                    new ConstructState(rowChecker.getFinalState()));
        }
        int forkIdx = 0;
        for (Optional<Fork> fork : this.forks.keySet()) {
            constructState.addConstructState(Constructs.FORK_OPERATOR,
                    new ConstructState(fork.get().getFinalState()), Integer.toString(forkIdx));
            forkIdx++;
        }

        constructState.mergeIntoWorkUnitState(this.taskState);
    }

    /**
     * Commit this task by doing the following things:
     * 1. Committing each fork by {@link Fork#commit()}.
     * 2. Update final state of construct in {@link #taskState}.
     * 3. Check whether to publish data in task.
     */
    public void commit() {
        try {
            // Check if all forks succeeded
            List<Integer> failedForkIds = new ArrayList<>();
            for (Optional<Fork> fork : this.forks.keySet()) {
                if (fork.isPresent()) {
                    if (fork.get().isSucceeded()) {
                        if (!fork.get().commit()) {
                            failedForkIds.add(fork.get().getIndex());
                        }
                    } else {
                        failedForkIds.add(fork.get().getIndex());
                    }
                }
            }

            if (failedForkIds.size() == 0) {
                // Set the task state to SUCCESSFUL. The state is not set to COMMITTED
                // as the data publisher will do that upon successful data publishing.
                this.taskState.setWorkingState(WorkUnitState.WorkingState.SUCCESSFUL);
            } else {
                failTask(new ForkException("Fork branches " + failedForkIds + " failed for task " + this.taskId));
            }
        } catch (Throwable t) {
            failTask(t);
        } finally {
            addConstructsFinalStateToTaskState(extractor, converter, rowChecker);

            this.taskState.setProp(ConfigurationKeys.WRITER_RECORDS_WRITTEN, getRecordsWritten());
            this.taskState.setProp(ConfigurationKeys.WRITER_BYTES_WRITTEN, getBytesWritten());

            this.submitTaskCommittedEvent();

            try {
                closer.close();
            } catch (Throwable t) {
                LOG.error("Failed to close all open resources", t);
            }

            for (Map.Entry<Optional<Fork>, Optional<Future<?>>> forkAndFuture : this.forks.entrySet()) {
                if (forkAndFuture.getKey().isPresent() && forkAndFuture.getValue().isPresent()) {
                    try {
                        forkAndFuture.getValue().get().cancel(true);
                    } catch (Throwable t) {
                        LOG.error(String.format("Failed to cancel Fork \"%s\"", forkAndFuture.getKey().get()), t);
                    }
                }
            }

            try {
                if (shouldPublishDataInTask()) {
                    // If data should be published by the task, publish the data and set the task state to COMMITTED.
                    // Task data can only be published after all forks have been closed by closer.close().
                    publishTaskData();
                    this.taskState.setWorkingState(WorkUnitState.WorkingState.COMMITTED);
                }
            } catch (IOException ioe) {
                failTask(ioe);
            } finally {
                long endTime = System.currentTimeMillis();
                this.taskState.setEndTime(endTime);
                this.taskState.setTaskDuration(endTime - startTime);
                this.taskStateTracker.onTaskCommitCompletion(this);
            }
        }
    }

    protected void submitTaskCommittedEvent() {
        MetricContext taskMetricContext = TaskMetrics.get(this.taskState).getMetricContext();
        EventSubmitter eventSubmitter = new EventSubmitter.Builder(taskMetricContext, "gobblin.runtime.task")
                .build();
        eventSubmitter.submit(TaskEvent.TASK_COMMITTED_EVENT_NAME, ImmutableMap.of(TaskEvent.METADATA_TASK_ID,
                this.taskId, TaskEvent.METADATA_TASK_ATTEMPT_ID, this.taskState.getTaskAttemptId().or("")));
    }

    /**
     * @return true if the current {@link Task} is safe to have duplicate attempts; false, otherwise.
     */
    public boolean isSpeculativeExecutionSafe() {
        if (this.extractor instanceof SpeculativeAttemptAwareConstruct) {
            if (!((SpeculativeAttemptAwareConstruct) this.extractor).isSpeculativeAttemptSafe()) {
                return false;
            }
        }

        if (this.converter instanceof SpeculativeAttemptAwareConstruct) {
            if (!((SpeculativeAttemptAwareConstruct) this.converter).isSpeculativeAttemptSafe()) {
                return false;
            }
        }

        for (Optional<Fork> fork : this.forks.keySet()) {
            if (fork.isPresent() && !fork.get().isSpeculativeExecutionSafe()) {
                return false;
            }
        }
        return true;
    }
}