com.asakusafw.runtime.stage.output.BridgeOutputFormat.java Source code

Java tutorial

Introduction

Here is the source code for com.asakusafw.runtime.stage.output.BridgeOutputFormat.java

Source

/**
 * Copyright 2011-2017 Asakusa Framework Team.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.asakusafw.runtime.stage.output;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.InterruptedIOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.TreeMap;
import java.util.WeakHashMap;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import org.apache.commons.codec.binary.Base64InputStream;
import org.apache.commons.codec.binary.Base64OutputStream;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.JobStatus.State;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.util.Progressable;

import com.asakusafw.runtime.directio.Counter;
import com.asakusafw.runtime.directio.DirectDataSource;
import com.asakusafw.runtime.directio.DirectDataSourceConstants;
import com.asakusafw.runtime.directio.DirectDataSourceRepository;
import com.asakusafw.runtime.directio.FilePattern;
import com.asakusafw.runtime.directio.OutputAttemptContext;
import com.asakusafw.runtime.directio.OutputTransactionContext;
import com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil;
import com.asakusafw.runtime.directio.hadoop.ProgressableCounter;
import com.asakusafw.runtime.stage.StageConstants;
import com.asakusafw.runtime.stage.StageOutput;
import com.asakusafw.runtime.util.VariableTable;

/**
 * A bridge implementation for Hadoop {@link OutputFormat}.
 * @since 0.2.5
 * @version 0.9.0
 */
public final class BridgeOutputFormat extends OutputFormat<Object, Object> {

    static final Log LOG = LogFactory.getLog(BridgeOutputFormat.class);

    private static final Charset ASCII = StandardCharsets.US_ASCII;

    private static final long SERIAL_VERSION = 1;

    private static final String KEY = "com.asakusafw.output.bridge"; //$NON-NLS-1$

    private final Map<TaskAttemptID, OutputCommitter> commiterCache = new WeakHashMap<>();

    /**
     * Returns whether this stage has an output corresponding this format.
     * @param context current context
     * @return {@code true} if such output exists, otherwise {@code false}
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    public static boolean hasOutput(JobContext context) {
        if (context == null) {
            throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$
        }
        return context.getConfiguration().getRaw(KEY) != null;
    }

    /**
     * Sets current output information into the current context.
     * @param context current context
     * @param outputList output information to be set
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    public static void set(JobContext context, List<StageOutput> outputList) {
        if (context == null) {
            throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$
        }
        if (outputList == null) {
            throw new IllegalArgumentException("outputList must not be null"); //$NON-NLS-1$
        }
        List<OutputSpec> specs = new ArrayList<>();
        for (StageOutput output : outputList) {
            List<String> deletePatterns = getDeletePatterns(output);
            OutputSpec spec = new OutputSpec(output.getName(), deletePatterns);
            specs.add(spec);
        }
        save(context.getConfiguration(), specs);
    }

    private static List<String> getDeletePatterns(StageOutput output) {
        assert output != null;
        List<String> results = new ArrayList<>();
        for (Map.Entry<String, String> entry : output.getAttributes().entrySet()) {
            if (entry.getKey().startsWith(DirectDataSourceConstants.PREFIX_DELETE_PATTERN)) {
                String rawDeletePattern = entry.getValue();
                results.add(rawDeletePattern);
            }
        }
        return results;
    }

    private static void save(Configuration conf, List<OutputSpec> specs) {
        assert conf != null;
        assert specs != null;
        for (OutputSpec spec : specs) {
            if (spec.resolved) {
                throw new IllegalStateException();
            }
        }
        ByteArrayOutputStream sink = new ByteArrayOutputStream();
        try (DataOutputStream output = new DataOutputStream(new GZIPOutputStream(new Base64OutputStream(sink)))) {
            WritableUtils.writeVLong(output, SERIAL_VERSION);
            WritableUtils.writeVInt(output, specs.size());
            for (OutputSpec spec : specs) {
                WritableUtils.writeString(output, spec.basePath);
                WritableUtils.writeVInt(output, spec.deletePatterns.size());
                for (String pattern : spec.deletePatterns) {
                    WritableUtils.writeString(output, pattern);
                }
            }
        } catch (IOException e) {
            throw new IllegalStateException(e);
        }
        conf.set(KEY, new String(sink.toByteArray(), ASCII));
    }

    private static List<OutputSpec> getSpecs(JobContext context) {
        assert context != null;
        String encoded = context.getConfiguration().getRaw(KEY);
        if (encoded == null) {
            return Collections.emptyList();
        }
        VariableTable table = getVariableTable(context);
        try {
            ByteArrayInputStream source = new ByteArrayInputStream(encoded.getBytes(ASCII));
            DataInputStream input = new DataInputStream(new GZIPInputStream(new Base64InputStream(source)));
            long version = WritableUtils.readVLong(input);
            if (version != SERIAL_VERSION) {
                throw new IOException(MessageFormat.format("Invalid StageOutput version: framework={0}, saw={1}",
                        SERIAL_VERSION, version));
            }
            List<OutputSpec> results = new ArrayList<>();
            int specCount = WritableUtils.readVInt(input);
            for (int specIndex = 0; specIndex < specCount; specIndex++) {
                String basePath = WritableUtils.readString(input);
                try {
                    basePath = table.parse(basePath);
                } catch (IllegalArgumentException e) {
                    throw new IllegalStateException(MessageFormat.format("Invalid basePath: {0}", basePath), e);
                }
                int patternCount = WritableUtils.readVInt(input);
                List<String> patterns = new ArrayList<>();
                for (int patternIndex = 0; patternIndex < patternCount; patternIndex++) {
                    String pattern = WritableUtils.readString(input);
                    try {
                        pattern = table.parse(pattern);
                    } catch (IllegalArgumentException e) {
                        throw new IllegalStateException(
                                MessageFormat.format("Invalid delete pattern: {0}", pattern), e);
                    }
                    patterns.add(pattern);
                }
                results.add(new OutputSpec(basePath, patterns, true));
            }
            return results;
        } catch (IOException e) {
            throw new IllegalStateException(e);
        }
    }

    private static DirectDataSourceRepository getDataSourceRepository(JobContext context) {
        assert context != null;
        return HadoopDataSourceUtil.loadRepository(context.getConfiguration());
    }

    /**
     * Creates output context from Hadoop context.
     * @param context current context in Hadoop
     * @param datasourceId datasource ID
     * @return the created context
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    public static OutputTransactionContext createContext(JobContext context, String datasourceId) {
        if (context == null) {
            throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$
        }
        if (datasourceId == null) {
            throw new IllegalArgumentException("datasourceId must not be null"); //$NON-NLS-1$
        }
        String transactionId = getTransactionId(context, datasourceId);
        return new OutputTransactionContext(transactionId, datasourceId, createCounter(context));
    }

    /**
     * Creates output context from Hadoop context.
     * @param context current context in Hadoop
     * @param datasourceId datasource ID
     * @return the created context
     * @throws IllegalArgumentException if some parameters were {@code null}
     */
    public static OutputAttemptContext createContext(TaskAttemptContext context, String datasourceId) {
        if (context == null) {
            throw new IllegalArgumentException("context must not be null"); //$NON-NLS-1$
        }
        if (datasourceId == null) {
            throw new IllegalArgumentException("datasourceId must not be null"); //$NON-NLS-1$
        }
        String transactionId = getTransactionId(context, datasourceId);
        String attemptId = getAttemptId(context, datasourceId);
        return new OutputAttemptContext(transactionId, attemptId, datasourceId, createCounter(context));
    }

    private static String getTransactionId(JobContext jobContext, String datasourceId) {
        assert jobContext != null;
        assert datasourceId != null;
        String executionId = jobContext.getConfiguration().get(StageConstants.PROP_EXECUTION_ID);
        if (executionId == null) {
            executionId = jobContext.getJobID().toString();
        }
        return getTransactionId(executionId);
    }

    private static String getTransactionId(String executionId) {
        return executionId;
    }

    private static String getAttemptId(TaskAttemptContext taskContext, String datasourceId) {
        assert taskContext != null;
        assert datasourceId != null;
        return taskContext.getTaskAttemptID().toString();
    }

    private static Counter createCounter(JobContext context) {
        assert context != null;
        if (context instanceof Progressable) {
            return new ProgressableCounter((Progressable) context);
        } else if (context instanceof org.apache.hadoop.mapred.JobContext) {
            return new ProgressableCounter(((org.apache.hadoop.mapred.JobContext) context).getProgressible());
        } else {
            return new Counter();
        }
    }

    @Override
    public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
        DirectDataSourceRepository repo = getDataSourceRepository(context);
        List<OutputSpec> specs = getSpecs(context);
        for (OutputSpec spec : specs) {
            try {
                repo.getContainerPath(spec.basePath);
            } catch (IOException e) {
                throw new IOException(MessageFormat
                        .format("There are no corresponded data sources for the base path: {0}", spec.basePath), e);
            }
            for (String pattern : spec.deletePatterns) {
                try {
                    FilePattern.compile(pattern);
                } catch (IllegalArgumentException e) {
                    throw new IOException(MessageFormat.format("Invalid delete pattern: {0}", pattern), e);
                }
            }
        }
    }

    @Override
    public RecordWriter<Object, Object> getRecordWriter(TaskAttemptContext context)
            throws IOException, InterruptedException {
        return new EmptyFileOutputFormat().getRecordWriter(context);
    }

    @Override
    public OutputCommitter getOutputCommitter(TaskAttemptContext context) throws IOException, InterruptedException {
        synchronized (this) {
            TaskAttemptID id = context.getTaskAttemptID();
            OutputCommitter committer = commiterCache.get(id);
            if (committer == null) {
                committer = createOutputCommitter(context);
            }
            commiterCache.put(id, committer);
            return committer;
        }
    }

    private OutputCommitter createOutputCommitter(JobContext context) throws IOException {
        assert context != null;
        DirectDataSourceRepository repository = getDataSourceRepository(context);
        List<OutputSpec> specs = getSpecs(context);
        if (LOG.isDebugEnabled()) {
            LOG.debug(MessageFormat.format("Creating output commiter: {0}", //$NON-NLS-1$
                    specs));
        }
        return new BridgeOutputCommitter(repository, specs);
    }

    static VariableTable getVariableTable(JobContext context) {
        assert context != null;
        String arguments = context.getConfiguration().get(StageConstants.PROP_ASAKUSA_BATCH_ARGS, ""); //$NON-NLS-1$
        VariableTable variables = new VariableTable(VariableTable.RedefineStrategy.IGNORE);
        variables.defineVariables(arguments);
        return variables;
    }

    private static final class OutputSpec {

        final String basePath;

        final List<String> deletePatterns;

        final boolean resolved;

        OutputSpec(String basePath, List<String> deletePatterns) {
            this(basePath, deletePatterns, false);
        }

        OutputSpec(String basePath, List<String> deletePatterns, boolean resolved) {
            assert basePath != null;
            this.basePath = basePath;
            this.deletePatterns = deletePatterns;
            this.resolved = resolved;
        }

        @Override
        public String toString() {
            return MessageFormat.format("Output(path={0}, delete={1})", //$NON-NLS-1$
                    basePath, deletePatterns);
        }
    }

    private static final class BridgeOutputCommitter extends OutputCommitter {

        private final DirectDataSourceRepository repository;

        private final Map<String, String> outputMap;

        private final List<OutputSpec> outputSpecs;

        BridgeOutputCommitter(DirectDataSourceRepository repository, List<OutputSpec> outputList)
                throws IOException {
            assert repository != null;
            assert outputList != null;
            this.repository = repository;
            this.outputSpecs = outputList;
            this.outputMap = createMap(repository, outputList);
        }

        private static Map<String, String> createMap(DirectDataSourceRepository repo, List<OutputSpec> specs)
                throws IOException {
            assert repo != null;
            assert specs != null;
            Map<String, String> results = new TreeMap<>();
            for (OutputSpec spec : specs) {
                String containerPath = repo.getContainerPath(spec.basePath);
                String id = repo.getRelatedId(spec.basePath);
                results.put(containerPath, id);
            }
            return results;
        }

        @Override
        public boolean needsTaskCommit(TaskAttemptContext taskContext) throws IOException {
            return outputMap.isEmpty() == false;
        }

        @Override
        public void setupTask(TaskAttemptContext taskContext) throws IOException {
            if (outputMap.isEmpty()) {
                return;
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(MessageFormat.format("start Direct I/O output task: {1} ({0})", //$NON-NLS-1$
                        taskContext.getJobName(), taskContext.getTaskAttemptID()));
            }
            long t0 = System.currentTimeMillis();
            for (Map.Entry<String, String> entry : outputMap.entrySet()) {
                String containerPath = entry.getKey();
                String id = entry.getValue();
                if (LOG.isDebugEnabled()) {
                    LOG.debug(MessageFormat.format("start Direct I/O output task setup for datasource: " //$NON-NLS-1$
                            + "datasource={0} ({2} ({1}))", //$NON-NLS-1$
                            id, taskContext.getJobName(), taskContext.getTaskAttemptID()));
                }
                OutputAttemptContext context = createContext(taskContext, id);
                try {
                    DirectDataSource repo = repository.getRelatedDataSource(containerPath);
                    repo.setupAttemptOutput(context);
                } catch (IOException e) {
                    LOG.error(
                            MessageFormat.format("Failed Direct I/O output task setup: datasource={0} ({2} ({1}))",
                                    id, taskContext.getJobName(), taskContext.getTaskAttemptID()),
                            e);
                    throw e;
                } catch (InterruptedException e) {
                    throw (IOException) new InterruptedIOException(
                            MessageFormat.format("Interrupted while setup attempt: {0}, {1} (path={2})",
                                    context.getTransactionId(), context.getAttemptId(), containerPath))
                                            .initCause(e);
                }
                context.getCounter().add(1);
            }
            if (LOG.isDebugEnabled()) {
                long t1 = System.currentTimeMillis();
                LOG.debug(MessageFormat.format("finish Direct I/O output task setup: task={1} ({0}), elapsed={2}ms", //$NON-NLS-1$
                        taskContext.getJobName(), taskContext.getTaskAttemptID(), t1 - t0));
            }
        }

        @Override
        public void commitTask(TaskAttemptContext taskContext) throws IOException {
            if (outputMap.isEmpty()) {
                return;
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(MessageFormat.format("start Direct I/O output task commit: {1} ({0})", //$NON-NLS-1$
                        taskContext.getJobName(), taskContext.getTaskAttemptID()));
            }
            long t0 = System.currentTimeMillis();
            for (Map.Entry<String, String> entry : outputMap.entrySet()) {
                String containerPath = entry.getKey();
                String id = entry.getValue();
                if (LOG.isDebugEnabled()) {
                    LOG.debug(MessageFormat.format("start Direct I/O output task commit for datasource: " //$NON-NLS-1$
                            + "datasource={0} ({2} ({1}))", //$NON-NLS-1$
                            id, taskContext.getJobName(), taskContext.getTaskAttemptID()));
                }
                OutputAttemptContext context = createContext(taskContext, id);
                try {
                    DirectDataSource repo = repository.getRelatedDataSource(containerPath);
                    repo.commitAttemptOutput(context);
                } catch (IOException e) {
                    LOG.error(
                            MessageFormat.format("Failed Direct I/O output task commit: datasource={0} ({2} ({1}))",
                                    id, taskContext.getJobName(), taskContext.getTaskAttemptID()),
                            e);
                    throw e;
                } catch (InterruptedException e) {
                    throw (IOException) new InterruptedIOException(
                            MessageFormat.format("Interrupted while commit task attempt: {0}, {1} (path={2})",
                                    context.getTransactionId(), context.getAttemptId(), containerPath))
                                            .initCause(e);
                } catch (RuntimeException e) {
                    LOG.fatal("TASK COMMIT FAILED", e);
                    throw e;
                }
                context.getCounter().add(1);
            }
            doCleanupTask(taskContext);
            if (LOG.isInfoEnabled()) {
                long t1 = System.currentTimeMillis();
                LOG.info(MessageFormat.format("staged Direct I/O output task: task={1} ({0}), elapsed={2}ms",
                        taskContext.getJobName(), taskContext.getTaskAttemptID(), t1 - t0));
            }
        }

        @Override
        public void abortTask(TaskAttemptContext taskContext) throws IOException {
            if (outputMap.isEmpty()) {
                return;
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(MessageFormat.format("Start Direct I/O output task abort: {1} ({0})", //$NON-NLS-1$
                        taskContext.getJobName(), taskContext.getTaskAttemptID()));
            }
            long t0 = System.currentTimeMillis();
            doCleanupTask(taskContext);
            if (LOG.isInfoEnabled()) {
                long t1 = System.currentTimeMillis();
                LOG.info(MessageFormat.format("aborted Direct I/O output task: task={1} ({0}), elapsed={2}ms",
                        taskContext.getJobName(), taskContext.getTaskAttemptID(), t1 - t0));
            }
        }

        private void doCleanupTask(TaskAttemptContext taskContext) throws IOException {
            assert taskContext != null;
            for (Map.Entry<String, String> entry : outputMap.entrySet()) {
                String containerPath = entry.getKey();
                String id = entry.getValue();
                if (LOG.isDebugEnabled()) {
                    LOG.debug(MessageFormat.format(
                            "Start directio task cleanup for datasource: datasource={0} ({2} ({1}))", //$NON-NLS-1$
                            id, taskContext.getJobName(), taskContext.getTaskAttemptID()));
                }
                OutputAttemptContext context = createContext(taskContext, id);
                try {
                    DirectDataSource repo = repository.getRelatedDataSource(containerPath);
                    repo.cleanupAttemptOutput(context);
                } catch (IOException e) {
                    LOG.error(MessageFormat.format("Failed directio task cleanup: datasource={0} ({2} ({1}))", id,
                            taskContext.getJobName(), taskContext.getTaskAttemptID()), e);
                    throw e;
                } catch (InterruptedException e) {
                    throw (IOException) new InterruptedIOException(
                            MessageFormat.format("Interrupted while cleanup attempt: {0}, {1} (path={2})",
                                    context.getTransactionId(), context.getAttemptId(), containerPath))
                                            .initCause(e);
                }
                context.getCounter().add(1);
            }
        }

        @Override
        public void setupJob(JobContext jobContext) throws IOException {
            if (outputMap.isEmpty()) {
                return;
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(MessageFormat.format("start Direct I/O output job setup: {0} ({1})", //$NON-NLS-1$
                        jobContext.getJobID(), jobContext.getJobName()));
            }
            long t0 = System.currentTimeMillis();
            cleanOutput(jobContext);
            setTransactionInfo(jobContext, true);
            for (Map.Entry<String, String> entry : outputMap.entrySet()) {
                String containerPath = entry.getKey();
                String id = entry.getValue();
                if (LOG.isDebugEnabled()) {
                    LOG.debug(MessageFormat.format("Start Direct I/O output job setup: datasource={0} ({1} ({2}))", //$NON-NLS-1$
                            id, jobContext.getJobID(), jobContext.getJobName()));
                }
                OutputTransactionContext context = createContext(jobContext, id);
                try {
                    DirectDataSource repo = repository.getRelatedDataSource(containerPath);
                    repo.setupTransactionOutput(context);
                } catch (IOException e) {
                    LOG.error(MessageFormat.format("Failed Direct I/O output job setup: datasource={0} ({1} ({2}))",
                            id, jobContext.getJobID(), jobContext.getJobName()), e);
                    throw e;
                } catch (InterruptedException e) {
                    throw (IOException) new InterruptedIOException(
                            MessageFormat.format("Interrupted while setup transaction: {0}, (path={1})",
                                    context.getTransactionId(), containerPath)).initCause(e);
                }
                context.getCounter().add(1);
            }
            if (LOG.isInfoEnabled()) {
                long t1 = System.currentTimeMillis();
                LOG.info(MessageFormat.format("initialized Direct I/O output: job={0} ({1}), elapsed={2}ms",
                        jobContext.getJobID(), jobContext.getJobName(), t1 - t0));
            }
        }

        private void cleanOutput(JobContext jobContext) throws IOException {
            assert jobContext != null;
            for (OutputSpec spec : outputSpecs) {
                if (spec.deletePatterns.isEmpty()) {
                    continue;
                }
                String id = repository.getRelatedId(spec.basePath);
                OutputTransactionContext context = createContext(jobContext, id);
                try {
                    DirectDataSource repo = repository.getRelatedDataSource(spec.basePath);
                    String basePath = repository.getComponentPath(spec.basePath);
                    for (String pattern : spec.deletePatterns) {
                        FilePattern resources = FilePattern.compile(pattern);
                        if (LOG.isDebugEnabled()) {
                            LOG.debug(MessageFormat.format(
                                    "Deleting output: datasource={0}, basePath={1}, pattern={2}", //$NON-NLS-1$
                                    id, basePath, pattern));
                        }
                        boolean succeed = repo.delete(basePath, resources, true, context.getCounter());
                        if (LOG.isDebugEnabled()) {
                            LOG.debug(MessageFormat.format("Deleted output (succeed={3}): " //$NON-NLS-1$
                                    + "datasource={0}, basePath={1}, pattern={2}", //$NON-NLS-1$
                                    id, basePath, pattern, succeed));
                        }
                    }
                } catch (IOException e) {
                    LOG.error(MessageFormat.format("Failed directio job setup: datasource={0} ({1} ({2}))", id,
                            jobContext.getJobID(), jobContext.getJobName()), e);
                    throw e;
                } catch (InterruptedException e) {
                    throw (IOException) new InterruptedIOException(MessageFormat.format(
                            "Interrupted while setup cleaning output: datasource={0} ({1} ({2}))", id,
                            jobContext.getJobID(), jobContext.getJobName())).initCause(e);
                }
            }
        }

        @Override
        public void commitJob(JobContext jobContext) throws IOException {
            if (outputMap.isEmpty()) {
                return;
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(MessageFormat.format("start Direct I/O output job commit: {0} ({1})", //$NON-NLS-1$
                        jobContext.getJobID(), jobContext.getJobName()));
            }
            long t0 = System.currentTimeMillis();
            setCommitted(jobContext, true);
            doCleanupJob(jobContext);
            if (LOG.isInfoEnabled()) {
                long t1 = System.currentTimeMillis();
                LOG.info(MessageFormat.format("committed Direct I/O output: job={0} ({1}), elapsed={2}ms",
                        jobContext.getJobID(), jobContext.getJobName(), t1 - t0));
            }
        }

        private void setTransactionInfo(JobContext jobContext, boolean value) throws IOException {
            Configuration conf = jobContext.getConfiguration();
            Path transactionInfo = getTransactionInfoPath(jobContext);
            FileSystem fs = transactionInfo.getFileSystem(conf);
            if (value) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug(MessageFormat.format("Creating Direct I/O transaction info: job={0} ({1}), path={2}", //$NON-NLS-1$
                            jobContext.getJobID(), jobContext.getJobName(), fs.makeQualified(transactionInfo)));
                }
                try (OutputStream output = new SafeOutputStream(fs.create(transactionInfo, false));
                        PrintWriter writer = new PrintWriter(
                                new OutputStreamWriter(output, HadoopDataSourceUtil.COMMENT_CHARSET))) {
                    writer.printf("      User Name: %s%n", //$NON-NLS-1$
                            conf.getRaw(StageConstants.PROP_USER));
                    writer.printf("       Batch ID: %s%n", //$NON-NLS-1$
                            conf.getRaw(StageConstants.PROP_BATCH_ID));
                    writer.printf("        Flow ID: %s%n", //$NON-NLS-1$
                            conf.getRaw(StageConstants.PROP_FLOW_ID));
                    writer.printf("   Execution ID: %s%n", //$NON-NLS-1$
                            conf.getRaw(StageConstants.PROP_EXECUTION_ID));
                    writer.printf("Batch Arguments: %s%n", //$NON-NLS-1$
                            conf.getRaw(StageConstants.PROP_ASAKUSA_BATCH_ARGS));
                    writer.printf("  Hadoop Job ID: %s%n", //$NON-NLS-1$
                            jobContext.getJobID());
                    writer.printf("Hadoop Job Name: %s%n", //$NON-NLS-1$
                            jobContext.getJobName());
                }
                if (LOG.isDebugEnabled()) {
                    LOG.debug(MessageFormat.format(
                            "Finish creating Direct I/O transaction info: job={0} ({1}), path={2}", //$NON-NLS-1$
                            jobContext.getJobID(), jobContext.getJobName(), fs.makeQualified(transactionInfo)));
                }
                if (LOG.isTraceEnabled()) {
                    try (FSDataInputStream input = fs.open(transactionInfo);
                            Scanner scanner = new Scanner(
                                    new InputStreamReader(input, HadoopDataSourceUtil.COMMENT_CHARSET))) {
                        while (scanner.hasNextLine()) {
                            String line = scanner.nextLine();
                            LOG.trace(">> " + line); //$NON-NLS-1$
                        }
                    }
                }
            } else {
                if (LOG.isDebugEnabled()) {
                    LOG.debug(MessageFormat.format("Deleting Direct I/O transaction info: job={0} ({1}), path={2}", //$NON-NLS-1$
                            jobContext.getJobID(), jobContext.getJobName(), fs.makeQualified(transactionInfo)));
                }
                fs.delete(transactionInfo, false);
                if (LOG.isDebugEnabled()) {
                    LOG.debug(MessageFormat.format(
                            "Finish deleting Direct I/O transaction info: job={0} ({1}), path={2}", //$NON-NLS-1$
                            jobContext.getJobID(), jobContext.getJobName(), fs.makeQualified(transactionInfo)));
                }
            }
        }

        private void setCommitted(JobContext jobContext, boolean value) throws IOException {
            Configuration conf = jobContext.getConfiguration();
            Path commitMark = getCommitMarkPath(jobContext);
            FileSystem fs = commitMark.getFileSystem(conf);
            if (value) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug(MessageFormat.format("Creating Direct I/O commit mark: job={0} ({1}), path={2}", //$NON-NLS-1$
                            jobContext.getJobID(), jobContext.getJobName(), fs.makeQualified(commitMark)));
                }
                fs.create(commitMark, false).close();
                if (LOG.isDebugEnabled()) {
                    LOG.debug(
                            MessageFormat.format("Finish creating Direct I/O commit mark: job={0} ({1}), path={2}", //$NON-NLS-1$
                                    jobContext.getJobID(), jobContext.getJobName(), fs.makeQualified(commitMark)));
                }
            } else {
                if (LOG.isDebugEnabled()) {
                    LOG.debug(MessageFormat.format("Deleting Direct I/O commit mark: job={0} ({1}), path={2}", //$NON-NLS-1$
                            jobContext.getJobID(), jobContext.getJobName(), fs.makeQualified(commitMark)));
                }
                fs.delete(commitMark, false);
                if (LOG.isDebugEnabled()) {
                    LOG.debug(
                            MessageFormat.format("Finish deleting Direct I/O commit mark: job={0} ({1}), path={2}", //$NON-NLS-1$
                                    jobContext.getJobID(), jobContext.getJobName(), fs.makeQualified(commitMark)));
                }
            }
        }

        private boolean isCommitted(JobContext jobContext) throws IOException {
            Path commitMark = getCommitMarkPath(jobContext);
            FileSystem fs = commitMark.getFileSystem(jobContext.getConfiguration());
            return fs.exists(commitMark);
        }

        @Override
        public void abortJob(JobContext jobContext, State state) throws IOException {
            if (outputMap.isEmpty()) {
                return;
            }
            if (LOG.isDebugEnabled()) {
                LOG.debug(MessageFormat.format("Start Direct I/O output job abort: job={0} ({1}), state={2}", //$NON-NLS-1$
                        jobContext.getJobID(), jobContext.getJobName(), state));
            }
            long t0 = System.currentTimeMillis();
            if (state == State.FAILED) {
                doCleanupJob(jobContext);
            }
            if (LOG.isInfoEnabled()) {
                long t1 = System.currentTimeMillis();
                LOG.info(MessageFormat.format("aborted Direct I/O output: job={0} ({1}), state={2}, elapsed={3}ms",
                        jobContext.getJobID(), jobContext.getJobName(), state, t1 - t0));
            }
        }

        private void doCleanupJob(JobContext jobContext) throws IOException {
            if (isCommitted(jobContext)) {
                rollforward(jobContext);
            }
            cleanup(jobContext);
            setCommitted(jobContext, false);
            setTransactionInfo(jobContext, false);
        }

        private void rollforward(JobContext jobContext) throws IOException {
            assert jobContext != null;
            for (Map.Entry<String, String> entry : outputMap.entrySet()) {
                String containerPath = entry.getKey();
                String id = entry.getValue();
                if (LOG.isDebugEnabled()) {
                    LOG.debug(MessageFormat.format(
                            "Start Direct I/O output job rollforward: datasource={0} ({1} ({2}))", //$NON-NLS-1$
                            id, jobContext.getJobID(), jobContext.getJobName()));
                }
                OutputTransactionContext context = createContext(jobContext, id);
                try {
                    DirectDataSource repo = repository.getRelatedDataSource(containerPath);
                    repo.commitTransactionOutput(context);
                } catch (IOException e) {
                    LOG.error(MessageFormat.format(
                            "Failed Direct I/O output job rollforward: datasource={0} ({1} ({2}))", id,
                            jobContext.getJobID(), jobContext.getJobName()), e);
                    throw e;
                } catch (InterruptedException e) {
                    throw (IOException) new InterruptedIOException(
                            MessageFormat.format("Interrupted while commit transaction: {0}, (path={1})",
                                    context.getTransactionId(), containerPath)).initCause(e);
                }
                context.getCounter().add(1);
            }
        }

        private void cleanup(JobContext jobContext) throws IOException {
            for (Map.Entry<String, String> entry : outputMap.entrySet()) {
                String containerPath = entry.getKey();
                String id = entry.getValue();
                if (LOG.isDebugEnabled()) {
                    LOG.debug(
                            MessageFormat.format("Start Direct I/O output job cleanup: datasource={0} ({1} ({2}))", //$NON-NLS-1$
                                    id, jobContext.getJobID(), jobContext.getJobName()));
                }
                OutputTransactionContext context = createContext(jobContext, id);
                try {
                    DirectDataSource repo = repository.getRelatedDataSource(containerPath);
                    repo.cleanupTransactionOutput(context);
                } catch (IOException e) {
                    LOG.error(
                            MessageFormat.format("Failed Direct I/O output job cleanup: datasource={0} ({1} ({2}))",
                                    id, jobContext.getJobID(), jobContext.getJobName()),
                            e);
                    throw e;
                } catch (InterruptedException e) {
                    throw (IOException) new InterruptedIOException(
                            MessageFormat.format("Interrupted while cleanup transaction: {0}, (path={1})",
                                    context.getTransactionId(), containerPath)).initCause(e);
                }
                context.getCounter().add(1);
            }
        }

        private static Path getTransactionInfoPath(JobContext context) throws IOException {
            assert context != null;
            Configuration conf = context.getConfiguration();
            String executionId = conf.get(StageConstants.PROP_EXECUTION_ID);
            return HadoopDataSourceUtil.getTransactionInfoPath(conf, executionId);
        }

        private static Path getCommitMarkPath(JobContext context) throws IOException {
            assert context != null;
            Configuration conf = context.getConfiguration();
            String executionId = conf.get(StageConstants.PROP_EXECUTION_ID);
            return HadoopDataSourceUtil.getCommitMarkPath(conf, executionId);
        }
    }

    private static class SafeOutputStream extends OutputStream {

        private final OutputStream delegate;

        private final AtomicBoolean closed = new AtomicBoolean();

        SafeOutputStream(OutputStream delegate) {
            this.delegate = delegate;
        }

        @Override
        public void write(int b) throws IOException {
            delegate.write(b);
        }

        @Override
        public void write(byte[] b, int off, int len) throws IOException {
            delegate.write(b, off, len);
        }

        @Override
        public void close() throws IOException {
            if (closed.compareAndSet(false, true)) {
                delegate.close();
            }
        }
    }
}