alluxio.hadoop.mapreduce.KeyValueOutputFormat.java Source code

Java tutorial

Introduction

Here is the source code for alluxio.hadoop.mapreduce.KeyValueOutputFormat.java

Source

/*
 * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
 * (the "License"). You may not use this work except in compliance with the License, which is
 * available at www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied, as more fully set forth in the License.
 *
 * See the NOTICE file distributed with this work for information regarding copyright ownership.
 */

package alluxio.hadoop.mapreduce;

import alluxio.AlluxioURI;
import alluxio.annotation.PublicApi;
import alluxio.client.keyvalue.KeyValueSystem;
import alluxio.exception.AlluxioException;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

import javax.annotation.concurrent.ThreadSafe;

/**
 * An {@link OutputFormat} implementation to let MapReduce job write to a key-value store under the
 * MapReduce output directory.
 * <p>
 * There are different key-value stores under the output directory configured via MapReduce property
 * {@code mapreduce.output.fileoutputformat.outputdir} for different tasks. The stores are merged
 * into one key-value store under the MapReduce output directory by {@link
 * KeyValueOutputCommitter}.
 */
// TODO(cc): Consider key distributions in each Reducer.
@PublicApi
@ThreadSafe
public final class KeyValueOutputFormat extends FileOutputFormat<BytesWritable, BytesWritable> {
    private OutputCommitter mCommitter;

    /**
     * Constructs a new {@link KeyValueOutputFormat}.
     */
    public KeyValueOutputFormat() {
    }

    /**
     * @param taskContext MapReduce task context
     * @return the task's temporary output path ${job output directory}/_temporary/${task attempt id}
     */
    public static AlluxioURI getTaskOutputURI(TaskAttemptContext taskContext) {
        return getJobOutputURI(taskContext).join(KeyValueOutputCommitter.getPendingDirName())
                .join(taskContext.getTaskAttemptID().toString());
    }

    /**
     * @param jobContext MapReduce job configuration
     * @return the job's output path
     */
    public static AlluxioURI getJobOutputURI(JobContext jobContext) {
        return new AlluxioURI(FileOutputFormat.getOutputPath(jobContext).toString());
    }

    @Override
    public RecordWriter<BytesWritable, BytesWritable> getRecordWriter(TaskAttemptContext taskAttemptContext)
            throws IOException {
        return new KeyValueRecordWriter(getTaskOutputURI(taskAttemptContext));
    }

    /**
     * {@inheritDoc}
     * <p>
     * This method is called immediately when job is submitted, a key-value store is created at the
     * job's output directory, key-value stores created by MapReduce tasks will be merged into this
     * store when task is submitted.
     */
    @Override
    public void checkOutputSpecs(JobContext jobContext) throws IOException {
        super.checkOutputSpecs(jobContext);
        try {
            KeyValueSystem.Factory.create().createStore(KeyValueOutputFormat.getJobOutputURI(jobContext)).close();
        } catch (AlluxioException e) {
            throw new IOException(e);
        }
    }

    /**
     * @param taskContext MapReduce task configuration
     * @return a {@link KeyValueOutputCommitter}
     * @throws IOException when committer fails to be created
     */
    @Override
    public OutputCommitter getOutputCommitter(TaskAttemptContext taskContext) throws IOException {
        if (mCommitter == null) {
            mCommitter = new KeyValueOutputCommitter(
                    new Path(KeyValueOutputFormat.getJobOutputURI(taskContext).toString()), taskContext);
        }
        return mCommitter;
    }
}