Example usage for org.apache.hadoop.mapred FileOutputFormat getUniqueName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred FileOutputFormat getUniqueName.

Prototype

public static String getUniqueName(JobConf conf, String name)

Source Link

Document

Helper function to generate a name that is unique for the task.

Usage

From source file:com.facebook.hiveio.output.HiveApiOutputFormat.java

License:Apache License

/**
 * Get the base Hadoop RecordWriter.//from   w  w  w  .  ja v a  2  s.  com
 * @param taskAttemptContext TaskAttemptContext
 * @param baseOutputFormat Hadoop OutputFormat
 * @return RecordWriter
 * @throws IOException Hadoop issues
 */
// CHECKSTYLE: stop LineLengthCheck
protected static org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> getBaseRecordWriter(
        TaskAttemptContext taskAttemptContext, org.apache.hadoop.mapred.OutputFormat baseOutputFormat)
        throws IOException {
    // CHECKSTYLE: resume LineLengthCheck
    HadoopUtils.setWorkOutputDir(taskAttemptContext);
    JobConf jobConf = new JobConf(taskAttemptContext.getConfiguration());
    int fileId = CREATED_FILES_COUNTER.incrementAndGet();
    String name = FileOutputFormat.getUniqueName(jobConf, "part-" + fileId);
    Reporter reporter = new ProgressReporter(taskAttemptContext);
    org.apache.hadoop.mapred.RecordWriter<WritableComparable, Writable> baseWriter = baseOutputFormat
            .getRecordWriter(null, jobConf, name, reporter);
    LOG.info("getBaseRecordWriter: Created new {} with file {}", baseWriter, name);
    return baseWriter;
}

From source file:org.apache.hcatalog.mapreduce.FileOutputFormatContainer.java

License:Apache License

@Override
public RecordWriter<WritableComparable<?>, HCatRecord> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    //this needs to be manually set, under normal circumstances MR Task does this
    setWorkOutputPath(context);/*from  w w  w.ja  v  a 2 s . c o  m*/

    //Configure the output key and value classes.
    // This is required for writing null as key for file based tables.
    context.getConfiguration().set("mapred.output.key.class", NullWritable.class.getName());
    String jobInfoString = context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO);
    OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(jobInfoString);
    StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo();
    HCatStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), storeInfo);
    Class<? extends SerDe> serde = storageHandler.getSerDeClass();
    SerDe sd = (SerDe) ReflectionUtils.newInstance(serde, context.getConfiguration());
    context.getConfiguration().set("mapred.output.value.class", sd.getSerializedClass().getName());

    RecordWriter<WritableComparable<?>, HCatRecord> rw;
    if (HCatBaseOutputFormat.getJobInfo(context).isDynamicPartitioningUsed()) {
        // When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null.
        // (That's because records can't be written until the values of the dynamic partitions are deduced.
        // By that time, a new local instance of RecordWriter, with the correct output-path, will be constructed.)
        rw = new FileRecordWriterContainer((org.apache.hadoop.mapred.RecordWriter) null, context);
    } else {
        Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir"));
        Path childPath = new Path(parentDir,
                FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()), "part"));

        rw = new FileRecordWriterContainer(getBaseOutputFormat().getRecordWriter(
                parentDir.getFileSystem(context.getConfiguration()), new JobConf(context.getConfiguration()),
                childPath.toString(), InternalUtil.createReporter(context)), context);
    }
    return rw;
}

From source file:org.apache.hive.hcatalog.mapreduce.FileOutputFormatContainer.java

License:Apache License

@Override
public RecordWriter<WritableComparable<?>, HCatRecord> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    //this needs to be manually set, under normal circumstances MR Task does this
    setWorkOutputPath(context);/*  w  ww  .j  av a 2s. com*/

    //Configure the output key and value classes.
    // This is required for writing null as key for file based tables.
    context.getConfiguration().set("mapred.output.key.class", NullWritable.class.getName());
    String jobInfoString = context.getConfiguration().get(HCatConstants.HCAT_KEY_OUTPUT_INFO);
    OutputJobInfo jobInfo = (OutputJobInfo) HCatUtil.deserialize(jobInfoString);
    StorerInfo storeInfo = jobInfo.getTableInfo().getStorerInfo();
    HiveStorageHandler storageHandler = HCatUtil.getStorageHandler(context.getConfiguration(), storeInfo);
    Class<? extends SerDe> serde = storageHandler.getSerDeClass();
    SerDe sd = (SerDe) ReflectionUtils.newInstance(serde, context.getConfiguration());
    context.getConfiguration().set("mapred.output.value.class", sd.getSerializedClass().getName());

    RecordWriter<WritableComparable<?>, HCatRecord> rw;
    if (HCatBaseOutputFormat.getJobInfo(context.getConfiguration()).isDynamicPartitioningUsed()) {
        // When Dynamic partitioning is used, the RecordWriter instance initialized here isn't used. Can use null.
        // (That's because records can't be written until the values of the dynamic partitions are deduced.
        // By that time, a new local instance of RecordWriter, with the correct output-path, will be constructed.)
        rw = new DynamicPartitionFileRecordWriterContainer((org.apache.hadoop.mapred.RecordWriter) null,
                context);
    } else {
        Path parentDir = new Path(context.getConfiguration().get("mapred.work.output.dir"));
        Path childPath = new Path(parentDir,
                FileOutputFormat.getUniqueName(new JobConf(context.getConfiguration()),
                        context.getConfiguration().get("mapreduce.output.basename", "part")));

        rw = new StaticPartitionFileRecordWriterContainer(getBaseOutputFormat().getRecordWriter(
                parentDir.getFileSystem(context.getConfiguration()), new JobConf(context.getConfiguration()),
                childPath.toString(), InternalUtil.createReporter(context)), context);
    }
    return rw;
}