Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getOutputName

List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getOutputName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat getOutputName.

Prototype

protected static String getOutputName(JobContext job) 

Source Link

Document

Get the base output name for the output file.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputFormat.java

License:Apache License

/**
 * Create a composite record writer that can write key/value data to different output files.
 *
 * @return a composite record writer//from  w w  w  .  j ava 2 s .c o m
 * @throws IOException
 */
@Override
public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext job) throws IOException {
    final String outputName = FileOutputFormat.getOutputName(job);

    Configuration configuration = job.getConfiguration();
    Class<? extends DynamicPartitioner> partitionerClass = configuration.getClass(
            PartitionedFileSetArguments.DYNAMIC_PARTITIONER_CLASS_NAME, null, DynamicPartitioner.class);

    @SuppressWarnings("unchecked")
    final DynamicPartitioner<K, V> dynamicPartitioner = new InstantiatorFactory(false)
            .get(TypeToken.of(partitionerClass)).create();

    MapReduceClassLoader classLoader = MapReduceClassLoader.getFromConfiguration(configuration);
    final BasicMapReduceTaskContext<K, V> taskContext = classLoader.getTaskContextProvider().get(job);

    String outputDatasetName = configuration.get(Constants.Dataset.Partitioned.HCONF_ATTR_OUTPUT_DATASET);
    PartitionedFileSet outputDataset = taskContext.getDataset(outputDatasetName);
    final Partitioning partitioning = outputDataset.getPartitioning();

    dynamicPartitioner.initialize(taskContext);

    return new RecordWriter<K, V>() {

        // a cache storing the record writers for different output files.
        Map<PartitionKey, RecordWriter<K, V>> recordWriters = new HashMap<>();

        public void write(K key, V value) throws IOException, InterruptedException {
            PartitionKey partitionKey = dynamicPartitioner.getPartitionKey(key, value);
            RecordWriter<K, V> rw = this.recordWriters.get(partitionKey);
            if (rw == null) {
                String relativePath = PartitionedFileSetDataset.getOutputPath(partitionKey, partitioning);
                String finalPath = relativePath + "/" + outputName;

                // if we don't have the record writer yet for the final path, create one and add it to the cache
                rw = getBaseRecordWriter(getTaskAttemptContext(job, finalPath));
                this.recordWriters.put(partitionKey, rw);
            }
            rw.write(key, value);
        }

        @Override
        public void close(TaskAttemptContext context) throws IOException, InterruptedException {
            try {
                List<RecordWriter<?, ?>> recordWriters = new ArrayList<>();
                recordWriters.addAll(this.recordWriters.values());
                MultipleOutputs.closeRecordWriters(recordWriters, context);

                taskContext.flushOperations();
            } catch (Exception e) {
                throw new IOException(e);
            } finally {
                dynamicPartitioner.destroy();
            }
        }
    };
}

From source file:org.apache.solr.hadoop.SolrOutputFormat.java

License:Apache License

public static String getOutputName(JobContext job) {
    return FileOutputFormat.getOutputName(job);
}

From source file:org.schedoscope.export.ftp.outputformat.FtpUploadOutputFormat.java

License:Apache License

/**
 * A method to provide the fully qualified file path of the current file.
 *
 * @param context The TaskAttemptContext.
 * @return Returns the fully qualified file path of the current file.
 *///  ww w .  j a  va 2 s. c o m
public static String getOutputName(TaskAttemptContext context) {
    return getUniqueFile(context, FileOutputFormat.getOutputName(context), extension);
}