Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getOutputFormatClass

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getOutputFormatClass.

Prototype

public Class<? extends OutputFormat<?, ?>> getOutputFormatClass() throws ClassNotFoundException;

Source Link

Document

Get the OutputFormat class for the job.

Usage

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.MultipleOutputs.java

License:Apache License

@SuppressWarnings("unchecked")
private synchronized RecordWriter getRecordWriter(String namedOutput) throws IOException, InterruptedException {
    // look for record-writer in the cache
    RecordWriter writer = recordWriters.get(namedOutput);

    // If not in cache, create a new one
    if (writer == null) {
        // get the record writer from context output format
        TaskAttemptContext taskContext = getContext(namedOutput);

        Class<? extends OutputFormat<?, ?>> outputFormatClass;
        try {//  w w  w.ja  va2s . c  om
            outputFormatClass = taskContext.getOutputFormatClass();
        } catch (ClassNotFoundException e) {
            throw new IOException(e);
        }

        ClassLoader outputFormatClassLoader = outputFormatClass.getClassLoader();
        // This is needed in case the OutputFormat's classloader conflicts with the program classloader (for example,
        // TableOutputFormat).
        ClassLoader oldClassLoader = ClassLoaders.setContextClassLoader(outputFormatClassLoader);

        try {
            // We use ReflectionUtils to instantiate the OutputFormat, because it also calls setConf on the object, if it
            // is a org.apache.hadoop.conf.Configurable.
            OutputFormat<?, ?> outputFormat = ReflectionUtils.newInstance(outputFormatClass,
                    taskContext.getConfiguration());
            writer = new MeteredRecordWriter<>(outputFormat.getRecordWriter(taskContext), context);
        } finally {
            ClassLoaders.setContextClassLoader(oldClassLoader);
        }

        // add the record-writer to the cache
        recordWriters.put(namedOutput, writer);
    }
    return writer;
}

From source file:com.linkedin.whiteelephant.mapreduce.MyAvroMultipleOutputs.java

License:Apache License

@SuppressWarnings("unchecked")
private synchronized RecordWriter getRecordWriter(TaskAttemptContext taskContext, String baseFileName)
        throws IOException, InterruptedException {

    // look for record-writer in the cache
    RecordWriter writer = recordWriters.get(baseFileName);

    // If not in cache, create a new one
    if (writer == null) {
        // get the record writer from context output format
        //FileOutputFormat.setOutputName(taskContext, baseFileName);

        // (mhayes) Fix so can write to different baseFileName
        // * Bug reported here:            https://issues.apache.org/jira/browse/AVRO-1215
        // * Original code submitted here: https://issues.apache.org/jira/browse/AVRO-1106
        taskContext.getConfiguration().set("avro.mo.config.namedOutput", baseFileName);

        try {//from w w w .jav  a2  s  . c  o m
            writer = ((OutputFormat) ReflectionUtils.newInstance(taskContext.getOutputFormatClass(),
                    taskContext.getConfiguration())).getRecordWriter(taskContext);
        } catch (ClassNotFoundException e) {
            throw new IOException(e);
        }

        // if counters are enabled, wrap the writer with context 
        // to increment counters 
        if (countersEnabled) {
            writer = new RecordWriterWithCounter(writer, baseFileName, context);
        }

        // add the record-writer to the cache
        recordWriters.put(baseFileName, writer);
    }
    return writer;
}

From source file:com.scaleoutsoftware.soss.hserver.JobScheduler.java

License:Apache License

private org.apache.hadoop.mapreduce.OutputCommitter createOutputCommitter(boolean newApiCommitter, JobID jobId,
        Configuration conf) throws IOException, InterruptedException, ClassNotFoundException {
    org.apache.hadoop.mapreduce.OutputCommitter committer = null;

    LOG.info("OutputCommitter set in config " + conf.get("mapred.output.committer.class"));

    if (newApiCommitter) {
        HadoopVersionSpecificCode hadoopVersionSpecificCode = HadoopVersionSpecificCode
                .getInstance(VersionInfo.getVersion(), conf);
        org.apache.hadoop.mapreduce.TaskAttemptID taskAttemptID = hadoopVersionSpecificCode
                .createTaskAttemptId(jobId, true, 0);
        org.apache.hadoop.mapreduce.TaskAttemptContext taskContext = hadoopVersionSpecificCode
                .createTaskAttemptContext(conf, taskAttemptID);
        OutputFormat outputFormat = ReflectionUtils.newInstance(taskContext.getOutputFormatClass(), conf);
        committer = outputFormat.getOutputCommitter(taskContext);
    } else {//from w  w  w  .  j  a  v  a 2  s . c  o  m
        committer = ReflectionUtils.newInstance(conf.getClass("mapred.output.committer.class",
                FileOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class), conf);
    }
    LOG.info("OutputCommitter is " + committer.getClass().getName());
    return committer;
}

From source file:edu.arizona.cs.hadoop.fs.irods.output.HirodsMultipleOutputs.java

License:Apache License

@SuppressWarnings("unchecked")
private synchronized RecordWriter getRecordWriter(TaskAttemptContext taskContext, String baseFileName)
        throws IOException, InterruptedException {

    // look for record-writer in the cache
    RecordWriter writer = recordWriters.get(baseFileName);

    // If not in cache, create a new one
    if (writer == null) {
        // get the record writer from context output format
        HirodsFileOutputFormat.setOutputName(taskContext, baseFileName);
        try {/*from   w w  w .j  a v a 2 s  .c  o m*/
            writer = ((OutputFormat) ReflectionUtils.newInstance(taskContext.getOutputFormatClass(),
                    taskContext.getConfiguration())).getRecordWriter(taskContext);
        } catch (ClassNotFoundException e) {
            throw new IOException(e);
        }

        // if counters are enabled, wrap the writer with context 
        // to increment counters 
        if (countersEnabled) {
            writer = new RecordWriterWithCounter(writer, baseFileName, context);
        }

        // add the record-writer to the cache
        recordWriters.put(baseFileName, writer);
    }
    return writer;
}

From source file:eu.dnetlib.iis.core.javamapreduce.hack.AvroMultipleOutputs.java

License:Apache License

/** Hacked: make public from private */
// by being synchronized MultipleOutputTask can be use with a
// MultithreadedMapper.
@SuppressWarnings("unchecked")
public synchronized RecordWriter getRecordWriter(TaskAttemptContext taskContext, String baseFileName)
        throws IOException, InterruptedException {

    // look for record-writer in the cache
    RecordWriter writer = recordWriters.get(baseFileName);

    // If not in cache, create a new one
    if (writer == null) {
        // get the record writer from context output format
        //FileOutputFormat.setOutputName(taskContext, baseFileName);
        taskContext.getConfiguration().set("avro.mo.config.namedOutput", baseFileName);
        try {//from w  w w. j  a v  a2 s.  c  o m
            writer = ((OutputFormat) ReflectionUtils.newInstance(taskContext.getOutputFormatClass(),
                    taskContext.getConfiguration())).getRecordWriter(taskContext);
        } catch (ClassNotFoundException e) {
            throw new IOException(e);
        }

        // if counters are enabled, wrap the writer with context 
        // to increment counters 
        if (countersEnabled) {
            writer = new RecordWriterWithCounter(writer, baseFileName, context);
        }

        // add the record-writer to the cache
        recordWriters.put(baseFileName, writer);
    }
    return writer;
}

From source file:org.apache.avro.mapreduce.AvroMultipleOutputs.java

License:Apache License

@SuppressWarnings("unchecked")
private synchronized RecordWriter getRecordWriter(TaskAttemptContext taskContext, String baseFileName)
        throws IOException, InterruptedException {

    // look for record-writer in the cache
    RecordWriter writer = recordWriters.get(baseFileName);

    // If not in cache, create a new one
    if (writer == null) {
        // get the record writer from context output format
        //FileOutputFormat.setOutputName(taskContext, baseFileName);
        taskContext.getConfiguration().set("avro.mo.config.namedOutput", baseFileName);
        try {/*from  w w  w  .j ava 2s  .  co m*/
            writer = ((OutputFormat) ReflectionUtils.newInstance(taskContext.getOutputFormatClass(),
                    taskContext.getConfiguration())).getRecordWriter(taskContext);
        } catch (ClassNotFoundException e) {
            throw new IOException(e);
        }

        // if counters are enabled, wrap the writer with context 
        // to increment counters 
        if (countersEnabled) {
            writer = new RecordWriterWithCounter(writer, baseFileName, context);
        }

        // add the record-writer to the cache
        recordWriters.put(baseFileName, writer);
    }
    return writer;
}

From source file:org.apache.crunch.hadoop.mapreduce.lib.output.CrunchMultipleOutputs.java

License:Apache License

@SuppressWarnings("unchecked")
private synchronized RecordWriter getRecordWriter(TaskAttemptContext taskContext, String baseFileName)
        throws IOException, InterruptedException {

    // look for record-writer in the cache
    RecordWriter writer = recordWriters.get(baseFileName);

    // If not in cache, create a new one
    if (writer == null) {
        // get the record writer from context output format
        taskContext.getConfiguration().set(BASE_OUTPUT_NAME, baseFileName);
        try {//from w  ww .j  a v  a2  s  . c om
            writer = ((OutputFormat) ReflectionUtils.newInstance(taskContext.getOutputFormatClass(),
                    taskContext.getConfiguration())).getRecordWriter(taskContext);
        } catch (ClassNotFoundException e) {
            throw new IOException(e);
        }

        // if counters are enabled, wrap the writer with context 
        // to increment counters 
        if (countersEnabled) {
            writer = new RecordWriterWithCounter(writer, baseFileName, context);
        }

        // add the record-writer to the cache
        recordWriters.put(baseFileName, writer);
    }
    return writer;
}

From source file:org.apache.crunch.io.CrunchOutputs.java

License:Apache License

private synchronized RecordWriter<K, V> getRecordWriter(TaskAttemptContext taskContext, String namedOutput)
        throws IOException, InterruptedException {
    // look for record-writer in the cache
    RecordWriter<K, V> writer = recordWriters.get(namedOutput);

    // If not in cache, create a new one
    if (writer == null) {
        // get the record writer from context output format
        taskContext.getConfiguration().set(BASE_OUTPUT_NAME, namedOutput);
        try {/*from ww  w .j a  v  a2  s .  c o m*/
            OutputFormat format = ReflectionUtils.newInstance(taskContext.getOutputFormatClass(),
                    taskContext.getConfiguration());
            writer = format.getRecordWriter(taskContext);
        } catch (ClassNotFoundException e) {
            throw new IOException(e);
        }
        recordWriters.put(namedOutput, writer);
    }

    return writer;
}

From source file:org.apache.tez.mapreduce.committer.MROutputCommitter.java

License:Apache License

@SuppressWarnings("rawtypes")
private org.apache.hadoop.mapreduce.OutputCommitter getOutputCommitter(OutputCommitterContext context) {

    org.apache.hadoop.mapreduce.OutputCommitter committer = null;
    newApiCommitter = false;/*from  w  ww . java2 s.  c o  m*/
    if (jobConf.getBoolean("mapred.reducer.new-api", false)
            || jobConf.getBoolean("mapred.mapper.new-api", false)) {
        newApiCommitter = true;
        LOG.info("Using mapred newApiCommitter.");
    }

    if (newApiCommitter) {
        TaskAttemptID taskAttemptID = new TaskAttemptID(
                Long.toString(context.getApplicationId().getClusterTimestamp()),
                context.getApplicationId().getId(),
                ((jobConf.getBoolean(MRConfig.IS_MAP_PROCESSOR, false) ? TaskType.MAP : TaskType.REDUCE)), 0,
                context.getDAGAttemptNumber());

        TaskAttemptContext taskContext = new TaskAttemptContextImpl(jobConf, taskAttemptID);
        try {
            OutputFormat outputFormat = ReflectionUtils.newInstance(taskContext.getOutputFormatClass(),
                    jobConf);
            committer = outputFormat.getOutputCommitter(taskContext);
        } catch (Exception e) {
            throw new TezUncheckedException(e);
        }
    } else {
        committer = ReflectionUtils.newInstance(jobConf.getClass("mapred.output.committer.class",
                FileOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class), jobConf);
    }
    LOG.info("OutputCommitter for outputName=" + context.getOutputName() + ", vertexName="
            + context.getVertexName() + ", outputCommitterClass=" + committer.getClass().getName());
    return committer;
}

From source file:org.apache.tez.mapreduce.output.MultiMROutput.java

License:Apache License

@SuppressWarnings("unchecked")
private synchronized RecordWriter getNewRecordWriter(TaskAttemptContext taskContext, String baseFileName)
        throws IOException, InterruptedException {

    // look for record-writer in the cache
    RecordWriter writer = newRecordWriters.get(baseFileName);

    // If not in cache, create a new one
    if (writer == null) {
        // get the record writer from context output format
        taskContext.getConfiguration().set(MRJobConfig.FILEOUTPUTFORMAT_BASE_OUTPUT_NAME, baseFileName);
        try {/*from w  ww .  j  a va 2  s  . c  o  m*/
            writer = ((OutputFormat) ReflectionUtils.newInstance(taskContext.getOutputFormatClass(),
                    taskContext.getConfiguration())).getRecordWriter(taskContext);
        } catch (ClassNotFoundException e) {
            throw new IOException(e);
        }
        // add the record-writer to the cache
        newRecordWriters.put(baseFileName, writer);
    }
    return writer;
}