Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getJobID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getJobID.

Prototype

public JobID getJobID();

Source Link

Document

Get the unique ID for the job.

Usage

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.PangoolMultipleOutputs.java

License:Apache License

public synchronized RecordWriter getRecordWriter(String baseFileName) throws IOException, InterruptedException {

    // Look for record-writer in the cache
    OutputContext context = outputContexts.get(baseFileName);

    // If not in cache, create a new one
    if (context == null) {

        context = new OutputContext();

        OutputFormat mainOutputFormat;// www.  j  a v a2s.  c  o  m

        try {
            mainOutputFormat = ((OutputFormat) ReflectionUtils.newInstance(this.context.getOutputFormatClass(),
                    this.context.getConfiguration()));
        } catch (ClassNotFoundException e1) {
            throw new RuntimeException(e1);
        }

        ProxyOutputCommitter baseOutputCommitter = ((ProxyOutputCommitter) mainOutputFormat
                .getOutputCommitter(this.context));

        // The trick is to create a new Job for each output
        Job job = new Job(this.context.getConfiguration());
        job.setOutputKeyClass(getNamedOutputKeyClass(this.context, baseFileName));
        job.setOutputValueClass(getNamedOutputValueClass(this.context, baseFileName));
        // Check possible specific context for the output
        setSpecificNamedOutputContext(this.context.getConfiguration(), job, baseFileName);
        TaskAttemptContext taskContext;
        try {
            taskContext = TaskAttemptContextFactory.get(job.getConfiguration(),
                    this.context.getTaskAttemptID());
        } catch (Exception e) {
            throw new IOException(e);
        }

        // First we change the output dir for the new OutputFormat that we will
        // create
        // We put it inside the main output work path -> in case the Job fails,
        // everything will be discarded
        taskContext.getConfiguration().set("mapred.output.dir",
                baseOutputCommitter.getBaseDir() + "/" + baseFileName);
        // This is for Hadoop 2.0 :
        taskContext.getConfiguration().set("mapreduce.output.fileoutputformat.outputdir",
                baseOutputCommitter.getBaseDir() + "/" + baseFileName);
        context.taskAttemptContext = taskContext;

        // Load the OutputFormat instance
        OutputFormat outputFormat = InstancesDistributor.loadInstance(
                context.taskAttemptContext.getConfiguration(), OutputFormat.class,
                getNamedOutputFormatInstanceFile(this.context, baseFileName), true);
        // We have to create a JobContext for meeting the contract of the
        // OutputFormat
        JobContext jobContext;
        try {
            jobContext = JobContextFactory.get(taskContext.getConfiguration(), taskContext.getJobID());
        } catch (Exception e) {
            throw new IOException(e);
        }

        context.jobContext = jobContext;
        // The contract of the OutputFormat is to check the output specs
        outputFormat.checkOutputSpecs(jobContext);
        // We get the output committer so we can call it later
        context.outputCommitter = outputFormat.getOutputCommitter(taskContext);
        // Save the RecordWriter to cache it
        context.recordWriter = outputFormat.getRecordWriter(taskContext);

        // if counters are enabled, wrap the writer with context
        // to increment counters
        if (countersEnabled) {
            context.recordWriter = new RecordWriterWithCounter(context.recordWriter, baseFileName,
                    this.context);
        }

        outputContexts.put(baseFileName, context);
    }
    return context.recordWriter;
}

From source file:com.facebook.hiveio.output.CheckOutputSpecsTest.java

License:Apache License

@Test(expectedExceptions = IOException.class)
public void testTableDoesntExist() throws Exception {
    Configuration conf = new Configuration();

    HiveOutputDescription outputDesc = new HiveOutputDescription();
    outputDesc.getTableDesc().setTableName("doesnt-exist");

    OutputConf outputConf = new OutputConf(conf, PROFILE_ID);
    outputConf.writeOutputDescription(outputDesc);

    HiveApiOutputFormat outputFormat = new HiveApiOutputFormat();
    outputFormat.setMyProfileId(PROFILE_ID);

    JobConf jobConf = new JobConf(conf);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID());
    JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID());
    outputFormat.checkOutputSpecs(jobContext);
    fail();/*from  www .  j a  v a 2 s.co  m*/
}

From source file:com.facebook.hiveio.output.FaultyCheckOutputSpecsTest.java

License:Apache License

@Test(expectedExceptions = IOException.class)
public void testExceptionAfterTooManyRetriesWhenTableDoesNotExist() throws Exception {
    HiveMetastores.setTestClient(new FaultyThriftHiveMetastore(BackoffRetryTask.NUM_TRIES.getDefaultValue()));
    Configuration conf = new Configuration();
    conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100);
    HiveOutputDescription outputDesc = new HiveOutputDescription();
    outputDesc.getTableDesc().setTableName("doesnt-exist");
    OutputConf outputConf = new OutputConf(conf, PROFILE_ID);
    outputConf.writeOutputDescription(outputDesc);
    HiveApiOutputFormat outputFormat = new HiveApiOutputFormat();
    outputFormat.setMyProfileId(PROFILE_ID);
    JobConf jobConf = new JobConf(conf);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID());
    JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID());

    outputFormat.checkOutputSpecs(jobContext);

    fail();//from w w w.  j a  v a  2 s  . c  o  m
}

From source file:com.facebook.hiveio.output.FaultyCheckOutputSpecsTest.java

License:Apache License

@Test
public void testRecoveredFromFailuresAfterRetries() throws Exception {
    FaultyThriftHiveMetastore metastore = new FaultyThriftHiveMetastore(
            BackoffRetryTask.NUM_TRIES.getDefaultValue() - 1);

    Configuration conf = new Configuration();
    conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100);
    HiveOutputDescription outputDesc = new HiveOutputDescription();
    outputDesc.getTableDesc().setTableName("foo");
    OutputConf outputConf = new OutputConf(conf, PROFILE_ID);
    outputConf.writeOutputDescription(outputDesc);
    HiveApiOutputFormat outputFormat = new HiveApiOutputFormat();
    outputFormat.setMyProfileId(PROFILE_ID);
    JobConf jobConf = new JobConf(conf);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID());
    JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID());

    HiveMetastores.setTestClient(metastore);

    outputFormat.checkOutputSpecs(jobContext);

    assertEquals(BackoffRetryTask.NUM_TRIES.getDefaultValue(), metastore.getNumCalls());
}

From source file:org.apache.giraph.hive.Helpers.java

License:Apache License

public static void commitJob(GiraphConfiguration conf) throws IOException, InterruptedException {
    ImmutableClassesGiraphConfiguration iconf = new ImmutableClassesGiraphConfiguration(conf);
    WrappedVertexOutputFormat outputFormat = iconf.createWrappedVertexOutputFormat();
    JobConf jobConf = new JobConf(conf);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID());
    OutputCommitter outputCommitter = outputFormat.getOutputCommitter(taskContext);
    JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID());
    outputCommitter.commitJob(jobContext);
}

From source file:org.apache.giraph.hive.Helpers.java

License:Apache License

public static JobContext makeJobContext(Configuration conf) {
    JobConf jobConf = new JobConf(conf);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID());
    return new HackJobContext(jobConf, taskContext.getJobID());
}

From source file:org.seqdoop.hadoop_bam.TestVCFInputFormatStringency.java

License:Open Source License

public void checkReading(ValidationStringency validationStringency) throws Exception {
    String filename = "invalid_info_field.vcf";
    Configuration conf = new Configuration();
    String input_file = ClassLoader.getSystemClassLoader().getResource(filename).getFile();
    conf.set("mapred.input.dir", "file://" + input_file);

    if (validationStringency != null) {
        VCFRecordReader.setValidationStringency(conf, validationStringency);
    }/*from w  w w  .  j av a 2 s .  co m*/

    TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
    JobContext ctx = new JobContextImpl(conf, taskAttemptContext.getJobID());

    VCFInputFormat inputFormat = new VCFInputFormat(conf);
    List<InputSplit> splits = inputFormat.getSplits(ctx);
    assertEquals(1, splits.size());
    RecordReader<LongWritable, VariantContextWritable> reader = inputFormat.createRecordReader(splits.get(0),
            taskAttemptContext);
    int counter = 0;
    while (reader.nextKeyValue()) {
        VariantContextWritable writable = reader.getCurrentValue();
        assertNotNull(writable);
        VariantContext vc = writable.get();
        assertNotNull(vc);
        String value = vc.toString();
        assertNotNull(value);
        counter++;
    }
    assertEquals(4, counter);
}