Example usage for org.apache.hadoop.mapred JobContextImpl JobContextImpl

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobContextImpl JobContextImpl.

Prototype

public JobContextImpl(Configuration conf, JobID jobId)

Source Link

Usage

From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java

License:Apache License

@Test
public void testStreamRecordReader() throws Exception {
    File inputDir = tmpFolder.newFolder();
    File partition = new File(inputDir, "1.1000");
    partition.mkdirs();/*  w w w . jav  a2s.  c  o  m*/
    File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
    File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());

    // write 1 event
    StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile),
            Files.newOutputStreamSupplier(indexFile), 100L);
    writer.append(StreamFileTestUtils.createEvent(1000, "test"));
    writer.flush();

    // get splits from the input format. Expect to get 2 splits,
    // one from 0 - some offset and one from offset - Long.MAX_VALUE.
    Configuration conf = new Configuration();
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    StreamInputFormat.setStreamPath(conf, inputDir.toURI());
    StreamInputFormat format = new StreamInputFormat();
    List<InputSplit> splits = format.getSplits(new JobContextImpl(new JobConf(conf), new JobID()));
    Assert.assertEquals(2, splits.size());

    // write another event so that the 2nd split has something to read
    writer.append(StreamFileTestUtils.createEvent(1001, "test"));
    writer.close();

    // create a record reader for the 2nd split
    StreamRecordReader<LongWritable, StreamEvent> recordReader = new StreamRecordReader<>(
            new IdentityStreamEventDecoder());
    recordReader.initialize(splits.get(1), context);

    // check that we read the 2nd stream event
    Assert.assertTrue(recordReader.nextKeyValue());
    StreamEvent output = recordReader.getCurrentValue();
    Assert.assertEquals(1001, output.getTimestamp());
    Assert.assertEquals("test", Bytes.toString(output.getBody()));
    // check that there is nothing more to read
    Assert.assertFalse(recordReader.nextKeyValue());
}

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

private org.apache.hadoop.mapreduce.InputSplit[] getNewInputSplits(JobConf conf)
        throws ClassNotFoundException, IOException, InterruptedException {
    org.apache.hadoop.mapreduce.InputSplit[] splits = null;
    JobContext context = new JobContextImpl(conf, null);
    org.apache.hadoop.mapreduce.InputFormat inputFormat = ReflectionUtils
            .newInstance(context.getInputFormatClass(), conf);
    List<org.apache.hadoop.mapreduce.InputSplit> inputSplits = inputFormat.getSplits(context);
    return inputSplits.toArray(new org.apache.hadoop.mapreduce.InputSplit[] {});
}

From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java

License:Apache License

private void commitJob(String location) throws IOException {
    jobConf.set(OUTDIR, location);/*w ww . j  a  va2 s  . c  o m*/
    JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
    OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();
    // finalize HDFS output format
    outputCommitter.commitJob(jobContext);
}

From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java

License:Apache License

private HivePartitionWriter writerForLocation(String location) throws IOException {
    JobConf clonedConf = new JobConf(jobConf);
    clonedConf.set(OUTDIR, location);//  ww  w  .jav a 2 s . co  m
    OutputFormat outputFormat;
    try {
        StorageDescriptor sd = hiveTablePartition.getStorageDescriptor();
        Class outputFormatClz = Class.forName(sd.getOutputFormat(), true,
                Thread.currentThread().getContextClassLoader());
        outputFormatClz = HiveFileFormatUtils.getOutputFormatSubstitute(outputFormatClz);
        outputFormat = (OutputFormat) outputFormatClz.newInstance();
    } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) {
        throw new FlinkRuntimeException("Unable to instantiate the hadoop output format", e);
    }
    ReflectionUtils.setConf(outputFormat, clonedConf);
    OutputCommitter outputCommitter = clonedConf.getOutputCommitter();
    JobContext jobContext = new JobContextImpl(clonedConf, new JobID());
    outputCommitter.setupJob(jobContext);
    final boolean isCompressed = clonedConf.getBoolean(HiveConf.ConfVars.COMPRESSRESULT.varname, false);
    if (isCompressed) {
        String codecStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC.varname);
        if (!StringUtils.isNullOrWhitespaceOnly(codecStr)) {
            try {
                Class<? extends CompressionCodec> codec = (Class<? extends CompressionCodec>) Class
                        .forName(codecStr, true, Thread.currentThread().getContextClassLoader());
                FileOutputFormat.setOutputCompressorClass(clonedConf, codec);
            } catch (ClassNotFoundException e) {
                throw new RuntimeException(e);
            }
        }
        String typeStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE.varname);
        if (!StringUtils.isNullOrWhitespaceOnly(typeStr)) {
            SequenceFile.CompressionType style = SequenceFile.CompressionType.valueOf(typeStr);
            SequenceFileOutputFormat.setOutputCompressionType(clonedConf, style);
        }
    }
    String taskPartition = String.valueOf(clonedConf.getInt("mapreduce.task.partition", -1));
    Path taskPath = FileOutputFormat.getTaskOutputPath(clonedConf, taskPartition);
    FileSinkOperator.RecordWriter recordWriter;
    try {
        recordWriter = HiveFileFormatUtils.getRecordWriter(clonedConf, outputFormat, outputClass, isCompressed,
                tblProperties, taskPath, Reporter.NULL);
    } catch (HiveException e) {
        throw new IOException(e);
    }
    return new HivePartitionWriter(clonedConf, outputFormat, recordWriter, outputCommitter);
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2Job.java

License:Apache License

/**
 * Constructor./*from   w  w  w  . jav a2 s .  com*/
 *
 * @param jobId Job ID.
 * @param jobInfo Job info.
 * @param log Logger.
 * @param libNames Optional additional native library names.
 * @param helper Hadoop helper.
 */
public HadoopV2Job(HadoopJobId jobId, final HadoopDefaultJobInfo jobInfo, IgniteLogger log,
        @Nullable String[] libNames, HadoopHelper helper) {
    assert jobId != null;
    assert jobInfo != null;

    this.jobId = jobId;
    this.jobInfo = jobInfo;
    this.libNames = libNames;
    this.helper = helper;
    this.log = log;

    ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(getClass().getClassLoader());

    try {
        hadoopJobID = new JobID(jobId.globalId().toString(), jobId.localId());

        jobConf = new JobConf();

        HadoopFileSystemsUtils.setupFileSystems(jobConf);

        for (Map.Entry<String, String> e : jobInfo.properties().entrySet())
            jobConf.set(e.getKey(), e.getValue());

        jobCtx = new JobContextImpl(jobConf, hadoopJobID);

        rsrcMgr = new HadoopV2JobResourceManager(jobId, jobCtx, log, this);
    } finally {
        HadoopCommonUtils.restoreContextClassLoader(oldLdr);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext.java

License:Apache License

/**
 * @param taskInfo Task info.// www.ja v  a2  s  . co  m
 * @param job Job.
 * @param jobId Job ID.
 * @param locNodeId Local node ID.
 * @param jobConfDataInput DataInput for read JobConf.
 */
public HadoopV2TaskContext(HadoopTaskInfo taskInfo, HadoopJobEx job, HadoopJobId jobId,
        @Nullable UUID locNodeId, DataInput jobConfDataInput) throws IgniteCheckedException {
    super(taskInfo, job);
    this.locNodeId = locNodeId;

    // Before create JobConf instance we should set new context class loader.
    ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(getClass().getClassLoader());

    try {
        JobConf jobConf = new JobConf();

        try {
            jobConf.readFields(jobConfDataInput);
        } catch (IOException e) {
            throw new IgniteCheckedException(e);
        }

        // For map-reduce jobs prefer local writes.
        jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true);

        initializePartiallyRawComparator(jobConf);

        jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId()));

        useNewMapper = jobConf.getUseNewMapper();
        useNewReducer = jobConf.getUseNewReducer();
        useNewCombiner = jobConf.getCombinerClass() == null;
    } finally {
        HadoopCommonUtils.restoreContextClassLoader(oldLdr);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2Job.java

License:Apache License

/**
 * @param jobId Job ID.//from  w  w  w . j  a v a  2  s. c om
 * @param jobInfo Job info.
 * @param log Logger.
 */
public GridHadoopV2Job(GridHadoopJobId jobId, final GridHadoopDefaultJobInfo jobInfo, IgniteLogger log) {
    assert jobId != null;
    assert jobInfo != null;

    this.jobId = jobId;
    this.jobInfo = jobInfo;

    hadoopJobID = new JobID(jobId.globalId().toString(), jobId.localId());

    GridHadoopClassLoader clsLdr = (GridHadoopClassLoader) getClass().getClassLoader();

    // Before create JobConf instance we should set new context class loader.
    Thread.currentThread().setContextClassLoader(clsLdr);

    jobConf = new JobConf();

    GridHadoopFileSystemsUtils.setupFileSystems(jobConf);

    Thread.currentThread().setContextClassLoader(null);

    for (Map.Entry<String, String> e : jobInfo.properties().entrySet())
        jobConf.set(e.getKey(), e.getValue());

    jobCtx = new JobContextImpl(jobConf, hadoopJobID);

    rsrcMgr = new GridHadoopV2JobResourceManager(jobId, jobCtx, log);
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2TaskContext.java

License:Apache License

/**
 * @param taskInfo Task info./*from   w  w w .ja v  a  2 s  .  c o m*/
 * @param job Job.
 * @param jobId Job ID.
 * @param locNodeId Local node ID.
 * @param jobConfDataInput DataInput for read JobConf.
 */
public GridHadoopV2TaskContext(GridHadoopTaskInfo taskInfo, GridHadoopJob job, GridHadoopJobId jobId,
        @Nullable UUID locNodeId, DataInput jobConfDataInput) throws IgniteCheckedException {
    super(taskInfo, job);
    this.locNodeId = locNodeId;

    // Before create JobConf instance we should set new context class loader.
    Thread.currentThread().setContextClassLoader(getClass().getClassLoader());

    try {
        JobConf jobConf = new JobConf();

        try {
            jobConf.readFields(jobConfDataInput);
        } catch (IOException e) {
            throw new IgniteCheckedException(e);
        }

        // For map-reduce jobs prefer local writes.
        jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true);

        jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId()));

        useNewMapper = jobConf.getUseNewMapper();
        useNewReducer = jobConf.getUseNewReducer();
        useNewCombiner = jobConf.getCombinerClass() == null;
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2Job.java

License:Apache License

/**
 * @param jobId Job ID.//  w ww . j  a  v a  2  s  .  c o m
 * @param jobInfo Job info.
 * @param log Logger.
 */
public HadoopV2Job(HadoopJobId jobId, final HadoopDefaultJobInfo jobInfo, IgniteLogger log) {
    assert jobId != null;
    assert jobInfo != null;

    this.jobId = jobId;
    this.jobInfo = jobInfo;

    hadoopJobID = new JobID(jobId.globalId().toString(), jobId.localId());

    HadoopClassLoader clsLdr = (HadoopClassLoader) getClass().getClassLoader();

    // Before create JobConf instance we should set new context class loader.
    Thread.currentThread().setContextClassLoader(clsLdr);

    jobConf = new JobConf();

    HadoopFileSystemsUtils.setupFileSystems(jobConf);

    Thread.currentThread().setContextClassLoader(null);

    for (Map.Entry<String, String> e : jobInfo.properties().entrySet())
        jobConf.set(e.getKey(), e.getValue());

    jobCtx = new JobContextImpl(jobConf, hadoopJobID);

    rsrcMgr = new HadoopV2JobResourceManager(jobId, jobCtx, log);
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2TaskContext.java

License:Apache License

/**
 * @param taskInfo Task info./*w w  w.  ja  va  2 s .  c o  m*/
 * @param job Job.
 * @param jobId Job ID.
 * @param locNodeId Local node ID.
 * @param jobConfDataInput DataInput for read JobConf.
 */
public HadoopV2TaskContext(HadoopTaskInfo taskInfo, HadoopJob job, HadoopJobId jobId, @Nullable UUID locNodeId,
        DataInput jobConfDataInput) throws IgniteCheckedException {
    super(taskInfo, job);
    this.locNodeId = locNodeId;

    // Before create JobConf instance we should set new context class loader.
    Thread.currentThread().setContextClassLoader(getClass().getClassLoader());

    try {
        JobConf jobConf = new JobConf();

        try {
            jobConf.readFields(jobConfDataInput);
        } catch (IOException e) {
            throw new IgniteCheckedException(e);
        }

        // For map-reduce jobs prefer local writes.
        jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true);

        jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId()));

        useNewMapper = jobConf.getUseNewMapper();
        useNewReducer = jobConf.getUseNewReducer();
        useNewCombiner = jobConf.getCombinerClass() == null;
    } finally {
        Thread.currentThread().setContextClassLoader(null);
    }
}