List of usage examples for org.apache.hadoop.mapred JobContextImpl JobContextImpl
public JobContextImpl(Configuration conf, JobID jobId)
From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java
License:Apache License
@Test public void testStreamRecordReader() throws Exception { File inputDir = tmpFolder.newFolder(); File partition = new File(inputDir, "1.1000"); partition.mkdirs();/* w w w . jav a2s. c o m*/ File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix()); File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix()); // write 1 event StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile), Files.newOutputStreamSupplier(indexFile), 100L); writer.append(StreamFileTestUtils.createEvent(1000, "test")); writer.flush(); // get splits from the input format. Expect to get 2 splits, // one from 0 - some offset and one from offset - Long.MAX_VALUE. Configuration conf = new Configuration(); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); StreamInputFormat.setStreamPath(conf, inputDir.toURI()); StreamInputFormat format = new StreamInputFormat(); List<InputSplit> splits = format.getSplits(new JobContextImpl(new JobConf(conf), new JobID())); Assert.assertEquals(2, splits.size()); // write another event so that the 2nd split has something to read writer.append(StreamFileTestUtils.createEvent(1001, "test")); writer.close(); // create a record reader for the 2nd split StreamRecordReader<LongWritable, StreamEvent> recordReader = new StreamRecordReader<>( new IdentityStreamEventDecoder()); recordReader.initialize(splits.get(1), context); // check that we read the 2nd stream event Assert.assertTrue(recordReader.nextKeyValue()); StreamEvent output = recordReader.getCurrentValue(); Assert.assertEquals(1001, output.getTimestamp()); Assert.assertEquals("test", Bytes.toString(output.getBody())); // check that there is nothing more to read Assert.assertFalse(recordReader.nextKeyValue()); }
From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java
License:Apache License
private org.apache.hadoop.mapreduce.InputSplit[] getNewInputSplits(JobConf conf) throws ClassNotFoundException, IOException, InterruptedException { org.apache.hadoop.mapreduce.InputSplit[] splits = null; JobContext context = new JobContextImpl(conf, null); org.apache.hadoop.mapreduce.InputFormat inputFormat = ReflectionUtils .newInstance(context.getInputFormatClass(), conf); List<org.apache.hadoop.mapreduce.InputSplit> inputSplits = inputFormat.getSplits(context); return inputSplits.toArray(new org.apache.hadoop.mapreduce.InputSplit[] {}); }
From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java
License:Apache License
private void commitJob(String location) throws IOException { jobConf.set(OUTDIR, location);/*w ww . j a va2 s . c o m*/ JobContext jobContext = new JobContextImpl(this.jobConf, new JobID()); OutputCommitter outputCommitter = this.jobConf.getOutputCommitter(); // finalize HDFS output format outputCommitter.commitJob(jobContext); }
From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java
License:Apache License
private HivePartitionWriter writerForLocation(String location) throws IOException { JobConf clonedConf = new JobConf(jobConf); clonedConf.set(OUTDIR, location);// ww w .jav a 2 s . co m OutputFormat outputFormat; try { StorageDescriptor sd = hiveTablePartition.getStorageDescriptor(); Class outputFormatClz = Class.forName(sd.getOutputFormat(), true, Thread.currentThread().getContextClassLoader()); outputFormatClz = HiveFileFormatUtils.getOutputFormatSubstitute(outputFormatClz); outputFormat = (OutputFormat) outputFormatClz.newInstance(); } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) { throw new FlinkRuntimeException("Unable to instantiate the hadoop output format", e); } ReflectionUtils.setConf(outputFormat, clonedConf); OutputCommitter outputCommitter = clonedConf.getOutputCommitter(); JobContext jobContext = new JobContextImpl(clonedConf, new JobID()); outputCommitter.setupJob(jobContext); final boolean isCompressed = clonedConf.getBoolean(HiveConf.ConfVars.COMPRESSRESULT.varname, false); if (isCompressed) { String codecStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC.varname); if (!StringUtils.isNullOrWhitespaceOnly(codecStr)) { try { Class<? extends CompressionCodec> codec = (Class<? extends CompressionCodec>) Class .forName(codecStr, true, Thread.currentThread().getContextClassLoader()); FileOutputFormat.setOutputCompressorClass(clonedConf, codec); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } } String typeStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE.varname); if (!StringUtils.isNullOrWhitespaceOnly(typeStr)) { SequenceFile.CompressionType style = SequenceFile.CompressionType.valueOf(typeStr); SequenceFileOutputFormat.setOutputCompressionType(clonedConf, style); } } String taskPartition = String.valueOf(clonedConf.getInt("mapreduce.task.partition", -1)); Path taskPath = FileOutputFormat.getTaskOutputPath(clonedConf, taskPartition); FileSinkOperator.RecordWriter recordWriter; try { recordWriter = HiveFileFormatUtils.getRecordWriter(clonedConf, outputFormat, outputClass, isCompressed, tblProperties, taskPath, Reporter.NULL); } catch (HiveException e) { throw new IOException(e); } return new HivePartitionWriter(clonedConf, outputFormat, recordWriter, outputCommitter); }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2Job.java
License:Apache License
/** * Constructor./*from w w w . jav a2 s . com*/ * * @param jobId Job ID. * @param jobInfo Job info. * @param log Logger. * @param libNames Optional additional native library names. * @param helper Hadoop helper. */ public HadoopV2Job(HadoopJobId jobId, final HadoopDefaultJobInfo jobInfo, IgniteLogger log, @Nullable String[] libNames, HadoopHelper helper) { assert jobId != null; assert jobInfo != null; this.jobId = jobId; this.jobInfo = jobInfo; this.libNames = libNames; this.helper = helper; this.log = log; ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(getClass().getClassLoader()); try { hadoopJobID = new JobID(jobId.globalId().toString(), jobId.localId()); jobConf = new JobConf(); HadoopFileSystemsUtils.setupFileSystems(jobConf); for (Map.Entry<String, String> e : jobInfo.properties().entrySet()) jobConf.set(e.getKey(), e.getValue()); jobCtx = new JobContextImpl(jobConf, hadoopJobID); rsrcMgr = new HadoopV2JobResourceManager(jobId, jobCtx, log, this); } finally { HadoopCommonUtils.restoreContextClassLoader(oldLdr); } }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext.java
License:Apache License
/** * @param taskInfo Task info.// www.ja v a2 s . co m * @param job Job. * @param jobId Job ID. * @param locNodeId Local node ID. * @param jobConfDataInput DataInput for read JobConf. */ public HadoopV2TaskContext(HadoopTaskInfo taskInfo, HadoopJobEx job, HadoopJobId jobId, @Nullable UUID locNodeId, DataInput jobConfDataInput) throws IgniteCheckedException { super(taskInfo, job); this.locNodeId = locNodeId; // Before create JobConf instance we should set new context class loader. ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(getClass().getClassLoader()); try { JobConf jobConf = new JobConf(); try { jobConf.readFields(jobConfDataInput); } catch (IOException e) { throw new IgniteCheckedException(e); } // For map-reduce jobs prefer local writes. jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true); initializePartiallyRawComparator(jobConf); jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId())); useNewMapper = jobConf.getUseNewMapper(); useNewReducer = jobConf.getUseNewReducer(); useNewCombiner = jobConf.getCombinerClass() == null; } finally { HadoopCommonUtils.restoreContextClassLoader(oldLdr); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2Job.java
License:Apache License
/** * @param jobId Job ID.//from w w w . j a v a 2 s. c om * @param jobInfo Job info. * @param log Logger. */ public GridHadoopV2Job(GridHadoopJobId jobId, final GridHadoopDefaultJobInfo jobInfo, IgniteLogger log) { assert jobId != null; assert jobInfo != null; this.jobId = jobId; this.jobInfo = jobInfo; hadoopJobID = new JobID(jobId.globalId().toString(), jobId.localId()); GridHadoopClassLoader clsLdr = (GridHadoopClassLoader) getClass().getClassLoader(); // Before create JobConf instance we should set new context class loader. Thread.currentThread().setContextClassLoader(clsLdr); jobConf = new JobConf(); GridHadoopFileSystemsUtils.setupFileSystems(jobConf); Thread.currentThread().setContextClassLoader(null); for (Map.Entry<String, String> e : jobInfo.properties().entrySet()) jobConf.set(e.getKey(), e.getValue()); jobCtx = new JobContextImpl(jobConf, hadoopJobID); rsrcMgr = new GridHadoopV2JobResourceManager(jobId, jobCtx, log); }
From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2TaskContext.java
License:Apache License
/** * @param taskInfo Task info./*from w w w .ja v a 2 s . c o m*/ * @param job Job. * @param jobId Job ID. * @param locNodeId Local node ID. * @param jobConfDataInput DataInput for read JobConf. */ public GridHadoopV2TaskContext(GridHadoopTaskInfo taskInfo, GridHadoopJob job, GridHadoopJobId jobId, @Nullable UUID locNodeId, DataInput jobConfDataInput) throws IgniteCheckedException { super(taskInfo, job); this.locNodeId = locNodeId; // Before create JobConf instance we should set new context class loader. Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); try { JobConf jobConf = new JobConf(); try { jobConf.readFields(jobConfDataInput); } catch (IOException e) { throw new IgniteCheckedException(e); } // For map-reduce jobs prefer local writes. jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true); jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId())); useNewMapper = jobConf.getUseNewMapper(); useNewReducer = jobConf.getUseNewReducer(); useNewCombiner = jobConf.getCombinerClass() == null; } finally { Thread.currentThread().setContextClassLoader(null); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2Job.java
License:Apache License
/** * @param jobId Job ID.// w ww . j a v a 2 s . c o m * @param jobInfo Job info. * @param log Logger. */ public HadoopV2Job(HadoopJobId jobId, final HadoopDefaultJobInfo jobInfo, IgniteLogger log) { assert jobId != null; assert jobInfo != null; this.jobId = jobId; this.jobInfo = jobInfo; hadoopJobID = new JobID(jobId.globalId().toString(), jobId.localId()); HadoopClassLoader clsLdr = (HadoopClassLoader) getClass().getClassLoader(); // Before create JobConf instance we should set new context class loader. Thread.currentThread().setContextClassLoader(clsLdr); jobConf = new JobConf(); HadoopFileSystemsUtils.setupFileSystems(jobConf); Thread.currentThread().setContextClassLoader(null); for (Map.Entry<String, String> e : jobInfo.properties().entrySet()) jobConf.set(e.getKey(), e.getValue()); jobCtx = new JobContextImpl(jobConf, hadoopJobID); rsrcMgr = new HadoopV2JobResourceManager(jobId, jobCtx, log); }
From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2TaskContext.java
License:Apache License
/** * @param taskInfo Task info./*w w w. ja va 2 s . c o m*/ * @param job Job. * @param jobId Job ID. * @param locNodeId Local node ID. * @param jobConfDataInput DataInput for read JobConf. */ public HadoopV2TaskContext(HadoopTaskInfo taskInfo, HadoopJob job, HadoopJobId jobId, @Nullable UUID locNodeId, DataInput jobConfDataInput) throws IgniteCheckedException { super(taskInfo, job); this.locNodeId = locNodeId; // Before create JobConf instance we should set new context class loader. Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); try { JobConf jobConf = new JobConf(); try { jobConf.readFields(jobConfDataInput); } catch (IOException e) { throw new IgniteCheckedException(e); } // For map-reduce jobs prefer local writes. jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true); jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId())); useNewMapper = jobConf.getUseNewMapper(); useNewReducer = jobConf.getUseNewReducer(); useNewCombiner = jobConf.getCombinerClass() == null; } finally { Thread.currentThread().setContextClassLoader(null); } }