List of usage examples for org.apache.hadoop.mapred JobID JobID
public JobID()
From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java
License:Apache License
@Test public void testStreamRecordReader() throws Exception { File inputDir = tmpFolder.newFolder(); File partition = new File(inputDir, "1.1000"); partition.mkdirs();/*from w w w. j a v a 2 s . com*/ File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix()); File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix()); // write 1 event StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile), Files.newOutputStreamSupplier(indexFile), 100L); writer.append(StreamFileTestUtils.createEvent(1000, "test")); writer.flush(); // get splits from the input format. Expect to get 2 splits, // one from 0 - some offset and one from offset - Long.MAX_VALUE. Configuration conf = new Configuration(); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); StreamInputFormat.setStreamPath(conf, inputDir.toURI()); StreamInputFormat format = new StreamInputFormat(); List<InputSplit> splits = format.getSplits(new JobContextImpl(new JobConf(conf), new JobID())); Assert.assertEquals(2, splits.size()); // write another event so that the 2nd split has something to read writer.append(StreamFileTestUtils.createEvent(1001, "test")); writer.close(); // create a record reader for the 2nd split StreamRecordReader<LongWritable, StreamEvent> recordReader = new StreamRecordReader<>( new IdentityStreamEventDecoder()); recordReader.initialize(splits.get(1), context); // check that we read the 2nd stream event Assert.assertTrue(recordReader.nextKeyValue()); StreamEvent output = recordReader.getCurrentValue(); Assert.assertEquals(1001, output.getTimestamp()); Assert.assertEquals("test", Bytes.toString(output.getBody())); // check that there is nothing more to read Assert.assertFalse(recordReader.nextKeyValue()); }
From source file:eu.stratosphere.hadoopcompatibility.mapred.HadoopOutputFormat.java
License:Apache License
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws IOException/*from w w w .j a va 2s . c om*/ */ @Override public void open(int taskNumber, int numTasks) throws IOException { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); try { this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID); } catch (Exception e) { throw new RuntimeException(e); } this.jobConf.set("mapred.task.id", taskAttemptID.toString()); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.fileOutputCommitter = new FileOutputCommitter(); try { this.jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID()); } catch (Exception e) { throw new RuntimeException(e); } this.fileOutputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); }
From source file:org.apache.flink.api.java.hadoop.mapred.HadoopOutputFormatBase.java
License:Apache License
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws java.io.IOException/*ww w.j av a 2 s . com*/ */ @Override public void open(int taskNumber, int numTasks) throws IOException { // enforce sequential open() calls synchronized (OPEN_MUTEX) { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String .format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber + 1); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1); try { this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID); } catch (Exception e) { throw new RuntimeException(e); } this.outputCommitter = this.jobConf.getOutputCommitter(); JobContext jobContext; try { jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID()); } catch (Exception e) { throw new RuntimeException(e); } this.outputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); } }
From source file:org.apache.flink.api.java.hadoop.mapred.HadoopOutputFormatBase.java
License:Apache License
@Override public void finalizeGlobal(int parallelism) throws IOException { try {// w ww. jav a 2 s .c o m JobContext jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID()); OutputCommitter outputCommitter = this.jobConf.getOutputCommitter(); // finalize HDFS output format outputCommitter.commitJob(jobContext); } catch (Exception e) { throw new RuntimeException(e); } }
From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java
License:Apache License
private void commitJob(String location) throws IOException { jobConf.set(OUTDIR, location);// ww w.j a va2 s . c o m JobContext jobContext = new JobContextImpl(this.jobConf, new JobID()); OutputCommitter outputCommitter = this.jobConf.getOutputCommitter(); // finalize HDFS output format outputCommitter.commitJob(jobContext); }
From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java
License:Apache License
private HivePartitionWriter writerForLocation(String location) throws IOException { JobConf clonedConf = new JobConf(jobConf); clonedConf.set(OUTDIR, location);//from w w w . j av a 2s. c o m OutputFormat outputFormat; try { StorageDescriptor sd = hiveTablePartition.getStorageDescriptor(); Class outputFormatClz = Class.forName(sd.getOutputFormat(), true, Thread.currentThread().getContextClassLoader()); outputFormatClz = HiveFileFormatUtils.getOutputFormatSubstitute(outputFormatClz); outputFormat = (OutputFormat) outputFormatClz.newInstance(); } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) { throw new FlinkRuntimeException("Unable to instantiate the hadoop output format", e); } ReflectionUtils.setConf(outputFormat, clonedConf); OutputCommitter outputCommitter = clonedConf.getOutputCommitter(); JobContext jobContext = new JobContextImpl(clonedConf, new JobID()); outputCommitter.setupJob(jobContext); final boolean isCompressed = clonedConf.getBoolean(HiveConf.ConfVars.COMPRESSRESULT.varname, false); if (isCompressed) { String codecStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC.varname); if (!StringUtils.isNullOrWhitespaceOnly(codecStr)) { try { Class<? extends CompressionCodec> codec = (Class<? extends CompressionCodec>) Class .forName(codecStr, true, Thread.currentThread().getContextClassLoader()); FileOutputFormat.setOutputCompressorClass(clonedConf, codec); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } } String typeStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE.varname); if (!StringUtils.isNullOrWhitespaceOnly(typeStr)) { SequenceFile.CompressionType style = SequenceFile.CompressionType.valueOf(typeStr); SequenceFileOutputFormat.setOutputCompressionType(clonedConf, style); } } String taskPartition = String.valueOf(clonedConf.getInt("mapreduce.task.partition", -1)); Path taskPath = FileOutputFormat.getTaskOutputPath(clonedConf, taskPartition); FileSinkOperator.RecordWriter recordWriter; try { recordWriter = HiveFileFormatUtils.getRecordWriter(clonedConf, outputFormat, outputClass, isCompressed, tblProperties, taskPath, Reporter.NULL); } catch (HiveException e) { throw new IOException(e); } return new HivePartitionWriter(clonedConf, outputFormat, recordWriter, outputCommitter); }
From source file:org.apache.flink.hadoopcompatibility.mapred.HadoopOutputFormat.java
License:Apache License
/** * create the temporary output file for hadoop RecordWriter. * @param taskNumber The number of the parallel instance. * @param numTasks The number of parallel tasks. * @throws IOException/*from w w w .j a va 2 s . c o m*/ */ @Override public void open(int taskNumber, int numTasks) throws IOException { if (Integer.toString(taskNumber + 1).length() > 6) { throw new IOException("Task id too large."); } TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0") + Integer.toString(taskNumber + 1) + "_0"); this.jobConf.set("mapred.task.id", taskAttemptID.toString()); this.jobConf.setInt("mapred.task.partition", taskNumber + 1); // for hadoop 2.2 this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString()); this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1); try { this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID); } catch (Exception e) { throw new RuntimeException(e); } this.fileOutputCommitter = new FileOutputCommitter(); try { this.jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID()); } catch (Exception e) { throw new RuntimeException(e); } this.fileOutputCommitter.setupJob(jobContext); this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable()); }