Example usage for org.apache.hadoop.mapred JobID JobID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobID JobID.

Prototype

public JobID()

Source Link

Usage

From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java

License:Apache License

@Test
public void testStreamRecordReader() throws Exception {
    File inputDir = tmpFolder.newFolder();
    File partition = new File(inputDir, "1.1000");
    partition.mkdirs();/*from w w  w.  j  a v  a  2 s  . com*/
    File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
    File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());

    // write 1 event
    StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile),
            Files.newOutputStreamSupplier(indexFile), 100L);
    writer.append(StreamFileTestUtils.createEvent(1000, "test"));
    writer.flush();

    // get splits from the input format. Expect to get 2 splits,
    // one from 0 - some offset and one from offset - Long.MAX_VALUE.
    Configuration conf = new Configuration();
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    StreamInputFormat.setStreamPath(conf, inputDir.toURI());
    StreamInputFormat format = new StreamInputFormat();
    List<InputSplit> splits = format.getSplits(new JobContextImpl(new JobConf(conf), new JobID()));
    Assert.assertEquals(2, splits.size());

    // write another event so that the 2nd split has something to read
    writer.append(StreamFileTestUtils.createEvent(1001, "test"));
    writer.close();

    // create a record reader for the 2nd split
    StreamRecordReader<LongWritable, StreamEvent> recordReader = new StreamRecordReader<>(
            new IdentityStreamEventDecoder());
    recordReader.initialize(splits.get(1), context);

    // check that we read the 2nd stream event
    Assert.assertTrue(recordReader.nextKeyValue());
    StreamEvent output = recordReader.getCurrentValue();
    Assert.assertEquals(1001, output.getTimestamp());
    Assert.assertEquals("test", Bytes.toString(output.getBody()));
    // check that there is nothing more to read
    Assert.assertFalse(recordReader.nextKeyValue());
}

From source file:eu.stratosphere.hadoopcompatibility.mapred.HadoopOutputFormat.java

License:Apache License

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws IOException/*from w w w  .j  a va  2s . c om*/
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {
    if (Integer.toString(taskNumber + 1).length() > 6) {
        throw new IOException("Task id too large.");
    }

    TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
            + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
            + Integer.toString(taskNumber + 1) + "_0");

    try {
        this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    this.jobConf.set("mapred.task.id", taskAttemptID.toString());
    // for hadoop 2.2
    this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());

    this.fileOutputCommitter = new FileOutputCommitter();

    try {
        this.jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID());
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    this.fileOutputCommitter.setupJob(jobContext);

    this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf,
            Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
}

From source file:org.apache.flink.api.java.hadoop.mapred.HadoopOutputFormatBase.java

License:Apache License

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws java.io.IOException/*ww w.j  av a  2 s . com*/
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {

    // enforce sequential open() calls
    synchronized (OPEN_MUTEX) {
        if (Integer.toString(taskNumber + 1).length() > 6) {
            throw new IOException("Task id too large.");
        }

        TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_" + String
                .format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
                + Integer.toString(taskNumber + 1) + "_0");

        this.jobConf.set("mapred.task.id", taskAttemptID.toString());
        this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
        // for hadoop 2.2
        this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
        this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

        try {
            this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }

        this.outputCommitter = this.jobConf.getOutputCommitter();

        JobContext jobContext;
        try {
            jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID());
        } catch (Exception e) {
            throw new RuntimeException(e);
        }

        this.outputCommitter.setupJob(jobContext);

        this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf,
                Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
    }
}

From source file:org.apache.flink.api.java.hadoop.mapred.HadoopOutputFormatBase.java

License:Apache License

@Override
public void finalizeGlobal(int parallelism) throws IOException {

    try {//  w ww.  jav  a  2 s .c  o m
        JobContext jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID());
        OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();

        // finalize HDFS output format
        outputCommitter.commitJob(jobContext);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java

License:Apache License

private void commitJob(String location) throws IOException {
    jobConf.set(OUTDIR, location);//  ww  w.j  a  va2 s .  c  o m
    JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
    OutputCommitter outputCommitter = this.jobConf.getOutputCommitter();
    // finalize HDFS output format
    outputCommitter.commitJob(jobContext);
}

From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java

License:Apache License

private HivePartitionWriter writerForLocation(String location) throws IOException {
    JobConf clonedConf = new JobConf(jobConf);
    clonedConf.set(OUTDIR, location);//from  w  w  w . j  av  a 2s. c o  m
    OutputFormat outputFormat;
    try {
        StorageDescriptor sd = hiveTablePartition.getStorageDescriptor();
        Class outputFormatClz = Class.forName(sd.getOutputFormat(), true,
                Thread.currentThread().getContextClassLoader());
        outputFormatClz = HiveFileFormatUtils.getOutputFormatSubstitute(outputFormatClz);
        outputFormat = (OutputFormat) outputFormatClz.newInstance();
    } catch (InstantiationException | IllegalAccessException | ClassNotFoundException e) {
        throw new FlinkRuntimeException("Unable to instantiate the hadoop output format", e);
    }
    ReflectionUtils.setConf(outputFormat, clonedConf);
    OutputCommitter outputCommitter = clonedConf.getOutputCommitter();
    JobContext jobContext = new JobContextImpl(clonedConf, new JobID());
    outputCommitter.setupJob(jobContext);
    final boolean isCompressed = clonedConf.getBoolean(HiveConf.ConfVars.COMPRESSRESULT.varname, false);
    if (isCompressed) {
        String codecStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATECODEC.varname);
        if (!StringUtils.isNullOrWhitespaceOnly(codecStr)) {
            try {
                Class<? extends CompressionCodec> codec = (Class<? extends CompressionCodec>) Class
                        .forName(codecStr, true, Thread.currentThread().getContextClassLoader());
                FileOutputFormat.setOutputCompressorClass(clonedConf, codec);
            } catch (ClassNotFoundException e) {
                throw new RuntimeException(e);
            }
        }
        String typeStr = clonedConf.get(HiveConf.ConfVars.COMPRESSINTERMEDIATETYPE.varname);
        if (!StringUtils.isNullOrWhitespaceOnly(typeStr)) {
            SequenceFile.CompressionType style = SequenceFile.CompressionType.valueOf(typeStr);
            SequenceFileOutputFormat.setOutputCompressionType(clonedConf, style);
        }
    }
    String taskPartition = String.valueOf(clonedConf.getInt("mapreduce.task.partition", -1));
    Path taskPath = FileOutputFormat.getTaskOutputPath(clonedConf, taskPartition);
    FileSinkOperator.RecordWriter recordWriter;
    try {
        recordWriter = HiveFileFormatUtils.getRecordWriter(clonedConf, outputFormat, outputClass, isCompressed,
                tblProperties, taskPath, Reporter.NULL);
    } catch (HiveException e) {
        throw new IOException(e);
    }
    return new HivePartitionWriter(clonedConf, outputFormat, recordWriter, outputCommitter);
}

From source file:org.apache.flink.hadoopcompatibility.mapred.HadoopOutputFormat.java

License:Apache License

/**
 * create the temporary output file for hadoop RecordWriter.
 * @param taskNumber The number of the parallel instance.
 * @param numTasks The number of parallel tasks.
 * @throws IOException/*from  w w w  .j a  va 2 s  .  c  o  m*/
 */
@Override
public void open(int taskNumber, int numTasks) throws IOException {
    if (Integer.toString(taskNumber + 1).length() > 6) {
        throw new IOException("Task id too large.");
    }

    TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
            + String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
            + Integer.toString(taskNumber + 1) + "_0");

    this.jobConf.set("mapred.task.id", taskAttemptID.toString());
    this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
    // for hadoop 2.2
    this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
    this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);

    try {
        this.context = HadoopUtils.instantiateTaskAttemptContext(this.jobConf, taskAttemptID);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    this.fileOutputCommitter = new FileOutputCommitter();

    try {
        this.jobContext = HadoopUtils.instantiateJobContext(this.jobConf, new JobID());
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    this.fileOutputCommitter.setupJob(jobContext);

    this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf,
            Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
}