Example usage for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl

List of usage examples for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl.

Prototype

public TaskAttemptContextImpl(Configuration conf, TaskAttemptID taskId) 

Source Link

Usage

From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java

License:Apache License

@Test
public void testStreamRecordReader() throws Exception {
    File inputDir = tmpFolder.newFolder();
    File partition = new File(inputDir, "1.1000");
    partition.mkdirs();/* w w w .  java2s  .  c  om*/
    File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
    File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());

    // write 1 event
    StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile),
            Files.newOutputStreamSupplier(indexFile), 100L);
    writer.append(StreamFileTestUtils.createEvent(1000, "test"));
    writer.flush();

    // get splits from the input format. Expect to get 2 splits,
    // one from 0 - some offset and one from offset - Long.MAX_VALUE.
    Configuration conf = new Configuration();
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    StreamInputFormat.setStreamPath(conf, inputDir.toURI());
    StreamInputFormat format = new StreamInputFormat();
    List<InputSplit> splits = format.getSplits(new JobContextImpl(new JobConf(conf), new JobID()));
    Assert.assertEquals(2, splits.size());

    // write another event so that the 2nd split has something to read
    writer.append(StreamFileTestUtils.createEvent(1001, "test"));
    writer.close();

    // create a record reader for the 2nd split
    StreamRecordReader<LongWritable, StreamEvent> recordReader = new StreamRecordReader<>(
            new IdentityStreamEventDecoder());
    recordReader.initialize(splits.get(1), context);

    // check that we read the 2nd stream event
    Assert.assertTrue(recordReader.nextKeyValue());
    StreamEvent output = recordReader.getCurrentValue();
    Assert.assertEquals(1001, output.getTimestamp());
    Assert.assertEquals("test", Bytes.toString(output.getBody()));
    // check that there is nothing more to read
    Assert.assertFalse(recordReader.nextKeyValue());
}

From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java

License:Apache License

@Test
public void testFormatStreamRecordReader() throws IOException, InterruptedException {
    File inputDir = tmpFolder.newFolder();
    File partition = new File(inputDir, "1.1000");
    partition.mkdirs();//from   ww w  .ja va2 s  . c  o m
    File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
    File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());

    // write 1 event
    StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile),
            Files.newOutputStreamSupplier(indexFile), 100L);

    StreamEvent streamEvent = new StreamEvent(ImmutableMap.of("header1", "value1", "header2", "value2"),
            Charsets.UTF_8.encode("hello world"), 1000);
    writer.append(streamEvent);
    writer.close();

    FormatSpecification formatSpec = new FormatSpecification(TextRecordFormat.class.getName(),
            Schema.recordOf("event", Schema.Field.of("body", Schema.of(Schema.Type.STRING))),
            Collections.<String, String>emptyMap());
    Configuration conf = new Configuration();
    StreamInputFormat.setBodyFormatSpecification(conf, formatSpec);
    StreamInputFormat.setStreamPath(conf, inputDir.toURI());
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());

    StreamInputFormat format = new StreamInputFormat();

    // read all splits and store the results in the list
    List<GenericStreamEventData<StructuredRecord>> recordsRead = Lists.newArrayList();
    List<InputSplit> inputSplits = format.getSplits(context);
    for (InputSplit split : inputSplits) {
        RecordReader<LongWritable, GenericStreamEventData<StructuredRecord>> recordReader = format
                .createRecordReader(split, context);
        recordReader.initialize(split, context);
        while (recordReader.nextKeyValue()) {
            recordsRead.add(recordReader.getCurrentValue());
        }
    }

    // should only have read 1 record
    Assert.assertEquals(1, recordsRead.size());
    GenericStreamEventData<StructuredRecord> eventData = recordsRead.get(0);
    Assert.assertEquals(streamEvent.getHeaders(), eventData.getHeaders());
    Assert.assertEquals("hello world", eventData.getBody().get("body"));
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitionerWriterWrapper.java

License:Apache License

private TaskAttemptContext getTaskAttemptContext(TaskAttemptContext context, String newOutputName)
        throws IOException {
    Job job = new Job(context.getConfiguration());
    DynamicPartitioningOutputFormat.setOutputName(job, newOutputName);
    // CDAP-4806 We must set this parameter in addition to calling FileOutputFormat#setOutputName, because
    // AvroKeyOutputFormat/AvroKeyValueOutputFormat use a different parameter for the output name than FileOutputFormat.
    if (isAvroOutputFormat(getFileOutputFormat(context))) {
        job.getConfiguration().set("avro.mo.config.namedOutput", newOutputName);
    }/*from   w w  w.  j a  v  a2  s .  c  o  m*/

    Path jobOutputPath = DynamicPartitioningOutputFormat
            .createJobSpecificPath(FileOutputFormat.getOutputPath(job), context);
    DynamicPartitioningOutputFormat.setOutputPath(job, jobOutputPath);

    return new TaskAttemptContextImpl(job.getConfiguration(), context.getTaskAttemptID());
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputFormat.java

License:Apache License

private TaskAttemptContext getTaskAttemptContext(TaskAttemptContext context, String newOutputName)
        throws IOException {
    Job job = new Job(context.getConfiguration());
    FileOutputFormat.setOutputName(job, newOutputName);
    // CDAP-4806 We must set this parameter in addition to calling FileOutputFormat#setOutputName, because
    // AvroKeyOutputFormat/AvroKeyValueOutputFormat use a different parameter for the output name than FileOutputFormat.
    if (isAvroOutputFormat(getFileOutputFormat(context))) {
        job.getConfiguration().set("avro.mo.config.namedOutput", newOutputName);
    }/*  w w  w.j a  va  2s  .c o  m*/

    Path jobOutputPath = createJobSpecificPath(FileOutputFormat.getOutputPath(job), context);
    FileOutputFormat.setOutputPath(job, jobOutputPath);

    return new TaskAttemptContextImpl(job.getConfiguration(), context.getTaskAttemptID());
}

From source file:com.asakusafw.lang.compiler.mapreduce.testing.InputFormatTester.java

License:Apache License

/**
 * Collects input contents./* w ww  . j  a  va 2s .  co  m*/
 * @param <T> the data type
 * @param collector the target collector
 * @throws IOException if failed
 * @throws InterruptedException if interrupted
 */
@SuppressWarnings("unchecked")
public <T> void collect(Consumer<T> collector) throws IOException, InterruptedException {
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    List<InputSplit> splits = format.getSplits(context);
    for (InputSplit split : splits) {
        InputSplit restored = restore(split);
        try (RecordReader<?, ?> reader = format.createRecordReader(restored, context)) {
            reader.initialize(restored, context);
            while (reader.nextKeyValue()) {
                collector.accept((T) reader.getCurrentValue());
            }
        }
    }
}

From source file:com.asakusafw.runtime.compatibility.hadoop2.JobCompatibilityHadoop2.java

License:Apache License

@Override
public TaskAttemptContext newTaskAttemptContext(Configuration conf, TaskAttemptID id,
        final Progressable progressable) {
    if (conf == null) {
        throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$
    }//w w w. j a va 2  s  . co m
    if (id == null) {
        throw new IllegalArgumentException("id must not be null"); //$NON-NLS-1$
    }
    if (progressable == null) {
        return new TaskAttemptContextImpl(conf, id);
    }
    return new TaskAttemptContextImpl(conf, id) {
        @Override
        public void progress() {
            progressable.progress();
            super.progress();
        }
    };
}

From source file:com.asakusafw.testdriver.file.FileDeployer.java

License:Apache License

/**
 * Opens output for the specified {@link OutputFormat}.
 * @param <V> value type/*from w  w  w .ja  v a2 s  . com*/
 * @param definition target model definition
 * @param destination output location
 * @param output format
 * @return the opened {@link ModelOutput}
 * @throws IOException if failed to open the target output
 * @throws IllegalArgumentException if some parameters were {@code null}
 */
public <V> ModelOutput<V> openOutput(DataModelDefinition<V> definition, final String destination,
        FileOutputFormat<? super NullWritable, ? super V> output) throws IOException {
    assert destination != null;
    assert output != null;
    LOG.debug("Opening {} using {}", destination, output.getClass().getName());
    Job job = Job.getInstance(configuration);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(definition.getModelClass());
    final File temporaryDir = File.createTempFile("asakusa", ".tempdir");
    if (temporaryDir.delete() == false || temporaryDir.mkdirs() == false) {
        throw new IOException("Failed to create temporary directory");
    }
    LOG.debug("Using staging deploy target: {}", temporaryDir);
    URI uri = temporaryDir.toURI();
    FileOutputFormat.setOutputPath(job, new Path(uri));
    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
            new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0));
    FileOutputFormatDriver<V> result = new FileOutputFormatDriver<V>(context, output, NullWritable.get()) {
        @Override
        public void close() throws IOException {
            super.close();
            deploy(destination, temporaryDir);
        }
    };
    return result;
}

From source file:com.asakusafw.testdriver.file.FileExporterRetriever.java

License:Apache License

@Override
public <V> DataModelSource createSource(DataModelDefinition<V> definition, FileExporterDescription description,
        TestContext context) throws IOException {
    LOG.info("??????: {}", description);
    VariableTable variables = createVariables(context);
    checkType(definition, description);/*from ww w  .j a v a2 s .c o  m*/
    Configuration conf = configurations.newInstance();
    Job job = Job.getInstance(conf);
    String resolved = variables.parse(description.getPathPrefix(), false);
    FileInputFormat.setInputPaths(job, new Path(resolved));
    TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(),
            new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0));
    FileInputFormat<?, V> format = getOpposite(conf, description.getOutputFormat());
    FileInputFormatDriver<V> result = new FileInputFormatDriver<>(definition, taskContext, format);
    return result;
}

From source file:com.baynote.kafka.hadoop.MultipleKafkaInputFormat.java

License:Apache License

/**
 * {@inheritDoc}/*from  ww  w.j  ava2  s  . c  om*/
 */
@Override
public RecordReader<LongWritable, BytesWritable> createRecordReader(final InputSplit split,
        final TaskAttemptContext context) throws IOException, InterruptedException {
    final TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split;
    final TaskAttemptContext taskAttemptContextClone = new TaskAttemptContextImpl(taggedInputSplit.getConf(),
            context.getTaskAttemptID());
    taskAttemptContextClone.setStatus(context.getStatus());
    return new DelegatingRecordReader<LongWritable, BytesWritable>(split, taskAttemptContextClone);
}

From source file:com.cloudera.integration.oracle.goldengate.ldv.mapreduce.lib.input.LengthDelimitedInputFormatTest.java

@Test
public void test() throws IOException, InterruptedException {
    Configuration conf = new Configuration(false);
    conf.set("fs.default.name", "file:///");
    conf.setInt(Constants.RECORD_PREFIX_LENGTH, 4);
    conf.setInt(Constants.FIELD_PREFIX_LENGTH, 4);

    Path path = new Path(tempFile.getAbsoluteFile().toURI());

    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    LengthDelimitedInputFormat inputFormat = ReflectionUtils.newInstance(LengthDelimitedInputFormat.class,
            conf);/*  ww w.  j a  v a 2  s .c  om*/
    try (LengthDelimitedRecordReader reader = (LengthDelimitedRecordReader) inputFormat.createRecordReader(null,
            context)) {
        FileSplit split = new FileSplit(path, 0, tempFile.length(), null);
        reader.initialize(split, context);

        while (reader.nextKeyValue()) {
            LengthDelimitedWritable writable = reader.getCurrentValue();
            Assert.assertNotNull(writable);
            Timestamp timestamp = new Timestamp(writable.getTimestamp().get());

            Assert.assertEquals("2014-12-31 23:06:06.255", timestamp.toString());
            FieldValueWritable[] writables = writable.getWritables();
            for (int i = 0; i < chars.length(); i++) {
                String value = chars.substring(0, i);
                FieldValueWritable fieldValueWritable = writables[i];
                Assert.assertEquals(value, fieldValueWritable.getData());
            }

            //          System.out.println(reader.getCurrentValue());
        }
    }

}