List of usage examples for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl
public TaskAttemptContextImpl(Configuration conf, TaskAttemptID taskId)
From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java
License:Apache License
@Test public void testStreamRecordReader() throws Exception { File inputDir = tmpFolder.newFolder(); File partition = new File(inputDir, "1.1000"); partition.mkdirs();/* w w w . java2s . c om*/ File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix()); File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix()); // write 1 event StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile), Files.newOutputStreamSupplier(indexFile), 100L); writer.append(StreamFileTestUtils.createEvent(1000, "test")); writer.flush(); // get splits from the input format. Expect to get 2 splits, // one from 0 - some offset and one from offset - Long.MAX_VALUE. Configuration conf = new Configuration(); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); StreamInputFormat.setStreamPath(conf, inputDir.toURI()); StreamInputFormat format = new StreamInputFormat(); List<InputSplit> splits = format.getSplits(new JobContextImpl(new JobConf(conf), new JobID())); Assert.assertEquals(2, splits.size()); // write another event so that the 2nd split has something to read writer.append(StreamFileTestUtils.createEvent(1001, "test")); writer.close(); // create a record reader for the 2nd split StreamRecordReader<LongWritable, StreamEvent> recordReader = new StreamRecordReader<>( new IdentityStreamEventDecoder()); recordReader.initialize(splits.get(1), context); // check that we read the 2nd stream event Assert.assertTrue(recordReader.nextKeyValue()); StreamEvent output = recordReader.getCurrentValue(); Assert.assertEquals(1001, output.getTimestamp()); Assert.assertEquals("test", Bytes.toString(output.getBody())); // check that there is nothing more to read Assert.assertFalse(recordReader.nextKeyValue()); }
From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java
License:Apache License
@Test public void testFormatStreamRecordReader() throws IOException, InterruptedException { File inputDir = tmpFolder.newFolder(); File partition = new File(inputDir, "1.1000"); partition.mkdirs();//from ww w .ja va2 s . c o m File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix()); File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix()); // write 1 event StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile), Files.newOutputStreamSupplier(indexFile), 100L); StreamEvent streamEvent = new StreamEvent(ImmutableMap.of("header1", "value1", "header2", "value2"), Charsets.UTF_8.encode("hello world"), 1000); writer.append(streamEvent); writer.close(); FormatSpecification formatSpec = new FormatSpecification(TextRecordFormat.class.getName(), Schema.recordOf("event", Schema.Field.of("body", Schema.of(Schema.Type.STRING))), Collections.<String, String>emptyMap()); Configuration conf = new Configuration(); StreamInputFormat.setBodyFormatSpecification(conf, formatSpec); StreamInputFormat.setStreamPath(conf, inputDir.toURI()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); StreamInputFormat format = new StreamInputFormat(); // read all splits and store the results in the list List<GenericStreamEventData<StructuredRecord>> recordsRead = Lists.newArrayList(); List<InputSplit> inputSplits = format.getSplits(context); for (InputSplit split : inputSplits) { RecordReader<LongWritable, GenericStreamEventData<StructuredRecord>> recordReader = format .createRecordReader(split, context); recordReader.initialize(split, context); while (recordReader.nextKeyValue()) { recordsRead.add(recordReader.getCurrentValue()); } } // should only have read 1 record Assert.assertEquals(1, recordsRead.size()); GenericStreamEventData<StructuredRecord> eventData = recordsRead.get(0); Assert.assertEquals(streamEvent.getHeaders(), eventData.getHeaders()); Assert.assertEquals("hello world", eventData.getBody().get("body")); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitionerWriterWrapper.java
License:Apache License
private TaskAttemptContext getTaskAttemptContext(TaskAttemptContext context, String newOutputName) throws IOException { Job job = new Job(context.getConfiguration()); DynamicPartitioningOutputFormat.setOutputName(job, newOutputName); // CDAP-4806 We must set this parameter in addition to calling FileOutputFormat#setOutputName, because // AvroKeyOutputFormat/AvroKeyValueOutputFormat use a different parameter for the output name than FileOutputFormat. if (isAvroOutputFormat(getFileOutputFormat(context))) { job.getConfiguration().set("avro.mo.config.namedOutput", newOutputName); }/*from w w w. j a v a2 s . c o m*/ Path jobOutputPath = DynamicPartitioningOutputFormat .createJobSpecificPath(FileOutputFormat.getOutputPath(job), context); DynamicPartitioningOutputFormat.setOutputPath(job, jobOutputPath); return new TaskAttemptContextImpl(job.getConfiguration(), context.getTaskAttemptID()); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputFormat.java
License:Apache License
private TaskAttemptContext getTaskAttemptContext(TaskAttemptContext context, String newOutputName) throws IOException { Job job = new Job(context.getConfiguration()); FileOutputFormat.setOutputName(job, newOutputName); // CDAP-4806 We must set this parameter in addition to calling FileOutputFormat#setOutputName, because // AvroKeyOutputFormat/AvroKeyValueOutputFormat use a different parameter for the output name than FileOutputFormat. if (isAvroOutputFormat(getFileOutputFormat(context))) { job.getConfiguration().set("avro.mo.config.namedOutput", newOutputName); }/* w w w.j a va 2s .c o m*/ Path jobOutputPath = createJobSpecificPath(FileOutputFormat.getOutputPath(job), context); FileOutputFormat.setOutputPath(job, jobOutputPath); return new TaskAttemptContextImpl(job.getConfiguration(), context.getTaskAttemptID()); }
From source file:com.asakusafw.lang.compiler.mapreduce.testing.InputFormatTester.java
License:Apache License
/** * Collects input contents./* w ww . j a va 2s . co m*/ * @param <T> the data type * @param collector the target collector * @throws IOException if failed * @throws InterruptedException if interrupted */ @SuppressWarnings("unchecked") public <T> void collect(Consumer<T> collector) throws IOException, InterruptedException { TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); List<InputSplit> splits = format.getSplits(context); for (InputSplit split : splits) { InputSplit restored = restore(split); try (RecordReader<?, ?> reader = format.createRecordReader(restored, context)) { reader.initialize(restored, context); while (reader.nextKeyValue()) { collector.accept((T) reader.getCurrentValue()); } } } }
From source file:com.asakusafw.runtime.compatibility.hadoop2.JobCompatibilityHadoop2.java
License:Apache License
@Override public TaskAttemptContext newTaskAttemptContext(Configuration conf, TaskAttemptID id, final Progressable progressable) { if (conf == null) { throw new IllegalArgumentException("conf must not be null"); //$NON-NLS-1$ }//w w w. j a va 2 s . co m if (id == null) { throw new IllegalArgumentException("id must not be null"); //$NON-NLS-1$ } if (progressable == null) { return new TaskAttemptContextImpl(conf, id); } return new TaskAttemptContextImpl(conf, id) { @Override public void progress() { progressable.progress(); super.progress(); } }; }
From source file:com.asakusafw.testdriver.file.FileDeployer.java
License:Apache License
/** * Opens output for the specified {@link OutputFormat}. * @param <V> value type/*from w w w .ja v a2 s . com*/ * @param definition target model definition * @param destination output location * @param output format * @return the opened {@link ModelOutput} * @throws IOException if failed to open the target output * @throws IllegalArgumentException if some parameters were {@code null} */ public <V> ModelOutput<V> openOutput(DataModelDefinition<V> definition, final String destination, FileOutputFormat<? super NullWritable, ? super V> output) throws IOException { assert destination != null; assert output != null; LOG.debug("Opening {} using {}", destination, output.getClass().getName()); Job job = Job.getInstance(configuration); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(definition.getModelClass()); final File temporaryDir = File.createTempFile("asakusa", ".tempdir"); if (temporaryDir.delete() == false || temporaryDir.mkdirs() == false) { throw new IOException("Failed to create temporary directory"); } LOG.debug("Using staging deploy target: {}", temporaryDir); URI uri = temporaryDir.toURI(); FileOutputFormat.setOutputPath(job, new Path(uri)); TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)); FileOutputFormatDriver<V> result = new FileOutputFormatDriver<V>(context, output, NullWritable.get()) { @Override public void close() throws IOException { super.close(); deploy(destination, temporaryDir); } }; return result; }
From source file:com.asakusafw.testdriver.file.FileExporterRetriever.java
License:Apache License
@Override public <V> DataModelSource createSource(DataModelDefinition<V> definition, FileExporterDescription description, TestContext context) throws IOException { LOG.info("??????: {}", description); VariableTable variables = createVariables(context); checkType(definition, description);/*from ww w .j a v a2 s .c o m*/ Configuration conf = configurations.newInstance(); Job job = Job.getInstance(conf); String resolved = variables.parse(description.getPathPrefix(), false); FileInputFormat.setInputPaths(job, new Path(resolved)); TaskAttemptContext taskContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)); FileInputFormat<?, V> format = getOpposite(conf, description.getOutputFormat()); FileInputFormatDriver<V> result = new FileInputFormatDriver<>(definition, taskContext, format); return result; }
From source file:com.baynote.kafka.hadoop.MultipleKafkaInputFormat.java
License:Apache License
/** * {@inheritDoc}/*from ww w.j ava2 s . c om*/ */ @Override public RecordReader<LongWritable, BytesWritable> createRecordReader(final InputSplit split, final TaskAttemptContext context) throws IOException, InterruptedException { final TaggedInputSplit taggedInputSplit = (TaggedInputSplit) split; final TaskAttemptContext taskAttemptContextClone = new TaskAttemptContextImpl(taggedInputSplit.getConf(), context.getTaskAttemptID()); taskAttemptContextClone.setStatus(context.getStatus()); return new DelegatingRecordReader<LongWritable, BytesWritable>(split, taskAttemptContextClone); }
From source file:com.cloudera.integration.oracle.goldengate.ldv.mapreduce.lib.input.LengthDelimitedInputFormatTest.java
@Test public void test() throws IOException, InterruptedException { Configuration conf = new Configuration(false); conf.set("fs.default.name", "file:///"); conf.setInt(Constants.RECORD_PREFIX_LENGTH, 4); conf.setInt(Constants.FIELD_PREFIX_LENGTH, 4); Path path = new Path(tempFile.getAbsoluteFile().toURI()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); LengthDelimitedInputFormat inputFormat = ReflectionUtils.newInstance(LengthDelimitedInputFormat.class, conf);/* ww w. j a v a 2 s .c om*/ try (LengthDelimitedRecordReader reader = (LengthDelimitedRecordReader) inputFormat.createRecordReader(null, context)) { FileSplit split = new FileSplit(path, 0, tempFile.length(), null); reader.initialize(split, context); while (reader.nextKeyValue()) { LengthDelimitedWritable writable = reader.getCurrentValue(); Assert.assertNotNull(writable); Timestamp timestamp = new Timestamp(writable.getTimestamp().get()); Assert.assertEquals("2014-12-31 23:06:06.255", timestamp.toString()); FieldValueWritable[] writables = writable.getWritables(); for (int i = 0; i < chars.length(); i++) { String value = chars.substring(0, i); FieldValueWritable fieldValueWritable = writables[i]; Assert.assertEquals(value, fieldValueWritable.getData()); } // System.out.println(reader.getCurrentValue()); } } }