List of usage examples for org.apache.hadoop.mapreduce.task TaskAttemptContextImpl TaskAttemptContextImpl
public TaskAttemptContextImpl(Configuration conf, TaskAttemptID taskId)
From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java
License:Apache License
@Test public void testMaxHFileSizeNewRow() throws Exception { final HFileKeyValue entry1 = entry("row-key1", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); final HFileKeyValue entry2 = entry("row-key2", mDefaultLGId, "b", 1L, makeBytes(0, 1024)); mConf.setInt(KijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1); final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2); final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); writer.write(entry1, NW);/* w w w.j a v a 2 s . c o m*/ writer.write(entry2, NW); writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertFalse(fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue()); assertHFileContent(new Path(defaultDir, "00001"), entry2.getKeyValue()); assertFalse(fs.exists(new Path(defaultDir, "00002"))); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java
License:Apache License
@Test public void testMultipleLayouts() throws Exception { final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2); final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); final HFileKeyValue defaultEntry = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); writer.write(defaultEntry, NW);/*from www . j a v a 2s . c o m*/ final HFileKeyValue inMemoryEntry = entry("row-key", mInMemoryLGId, "a", 1L, makeBytes(2, 1024)); writer.write(inMemoryEntry, NW); try { // Test with an invalid locality group ID: final ColumnId invalid = new ColumnId(1234); assertTrue(!mLayout.getLocalityGroupIdNameMap().containsKey(invalid)); writer.write(entry("row-key", invalid, "a", 1L, HConstants.EMPTY_BYTE_ARRAY), NW); fail("Output format did not fail on unknown locality group IDs."); } catch (IllegalArgumentException iae) { LOG.info("Expected error: " + iae); } writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertTrue(fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), defaultEntry.getKeyValue()); assertHFileContent(new Path(inMemoryDir, "00000"), inMemoryEntry.getKeyValue()); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:org.kiji.mapreduce.platform.CDH4MR1KijiMRBridge.java
License:Apache License
/** {@inheritDoc} */ @Override/*from ww w .j av a 2 s . c om*/ public TaskAttemptContext newTaskAttemptContext(Configuration conf, TaskAttemptID id) { // In CDH4, TaskAttemptContext and its implementation are separated. return new TaskAttemptContextImpl(conf, id); }
From source file:org.pentaho.hadoop.shim.common.format.parquet.PentahoParquetInputFormat.java
License:Apache License
@Override public IPentahoRecordReader createRecordReader(IPentahoInputSplit split) throws Exception { return inClassloader(() -> { PentahoInputSplitImpl pentahoInputSplit = (PentahoInputSplitImpl) split; InputSplit inputSplit = pentahoInputSplit.getInputSplit(); ReadSupport<RowMetaAndData> readSupport = new PentahoParquetReadSupport(); ParquetRecordReader<RowMetaAndData> nativeRecordReader = new ParquetRecordReader<RowMetaAndData>( readSupport, ParquetInputFormat.getFilter(job.getConfiguration())); TaskAttemptContextImpl task = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); nativeRecordReader.initialize(inputSplit, task); return new PentahoParquetRecordReader(nativeRecordReader); });// w w w . jav a 2 s . co m }
From source file:org.pentaho.hadoop.shim.common.format.parquet.PentahoParquetOutputFormat.java
License:Apache License
@Override public IPentahoRecordWriter createRecordWriter() throws Exception { if (outputFile == null) { throw new RuntimeException("Output file is not defined"); }/*from w ww . ja v a 2 s. c o m*/ if ((outputFields == null) || (outputFields.size() == 0)) { throw new RuntimeException("Schema is not defined"); } return inClassloader(() -> { FixedParquetOutputFormat nativeParquetOutputFormat = new FixedParquetOutputFormat( new PentahoParquetWriteSupport(outputFields)); TaskAttemptID taskAttemptID = new TaskAttemptID("qq", 111, TaskType.MAP, 11, 11); TaskAttemptContextImpl task = new TaskAttemptContextImpl(job.getConfiguration(), taskAttemptID); try { ParquetRecordWriter<RowMetaAndData> recordWriter = (ParquetRecordWriter<RowMetaAndData>) nativeParquetOutputFormat .getRecordWriter(task); return new PentahoParquetRecordWriter(recordWriter, task); } catch (IOException e) { throw new RuntimeException("Some error accessing parquet files", e); } catch (InterruptedException e) { // logging here e.printStackTrace(); throw new RuntimeException("This should never happen " + e); } }); }
From source file:org.pentaho.hadoop.shim.common.format.parquet.PentahoParquetRecordReaderTest.java
License:Apache License
private void initializeRecordReader() throws Exception { InputSplit inputSplit = nativeParquetInputFormat.getSplits(job).get(0); TaskAttemptContextImpl task = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); nativeRecordReader.initialize(inputSplit, task); }
From source file:org.pentaho.hadoop.shim.common.format.parquet.PentahoParquetRecordWriterTest.java
License:Apache License
@Before public void setUp() throws Exception { ConfigurationProxy conf = new ConfigurationProxy(); conf.set("fs.defaultFS", "file:///"); Job job = Job.getInstance(conf);/* w w w . jav a 2 s . com*/ tempFile = Files.createTempDirectory("parquet"); org.apache.hadoop.fs.Path outputFile = new org.apache.hadoop.fs.Path(tempFile + PARQUET_FILE_NAME); ParquetOutputFormat.setOutputPath(job, outputFile.getParent()); TaskAttemptID taskAttemptID = new TaskAttemptID("qq", 111, TaskType.MAP, 11, 11); task = new TaskAttemptContextImpl(job.getConfiguration(), taskAttemptID); }
From source file:org.pentaho.hadoop.shim.common.format.PentahoParquetOutputFormat.java
License:Apache License
@Override public IPentahoRecordWriter createRecordWriter() throws Exception { if (outputFile == null) { throw new RuntimeException("Output file is not defined"); }// www . j av a2s .c o m if (schema == null) { throw new RuntimeException("Schema is not defined"); } return inClassloader(() -> { FixedParquetOutputFormat nativeParquetOutputFormat = new FixedParquetOutputFormat( new PentahoParquetWriteSupport(schema)); TaskAttemptID taskAttemptID = new TaskAttemptID("qq", 111, TaskType.MAP, 11, 11); TaskAttemptContextImpl task = new TaskAttemptContextImpl(job.getConfiguration(), taskAttemptID); try { ParquetRecordWriter<RowMetaAndData> recordWriter = (ParquetRecordWriter<RowMetaAndData>) nativeParquetOutputFormat .getRecordWriter(task); return new PentahoParquetRecordWriter(recordWriter, task); } catch (IOException e) { throw new RuntimeException("Some error accessing parquet files", e); } catch (InterruptedException e) { // logging here e.printStackTrace(); throw new RuntimeException("This should never happen " + e); } }); }
From source file:org.seqdoop.hadoop_bam.TestVCFInputFormatStringency.java
License:Open Source License
public void checkReading(ValidationStringency validationStringency) throws Exception { String filename = "invalid_info_field.vcf"; Configuration conf = new Configuration(); String input_file = ClassLoader.getSystemClassLoader().getResource(filename).getFile(); conf.set("mapred.input.dir", "file://" + input_file); if (validationStringency != null) { VCFRecordReader.setValidationStringency(conf, validationStringency); }/*from w w w.j a va2 s.c o m*/ TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class)); JobContext ctx = new JobContextImpl(conf, taskAttemptContext.getJobID()); VCFInputFormat inputFormat = new VCFInputFormat(conf); List<InputSplit> splits = inputFormat.getSplits(ctx); assertEquals(1, splits.size()); RecordReader<LongWritable, VariantContextWritable> reader = inputFormat.createRecordReader(splits.get(0), taskAttemptContext); int counter = 0; while (reader.nextKeyValue()) { VariantContextWritable writable = reader.getCurrentValue(); assertNotNull(writable); VariantContext vc = writable.get(); assertNotNull(vc); String value = vc.toString(); assertNotNull(value); counter++; } assertEquals(4, counter); }
From source file:org.warcbase.io.GenericArchiveRecordWritableTest.java
License:Apache License
@Test public void testArcInputFormat() throws Exception { String arcFile = Resources.getResource("arc/example.arc.gz").getPath(); Configuration conf = new Configuration(false); conf.set("fs.defaultFS", "file:///"); File testFile = new File(arcFile); Path path = new Path(testFile.getAbsoluteFile().toURI()); FileSplit split = new FileSplit(path, 0, testFile.length(), null); InputFormat<LongWritable, GenericArchiveRecordWritable> inputFormat = ReflectionUtils .newInstance(WacGenericInputFormat.class, conf); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); RecordReader<LongWritable, GenericArchiveRecordWritable> reader = inputFormat.createRecordReader(split, context);//from w w w .j a v a2s . co m reader.initialize(split, context); int cnt = 0; while (reader.nextKeyValue()) { GenericArchiveRecordWritable record = reader.getCurrentValue(); cnt++; ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(); DataOutputStream dataOut = new DataOutputStream(bytesOut); record.write(dataOut); GenericArchiveRecordWritable reconstructed = new GenericArchiveRecordWritable(); reconstructed.setFormat(ArchiveFormat.ARC); reconstructed.readFields(new DataInputStream(new ByteArrayInputStream(bytesOut.toByteArray()))); boolean isArc = (record.getFormat() == ArchiveFormat.ARC); assertEquals(isArc, true); if (isArc) { assertEquals(((ARCRecord) record.getRecord()).getMetaData().getUrl(), ((ARCRecord) reconstructed.getRecord()).getMetaData().getUrl()); } } assertEquals(300, cnt); }