List of usage examples for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID
@Deprecated public TaskAttemptID(String jtIdentifier, int jobId, boolean isMap, int taskId, int id)
From source file:org.kiji.avro.mapreduce.TestAvroKeyOutputFormat.java
License:Apache License
/** * Tests that the record writer is contructed and returned correclty from the output format. *//*from w w w .j ava2 s .c o m*/ private void testGetRecordWriter(Configuration conf, CodecFactory expectedCodec) throws IOException { // Configure a mock task attempt context. Job job = new Job(conf); job.getConfiguration().set("mapred.output.dir", mTempDir.getRoot().getPath()); Schema writerSchema = Schema.create(Schema.Type.INT); AvroJob.setOutputKeySchema(job, writerSchema); TaskAttemptContext context = createMock(TaskAttemptContext.class); expect(context.getConfiguration()).andReturn(job.getConfiguration()).anyTimes(); expect(context.getTaskAttemptID()).andReturn(new TaskAttemptID("id", 1, true, 1, 1)).anyTimes(); // Create a mock record writer. @SuppressWarnings("unchecked") RecordWriter<AvroKey<Integer>, NullWritable> expectedRecordWriter = createMock(RecordWriter.class); AvroKeyOutputFormat.RecordWriterFactory recordWriterFactory = createMock( AvroKeyOutputFormat.RecordWriterFactory.class); // Expect the record writer factory to be called with appropriate parameters. Capture<CodecFactory> capturedCodecFactory = new Capture<CodecFactory>(); expect(recordWriterFactory.create(eq(writerSchema), capture(capturedCodecFactory), // Capture for comparison later. anyObject(OutputStream.class))).andReturn(expectedRecordWriter); replay(context); replay(expectedRecordWriter); replay(recordWriterFactory); AvroKeyOutputFormat<Integer> outputFormat = new AvroKeyOutputFormat<Integer>(recordWriterFactory); RecordWriter<AvroKey<Integer>, NullWritable> recordWriter = outputFormat.getRecordWriter(context); // Make sure the expected codec was used. assertTrue(capturedCodecFactory.hasCaptured()); assertEquals(expectedCodec.toString(), capturedCodecFactory.getValue().toString()); verify(context); verify(expectedRecordWriter); verify(recordWriterFactory); assertNotNull(recordWriter); assertTrue(expectedRecordWriter == recordWriter); }
From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java
License:Apache License
@Test public void testMaxHFileSizeSameRow() throws Exception { final HFileKeyValue entry1 = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); final HFileKeyValue entry2 = entry("row-key", mDefaultLGId, "b", 1L, makeBytes(0, 1024)); mConf.setInt(KijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1); final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2); final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); writer.write(entry1, NW);//w w w .j a v a 2 s. c om writer.write(entry2, NW); writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertTrue(!fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue(), entry2.getKeyValue()); assertFalse(fs.exists(new Path(defaultDir, "00001"))); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java
License:Apache License
@Test public void testMaxHFileSizeNewRow() throws Exception { final HFileKeyValue entry1 = entry("row-key1", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); final HFileKeyValue entry2 = entry("row-key2", mDefaultLGId, "b", 1L, makeBytes(0, 1024)); mConf.setInt(KijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1); final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2); final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); writer.write(entry1, NW);/*from www .j ava2 s.co m*/ writer.write(entry2, NW); writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertFalse(fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue()); assertHFileContent(new Path(defaultDir, "00001"), entry2.getKeyValue()); assertFalse(fs.exists(new Path(defaultDir, "00002"))); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java
License:Apache License
@Test public void testMultipleLayouts() throws Exception { final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2); final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId); final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION); final FileSystem fs = outputDir.getFileSystem(mConf); final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context); final HFileKeyValue defaultEntry = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024)); writer.write(defaultEntry, NW);/*from w w w.j av a 2 s. c om*/ final HFileKeyValue inMemoryEntry = entry("row-key", mInMemoryLGId, "a", 1L, makeBytes(2, 1024)); writer.write(inMemoryEntry, NW); try { // Test with an invalid locality group ID: final ColumnId invalid = new ColumnId(1234); assertTrue(!mLayout.getLocalityGroupIdNameMap().containsKey(invalid)); writer.write(entry("row-key", invalid, "a", 1L, HConstants.EMPTY_BYTE_ARRAY), NW); fail("Output format did not fail on unknown locality group IDs."); } catch (IllegalArgumentException iae) { LOG.info("Expected error: " + iae); } writer.close(context); final Path defaultDir = new Path(outputDir, mDefaultLGId.toString()); assertTrue(fs.exists(defaultDir)); final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString()); assertTrue(fs.exists(inMemoryDir)); assertHFileContent(new Path(defaultDir, "00000"), defaultEntry.getKeyValue()); assertHFileContent(new Path(inMemoryDir, "00000"), inMemoryEntry.getKeyValue()); mFormat.getOutputCommitter(context).commitTask(context); }
From source file:org.kiji.mapreduce.platform.CDH4MR1KijiMRBridge.java
License:Apache License
/** {@inheritDoc} */ @Override/*from www .jav a 2s . c o m*/ public TaskAttemptID newTaskAttemptID(String jtIdentifier, int jobId, TaskType type, int taskId, int id) { // In CDH4, use all these args directly. return new TaskAttemptID(jtIdentifier, jobId, type, taskId, id); }
From source file:org.kiji.mapreduce.platform.Hadoop1xKijiMRBridge.java
License:Apache License
/** {@inheritDoc} */ @Override//from w ww. j a va 2 s . c om public TaskAttemptID newTaskAttemptID(String jtIdentifier, int jobId, TaskType type, int taskId, int id) { // In Hadoop 1.0, TaskType isn't an arg to TaskAttemptID; instead, there's just a // boolean indicating whether it's a map task or not. boolean isMap = type == TaskType.MAP; return new TaskAttemptID(jtIdentifier, jobId, isMap, taskId, id); }
From source file:org.msgpack.hadoop.mapreduce.input.TestMessagePackInputFormat.java
License:Apache License
void checkFormat(Job job) throws Exception { TaskAttemptContext attemptContext = new TaskAttemptContext(job.getConfiguration(), new TaskAttemptID("123", 0, false, 1, 2)); MessagePackInputFormat format = new MessagePackInputFormat(); FileInputFormat.setInputPaths(job, workDir); List<InputSplit> splits = format.getSplits(job); assertEquals(1, splits.size());/*ww w . j a v a 2 s . c o m*/ for (int j = 0; j < splits.size(); j++) { RecordReader<LongWritable, MessagePackWritable> reader = format.createRecordReader(splits.get(j), attemptContext); reader.initialize(splits.get(j), attemptContext); int count = 0; try { while (reader.nextKeyValue()) { LongWritable key = reader.getCurrentKey(); Value val = reader.getCurrentValue().get(); assertEquals(count, val.asIntegerValue().getLong()); count++; } } finally { reader.close(); } } }
From source file:org.pentaho.hadoop.shim.common.format.parquet.PentahoParquetOutputFormat.java
License:Apache License
@Override public IPentahoRecordWriter createRecordWriter() throws Exception { if (outputFile == null) { throw new RuntimeException("Output file is not defined"); }/*from w w w.j a va 2 s .c om*/ if ((outputFields == null) || (outputFields.size() == 0)) { throw new RuntimeException("Schema is not defined"); } return inClassloader(() -> { FixedParquetOutputFormat nativeParquetOutputFormat = new FixedParquetOutputFormat( new PentahoParquetWriteSupport(outputFields)); TaskAttemptID taskAttemptID = new TaskAttemptID("qq", 111, TaskType.MAP, 11, 11); TaskAttemptContextImpl task = new TaskAttemptContextImpl(job.getConfiguration(), taskAttemptID); try { ParquetRecordWriter<RowMetaAndData> recordWriter = (ParquetRecordWriter<RowMetaAndData>) nativeParquetOutputFormat .getRecordWriter(task); return new PentahoParquetRecordWriter(recordWriter, task); } catch (IOException e) { throw new RuntimeException("Some error accessing parquet files", e); } catch (InterruptedException e) { // logging here e.printStackTrace(); throw new RuntimeException("This should never happen " + e); } }); }
From source file:org.pentaho.hadoop.shim.common.format.parquet.PentahoParquetRecordWriterTest.java
License:Apache License
@Before public void setUp() throws Exception { ConfigurationProxy conf = new ConfigurationProxy(); conf.set("fs.defaultFS", "file:///"); Job job = Job.getInstance(conf);/* w w w . jav a 2 s . co m*/ tempFile = Files.createTempDirectory("parquet"); org.apache.hadoop.fs.Path outputFile = new org.apache.hadoop.fs.Path(tempFile + PARQUET_FILE_NAME); ParquetOutputFormat.setOutputPath(job, outputFile.getParent()); TaskAttemptID taskAttemptID = new TaskAttemptID("qq", 111, TaskType.MAP, 11, 11); task = new TaskAttemptContextImpl(job.getConfiguration(), taskAttemptID); }
From source file:org.pentaho.hadoop.shim.common.format.PentahoParquetOutputFormat.java
License:Apache License
@Override public IPentahoRecordWriter createRecordWriter() throws Exception { if (outputFile == null) { throw new RuntimeException("Output file is not defined"); }/*from w w w .j a v a 2 s . c o m*/ if (schema == null) { throw new RuntimeException("Schema is not defined"); } return inClassloader(() -> { FixedParquetOutputFormat nativeParquetOutputFormat = new FixedParquetOutputFormat( new PentahoParquetWriteSupport(schema)); TaskAttemptID taskAttemptID = new TaskAttemptID("qq", 111, TaskType.MAP, 11, 11); TaskAttemptContextImpl task = new TaskAttemptContextImpl(job.getConfiguration(), taskAttemptID); try { ParquetRecordWriter<RowMetaAndData> recordWriter = (ParquetRecordWriter<RowMetaAndData>) nativeParquetOutputFormat .getRecordWriter(task); return new PentahoParquetRecordWriter(recordWriter, task); } catch (IOException e) { throw new RuntimeException("Some error accessing parquet files", e); } catch (InterruptedException e) { // logging here e.printStackTrace(); throw new RuntimeException("This should never happen " + e); } }); }