Example usage for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptID TaskAttemptID.

Prototype

@Deprecated
public TaskAttemptID(String jtIdentifier, int jobId, boolean isMap, int taskId, int id)

Source Link

Document

Constructs a TaskId object from given parts.

Usage

From source file:org.kiji.avro.mapreduce.TestAvroKeyOutputFormat.java

License:Apache License

/**
 * Tests that the record writer is contructed and returned correclty from the output format.
 *//*from  w w w .j ava2 s .c o  m*/
private void testGetRecordWriter(Configuration conf, CodecFactory expectedCodec) throws IOException {
    // Configure a mock task attempt context.
    Job job = new Job(conf);
    job.getConfiguration().set("mapred.output.dir", mTempDir.getRoot().getPath());
    Schema writerSchema = Schema.create(Schema.Type.INT);
    AvroJob.setOutputKeySchema(job, writerSchema);
    TaskAttemptContext context = createMock(TaskAttemptContext.class);
    expect(context.getConfiguration()).andReturn(job.getConfiguration()).anyTimes();
    expect(context.getTaskAttemptID()).andReturn(new TaskAttemptID("id", 1, true, 1, 1)).anyTimes();

    // Create a mock record writer.
    @SuppressWarnings("unchecked")
    RecordWriter<AvroKey<Integer>, NullWritable> expectedRecordWriter = createMock(RecordWriter.class);
    AvroKeyOutputFormat.RecordWriterFactory recordWriterFactory = createMock(
            AvroKeyOutputFormat.RecordWriterFactory.class);

    // Expect the record writer factory to be called with appropriate parameters.
    Capture<CodecFactory> capturedCodecFactory = new Capture<CodecFactory>();
    expect(recordWriterFactory.create(eq(writerSchema), capture(capturedCodecFactory), // Capture for comparison later.
            anyObject(OutputStream.class))).andReturn(expectedRecordWriter);

    replay(context);
    replay(expectedRecordWriter);
    replay(recordWriterFactory);

    AvroKeyOutputFormat<Integer> outputFormat = new AvroKeyOutputFormat<Integer>(recordWriterFactory);
    RecordWriter<AvroKey<Integer>, NullWritable> recordWriter = outputFormat.getRecordWriter(context);
    // Make sure the expected codec was used.
    assertTrue(capturedCodecFactory.hasCaptured());
    assertEquals(expectedCodec.toString(), capturedCodecFactory.getValue().toString());

    verify(context);
    verify(expectedRecordWriter);
    verify(recordWriterFactory);

    assertNotNull(recordWriter);
    assertTrue(expectedRecordWriter == recordWriter);
}

From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java

License:Apache License

@Test
public void testMaxHFileSizeSameRow() throws Exception {
    final HFileKeyValue entry1 = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    final HFileKeyValue entry2 = entry("row-key", mDefaultLGId, "b", 1L, makeBytes(0, 1024));

    mConf.setInt(KijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1);

    final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2);
    final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);
    writer.write(entry1, NW);//w w  w  .j a v  a  2 s. c om
    writer.write(entry2, NW);
    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertTrue(!fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue(), entry2.getKeyValue());
    assertFalse(fs.exists(new Path(defaultDir, "00001")));

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java

License:Apache License

@Test
public void testMaxHFileSizeNewRow() throws Exception {
    final HFileKeyValue entry1 = entry("row-key1", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    final HFileKeyValue entry2 = entry("row-key2", mDefaultLGId, "b", 1L, makeBytes(0, 1024));

    mConf.setInt(KijiHFileOutputFormat.CONF_HREGION_MAX_FILESIZE, entry1.getLength() + 1);

    final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2);
    final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);
    writer.write(entry1, NW);/*from  www  .j  ava2  s.co m*/
    writer.write(entry2, NW);
    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertFalse(fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), entry1.getKeyValue());
    assertHFileContent(new Path(defaultDir, "00001"), entry2.getKeyValue());
    assertFalse(fs.exists(new Path(defaultDir, "00002")));

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:org.kiji.mapreduce.output.TestKijiHFileOutputFormat.java

License:Apache License

@Test
public void testMultipleLayouts() throws Exception {
    final TaskAttemptID taskAttemptId = new TaskAttemptID("jobTracker:jtPort", 314, TaskType.MAP, 159, 2);
    final TaskAttemptContext context = new TaskAttemptContextImpl(mConf, taskAttemptId);
    final Path outputDir = mFormat.getDefaultWorkFile(context, KijiHFileOutputFormat.OUTPUT_EXTENSION);
    final FileSystem fs = outputDir.getFileSystem(mConf);

    final RecordWriter<HFileKeyValue, NullWritable> writer = mFormat.getRecordWriter(context);

    final HFileKeyValue defaultEntry = entry("row-key", mDefaultLGId, "a", 1L, makeBytes(0, 1024));
    writer.write(defaultEntry, NW);/*from w w w.j  av  a 2  s. c  om*/
    final HFileKeyValue inMemoryEntry = entry("row-key", mInMemoryLGId, "a", 1L, makeBytes(2, 1024));
    writer.write(inMemoryEntry, NW);

    try {
        // Test with an invalid locality group ID:
        final ColumnId invalid = new ColumnId(1234);
        assertTrue(!mLayout.getLocalityGroupIdNameMap().containsKey(invalid));
        writer.write(entry("row-key", invalid, "a", 1L, HConstants.EMPTY_BYTE_ARRAY), NW);
        fail("Output format did not fail on unknown locality group IDs.");
    } catch (IllegalArgumentException iae) {
        LOG.info("Expected error: " + iae);
    }

    writer.close(context);

    final Path defaultDir = new Path(outputDir, mDefaultLGId.toString());
    assertTrue(fs.exists(defaultDir));

    final Path inMemoryDir = new Path(outputDir, mInMemoryLGId.toString());
    assertTrue(fs.exists(inMemoryDir));

    assertHFileContent(new Path(defaultDir, "00000"), defaultEntry.getKeyValue());
    assertHFileContent(new Path(inMemoryDir, "00000"), inMemoryEntry.getKeyValue());

    mFormat.getOutputCommitter(context).commitTask(context);
}

From source file:org.kiji.mapreduce.platform.CDH4MR1KijiMRBridge.java

License:Apache License

/** {@inheritDoc} */
@Override/*from   www  .jav  a  2s .  c o m*/
public TaskAttemptID newTaskAttemptID(String jtIdentifier, int jobId, TaskType type, int taskId, int id) {
    // In CDH4, use all these args directly.
    return new TaskAttemptID(jtIdentifier, jobId, type, taskId, id);
}

From source file:org.kiji.mapreduce.platform.Hadoop1xKijiMRBridge.java

License:Apache License

/** {@inheritDoc} */
@Override//from  w  ww.  j a  va 2  s  . c om
public TaskAttemptID newTaskAttemptID(String jtIdentifier, int jobId, TaskType type, int taskId, int id) {
    // In Hadoop 1.0, TaskType isn't an arg to TaskAttemptID; instead, there's just a
    // boolean indicating whether it's a map task or not.
    boolean isMap = type == TaskType.MAP;
    return new TaskAttemptID(jtIdentifier, jobId, isMap, taskId, id);
}

From source file:org.msgpack.hadoop.mapreduce.input.TestMessagePackInputFormat.java

License:Apache License

void checkFormat(Job job) throws Exception {
    TaskAttemptContext attemptContext = new TaskAttemptContext(job.getConfiguration(),
            new TaskAttemptID("123", 0, false, 1, 2));

    MessagePackInputFormat format = new MessagePackInputFormat();
    FileInputFormat.setInputPaths(job, workDir);

    List<InputSplit> splits = format.getSplits(job);
    assertEquals(1, splits.size());/*ww w . j a v  a 2 s .  c  o m*/
    for (int j = 0; j < splits.size(); j++) {
        RecordReader<LongWritable, MessagePackWritable> reader = format.createRecordReader(splits.get(j),
                attemptContext);
        reader.initialize(splits.get(j), attemptContext);

        int count = 0;
        try {
            while (reader.nextKeyValue()) {
                LongWritable key = reader.getCurrentKey();
                Value val = reader.getCurrentValue().get();
                assertEquals(count, val.asIntegerValue().getLong());
                count++;
            }
        } finally {
            reader.close();
        }
    }
}

From source file:org.pentaho.hadoop.shim.common.format.parquet.PentahoParquetOutputFormat.java

License:Apache License

@Override
public IPentahoRecordWriter createRecordWriter() throws Exception {
    if (outputFile == null) {
        throw new RuntimeException("Output file is not defined");
    }/*from  w w w.j a  va  2 s .c  om*/
    if ((outputFields == null) || (outputFields.size() == 0)) {
        throw new RuntimeException("Schema is not defined");
    }

    return inClassloader(() -> {
        FixedParquetOutputFormat nativeParquetOutputFormat = new FixedParquetOutputFormat(
                new PentahoParquetWriteSupport(outputFields));

        TaskAttemptID taskAttemptID = new TaskAttemptID("qq", 111, TaskType.MAP, 11, 11);
        TaskAttemptContextImpl task = new TaskAttemptContextImpl(job.getConfiguration(), taskAttemptID);
        try {

            ParquetRecordWriter<RowMetaAndData> recordWriter = (ParquetRecordWriter<RowMetaAndData>) nativeParquetOutputFormat
                    .getRecordWriter(task);
            return new PentahoParquetRecordWriter(recordWriter, task);
        } catch (IOException e) {
            throw new RuntimeException("Some error accessing parquet files", e);
        } catch (InterruptedException e) {
            // logging here
            e.printStackTrace();
            throw new RuntimeException("This should never happen " + e);
        }
    });
}

From source file:org.pentaho.hadoop.shim.common.format.parquet.PentahoParquetRecordWriterTest.java

License:Apache License

@Before
public void setUp() throws Exception {

    ConfigurationProxy conf = new ConfigurationProxy();
    conf.set("fs.defaultFS", "file:///");
    Job job = Job.getInstance(conf);/*  w w  w  . jav  a  2 s . co  m*/

    tempFile = Files.createTempDirectory("parquet");

    org.apache.hadoop.fs.Path outputFile = new org.apache.hadoop.fs.Path(tempFile + PARQUET_FILE_NAME);

    ParquetOutputFormat.setOutputPath(job, outputFile.getParent());

    TaskAttemptID taskAttemptID = new TaskAttemptID("qq", 111, TaskType.MAP, 11, 11);

    task = new TaskAttemptContextImpl(job.getConfiguration(), taskAttemptID);
}

From source file:org.pentaho.hadoop.shim.common.format.PentahoParquetOutputFormat.java

License:Apache License

@Override
public IPentahoRecordWriter createRecordWriter() throws Exception {
    if (outputFile == null) {
        throw new RuntimeException("Output file is not defined");
    }/*from  w  w w  .j a  v  a  2 s  .  c  o m*/
    if (schema == null) {
        throw new RuntimeException("Schema is not defined");
    }

    return inClassloader(() -> {
        FixedParquetOutputFormat nativeParquetOutputFormat = new FixedParquetOutputFormat(
                new PentahoParquetWriteSupport(schema));

        TaskAttemptID taskAttemptID = new TaskAttemptID("qq", 111, TaskType.MAP, 11, 11);
        TaskAttemptContextImpl task = new TaskAttemptContextImpl(job.getConfiguration(), taskAttemptID);
        try {

            ParquetRecordWriter<RowMetaAndData> recordWriter = (ParquetRecordWriter<RowMetaAndData>) nativeParquetOutputFormat
                    .getRecordWriter(task);
            return new PentahoParquetRecordWriter(recordWriter, task);
        } catch (IOException e) {
            throw new RuntimeException("Some error accessing parquet files", e);
        } catch (InterruptedException e) {
            // logging here
            e.printStackTrace();
            throw new RuntimeException("This should never happen " + e);
        }
    });
}