Example usage for org.apache.hadoop.mapred TaskAttemptID TaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred TaskAttemptID TaskAttemptID.

Prototype

public TaskAttemptID()

Source Link

Usage

From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java

License:Apache License

@Test
public void testStreamRecordReader() throws Exception {
    File inputDir = tmpFolder.newFolder();
    File partition = new File(inputDir, "1.1000");
    partition.mkdirs();// ww  w.  j  av a2  s  . c  o m
    File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
    File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());

    // write 1 event
    StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile),
            Files.newOutputStreamSupplier(indexFile), 100L);
    writer.append(StreamFileTestUtils.createEvent(1000, "test"));
    writer.flush();

    // get splits from the input format. Expect to get 2 splits,
    // one from 0 - some offset and one from offset - Long.MAX_VALUE.
    Configuration conf = new Configuration();
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    StreamInputFormat.setStreamPath(conf, inputDir.toURI());
    StreamInputFormat format = new StreamInputFormat();
    List<InputSplit> splits = format.getSplits(new JobContextImpl(new JobConf(conf), new JobID()));
    Assert.assertEquals(2, splits.size());

    // write another event so that the 2nd split has something to read
    writer.append(StreamFileTestUtils.createEvent(1001, "test"));
    writer.close();

    // create a record reader for the 2nd split
    StreamRecordReader<LongWritable, StreamEvent> recordReader = new StreamRecordReader<>(
            new IdentityStreamEventDecoder());
    recordReader.initialize(splits.get(1), context);

    // check that we read the 2nd stream event
    Assert.assertTrue(recordReader.nextKeyValue());
    StreamEvent output = recordReader.getCurrentValue();
    Assert.assertEquals(1001, output.getTimestamp());
    Assert.assertEquals("test", Bytes.toString(output.getBody()));
    // check that there is nothing more to read
    Assert.assertFalse(recordReader.nextKeyValue());
}

From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java

License:Apache License

@Test
public void testFormatStreamRecordReader() throws IOException, InterruptedException {
    File inputDir = tmpFolder.newFolder();
    File partition = new File(inputDir, "1.1000");
    partition.mkdirs();/*from   w  w  w.  ja v  a  2  s . com*/
    File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
    File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());

    // write 1 event
    StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile),
            Files.newOutputStreamSupplier(indexFile), 100L);

    StreamEvent streamEvent = new StreamEvent(ImmutableMap.of("header1", "value1", "header2", "value2"),
            Charsets.UTF_8.encode("hello world"), 1000);
    writer.append(streamEvent);
    writer.close();

    FormatSpecification formatSpec = new FormatSpecification(TextRecordFormat.class.getName(),
            Schema.recordOf("event", Schema.Field.of("body", Schema.of(Schema.Type.STRING))),
            Collections.<String, String>emptyMap());
    Configuration conf = new Configuration();
    StreamInputFormat.setBodyFormatSpecification(conf, formatSpec);
    StreamInputFormat.setStreamPath(conf, inputDir.toURI());
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());

    StreamInputFormat format = new StreamInputFormat();

    // read all splits and store the results in the list
    List<GenericStreamEventData<StructuredRecord>> recordsRead = Lists.newArrayList();
    List<InputSplit> inputSplits = format.getSplits(context);
    for (InputSplit split : inputSplits) {
        RecordReader<LongWritable, GenericStreamEventData<StructuredRecord>> recordReader = format
                .createRecordReader(split, context);
        recordReader.initialize(split, context);
        while (recordReader.nextKeyValue()) {
            recordsRead.add(recordReader.getCurrentValue());
        }
    }

    // should only have read 1 record
    Assert.assertEquals(1, recordsRead.size());
    GenericStreamEventData<StructuredRecord> eventData = recordsRead.get(0);
    Assert.assertEquals(streamEvent.getHeaders(), eventData.getHeaders());
    Assert.assertEquals("hello world", eventData.getBody().get("body"));
}

From source file:com.cloudera.recordservice.mapreduce.MapReduceTest.java

License:Apache License

@Test
// TODO: make this generic. This should be extensible to test all the input
// formats we support. How do we do this?
public void testReadNation() throws IOException, InterruptedException {
    Configuration config = new Configuration();
    RecordServiceInputFormat.RecordServiceRecordReader reader = new RecordServiceInputFormat.RecordServiceRecordReader();

    try {/*w  w w. jav  a 2s  .  c o  m*/
        RecordServiceConfig.setInputTable(config, null, "tpch.nation");
        List<InputSplit> splits = PlanUtil.getSplits(config, new Credentials()).splits;
        reader.initialize(splits.get(0), new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID()));

        int numRows = 0;
        while (reader.nextKeyValue()) {
            RecordServiceRecord value = reader.getCurrentValue();
            ++numRows;

            if (numRows == 10) {
                assertEquals("INDONESIA", value.getColumnValue(1).toString());
            }
        }
        assertFalse(reader.nextKeyValue());
        assertFalse(reader.nextRecord());
        assertEquals(25, numRows);

        config.clear();
        RecordServiceConfig.setInputTable(config, "tpch", "nation", "n_comment");
        splits = PlanUtil.getSplits(config, new Credentials()).splits;
        reader.initialize(splits.get(0), new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID()));
        numRows = 0;
        while (reader.nextKeyValue()) {
            RecordServiceRecord value = reader.getCurrentValue();
            if (numRows == 12) {
                assertEquals("ously. final, express gifts cajole a", value.getColumnValue(0).toString());
            }
            ++numRows;
        }
        assertEquals(25, numRows);
    } finally {
        reader.close();
    }
}

From source file:com.cloudera.recordservice.mapreduce.MapReduceTest.java

License:Apache License

@Test
public void testReadAllTypes() throws IOException, InterruptedException {
    Configuration config = new Configuration();
    RecordServiceInputFormat.RecordServiceRecordReader reader = new RecordServiceInputFormat.RecordServiceRecordReader();

    SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
    format.setTimeZone(TimeZone.getTimeZone("GMT"));

    try {/*from w  ww  .j  a va 2 s .com*/
        RecordServiceConfig.setInputTable(config, null, "rs.alltypes");
        List<InputSplit> splits = PlanUtil.getSplits(config, new Credentials()).splits;

        int numRows = 0;
        for (InputSplit split : splits) {
            reader.initialize(split, new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID()));
            while (reader.nextKeyValue()) {
                RecordServiceRecord value = reader.getCurrentValue();
                if (((BooleanWritable) value.getColumnValue(0)).get()) {
                    assertEquals(0, ((ByteWritable) value.getColumnValue(1)).get());
                    assertEquals(1, ((ShortWritable) value.getColumnValue(2)).get());
                    assertEquals(2, ((IntWritable) value.getColumnValue(3)).get());
                    assertEquals(3, ((LongWritable) value.getColumnValue(4)).get());
                    assertEquals(4.0, ((FloatWritable) value.getColumnValue(5)).get(), 0.1);
                    assertEquals(5.0, ((DoubleWritable) value.getColumnValue(6)).get(), 0.1);
                    assertEquals("hello", value.getColumnValue(7).toString());
                    assertEquals("vchar1", value.getColumnValue(8).toString());
                    assertEquals("char1", value.getColumnValue(9).toString());
                    assertEquals("2015-01-01", format
                            .format(((TimestampNanosWritable) value.getColumnValue(10)).get().toTimeStamp()));
                    assertEquals(new BigDecimal("3.1415920000"),
                            ((DecimalWritable) value.getColumnValue(11)).get().toBigDecimal());
                } else {
                    assertEquals(6, ((ByteWritable) value.getColumnValue(1)).get());
                    assertEquals(7, ((ShortWritable) value.getColumnValue(2)).get());
                    assertEquals(8, ((IntWritable) value.getColumnValue(3)).get());
                    assertEquals(9, ((LongWritable) value.getColumnValue(4)).get());
                    assertEquals(10.0, ((FloatWritable) value.getColumnValue(5)).get(), 0.1);
                    assertEquals(11.0, ((DoubleWritable) value.getColumnValue(6)).get(), 0.1);
                    assertEquals("world", value.getColumnValue(7).toString());
                    assertEquals("vchar2", value.getColumnValue(8).toString());
                    assertEquals("char2", value.getColumnValue(9).toString());
                    assertEquals("2016-01-01", format
                            .format(((TimestampNanosWritable) value.getColumnValue(10)).get().toTimeStamp()));
                    assertEquals(new BigDecimal("1234.5678900000"),
                            ((DecimalWritable) value.getColumnValue(11)).get().toBigDecimal());
                }
                ++numRows;
            }
        }
        assertEquals(2, numRows);
    } finally {
        reader.close();
    }
}

From source file:com.cloudera.recordservice.mapreduce.MapReduceTest.java

License:Apache License

@Test
public void testReadAllTypesNull() throws IOException, InterruptedException {
    Configuration config = new Configuration();
    RecordServiceInputFormat.RecordServiceRecordReader reader = new RecordServiceInputFormat.RecordServiceRecordReader();

    try {//from  ww  w.j ava  2  s  . c o m
        RecordServiceConfig.setInputTable(config, null, "rs.alltypes_null");
        List<InputSplit> splits = PlanUtil.getSplits(config, new Credentials()).splits;

        int numRows = 0;
        for (InputSplit split : splits) {
            reader.initialize(split, new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID()));
            while (reader.nextKeyValue()) {
                RecordServiceRecord value = reader.getCurrentValue();
                for (int i = 0; i < value.getSchema().getNumColumns(); ++i) {
                    assertTrue(value.getColumnValue(i) == null);
                }
                ++numRows;
            }
        }
        assertEquals(1, numRows);
    } finally {
        reader.close();
    }
}

From source file:com.cloudera.recordservice.mapreduce.MapReduceTest.java

License:Apache License

@Test
public void testCountStar() throws IOException, InterruptedException {
    Configuration config = new Configuration();
    TextInputFormat.TextRecordReader reader = new TextInputFormat.TextRecordReader();

    try {/*ww w.  j  ava 2  s .  co m*/
        RecordServiceConfig.setInputQuery(config, "select count(*) from tpch.nation");
        List<InputSplit> splits = PlanUtil.getSplits(config, new Credentials()).splits;
        int numRows = 0;
        for (InputSplit split : splits) {
            reader.initialize(split, new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID()));
            while (reader.nextKeyValue()) {
                ++numRows;
            }
        }
        assertEquals(25, numRows);
    } finally {
        reader.close();
    }
}

From source file:com.facebook.hiveio.output.CheckOutputSpecsTest.java

License:Apache License

@Test(expectedExceptions = IOException.class)
public void testTableDoesntExist() throws Exception {
    Configuration conf = new Configuration();

    HiveOutputDescription outputDesc = new HiveOutputDescription();
    outputDesc.getTableDesc().setTableName("doesnt-exist");

    OutputConf outputConf = new OutputConf(conf, PROFILE_ID);
    outputConf.writeOutputDescription(outputDesc);

    HiveApiOutputFormat outputFormat = new HiveApiOutputFormat();
    outputFormat.setMyProfileId(PROFILE_ID);

    JobConf jobConf = new JobConf(conf);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID());
    JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID());
    outputFormat.checkOutputSpecs(jobContext);
    fail();//www  .  ja  va  2 s.c o m
}

From source file:com.facebook.hiveio.output.FaultyCheckOutputSpecsTest.java

License:Apache License

@Test(expectedExceptions = IOException.class)
public void testExceptionAfterTooManyRetriesWhenTableDoesNotExist() throws Exception {
    HiveMetastores.setTestClient(new FaultyThriftHiveMetastore(BackoffRetryTask.NUM_TRIES.getDefaultValue()));
    Configuration conf = new Configuration();
    conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100);
    HiveOutputDescription outputDesc = new HiveOutputDescription();
    outputDesc.getTableDesc().setTableName("doesnt-exist");
    OutputConf outputConf = new OutputConf(conf, PROFILE_ID);
    outputConf.writeOutputDescription(outputDesc);
    HiveApiOutputFormat outputFormat = new HiveApiOutputFormat();
    outputFormat.setMyProfileId(PROFILE_ID);
    JobConf jobConf = new JobConf(conf);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID());
    JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID());

    outputFormat.checkOutputSpecs(jobContext);

    fail();/*from  w ww .  j av a  2 s  .  c  o m*/
}

From source file:com.facebook.hiveio.output.FaultyCheckOutputSpecsTest.java

License:Apache License

@Test
public void testRecoveredFromFailuresAfterRetries() throws Exception {
    FaultyThriftHiveMetastore metastore = new FaultyThriftHiveMetastore(
            BackoffRetryTask.NUM_TRIES.getDefaultValue() - 1);

    Configuration conf = new Configuration();
    conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100);
    HiveOutputDescription outputDesc = new HiveOutputDescription();
    outputDesc.getTableDesc().setTableName("foo");
    OutputConf outputConf = new OutputConf(conf, PROFILE_ID);
    outputConf.writeOutputDescription(outputDesc);
    HiveApiOutputFormat outputFormat = new HiveApiOutputFormat();
    outputFormat.setMyProfileId(PROFILE_ID);
    JobConf jobConf = new JobConf(conf);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID());
    JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID());

    HiveMetastores.setTestClient(metastore);

    outputFormat.checkOutputSpecs(jobContext);

    assertEquals(BackoffRetryTask.NUM_TRIES.getDefaultValue(), metastore.getNumCalls());
}

From source file:org.apache.giraph.hive.Helpers.java

License:Apache License

public static void commitJob(GiraphConfiguration conf) throws IOException, InterruptedException {
    ImmutableClassesGiraphConfiguration iconf = new ImmutableClassesGiraphConfiguration(conf);
    WrappedVertexOutputFormat outputFormat = iconf.createWrappedVertexOutputFormat();
    JobConf jobConf = new JobConf(conf);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID());
    OutputCommitter outputCommitter = outputFormat.getOutputCommitter(taskContext);
    JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID());
    outputCommitter.commitJob(jobContext);
}