Example usage for org.apache.hadoop.mapred TaskAttemptID TaskAttemptID

List of usage examples for org.apache.hadoop.mapred TaskAttemptID TaskAttemptID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred TaskAttemptID TaskAttemptID.

Prototype

public TaskAttemptID() 

Source Link

Usage

From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java

License:Apache License

@Test
public void testStreamRecordReader() throws Exception {
    File inputDir = tmpFolder.newFolder();
    File partition = new File(inputDir, "1.1000");
    partition.mkdirs();// ww  w.  j  av a2  s  . c  o m
    File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
    File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());

    // write 1 event
    StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile),
            Files.newOutputStreamSupplier(indexFile), 100L);
    writer.append(StreamFileTestUtils.createEvent(1000, "test"));
    writer.flush();

    // get splits from the input format. Expect to get 2 splits,
    // one from 0 - some offset and one from offset - Long.MAX_VALUE.
    Configuration conf = new Configuration();
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());
    StreamInputFormat.setStreamPath(conf, inputDir.toURI());
    StreamInputFormat format = new StreamInputFormat();
    List<InputSplit> splits = format.getSplits(new JobContextImpl(new JobConf(conf), new JobID()));
    Assert.assertEquals(2, splits.size());

    // write another event so that the 2nd split has something to read
    writer.append(StreamFileTestUtils.createEvent(1001, "test"));
    writer.close();

    // create a record reader for the 2nd split
    StreamRecordReader<LongWritable, StreamEvent> recordReader = new StreamRecordReader<>(
            new IdentityStreamEventDecoder());
    recordReader.initialize(splits.get(1), context);

    // check that we read the 2nd stream event
    Assert.assertTrue(recordReader.nextKeyValue());
    StreamEvent output = recordReader.getCurrentValue();
    Assert.assertEquals(1001, output.getTimestamp());
    Assert.assertEquals("test", Bytes.toString(output.getBody()));
    // check that there is nothing more to read
    Assert.assertFalse(recordReader.nextKeyValue());
}

From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java

License:Apache License

@Test
public void testFormatStreamRecordReader() throws IOException, InterruptedException {
    File inputDir = tmpFolder.newFolder();
    File partition = new File(inputDir, "1.1000");
    partition.mkdirs();/*from   w  w  w.  ja v  a  2  s . com*/
    File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
    File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());

    // write 1 event
    StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile),
            Files.newOutputStreamSupplier(indexFile), 100L);

    StreamEvent streamEvent = new StreamEvent(ImmutableMap.of("header1", "value1", "header2", "value2"),
            Charsets.UTF_8.encode("hello world"), 1000);
    writer.append(streamEvent);
    writer.close();

    FormatSpecification formatSpec = new FormatSpecification(TextRecordFormat.class.getName(),
            Schema.recordOf("event", Schema.Field.of("body", Schema.of(Schema.Type.STRING))),
            Collections.<String, String>emptyMap());
    Configuration conf = new Configuration();
    StreamInputFormat.setBodyFormatSpecification(conf, formatSpec);
    StreamInputFormat.setStreamPath(conf, inputDir.toURI());
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());

    StreamInputFormat format = new StreamInputFormat();

    // read all splits and store the results in the list
    List<GenericStreamEventData<StructuredRecord>> recordsRead = Lists.newArrayList();
    List<InputSplit> inputSplits = format.getSplits(context);
    for (InputSplit split : inputSplits) {
        RecordReader<LongWritable, GenericStreamEventData<StructuredRecord>> recordReader = format
                .createRecordReader(split, context);
        recordReader.initialize(split, context);
        while (recordReader.nextKeyValue()) {
            recordsRead.add(recordReader.getCurrentValue());
        }
    }

    // should only have read 1 record
    Assert.assertEquals(1, recordsRead.size());
    GenericStreamEventData<StructuredRecord> eventData = recordsRead.get(0);
    Assert.assertEquals(streamEvent.getHeaders(), eventData.getHeaders());
    Assert.assertEquals("hello world", eventData.getBody().get("body"));
}

From source file:com.cloudera.recordservice.mapreduce.MapReduceTest.java

License:Apache License

@Test
// TODO: make this generic. This should be extensible to test all the input
// formats we support. How do we do this?
public void testReadNation() throws IOException, InterruptedException {
    Configuration config = new Configuration();
    RecordServiceInputFormat.RecordServiceRecordReader reader = new RecordServiceInputFormat.RecordServiceRecordReader();

    try {/*w  w w. jav  a 2s  .  c o  m*/
        RecordServiceConfig.setInputTable(config, null, "tpch.nation");
        List<InputSplit> splits = PlanUtil.getSplits(config, new Credentials()).splits;
        reader.initialize(splits.get(0), new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID()));

        int numRows = 0;
        while (reader.nextKeyValue()) {
            RecordServiceRecord value = reader.getCurrentValue();
            ++numRows;

            if (numRows == 10) {
                assertEquals("INDONESIA", value.getColumnValue(1).toString());
            }
        }
        assertFalse(reader.nextKeyValue());
        assertFalse(reader.nextRecord());
        assertEquals(25, numRows);

        config.clear();
        RecordServiceConfig.setInputTable(config, "tpch", "nation", "n_comment");
        splits = PlanUtil.getSplits(config, new Credentials()).splits;
        reader.initialize(splits.get(0), new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID()));
        numRows = 0;
        while (reader.nextKeyValue()) {
            RecordServiceRecord value = reader.getCurrentValue();
            if (numRows == 12) {
                assertEquals("ously. final, express gifts cajole a", value.getColumnValue(0).toString());
            }
            ++numRows;
        }
        assertEquals(25, numRows);
    } finally {
        reader.close();
    }
}

From source file:com.cloudera.recordservice.mapreduce.MapReduceTest.java

License:Apache License

@Test
public void testReadAllTypes() throws IOException, InterruptedException {
    Configuration config = new Configuration();
    RecordServiceInputFormat.RecordServiceRecordReader reader = new RecordServiceInputFormat.RecordServiceRecordReader();

    SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
    format.setTimeZone(TimeZone.getTimeZone("GMT"));

    try {/*from w  ww  .j  a va 2 s .com*/
        RecordServiceConfig.setInputTable(config, null, "rs.alltypes");
        List<InputSplit> splits = PlanUtil.getSplits(config, new Credentials()).splits;

        int numRows = 0;
        for (InputSplit split : splits) {
            reader.initialize(split, new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID()));
            while (reader.nextKeyValue()) {
                RecordServiceRecord value = reader.getCurrentValue();
                if (((BooleanWritable) value.getColumnValue(0)).get()) {
                    assertEquals(0, ((ByteWritable) value.getColumnValue(1)).get());
                    assertEquals(1, ((ShortWritable) value.getColumnValue(2)).get());
                    assertEquals(2, ((IntWritable) value.getColumnValue(3)).get());
                    assertEquals(3, ((LongWritable) value.getColumnValue(4)).get());
                    assertEquals(4.0, ((FloatWritable) value.getColumnValue(5)).get(), 0.1);
                    assertEquals(5.0, ((DoubleWritable) value.getColumnValue(6)).get(), 0.1);
                    assertEquals("hello", value.getColumnValue(7).toString());
                    assertEquals("vchar1", value.getColumnValue(8).toString());
                    assertEquals("char1", value.getColumnValue(9).toString());
                    assertEquals("2015-01-01", format
                            .format(((TimestampNanosWritable) value.getColumnValue(10)).get().toTimeStamp()));
                    assertEquals(new BigDecimal("3.1415920000"),
                            ((DecimalWritable) value.getColumnValue(11)).get().toBigDecimal());
                } else {
                    assertEquals(6, ((ByteWritable) value.getColumnValue(1)).get());
                    assertEquals(7, ((ShortWritable) value.getColumnValue(2)).get());
                    assertEquals(8, ((IntWritable) value.getColumnValue(3)).get());
                    assertEquals(9, ((LongWritable) value.getColumnValue(4)).get());
                    assertEquals(10.0, ((FloatWritable) value.getColumnValue(5)).get(), 0.1);
                    assertEquals(11.0, ((DoubleWritable) value.getColumnValue(6)).get(), 0.1);
                    assertEquals("world", value.getColumnValue(7).toString());
                    assertEquals("vchar2", value.getColumnValue(8).toString());
                    assertEquals("char2", value.getColumnValue(9).toString());
                    assertEquals("2016-01-01", format
                            .format(((TimestampNanosWritable) value.getColumnValue(10)).get().toTimeStamp()));
                    assertEquals(new BigDecimal("1234.5678900000"),
                            ((DecimalWritable) value.getColumnValue(11)).get().toBigDecimal());
                }
                ++numRows;
            }
        }
        assertEquals(2, numRows);
    } finally {
        reader.close();
    }
}

From source file:com.cloudera.recordservice.mapreduce.MapReduceTest.java

License:Apache License

@Test
public void testReadAllTypesNull() throws IOException, InterruptedException {
    Configuration config = new Configuration();
    RecordServiceInputFormat.RecordServiceRecordReader reader = new RecordServiceInputFormat.RecordServiceRecordReader();

    try {//from  ww  w.j ava  2  s  . c o m
        RecordServiceConfig.setInputTable(config, null, "rs.alltypes_null");
        List<InputSplit> splits = PlanUtil.getSplits(config, new Credentials()).splits;

        int numRows = 0;
        for (InputSplit split : splits) {
            reader.initialize(split, new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID()));
            while (reader.nextKeyValue()) {
                RecordServiceRecord value = reader.getCurrentValue();
                for (int i = 0; i < value.getSchema().getNumColumns(); ++i) {
                    assertTrue(value.getColumnValue(i) == null);
                }
                ++numRows;
            }
        }
        assertEquals(1, numRows);
    } finally {
        reader.close();
    }
}

From source file:com.cloudera.recordservice.mapreduce.MapReduceTest.java

License:Apache License

@Test
public void testCountStar() throws IOException, InterruptedException {
    Configuration config = new Configuration();
    TextInputFormat.TextRecordReader reader = new TextInputFormat.TextRecordReader();

    try {/*ww w.  j  ava 2  s .  co m*/
        RecordServiceConfig.setInputQuery(config, "select count(*) from tpch.nation");
        List<InputSplit> splits = PlanUtil.getSplits(config, new Credentials()).splits;
        int numRows = 0;
        for (InputSplit split : splits) {
            reader.initialize(split, new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID()));
            while (reader.nextKeyValue()) {
                ++numRows;
            }
        }
        assertEquals(25, numRows);
    } finally {
        reader.close();
    }
}

From source file:com.facebook.hiveio.output.CheckOutputSpecsTest.java

License:Apache License

@Test(expectedExceptions = IOException.class)
public void testTableDoesntExist() throws Exception {
    Configuration conf = new Configuration();

    HiveOutputDescription outputDesc = new HiveOutputDescription();
    outputDesc.getTableDesc().setTableName("doesnt-exist");

    OutputConf outputConf = new OutputConf(conf, PROFILE_ID);
    outputConf.writeOutputDescription(outputDesc);

    HiveApiOutputFormat outputFormat = new HiveApiOutputFormat();
    outputFormat.setMyProfileId(PROFILE_ID);

    JobConf jobConf = new JobConf(conf);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID());
    JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID());
    outputFormat.checkOutputSpecs(jobContext);
    fail();//www  .  ja  va  2 s.c o m
}

From source file:com.facebook.hiveio.output.FaultyCheckOutputSpecsTest.java

License:Apache License

@Test(expectedExceptions = IOException.class)
public void testExceptionAfterTooManyRetriesWhenTableDoesNotExist() throws Exception {
    HiveMetastores.setTestClient(new FaultyThriftHiveMetastore(BackoffRetryTask.NUM_TRIES.getDefaultValue()));
    Configuration conf = new Configuration();
    conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100);
    HiveOutputDescription outputDesc = new HiveOutputDescription();
    outputDesc.getTableDesc().setTableName("doesnt-exist");
    OutputConf outputConf = new OutputConf(conf, PROFILE_ID);
    outputConf.writeOutputDescription(outputDesc);
    HiveApiOutputFormat outputFormat = new HiveApiOutputFormat();
    outputFormat.setMyProfileId(PROFILE_ID);
    JobConf jobConf = new JobConf(conf);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID());
    JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID());

    outputFormat.checkOutputSpecs(jobContext);

    fail();/*from  w ww .  j av a  2 s  .  c  o m*/
}

From source file:com.facebook.hiveio.output.FaultyCheckOutputSpecsTest.java

License:Apache License

@Test
public void testRecoveredFromFailuresAfterRetries() throws Exception {
    FaultyThriftHiveMetastore metastore = new FaultyThriftHiveMetastore(
            BackoffRetryTask.NUM_TRIES.getDefaultValue() - 1);

    Configuration conf = new Configuration();
    conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100);
    HiveOutputDescription outputDesc = new HiveOutputDescription();
    outputDesc.getTableDesc().setTableName("foo");
    OutputConf outputConf = new OutputConf(conf, PROFILE_ID);
    outputConf.writeOutputDescription(outputDesc);
    HiveApiOutputFormat outputFormat = new HiveApiOutputFormat();
    outputFormat.setMyProfileId(PROFILE_ID);
    JobConf jobConf = new JobConf(conf);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID());
    JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID());

    HiveMetastores.setTestClient(metastore);

    outputFormat.checkOutputSpecs(jobContext);

    assertEquals(BackoffRetryTask.NUM_TRIES.getDefaultValue(), metastore.getNumCalls());
}

From source file:org.apache.giraph.hive.Helpers.java

License:Apache License

public static void commitJob(GiraphConfiguration conf) throws IOException, InterruptedException {
    ImmutableClassesGiraphConfiguration iconf = new ImmutableClassesGiraphConfiguration(conf);
    WrappedVertexOutputFormat outputFormat = iconf.createWrappedVertexOutputFormat();
    JobConf jobConf = new JobConf(conf);
    TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID());
    OutputCommitter outputCommitter = outputFormat.getOutputCommitter(taskContext);
    JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID());
    outputCommitter.commitJob(jobContext);
}