List of usage examples for org.apache.hadoop.mapred TaskAttemptID TaskAttemptID
public TaskAttemptID()
From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java
License:Apache License
@Test public void testStreamRecordReader() throws Exception { File inputDir = tmpFolder.newFolder(); File partition = new File(inputDir, "1.1000"); partition.mkdirs();// ww w. j av a2 s . c o m File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix()); File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix()); // write 1 event StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile), Files.newOutputStreamSupplier(indexFile), 100L); writer.append(StreamFileTestUtils.createEvent(1000, "test")); writer.flush(); // get splits from the input format. Expect to get 2 splits, // one from 0 - some offset and one from offset - Long.MAX_VALUE. Configuration conf = new Configuration(); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); StreamInputFormat.setStreamPath(conf, inputDir.toURI()); StreamInputFormat format = new StreamInputFormat(); List<InputSplit> splits = format.getSplits(new JobContextImpl(new JobConf(conf), new JobID())); Assert.assertEquals(2, splits.size()); // write another event so that the 2nd split has something to read writer.append(StreamFileTestUtils.createEvent(1001, "test")); writer.close(); // create a record reader for the 2nd split StreamRecordReader<LongWritable, StreamEvent> recordReader = new StreamRecordReader<>( new IdentityStreamEventDecoder()); recordReader.initialize(splits.get(1), context); // check that we read the 2nd stream event Assert.assertTrue(recordReader.nextKeyValue()); StreamEvent output = recordReader.getCurrentValue(); Assert.assertEquals(1001, output.getTimestamp()); Assert.assertEquals("test", Bytes.toString(output.getBody())); // check that there is nothing more to read Assert.assertFalse(recordReader.nextKeyValue()); }
From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java
License:Apache License
@Test public void testFormatStreamRecordReader() throws IOException, InterruptedException { File inputDir = tmpFolder.newFolder(); File partition = new File(inputDir, "1.1000"); partition.mkdirs();/*from w w w. ja v a 2 s . com*/ File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix()); File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix()); // write 1 event StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile), Files.newOutputStreamSupplier(indexFile), 100L); StreamEvent streamEvent = new StreamEvent(ImmutableMap.of("header1", "value1", "header2", "value2"), Charsets.UTF_8.encode("hello world"), 1000); writer.append(streamEvent); writer.close(); FormatSpecification formatSpec = new FormatSpecification(TextRecordFormat.class.getName(), Schema.recordOf("event", Schema.Field.of("body", Schema.of(Schema.Type.STRING))), Collections.<String, String>emptyMap()); Configuration conf = new Configuration(); StreamInputFormat.setBodyFormatSpecification(conf, formatSpec); StreamInputFormat.setStreamPath(conf, inputDir.toURI()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); StreamInputFormat format = new StreamInputFormat(); // read all splits and store the results in the list List<GenericStreamEventData<StructuredRecord>> recordsRead = Lists.newArrayList(); List<InputSplit> inputSplits = format.getSplits(context); for (InputSplit split : inputSplits) { RecordReader<LongWritable, GenericStreamEventData<StructuredRecord>> recordReader = format .createRecordReader(split, context); recordReader.initialize(split, context); while (recordReader.nextKeyValue()) { recordsRead.add(recordReader.getCurrentValue()); } } // should only have read 1 record Assert.assertEquals(1, recordsRead.size()); GenericStreamEventData<StructuredRecord> eventData = recordsRead.get(0); Assert.assertEquals(streamEvent.getHeaders(), eventData.getHeaders()); Assert.assertEquals("hello world", eventData.getBody().get("body")); }
From source file:com.cloudera.recordservice.mapreduce.MapReduceTest.java
License:Apache License
@Test // TODO: make this generic. This should be extensible to test all the input // formats we support. How do we do this? public void testReadNation() throws IOException, InterruptedException { Configuration config = new Configuration(); RecordServiceInputFormat.RecordServiceRecordReader reader = new RecordServiceInputFormat.RecordServiceRecordReader(); try {/*w w w. jav a 2s . c o m*/ RecordServiceConfig.setInputTable(config, null, "tpch.nation"); List<InputSplit> splits = PlanUtil.getSplits(config, new Credentials()).splits; reader.initialize(splits.get(0), new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID())); int numRows = 0; while (reader.nextKeyValue()) { RecordServiceRecord value = reader.getCurrentValue(); ++numRows; if (numRows == 10) { assertEquals("INDONESIA", value.getColumnValue(1).toString()); } } assertFalse(reader.nextKeyValue()); assertFalse(reader.nextRecord()); assertEquals(25, numRows); config.clear(); RecordServiceConfig.setInputTable(config, "tpch", "nation", "n_comment"); splits = PlanUtil.getSplits(config, new Credentials()).splits; reader.initialize(splits.get(0), new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID())); numRows = 0; while (reader.nextKeyValue()) { RecordServiceRecord value = reader.getCurrentValue(); if (numRows == 12) { assertEquals("ously. final, express gifts cajole a", value.getColumnValue(0).toString()); } ++numRows; } assertEquals(25, numRows); } finally { reader.close(); } }
From source file:com.cloudera.recordservice.mapreduce.MapReduceTest.java
License:Apache License
@Test public void testReadAllTypes() throws IOException, InterruptedException { Configuration config = new Configuration(); RecordServiceInputFormat.RecordServiceRecordReader reader = new RecordServiceInputFormat.RecordServiceRecordReader(); SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); format.setTimeZone(TimeZone.getTimeZone("GMT")); try {/*from w ww .j a va 2 s .com*/ RecordServiceConfig.setInputTable(config, null, "rs.alltypes"); List<InputSplit> splits = PlanUtil.getSplits(config, new Credentials()).splits; int numRows = 0; for (InputSplit split : splits) { reader.initialize(split, new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID())); while (reader.nextKeyValue()) { RecordServiceRecord value = reader.getCurrentValue(); if (((BooleanWritable) value.getColumnValue(0)).get()) { assertEquals(0, ((ByteWritable) value.getColumnValue(1)).get()); assertEquals(1, ((ShortWritable) value.getColumnValue(2)).get()); assertEquals(2, ((IntWritable) value.getColumnValue(3)).get()); assertEquals(3, ((LongWritable) value.getColumnValue(4)).get()); assertEquals(4.0, ((FloatWritable) value.getColumnValue(5)).get(), 0.1); assertEquals(5.0, ((DoubleWritable) value.getColumnValue(6)).get(), 0.1); assertEquals("hello", value.getColumnValue(7).toString()); assertEquals("vchar1", value.getColumnValue(8).toString()); assertEquals("char1", value.getColumnValue(9).toString()); assertEquals("2015-01-01", format .format(((TimestampNanosWritable) value.getColumnValue(10)).get().toTimeStamp())); assertEquals(new BigDecimal("3.1415920000"), ((DecimalWritable) value.getColumnValue(11)).get().toBigDecimal()); } else { assertEquals(6, ((ByteWritable) value.getColumnValue(1)).get()); assertEquals(7, ((ShortWritable) value.getColumnValue(2)).get()); assertEquals(8, ((IntWritable) value.getColumnValue(3)).get()); assertEquals(9, ((LongWritable) value.getColumnValue(4)).get()); assertEquals(10.0, ((FloatWritable) value.getColumnValue(5)).get(), 0.1); assertEquals(11.0, ((DoubleWritable) value.getColumnValue(6)).get(), 0.1); assertEquals("world", value.getColumnValue(7).toString()); assertEquals("vchar2", value.getColumnValue(8).toString()); assertEquals("char2", value.getColumnValue(9).toString()); assertEquals("2016-01-01", format .format(((TimestampNanosWritable) value.getColumnValue(10)).get().toTimeStamp())); assertEquals(new BigDecimal("1234.5678900000"), ((DecimalWritable) value.getColumnValue(11)).get().toBigDecimal()); } ++numRows; } } assertEquals(2, numRows); } finally { reader.close(); } }
From source file:com.cloudera.recordservice.mapreduce.MapReduceTest.java
License:Apache License
@Test public void testReadAllTypesNull() throws IOException, InterruptedException { Configuration config = new Configuration(); RecordServiceInputFormat.RecordServiceRecordReader reader = new RecordServiceInputFormat.RecordServiceRecordReader(); try {//from ww w.j ava 2 s . c o m RecordServiceConfig.setInputTable(config, null, "rs.alltypes_null"); List<InputSplit> splits = PlanUtil.getSplits(config, new Credentials()).splits; int numRows = 0; for (InputSplit split : splits) { reader.initialize(split, new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID())); while (reader.nextKeyValue()) { RecordServiceRecord value = reader.getCurrentValue(); for (int i = 0; i < value.getSchema().getNumColumns(); ++i) { assertTrue(value.getColumnValue(i) == null); } ++numRows; } } assertEquals(1, numRows); } finally { reader.close(); } }
From source file:com.cloudera.recordservice.mapreduce.MapReduceTest.java
License:Apache License
@Test public void testCountStar() throws IOException, InterruptedException { Configuration config = new Configuration(); TextInputFormat.TextRecordReader reader = new TextInputFormat.TextRecordReader(); try {/*ww w. j ava 2 s . co m*/ RecordServiceConfig.setInputQuery(config, "select count(*) from tpch.nation"); List<InputSplit> splits = PlanUtil.getSplits(config, new Credentials()).splits; int numRows = 0; for (InputSplit split : splits) { reader.initialize(split, new TaskAttemptContextImpl(new JobConf(config), new TaskAttemptID())); while (reader.nextKeyValue()) { ++numRows; } } assertEquals(25, numRows); } finally { reader.close(); } }
From source file:com.facebook.hiveio.output.CheckOutputSpecsTest.java
License:Apache License
@Test(expectedExceptions = IOException.class) public void testTableDoesntExist() throws Exception { Configuration conf = new Configuration(); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail();//www . ja va 2 s.c o m }
From source file:com.facebook.hiveio.output.FaultyCheckOutputSpecsTest.java
License:Apache License
@Test(expectedExceptions = IOException.class) public void testExceptionAfterTooManyRetriesWhenTableDoesNotExist() throws Exception { HiveMetastores.setTestClient(new FaultyThriftHiveMetastore(BackoffRetryTask.NUM_TRIES.getDefaultValue())); Configuration conf = new Configuration(); conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("doesnt-exist"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputFormat.checkOutputSpecs(jobContext); fail();/*from w ww . j av a 2 s . c o m*/ }
From source file:com.facebook.hiveio.output.FaultyCheckOutputSpecsTest.java
License:Apache License
@Test public void testRecoveredFromFailuresAfterRetries() throws Exception { FaultyThriftHiveMetastore metastore = new FaultyThriftHiveMetastore( BackoffRetryTask.NUM_TRIES.getDefaultValue() - 1); Configuration conf = new Configuration(); conf.setLong(BackoffRetryTask.INITIAL_RETRY_DELAY_MSEC.getKey(), 100); HiveOutputDescription outputDesc = new HiveOutputDescription(); outputDesc.getTableDesc().setTableName("foo"); OutputConf outputConf = new OutputConf(conf, PROFILE_ID); outputConf.writeOutputDescription(outputDesc); HiveApiOutputFormat outputFormat = new HiveApiOutputFormat(); outputFormat.setMyProfileId(PROFILE_ID); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); HiveMetastores.setTestClient(metastore); outputFormat.checkOutputSpecs(jobContext); assertEquals(BackoffRetryTask.NUM_TRIES.getDefaultValue(), metastore.getNumCalls()); }
From source file:org.apache.giraph.hive.Helpers.java
License:Apache License
public static void commitJob(GiraphConfiguration conf) throws IOException, InterruptedException { ImmutableClassesGiraphConfiguration iconf = new ImmutableClassesGiraphConfiguration(conf); WrappedVertexOutputFormat outputFormat = iconf.createWrappedVertexOutputFormat(); JobConf jobConf = new JobConf(conf); TaskAttemptContext taskContext = new HackTaskAttemptContext(jobConf, new TaskAttemptID()); OutputCommitter outputCommitter = outputFormat.getOutputCommitter(taskContext); JobContext jobContext = new HackJobContext(jobConf, taskContext.getJobID()); outputCommitter.commitJob(jobContext); }