List of usage examples for org.apache.hadoop.mapreduce RecordReader getProgress
public abstract float getProgress() throws IOException, InterruptedException;
From source file:co.cask.cdap.template.etl.common.ETLDBInputFormat.java
License:Apache License
@Override protected RecordReader createDBRecordReader(DBInputSplit split, Configuration conf) throws IOException { final RecordReader dbRecordReader = super.createDBRecordReader(split, conf); return new RecordReader() { @Override/*from w w w.j a va2 s . c o m*/ public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { dbRecordReader.initialize(split, context); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { return dbRecordReader.nextKeyValue(); } @Override public Object getCurrentKey() throws IOException, InterruptedException { return dbRecordReader.getCurrentKey(); } @Override public Object getCurrentValue() throws IOException, InterruptedException { return dbRecordReader.getCurrentValue(); } @Override public float getProgress() throws IOException, InterruptedException { return dbRecordReader.getProgress(); } @Override public void close() throws IOException { dbRecordReader.close(); try { DriverManager.deregisterDriver(driverShim); } catch (SQLException e) { throw new IOException(e); } } }; }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileTest.java
License:Apache License
@Test public void testOneFile() throws IOException, InterruptedException { Path dir = new Path(tempFolder.getRoot().getAbsolutePath()); CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>(); Path inputFile = new Path(dir, "file1.txt"); writeSequenceFile(inputFile);//from w w w.ja va2s.co m Job job = new Job(new JobConf()); FileInputFormat.addInputPath(job, inputFile); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size()); TaskAttemptID taskId = new TaskAttemptID("jt", 0, true, 0, 0); Configuration conf1 = new Configuration(); TaskAttemptContext context1 = new TaskAttemptContext(conf1, taskId); RecordReader<Text, Text> rr = inputFormat.createRecordReader(splits.get(0), context1); rr.initialize(splits.get(0), context1); assertTrue(rr.nextKeyValue()); assertEquals(key, rr.getCurrentKey()); assertEquals(value, rr.getCurrentValue()); assertFalse(rr.nextKeyValue()); assertEquals(1.0f, rr.getProgress(), 0.1); }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileTest.java
License:Apache License
@Test public void testTwoFiles() throws IOException, InterruptedException { Path dir = new Path(tempFolder.getRoot().getAbsolutePath()); CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>(); Path inputFile1 = new Path(dir, "file1.txt"); Path inputFile2 = new Path(dir, "file2.txt"); writeSequenceFile(inputFile1);//from ww w .ja v a 2 s. c o m writeSequenceFile(inputFile2); Job job = new Job(new JobConf()); FileInputFormat.addInputPath(job, inputFile1); FileInputFormat.addInputPath(job, inputFile2); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size()); TaskAttemptID taskId = new TaskAttemptID("jt", 0, true, 0, 0); Configuration conf1 = new Configuration(); TaskAttemptContext context1 = new TaskAttemptContext(conf1, taskId); RecordReader<Text, Text> rr = inputFormat.createRecordReader(splits.get(0), context1); rr.initialize(splits.get(0), context1); assertTrue(rr.nextKeyValue()); assertEquals(key, rr.getCurrentKey()); assertEquals(value, rr.getCurrentValue()); assertEquals(0.5f, rr.getProgress(), 0.1); assertTrue(rr.nextKeyValue()); assertEquals(key, rr.getCurrentKey()); assertEquals(value, rr.getCurrentValue()); assertFalse(rr.nextKeyValue()); assertEquals(1.0f, rr.getProgress(), 0.1); }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.HCatTupleInputFormat.java
License:Apache License
@Override public RecordReader<ITuple, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext taskContext) throws IOException, InterruptedException { HCatInputFormat iF = new HCatInputFormat(); @SuppressWarnings("rawtypes") final RecordReader<WritableComparable, HCatRecord> hCatRecordReader = iF.createRecordReader(split, taskContext);/*www . j a v a 2 s .c om*/ return new RecordReader<ITuple, NullWritable>() { ITuple tuple = new Tuple(pangoolSchema); @Override public void close() throws IOException { hCatRecordReader.close(); } @Override public ITuple getCurrentKey() throws IOException, InterruptedException { HCatRecord record = hCatRecordReader.getCurrentValue(); // Perform conversion between HCatRecord and Tuple for (int pos = 0; pos < schema.size(); pos++) { tuple.set(pos, record.get(pos)); } return tuple; } @Override public NullWritable getCurrentValue() throws IOException, InterruptedException { return NullWritable.get(); } @Override public float getProgress() throws IOException, InterruptedException { return hCatRecordReader.getProgress(); } @Override public void initialize(InputSplit iS, TaskAttemptContext context) throws IOException, InterruptedException { hCatRecordReader.initialize(iS, context); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { return hCatRecordReader.nextKeyValue(); } }; }
From source file:com.ikanow.aleph2.search_service.elasticsearch.hadoop.assets.TestAleph2EsInputFormat.java
License:Apache License
@Test public void test_Aleph2EsRecordReader_maxRecords() throws IOException, InterruptedException { @SuppressWarnings("rawtypes") final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class); Mockito.when(mock_shard_record_reader.nextKeyValue()).thenReturn(true); // (ie will keep going forever) Mockito.when(mock_shard_record_reader.getProgress()).thenReturn((float) 4.0); // (just return some dummy number so we can check it's working) // Test version {//from w w w .java 2 s. c o m final Configuration config = new Configuration(false); config.set(Aleph2EsInputFormat.BE_DEBUG_MAX_SIZE, "10"); final TaskAttemptContext mock_task = Mockito.mock(TaskAttemptContext.class); Mockito.when(mock_task.getConfiguration()).thenReturn(config); final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader); try { reader_under_test.initialize(null, mock_task); } catch (Exception e) { } // (the _delegate init call will fail out, that's fine) int ii = 0; for (; ii < 100 && reader_under_test.nextKeyValue(); ++ii) { assertTrue("getProgress should be overridden", reader_under_test.getProgress() <= 1.0); } assertEquals("Should have stopped after 10 iterations", 10, ii); } // Normal version { final Configuration config = new Configuration(false); final TaskAttemptContext mock_task = Mockito.mock(TaskAttemptContext.class); Mockito.when(mock_task.getConfiguration()).thenReturn(config); final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader); try { reader_under_test.initialize(null, mock_task); } catch (Exception e) { } // (the _delegate init call will fail out, that's fine) int ii = 0; for (; ii < 100 && reader_under_test.nextKeyValue(); ++ii) { assertTrue("getProgress should return the dummy value", reader_under_test.getProgress() == 4.0); } assertEquals("Should keep going for all 100 iterations", 100, ii); } }
From source file:com.ikanow.aleph2.search_service.elasticsearch.hadoop.assets.TestAleph2EsInputFormat.java
License:Apache License
@Test public void test_Aleph2EsRecordReader_testCoverage() throws IOException, InterruptedException { @SuppressWarnings("rawtypes") final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class, new Answer<Void>() { public Void answer(InvocationOnMock invocation) { //String fn_name = invocation.getMethod().getName(); return null; }// w ww. java 2s . c o m }); Mockito.when(mock_shard_record_reader.getProgress()).thenReturn((float) 1.0); Mockito.when(mock_shard_record_reader.nextKeyValue()).thenReturn(true); final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader); // void Functions we don't care about as long as they don't die reader_under_test.close(); // Functions that return something that we can pass along directly assertEquals((float) 1.0, (float) reader_under_test.getProgress(), 0.00001); assertEquals(true, reader_under_test.nextKeyValue()); // Things that throw exceptions try { reader_under_test.createKey(); fail("should have thrown exception"); } catch (Exception e) { } try { reader_under_test.createValue(); fail("should have thrown exception"); } catch (Exception e) { } try { reader_under_test.setCurrentKey("str", "str"); fail("should have thrown exception"); } catch (Exception e) { } try { reader_under_test.setCurrentValue(null, "str"); fail("should have thrown exception"); } catch (Exception e) { } }
From source file:com.ikanow.aleph2.v1.document_db.hadoop.assets.TestAleph2V1InputFormat.java
License:Apache License
@Test public void test_V1DocumentDbRecordReader_testCoverage() throws IOException, InterruptedException { @SuppressWarnings("rawtypes") final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class, new Answer<Void>() { public Void answer(InvocationOnMock invocation) { //String fn_name = invocation.getMethod().getName(); return null; }/* ww w.ja v a 2s . c o m*/ }); Mockito.when(mock_shard_record_reader.getProgress()).thenReturn((float) 1.0); Mockito.when(mock_shard_record_reader.nextKeyValue()).thenReturn(true); @SuppressWarnings("unchecked") final V1DocumentDbRecordReader reader_under_test = new V1DocumentDbRecordReader(mock_shard_record_reader); // void Functions we don't care about as long as they don't die reader_under_test.close(); // Functions that return something that we can pass along directly assertEquals((float) 1.0, (float) reader_under_test.getProgress(), 0.00001); assertEquals(true, reader_under_test.nextKeyValue()); // (basically just coverage testing) try { reader_under_test.initialize(null, null); //(this one doesn't exception for some reason) } catch (Exception e) { } }
From source file:com.inmobi.conduit.distcp.tools.mapred.lib.TestDynamicInputFormat.java
License:Apache License
@Test public void testGetSplits() throws Exception { DistCpOptions options = getOptions(); Configuration configuration = new Configuration(); configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps())); CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing( new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"), options);/*from ww w . j a v a 2 s . c om*/ JobID jobId = new JobID(); JobContext jobContext = mock(JobContext.class); when(jobContext.getConfiguration()).thenReturn(configuration); when(jobContext.getJobID()).thenReturn(jobId); DynamicInputFormat<Text, FileStatus> inputFormat = new DynamicInputFormat<Text, FileStatus>(); List<InputSplit> splits = inputFormat.getSplits(jobContext); int nFiles = 0; int taskId = 0; for (InputSplit split : splits) { TaskAttemptID tId = new TaskAttemptID("", 0, true, taskId, 0); final TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class); when(taskAttemptContext.getConfiguration()).thenReturn(configuration); when(taskAttemptContext.getTaskAttemptID()).thenReturn(tId); RecordReader<Text, FileStatus> recordReader = inputFormat.createRecordReader(split, taskAttemptContext); recordReader.initialize(splits.get(0), taskAttemptContext); float previousProgressValue = 0f; while (recordReader.nextKeyValue()) { FileStatus fileStatus = recordReader.getCurrentValue(); String source = fileStatus.getPath().toString(); System.out.println(source); Assert.assertTrue(expectedFilePaths.contains(source)); final float progress = recordReader.getProgress(); Assert.assertTrue(progress >= previousProgressValue); Assert.assertTrue(progress >= 0.0f); Assert.assertTrue(progress <= 1.0f); previousProgressValue = progress; ++nFiles; } Assert.assertTrue(recordReader.getProgress() == 1.0f); ++taskId; } Assert.assertEquals(expectedFilePaths.size(), nFiles); }
From source file:org.apache.avro.mapreduce.TestAvroKeyRecordReader.java
License:Apache License
/** * Verifies that avro records can be read and progress is reported correctly. *///from w ww .jav a2 s . c o m @Test public void testReadRecords() throws IOException, InterruptedException { // Create the test avro file input with two records: // 1. "first" // 2. "second" final SeekableInput avroFileInput = new SeekableFileInput( AvroFiles.createFile(new File(mTempDir.getRoot(), "myStringfile.avro"), Schema.create(Schema.Type.STRING), "first", "second")); // Create the record reader. Schema readerSchema = Schema.create(Schema.Type.STRING); RecordReader<AvroKey<CharSequence>, NullWritable> recordReader = new AvroKeyRecordReader<CharSequence>( readerSchema) { @Override protected SeekableInput createSeekableInput(Configuration conf, Path path) throws IOException { return avroFileInput; } }; // Set up the job configuration. Configuration conf = new Configuration(); // Create a mock input split for this record reader. FileSplit inputSplit = createMock(FileSplit.class); expect(inputSplit.getPath()).andReturn(new Path("/path/to/an/avro/file")).anyTimes(); expect(inputSplit.getStart()).andReturn(0L).anyTimes(); expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes(); // Create a mock task attempt context for this record reader. TaskAttemptContext context = createMock(TaskAttemptContext.class); expect(context.getConfiguration()).andReturn(conf).anyTimes(); // Initialize the record reader. replay(inputSplit); replay(context); recordReader.initialize(inputSplit, context); assertEquals("Progress should be zero before any records are read", 0.0f, recordReader.getProgress(), 0.0f); // Some variables to hold the records. AvroKey<CharSequence> key; NullWritable value; // Read the first record. assertTrue("Expected at least one record", recordReader.nextKeyValue()); key = recordReader.getCurrentKey(); value = recordReader.getCurrentValue(); assertNotNull("First record had null key", key); assertNotNull("First record had null value", value); CharSequence firstString = key.datum(); assertEquals("first", firstString.toString()); assertTrue("getCurrentKey() returned different keys for the same record", key == recordReader.getCurrentKey()); assertTrue("getCurrentValue() returned different values for the same record", value == recordReader.getCurrentValue()); // Read the second record. assertTrue("Expected to read a second record", recordReader.nextKeyValue()); key = recordReader.getCurrentKey(); value = recordReader.getCurrentValue(); assertNotNull("Second record had null key", key); assertNotNull("Second record had null value", value); CharSequence secondString = key.datum(); assertEquals("second", secondString.toString()); assertEquals("Progress should be complete (2 out of 2 records processed)", 1.0f, recordReader.getProgress(), 0.0f); // There should be no more records. assertFalse("Expected only 2 records", recordReader.nextKeyValue()); // Close the record reader. recordReader.close(); // Verify the expected calls on the mocks. verify(inputSplit); verify(context); }
From source file:org.apache.avro.mapreduce.TestAvroKeyValueRecordReader.java
License:Apache License
/** * Verifies that avro records can be read and progress is reported correctly. *///from w w w. j av a 2 s .com @Test public void testReadRecords() throws IOException, InterruptedException { // Create the test avro file input with two records: // 1. <"firstkey", 1> // 2. <"second", 2> Schema keyValueSchema = AvroKeyValue.getSchema(Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.INT)); AvroKeyValue<CharSequence, Integer> firstInputRecord = new AvroKeyValue<CharSequence, Integer>( new GenericData.Record(keyValueSchema)); firstInputRecord.setKey("first"); firstInputRecord.setValue(1); AvroKeyValue<CharSequence, Integer> secondInputRecord = new AvroKeyValue<CharSequence, Integer>( new GenericData.Record(keyValueSchema)); secondInputRecord.setKey("second"); secondInputRecord.setValue(2); final SeekableInput avroFileInput = new SeekableFileInput( AvroFiles.createFile(new File(mTempDir.getRoot(), "myInputFile.avro"), keyValueSchema, firstInputRecord.get(), secondInputRecord.get())); // Create the record reader over the avro input file. RecordReader<AvroKey<CharSequence>, AvroValue<Integer>> recordReader = new AvroKeyValueRecordReader<CharSequence, Integer>( Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.INT)) { @Override protected SeekableInput createSeekableInput(Configuration conf, Path path) throws IOException { return avroFileInput; } }; // Set up the job configuration. Configuration conf = new Configuration(); // Create a mock input split for this record reader. FileSplit inputSplit = createMock(FileSplit.class); expect(inputSplit.getPath()).andReturn(new Path("/path/to/an/avro/file")).anyTimes(); expect(inputSplit.getStart()).andReturn(0L).anyTimes(); expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes(); // Create a mock task attempt context for this record reader. TaskAttemptContext context = createMock(TaskAttemptContext.class); expect(context.getConfiguration()).andReturn(conf).anyTimes(); // Initialize the record reader. replay(inputSplit); replay(context); recordReader.initialize(inputSplit, context); assertEquals("Progress should be zero before any records are read", 0.0f, recordReader.getProgress(), 0.0f); // Some variables to hold the records. AvroKey<CharSequence> key; AvroValue<Integer> value; // Read the first record. assertTrue("Expected at least one record", recordReader.nextKeyValue()); key = recordReader.getCurrentKey(); value = recordReader.getCurrentValue(); assertNotNull("First record had null key", key); assertNotNull("First record had null value", value); assertEquals("first", key.datum().toString()); assertEquals(1, value.datum().intValue()); assertTrue("getCurrentKey() returned different keys for the same record", key == recordReader.getCurrentKey()); assertTrue("getCurrentValue() returned different values for the same record", value == recordReader.getCurrentValue()); // Read the second record. assertTrue("Expected to read a second record", recordReader.nextKeyValue()); key = recordReader.getCurrentKey(); value = recordReader.getCurrentValue(); assertNotNull("Second record had null key", key); assertNotNull("Second record had null value", value); assertEquals("second", key.datum().toString()); assertEquals(2, value.datum().intValue()); assertEquals("Progress should be complete (2 out of 2 records processed)", 1.0f, recordReader.getProgress(), 0.0f); // There should be no more records. assertFalse("Expected only 2 records", recordReader.nextKeyValue()); // Close the record reader. recordReader.close(); // Verify the expected calls on the mocks. verify(inputSplit); verify(context); }