Example usage for org.apache.hadoop.mapreduce RecordReader getProgress

List of usage examples for org.apache.hadoop.mapreduce RecordReader getProgress

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader getProgress.

Prototype

public abstract float getProgress() throws IOException, InterruptedException;

Source Link

Document

The current progress of the record reader through its data.

Usage

From source file:co.cask.cdap.template.etl.common.ETLDBInputFormat.java

License:Apache License

@Override
protected RecordReader createDBRecordReader(DBInputSplit split, Configuration conf) throws IOException {
    final RecordReader dbRecordReader = super.createDBRecordReader(split, conf);
    return new RecordReader() {
        @Override/*from  w w  w.j a  va2 s . c  o  m*/
        public void initialize(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            dbRecordReader.initialize(split, context);
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            return dbRecordReader.nextKeyValue();
        }

        @Override
        public Object getCurrentKey() throws IOException, InterruptedException {
            return dbRecordReader.getCurrentKey();
        }

        @Override
        public Object getCurrentValue() throws IOException, InterruptedException {
            return dbRecordReader.getCurrentValue();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return dbRecordReader.getProgress();
        }

        @Override
        public void close() throws IOException {
            dbRecordReader.close();
            try {
                DriverManager.deregisterDriver(driverShim);
            } catch (SQLException e) {
                throw new IOException(e);
            }
        }
    };
}

From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileTest.java

License:Apache License

@Test
public void testOneFile() throws IOException, InterruptedException {
    Path dir = new Path(tempFolder.getRoot().getAbsolutePath());

    CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>();
    Path inputFile = new Path(dir, "file1.txt");

    writeSequenceFile(inputFile);//from w  w w.ja  va2s.co m

    Job job = new Job(new JobConf());

    FileInputFormat.addInputPath(job, inputFile);

    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size());

    TaskAttemptID taskId = new TaskAttemptID("jt", 0, true, 0, 0);
    Configuration conf1 = new Configuration();
    TaskAttemptContext context1 = new TaskAttemptContext(conf1, taskId);

    RecordReader<Text, Text> rr = inputFormat.createRecordReader(splits.get(0), context1);
    rr.initialize(splits.get(0), context1);
    assertTrue(rr.nextKeyValue());

    assertEquals(key, rr.getCurrentKey());
    assertEquals(value, rr.getCurrentValue());

    assertFalse(rr.nextKeyValue());
    assertEquals(1.0f, rr.getProgress(), 0.1);
}

From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileTest.java

License:Apache License

@Test
public void testTwoFiles() throws IOException, InterruptedException {
    Path dir = new Path(tempFolder.getRoot().getAbsolutePath());

    CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>();
    Path inputFile1 = new Path(dir, "file1.txt");
    Path inputFile2 = new Path(dir, "file2.txt");

    writeSequenceFile(inputFile1);//from  ww  w .ja v a  2  s.  c o m
    writeSequenceFile(inputFile2);

    Job job = new Job(new JobConf());

    FileInputFormat.addInputPath(job, inputFile1);
    FileInputFormat.addInputPath(job, inputFile2);

    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size());

    TaskAttemptID taskId = new TaskAttemptID("jt", 0, true, 0, 0);
    Configuration conf1 = new Configuration();
    TaskAttemptContext context1 = new TaskAttemptContext(conf1, taskId);

    RecordReader<Text, Text> rr = inputFormat.createRecordReader(splits.get(0), context1);
    rr.initialize(splits.get(0), context1);
    assertTrue(rr.nextKeyValue());

    assertEquals(key, rr.getCurrentKey());
    assertEquals(value, rr.getCurrentValue());

    assertEquals(0.5f, rr.getProgress(), 0.1);

    assertTrue(rr.nextKeyValue());

    assertEquals(key, rr.getCurrentKey());
    assertEquals(value, rr.getCurrentValue());

    assertFalse(rr.nextKeyValue());
    assertEquals(1.0f, rr.getProgress(), 0.1);
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.HCatTupleInputFormat.java

License:Apache License

@Override
public RecordReader<ITuple, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext taskContext)
        throws IOException, InterruptedException {

    HCatInputFormat iF = new HCatInputFormat();

    @SuppressWarnings("rawtypes")
    final RecordReader<WritableComparable, HCatRecord> hCatRecordReader = iF.createRecordReader(split,
            taskContext);/*www . j a  v a  2  s .c om*/

    return new RecordReader<ITuple, NullWritable>() {

        ITuple tuple = new Tuple(pangoolSchema);

        @Override
        public void close() throws IOException {
            hCatRecordReader.close();
        }

        @Override
        public ITuple getCurrentKey() throws IOException, InterruptedException {
            HCatRecord record = hCatRecordReader.getCurrentValue();
            // Perform conversion between HCatRecord and Tuple
            for (int pos = 0; pos < schema.size(); pos++) {
                tuple.set(pos, record.get(pos));
            }
            return tuple;
        }

        @Override
        public NullWritable getCurrentValue() throws IOException, InterruptedException {
            return NullWritable.get();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return hCatRecordReader.getProgress();
        }

        @Override
        public void initialize(InputSplit iS, TaskAttemptContext context)
                throws IOException, InterruptedException {
            hCatRecordReader.initialize(iS, context);
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            return hCatRecordReader.nextKeyValue();
        }
    };
}

From source file:com.ikanow.aleph2.search_service.elasticsearch.hadoop.assets.TestAleph2EsInputFormat.java

License:Apache License

@Test
public void test_Aleph2EsRecordReader_maxRecords() throws IOException, InterruptedException {

    @SuppressWarnings("rawtypes")
    final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class);
    Mockito.when(mock_shard_record_reader.nextKeyValue()).thenReturn(true); // (ie will keep going forever)
    Mockito.when(mock_shard_record_reader.getProgress()).thenReturn((float) 4.0); // (just return some dummy number so we can check it's working)

    // Test version
    {//from   w w  w  .java  2 s. c  o m
        final Configuration config = new Configuration(false);
        config.set(Aleph2EsInputFormat.BE_DEBUG_MAX_SIZE, "10");
        final TaskAttemptContext mock_task = Mockito.mock(TaskAttemptContext.class);
        Mockito.when(mock_task.getConfiguration()).thenReturn(config);

        final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader);

        try {
            reader_under_test.initialize(null, mock_task);
        } catch (Exception e) {
        } // (the _delegate init call will fail out, that's fine)

        int ii = 0;
        for (; ii < 100 && reader_under_test.nextKeyValue(); ++ii) {
            assertTrue("getProgress should be overridden", reader_under_test.getProgress() <= 1.0);
        }
        assertEquals("Should have stopped after 10 iterations", 10, ii);
    }
    // Normal version
    {
        final Configuration config = new Configuration(false);
        final TaskAttemptContext mock_task = Mockito.mock(TaskAttemptContext.class);
        Mockito.when(mock_task.getConfiguration()).thenReturn(config);

        final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader);

        try {
            reader_under_test.initialize(null, mock_task);
        } catch (Exception e) {
        } // (the _delegate init call will fail out, that's fine)

        int ii = 0;
        for (; ii < 100 && reader_under_test.nextKeyValue(); ++ii) {
            assertTrue("getProgress should return the dummy value", reader_under_test.getProgress() == 4.0);
        }
        assertEquals("Should keep going for all 100 iterations", 100, ii);
    }
}

From source file:com.ikanow.aleph2.search_service.elasticsearch.hadoop.assets.TestAleph2EsInputFormat.java

License:Apache License

@Test
public void test_Aleph2EsRecordReader_testCoverage() throws IOException, InterruptedException {

    @SuppressWarnings("rawtypes")
    final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class, new Answer<Void>() {
        public Void answer(InvocationOnMock invocation) {
            //String fn_name = invocation.getMethod().getName();
            return null;
        }// w ww. java  2s  .  c  o  m
    });
    Mockito.when(mock_shard_record_reader.getProgress()).thenReturn((float) 1.0);
    Mockito.when(mock_shard_record_reader.nextKeyValue()).thenReturn(true);

    final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader);

    // void Functions we don't care about as long as they don't die

    reader_under_test.close();

    // Functions that return something that we can pass along directly

    assertEquals((float) 1.0, (float) reader_under_test.getProgress(), 0.00001);
    assertEquals(true, reader_under_test.nextKeyValue());

    // Things that throw exceptions

    try {
        reader_under_test.createKey();
        fail("should have thrown exception");
    } catch (Exception e) {
    }

    try {
        reader_under_test.createValue();
        fail("should have thrown exception");
    } catch (Exception e) {
    }

    try {
        reader_under_test.setCurrentKey("str", "str");
        fail("should have thrown exception");
    } catch (Exception e) {
    }

    try {
        reader_under_test.setCurrentValue(null, "str");
        fail("should have thrown exception");
    } catch (Exception e) {
    }
}

From source file:com.ikanow.aleph2.v1.document_db.hadoop.assets.TestAleph2V1InputFormat.java

License:Apache License

@Test
public void test_V1DocumentDbRecordReader_testCoverage() throws IOException, InterruptedException {

    @SuppressWarnings("rawtypes")
    final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class, new Answer<Void>() {
        public Void answer(InvocationOnMock invocation) {
            //String fn_name = invocation.getMethod().getName();
            return null;
        }/*  ww w.ja  v  a 2s  . c o  m*/
    });
    Mockito.when(mock_shard_record_reader.getProgress()).thenReturn((float) 1.0);
    Mockito.when(mock_shard_record_reader.nextKeyValue()).thenReturn(true);

    @SuppressWarnings("unchecked")
    final V1DocumentDbRecordReader reader_under_test = new V1DocumentDbRecordReader(mock_shard_record_reader);

    // void Functions we don't care about as long as they don't die

    reader_under_test.close();

    // Functions that return something that we can pass along directly

    assertEquals((float) 1.0, (float) reader_under_test.getProgress(), 0.00001);
    assertEquals(true, reader_under_test.nextKeyValue());

    // (basically just coverage testing)
    try {
        reader_under_test.initialize(null, null);
        //(this one doesn't exception for some reason)
    } catch (Exception e) {
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.lib.TestDynamicInputFormat.java

License:Apache License

@Test
public void testGetSplits() throws Exception {
    DistCpOptions options = getOptions();
    Configuration configuration = new Configuration();
    configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
    CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(
            new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"),
            options);/*from ww w  . j a  v  a  2 s .  c om*/

    JobID jobId = new JobID();
    JobContext jobContext = mock(JobContext.class);
    when(jobContext.getConfiguration()).thenReturn(configuration);
    when(jobContext.getJobID()).thenReturn(jobId);
    DynamicInputFormat<Text, FileStatus> inputFormat = new DynamicInputFormat<Text, FileStatus>();
    List<InputSplit> splits = inputFormat.getSplits(jobContext);

    int nFiles = 0;
    int taskId = 0;

    for (InputSplit split : splits) {
        TaskAttemptID tId = new TaskAttemptID("", 0, true, taskId, 0);
        final TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class);
        when(taskAttemptContext.getConfiguration()).thenReturn(configuration);
        when(taskAttemptContext.getTaskAttemptID()).thenReturn(tId);
        RecordReader<Text, FileStatus> recordReader = inputFormat.createRecordReader(split, taskAttemptContext);
        recordReader.initialize(splits.get(0), taskAttemptContext);
        float previousProgressValue = 0f;
        while (recordReader.nextKeyValue()) {
            FileStatus fileStatus = recordReader.getCurrentValue();
            String source = fileStatus.getPath().toString();
            System.out.println(source);
            Assert.assertTrue(expectedFilePaths.contains(source));
            final float progress = recordReader.getProgress();
            Assert.assertTrue(progress >= previousProgressValue);
            Assert.assertTrue(progress >= 0.0f);
            Assert.assertTrue(progress <= 1.0f);
            previousProgressValue = progress;
            ++nFiles;
        }
        Assert.assertTrue(recordReader.getProgress() == 1.0f);

        ++taskId;
    }

    Assert.assertEquals(expectedFilePaths.size(), nFiles);
}

From source file:org.apache.avro.mapreduce.TestAvroKeyRecordReader.java

License:Apache License

/**
 * Verifies that avro records can be read and progress is reported correctly.
 *///from   w  ww  .jav a2  s  .  c o  m
@Test
public void testReadRecords() throws IOException, InterruptedException {
    // Create the test avro file input with two records:
    //   1. "first"
    //   2. "second"
    final SeekableInput avroFileInput = new SeekableFileInput(
            AvroFiles.createFile(new File(mTempDir.getRoot(), "myStringfile.avro"),
                    Schema.create(Schema.Type.STRING), "first", "second"));

    // Create the record reader.
    Schema readerSchema = Schema.create(Schema.Type.STRING);
    RecordReader<AvroKey<CharSequence>, NullWritable> recordReader = new AvroKeyRecordReader<CharSequence>(
            readerSchema) {
        @Override
        protected SeekableInput createSeekableInput(Configuration conf, Path path) throws IOException {
            return avroFileInput;
        }
    };

    // Set up the job configuration.
    Configuration conf = new Configuration();

    // Create a mock input split for this record reader.
    FileSplit inputSplit = createMock(FileSplit.class);
    expect(inputSplit.getPath()).andReturn(new Path("/path/to/an/avro/file")).anyTimes();
    expect(inputSplit.getStart()).andReturn(0L).anyTimes();
    expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes();

    // Create a mock task attempt context for this record reader.
    TaskAttemptContext context = createMock(TaskAttemptContext.class);
    expect(context.getConfiguration()).andReturn(conf).anyTimes();

    // Initialize the record reader.
    replay(inputSplit);
    replay(context);
    recordReader.initialize(inputSplit, context);

    assertEquals("Progress should be zero before any records are read", 0.0f, recordReader.getProgress(), 0.0f);

    // Some variables to hold the records.
    AvroKey<CharSequence> key;
    NullWritable value;

    // Read the first record.
    assertTrue("Expected at least one record", recordReader.nextKeyValue());
    key = recordReader.getCurrentKey();
    value = recordReader.getCurrentValue();

    assertNotNull("First record had null key", key);
    assertNotNull("First record had null value", value);

    CharSequence firstString = key.datum();
    assertEquals("first", firstString.toString());

    assertTrue("getCurrentKey() returned different keys for the same record",
            key == recordReader.getCurrentKey());
    assertTrue("getCurrentValue() returned different values for the same record",
            value == recordReader.getCurrentValue());

    // Read the second record.
    assertTrue("Expected to read a second record", recordReader.nextKeyValue());
    key = recordReader.getCurrentKey();
    value = recordReader.getCurrentValue();

    assertNotNull("Second record had null key", key);
    assertNotNull("Second record had null value", value);

    CharSequence secondString = key.datum();
    assertEquals("second", secondString.toString());

    assertEquals("Progress should be complete (2 out of 2 records processed)", 1.0f, recordReader.getProgress(),
            0.0f);

    // There should be no more records.
    assertFalse("Expected only 2 records", recordReader.nextKeyValue());

    // Close the record reader.
    recordReader.close();

    // Verify the expected calls on the mocks.
    verify(inputSplit);
    verify(context);
}

From source file:org.apache.avro.mapreduce.TestAvroKeyValueRecordReader.java

License:Apache License

/**
 * Verifies that avro records can be read and progress is reported correctly.
 *///from  w  w  w.  j  av  a  2 s  .com
@Test
public void testReadRecords() throws IOException, InterruptedException {
    // Create the test avro file input with two records:
    //   1. <"firstkey", 1>
    //   2. <"second", 2>
    Schema keyValueSchema = AvroKeyValue.getSchema(Schema.create(Schema.Type.STRING),
            Schema.create(Schema.Type.INT));

    AvroKeyValue<CharSequence, Integer> firstInputRecord = new AvroKeyValue<CharSequence, Integer>(
            new GenericData.Record(keyValueSchema));
    firstInputRecord.setKey("first");
    firstInputRecord.setValue(1);

    AvroKeyValue<CharSequence, Integer> secondInputRecord = new AvroKeyValue<CharSequence, Integer>(
            new GenericData.Record(keyValueSchema));
    secondInputRecord.setKey("second");
    secondInputRecord.setValue(2);

    final SeekableInput avroFileInput = new SeekableFileInput(
            AvroFiles.createFile(new File(mTempDir.getRoot(), "myInputFile.avro"), keyValueSchema,
                    firstInputRecord.get(), secondInputRecord.get()));

    // Create the record reader over the avro input file.
    RecordReader<AvroKey<CharSequence>, AvroValue<Integer>> recordReader = new AvroKeyValueRecordReader<CharSequence, Integer>(
            Schema.create(Schema.Type.STRING), Schema.create(Schema.Type.INT)) {
        @Override
        protected SeekableInput createSeekableInput(Configuration conf, Path path) throws IOException {
            return avroFileInput;
        }
    };

    // Set up the job configuration.
    Configuration conf = new Configuration();

    // Create a mock input split for this record reader.
    FileSplit inputSplit = createMock(FileSplit.class);
    expect(inputSplit.getPath()).andReturn(new Path("/path/to/an/avro/file")).anyTimes();
    expect(inputSplit.getStart()).andReturn(0L).anyTimes();
    expect(inputSplit.getLength()).andReturn(avroFileInput.length()).anyTimes();

    // Create a mock task attempt context for this record reader.
    TaskAttemptContext context = createMock(TaskAttemptContext.class);
    expect(context.getConfiguration()).andReturn(conf).anyTimes();

    // Initialize the record reader.
    replay(inputSplit);
    replay(context);
    recordReader.initialize(inputSplit, context);

    assertEquals("Progress should be zero before any records are read", 0.0f, recordReader.getProgress(), 0.0f);

    // Some variables to hold the records.
    AvroKey<CharSequence> key;
    AvroValue<Integer> value;

    // Read the first record.
    assertTrue("Expected at least one record", recordReader.nextKeyValue());
    key = recordReader.getCurrentKey();
    value = recordReader.getCurrentValue();

    assertNotNull("First record had null key", key);
    assertNotNull("First record had null value", value);

    assertEquals("first", key.datum().toString());
    assertEquals(1, value.datum().intValue());

    assertTrue("getCurrentKey() returned different keys for the same record",
            key == recordReader.getCurrentKey());
    assertTrue("getCurrentValue() returned different values for the same record",
            value == recordReader.getCurrentValue());

    // Read the second record.
    assertTrue("Expected to read a second record", recordReader.nextKeyValue());
    key = recordReader.getCurrentKey();
    value = recordReader.getCurrentValue();

    assertNotNull("Second record had null key", key);
    assertNotNull("Second record had null value", value);

    assertEquals("second", key.datum().toString());
    assertEquals(2, value.datum().intValue());

    assertEquals("Progress should be complete (2 out of 2 records processed)", 1.0f, recordReader.getProgress(),
            0.0f);

    // There should be no more records.
    assertFalse("Expected only 2 records", recordReader.nextKeyValue());

    // Close the record reader.
    recordReader.close();

    // Verify the expected calls on the mocks.
    verify(inputSplit);
    verify(context);
}