Example usage for org.apache.hadoop.mapreduce InputFormat createRecordReader

List of usage examples for org.apache.hadoop.mapreduce InputFormat createRecordReader

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce InputFormat createRecordReader.

Prototype

public abstract RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException;

Source Link

Document

Create a record reader for a given split.

Usage

From source file:mvm.rya.accumulo.pig.AccumuloStorageTest.java

License:Apache License

protected List<AccumuloStorage> createAccumuloStorages(String location)
        throws IOException, InterruptedException {
    List<AccumuloStorage> accumuloStorages = new ArrayList<AccumuloStorage>();
    AccumuloStorage storage = new AccumuloStorage();
    InputFormat inputFormat = storage.getInputFormat();
    Job job = new Job(new Configuration());
    storage.setLocation(location, job);/*from www  . j av  a  2 s  .  co  m*/
    List<InputSplit> splits = inputFormat.getSplits(job);
    assertNotNull(splits);

    for (InputSplit inputSplit : splits) {
        storage = new AccumuloStorage();
        job = new Job(new Configuration());
        storage.setLocation(location, job);
        TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(job.getConfiguration(),
                new TaskAttemptID("jtid", 0, false, 0, 0));
        RecordReader recordReader = inputFormat.createRecordReader(inputSplit, taskAttemptContext);
        recordReader.initialize(inputSplit, taskAttemptContext);

        storage.prepareToRead(recordReader, null);
        accumuloStorages.add(storage);
    }
    return accumuloStorages;
}

From source file:mvm.rya.accumulo.pig.StatementPatternStorageTest.java

License:Apache License

protected List<StatementPatternStorage> createStorages(String location)
        throws IOException, InterruptedException {
    List<StatementPatternStorage> storages = new ArrayList<StatementPatternStorage>();
    StatementPatternStorage storage = new StatementPatternStorage();
    InputFormat inputFormat = storage.getInputFormat();
    Job job = new Job(new Configuration());
    storage.setLocation(location, job);/*  www  . ja v a  2 s  .  co  m*/
    List<InputSplit> splits = inputFormat.getSplits(job);
    assertNotNull(splits);

    for (InputSplit inputSplit : splits) {
        storage = new StatementPatternStorage();
        job = new Job(new Configuration());
        storage.setLocation(location, job);
        TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(job.getConfiguration(),
                new TaskAttemptID("jtid", 0, false, 0, 0));
        RecordReader recordReader = inputFormat.createRecordReader(inputSplit, taskAttemptContext);
        recordReader.initialize(inputSplit, taskAttemptContext);

        storage.prepareToRead(recordReader, null);
        storages.add(storage);
    }
    return storages;
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOReadTest.java

License:Apache License

/**
 * This test validates behavior of {@link HadoopInputFormatBoundedSource} if RecordReader object
 * creation fails./*w  w w  .  j  av a2s  .  c om*/
 */
@Test
public void testReadIfCreateRecordReaderFails() throws Exception {
    thrown.expect(Exception.class);
    thrown.expectMessage("Exception in creating RecordReader");
    InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
    Mockito.when(mockInputFormat.createRecordReader(Mockito.any(InputSplit.class),
            Mockito.any(TaskAttemptContext.class)))
            .thenThrow(new IOException("Exception in creating RecordReader"));
    HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>(serConf,
            WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required.
            null, // No value translation required.
            new SerializableSplit());
    boundedSource.setInputFormatObj(mockInputFormat);
    SourceTestUtils.readFromSource(boundedSource, p.getOptions());
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOReadTest.java

License:Apache License

/**
 * This test validates behavior of HadoopInputFormatSource if {@link
 * InputFormat#createRecordReader(InputSplit, TaskAttemptContext)} createRecordReader(InputSplit,
 * TaskAttemptContext)} of InputFormat returns null.
 *///from  w w  w  .j av a 2s  . co  m
@Test
public void testReadWithNullCreateRecordReader() throws Exception {
    InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
    thrown.expect(IOException.class);
    thrown.expectMessage(String.format("Null RecordReader object returned by %s", mockInputFormat.getClass()));
    Mockito.when(mockInputFormat.createRecordReader(Mockito.any(InputSplit.class),
            Mockito.any(TaskAttemptContext.class))).thenReturn(null);
    HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>(serConf,
            WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required.
            null, // No value translation required.
            new SerializableSplit());
    boundedSource.setInputFormatObj(mockInputFormat);
    SourceTestUtils.readFromSource(boundedSource, p.getOptions());
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOReadTest.java

License:Apache License

/**
 * This test validates behavior of {@link
 * HadoopInputFormatBoundedSource.HadoopInputFormatReader#start() start()} method if InputFormat's
 * {@link InputFormat#getSplits(JobContext)} getSplits(JobContext)} returns InputSplitList having
 * zero records./*from   w w  w .  j a v a  2s .  com*/
 */
@Test
public void testReadersStartWhenZeroRecords() throws Exception {

    InputFormat mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
    EmployeeRecordReader mockReader = Mockito.mock(EmployeeRecordReader.class);
    Mockito.when(mockInputFormat.createRecordReader(Mockito.any(), Mockito.any())).thenReturn(mockReader);
    Mockito.when(mockReader.nextKeyValue()).thenReturn(false);
    InputSplit mockInputSplit = Mockito.mock(NewObjectsEmployeeInputSplit.class);
    HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>(serConf,
            WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required.
            null, // No value translation required.
            new SerializableSplit(mockInputSplit));
    boundedSource.setInputFormatObj(mockInputFormat);
    BoundedReader<KV<Text, Employee>> reader = boundedSource.createReader(p.getOptions());
    assertFalse(reader.start());
    assertEquals(Double.valueOf(1), reader.getFractionConsumed());
    reader.close();
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOReadTest.java

License:Apache License

/**
 * This test validates the method getFractionConsumed()- when a bad progress value is returned by
 * the inputformat.//from   w w  w .  j av  a  2  s  .  c  o m
 */
@Test
public void testGetFractionConsumedForBadProgressValue() throws Exception {
    InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
    EmployeeRecordReader mockReader = Mockito.mock(EmployeeRecordReader.class);
    Mockito.when(mockInputFormat.createRecordReader(Mockito.any(), Mockito.any())).thenReturn(mockReader);
    Mockito.when(mockReader.nextKeyValue()).thenReturn(true);
    // Set to a bad value , not in range of 0 to 1
    Mockito.when(mockReader.getProgress()).thenReturn(2.0F);
    InputSplit mockInputSplit = Mockito.mock(NewObjectsEmployeeInputSplit.class);
    HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>(serConf,
            WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required.
            null, // No value translation required.
            new SerializableSplit(mockInputSplit));
    boundedSource.setInputFormatObj(mockInputFormat);
    BoundedReader<KV<Text, Employee>> reader = boundedSource.createReader(p.getOptions());
    assertEquals(Double.valueOf(0), reader.getFractionConsumed());
    boolean start = reader.start();
    assertTrue(start);
    if (start) {
        boolean advance = reader.advance();
        assertEquals(null, reader.getFractionConsumed());
        assertTrue(advance);
        if (advance) {
            advance = reader.advance();
            assertEquals(null, reader.getFractionConsumed());
        }
    }
    // Validate if getFractionConsumed() returns null after few number of reads as getProgress
    // returns invalid value '2' which is not in the range of 0 to 1.
    assertEquals(null, reader.getFractionConsumed());
    reader.close();
}

From source file:org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIOTest.java

License:Apache License

/**
 * This test validates behavior of {@link HadoopInputFormatBoundedSource} if RecordReader object
 * creation fails./*  w  w w .  j  ava2s  .  c om*/
 */
@Test
public void testReadIfCreateRecordReaderFails() throws Exception {
    thrown.expect(Exception.class);
    thrown.expectMessage("Exception in creating RecordReader");
    InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
    Mockito.when(mockInputFormat.createRecordReader(Mockito.any(InputSplit.class),
            Mockito.any(TaskAttemptContext.class)))
            .thenThrow(new IOException("Exception in creating RecordReader"));
    HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<Text, Employee>(
            serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required.
            null, // No value translation required.
            new SerializableSplit());
    boundedSource.setInputFormatObj(mockInputFormat);
    SourceTestUtils.readFromSource(boundedSource, p.getOptions());
}

From source file:org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIOTest.java

License:Apache License

/**
 * This test validates behavior of HadoopInputFormatSource if
 * {@link InputFormat#createRecordReader() createRecordReader()} of InputFormat returns null.
 */// ww w .j  a  v  a2s  .c o  m
@Test
public void testReadWithNullCreateRecordReader() throws Exception {
    InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
    thrown.expect(IOException.class);
    thrown.expectMessage(String.format("Null RecordReader object returned by %s", mockInputFormat.getClass()));
    Mockito.when(mockInputFormat.createRecordReader(Mockito.any(InputSplit.class),
            Mockito.any(TaskAttemptContext.class))).thenReturn(null);
    HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<Text, Employee>(
            serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required.
            null, // No value translation required.
            new SerializableSplit());
    boundedSource.setInputFormatObj(mockInputFormat);
    SourceTestUtils.readFromSource(boundedSource, p.getOptions());
}

From source file:org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIOTest.java

License:Apache License

/**
 * This test validates behavior of// w  w  w. j  a  va2s  .  c  o  m
 * {@link HadoopInputFormatBoundedSource.HadoopInputFormatReader#start() start()} method if
 * InputFormat's {@link InputFormat#getSplits() getSplits()} returns InputSplitList having zero
 * records.
 */
@Test
public void testReadersStartWhenZeroRecords() throws Exception {
    InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
    EmployeeRecordReader mockReader = Mockito.mock(EmployeeRecordReader.class);
    Mockito.when(mockInputFormat.createRecordReader(Mockito.any(InputSplit.class),
            Mockito.any(TaskAttemptContext.class))).thenReturn(mockReader);
    Mockito.when(mockReader.nextKeyValue()).thenReturn(false);
    InputSplit mockInputSplit = Mockito.mock(NewObjectsEmployeeInputSplit.class);
    HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<Text, Employee>(
            serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required.
            null, // No value translation required.
            new SerializableSplit(mockInputSplit));
    BoundedReader<KV<Text, Employee>> boundedReader = boundedSource.createReader(p.getOptions());
    assertEquals(false, boundedReader.start());
    assertEquals(Double.valueOf(1), boundedReader.getFractionConsumed());
}

From source file:org.apache.crunch.impl.mr.run.CrunchRecordReader.java

License:Apache License

private boolean initNextRecordReader() throws IOException, InterruptedException {
    if (combineFileSplit != null) {
        if (curReader != null) {
            curReader.close();/*from www  . j a va 2 s.com*/
            curReader = null;
            if (idx > 0) {
                progress += combineFileSplit.getLength(idx - 1);
            }
        }
        // if all chunks have been processed, nothing more to do.
        if (idx == combineFileSplit.getNumPaths()) {
            return false;
        }
    } else if (idx > 0) {
        return false;
    }

    idx++;
    Configuration conf = crunchSplit.getConf();
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils
            .newInstance(crunchSplit.getInputFormatClass(), conf);
    this.curReader = inputFormat.createRecordReader(getDelegateSplit(),
            TaskAttemptContextFactory.create(conf, context.getTaskAttemptID()));
    return true;
}