Example usage for org.apache.hadoop.mapreduce InputFormat createRecordReader

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce InputFormat createRecordReader.

Prototype

public abstract RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException;

Source Link

Document

Create a record reader for a given split.

Usage

From source file:mvm.rya.accumulo.pig.AccumuloStorageTest.java

License:Apache License

protected List<AccumuloStorage> createAccumuloStorages(String location)
        throws IOException, InterruptedException {
    List<AccumuloStorage> accumuloStorages = new ArrayList<AccumuloStorage>();
    AccumuloStorage storage = new AccumuloStorage();
    InputFormat inputFormat = storage.getInputFormat();
    Job job = new Job(new Configuration());
    storage.setLocation(location, job);/*from www  . j av  a  2 s  .  co  m*/
    List<InputSplit> splits = inputFormat.getSplits(job);
    assertNotNull(splits);

    for (InputSplit inputSplit : splits) {
        storage = new AccumuloStorage();
        job = new Job(new Configuration());
        storage.setLocation(location, job);
        TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(job.getConfiguration(),
                new TaskAttemptID("jtid", 0, false, 0, 0));
        RecordReader recordReader = inputFormat.createRecordReader(inputSplit, taskAttemptContext);
        recordReader.initialize(inputSplit, taskAttemptContext);

        storage.prepareToRead(recordReader, null);
        accumuloStorages.add(storage);
    }
    return accumuloStorages;
}

From source file:mvm.rya.accumulo.pig.StatementPatternStorageTest.java

License:Apache License

protected List<StatementPatternStorage> createStorages(String location)
        throws IOException, InterruptedException {
    List<StatementPatternStorage> storages = new ArrayList<StatementPatternStorage>();
    StatementPatternStorage storage = new StatementPatternStorage();
    InputFormat inputFormat = storage.getInputFormat();
    Job job = new Job(new Configuration());
    storage.setLocation(location, job);/*  www  . ja v a  2 s  .  co  m*/
    List<InputSplit> splits = inputFormat.getSplits(job);
    assertNotNull(splits);

    for (InputSplit inputSplit : splits) {
        storage = new StatementPatternStorage();
        job = new Job(new Configuration());
        storage.setLocation(location, job);
        TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(job.getConfiguration(),
                new TaskAttemptID("jtid", 0, false, 0, 0));
        RecordReader recordReader = inputFormat.createRecordReader(inputSplit, taskAttemptContext);
        recordReader.initialize(inputSplit, taskAttemptContext);

        storage.prepareToRead(recordReader, null);
        storages.add(storage);
    }
    return storages;
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOReadTest.java

License:Apache License

/**
 * This test validates behavior of {@link HadoopInputFormatBoundedSource} if RecordReader object
 * creation fails./*w  w w  .  j  av a2s  .  c om*/
 */
@Test
public void testReadIfCreateRecordReaderFails() throws Exception {
    thrown.expect(Exception.class);
    thrown.expectMessage("Exception in creating RecordReader");
    InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
    Mockito.when(mockInputFormat.createRecordReader(Mockito.any(InputSplit.class),
            Mockito.any(TaskAttemptContext.class)))
            .thenThrow(new IOException("Exception in creating RecordReader"));
    HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>(serConf,
            WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required.
            null, // No value translation required.
            new SerializableSplit());
    boundedSource.setInputFormatObj(mockInputFormat);
    SourceTestUtils.readFromSource(boundedSource, p.getOptions());
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOReadTest.java

License:Apache License

/**
 * This test validates behavior of HadoopInputFormatSource if {@link
 * InputFormat#createRecordReader(InputSplit, TaskAttemptContext)} createRecordReader(InputSplit,
 * TaskAttemptContext)} of InputFormat returns null.
 *///from  w w  w  .j av a 2s  . co  m
@Test
public void testReadWithNullCreateRecordReader() throws Exception {
    InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
    thrown.expect(IOException.class);
    thrown.expectMessage(String.format("Null RecordReader object returned by %s", mockInputFormat.getClass()));
    Mockito.when(mockInputFormat.createRecordReader(Mockito.any(InputSplit.class),
            Mockito.any(TaskAttemptContext.class))).thenReturn(null);
    HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>(serConf,
            WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required.
            null, // No value translation required.
            new SerializableSplit());
    boundedSource.setInputFormatObj(mockInputFormat);
    SourceTestUtils.readFromSource(boundedSource, p.getOptions());
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOReadTest.java

License:Apache License

/**
 * This test validates behavior of {@link
 * HadoopInputFormatBoundedSource.HadoopInputFormatReader#start() start()} method if InputFormat's
 * {@link InputFormat#getSplits(JobContext)} getSplits(JobContext)} returns InputSplitList having
 * zero records./*from   w w  w .  j a v a  2s .  com*/
 */
@Test
public void testReadersStartWhenZeroRecords() throws Exception {

    InputFormat mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
    EmployeeRecordReader mockReader = Mockito.mock(EmployeeRecordReader.class);
    Mockito.when(mockInputFormat.createRecordReader(Mockito.any(), Mockito.any())).thenReturn(mockReader);
    Mockito.when(mockReader.nextKeyValue()).thenReturn(false);
    InputSplit mockInputSplit = Mockito.mock(NewObjectsEmployeeInputSplit.class);
    HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>(serConf,
            WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required.
            null, // No value translation required.
            new SerializableSplit(mockInputSplit));
    boundedSource.setInputFormatObj(mockInputFormat);
    BoundedReader<KV<Text, Employee>> reader = boundedSource.createReader(p.getOptions());
    assertFalse(reader.start());
    assertEquals(Double.valueOf(1), reader.getFractionConsumed());
    reader.close();
}

From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOReadTest.java

License:Apache License

/**
 * This test validates the method getFractionConsumed()- when a bad progress value is returned by
 * the inputformat.//from   w w  w .  j av  a  2  s  .  c  o m
 */
@Test
public void testGetFractionConsumedForBadProgressValue() throws Exception {
    InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
    EmployeeRecordReader mockReader = Mockito.mock(EmployeeRecordReader.class);
    Mockito.when(mockInputFormat.createRecordReader(Mockito.any(), Mockito.any())).thenReturn(mockReader);
    Mockito.when(mockReader.nextKeyValue()).thenReturn(true);
    // Set to a bad value , not in range of 0 to 1
    Mockito.when(mockReader.getProgress()).thenReturn(2.0F);
    InputSplit mockInputSplit = Mockito.mock(NewObjectsEmployeeInputSplit.class);
    HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>(serConf,
            WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required.
            null, // No value translation required.
            new SerializableSplit(mockInputSplit));
    boundedSource.setInputFormatObj(mockInputFormat);
    BoundedReader<KV<Text, Employee>> reader = boundedSource.createReader(p.getOptions());
    assertEquals(Double.valueOf(0), reader.getFractionConsumed());
    boolean start = reader.start();
    assertTrue(start);
    if (start) {
        boolean advance = reader.advance();
        assertEquals(null, reader.getFractionConsumed());
        assertTrue(advance);
        if (advance) {
            advance = reader.advance();
            assertEquals(null, reader.getFractionConsumed());
        }
    }
    // Validate if getFractionConsumed() returns null after few number of reads as getProgress
    // returns invalid value '2' which is not in the range of 0 to 1.
    assertEquals(null, reader.getFractionConsumed());
    reader.close();
}

From source file:org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIOTest.java

License:Apache License

/**
 * This test validates behavior of {@link HadoopInputFormatBoundedSource} if RecordReader object
 * creation fails./*  w  w w .  j  ava2s  .  c om*/
 */
@Test
public void testReadIfCreateRecordReaderFails() throws Exception {
    thrown.expect(Exception.class);
    thrown.expectMessage("Exception in creating RecordReader");
    InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
    Mockito.when(mockInputFormat.createRecordReader(Mockito.any(InputSplit.class),
            Mockito.any(TaskAttemptContext.class)))
            .thenThrow(new IOException("Exception in creating RecordReader"));
    HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<Text, Employee>(
            serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required.
            null, // No value translation required.
            new SerializableSplit());
    boundedSource.setInputFormatObj(mockInputFormat);
    SourceTestUtils.readFromSource(boundedSource, p.getOptions());
}

From source file:org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIOTest.java

License:Apache License

/**
 * This test validates behavior of HadoopInputFormatSource if
 * {@link InputFormat#createRecordReader() createRecordReader()} of InputFormat returns null.
 */// ww w .j  a  v  a2s  .c o  m
@Test
public void testReadWithNullCreateRecordReader() throws Exception {
    InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
    thrown.expect(IOException.class);
    thrown.expectMessage(String.format("Null RecordReader object returned by %s", mockInputFormat.getClass()));
    Mockito.when(mockInputFormat.createRecordReader(Mockito.any(InputSplit.class),
            Mockito.any(TaskAttemptContext.class))).thenReturn(null);
    HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<Text, Employee>(
            serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required.
            null, // No value translation required.
            new SerializableSplit());
    boundedSource.setInputFormatObj(mockInputFormat);
    SourceTestUtils.readFromSource(boundedSource, p.getOptions());
}

From source file:org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIOTest.java

License:Apache License

/**
 * This test validates behavior of// w  w  w. j  a  va2s  .  c  o  m
 * {@link HadoopInputFormatBoundedSource.HadoopInputFormatReader#start() start()} method if
 * InputFormat's {@link InputFormat#getSplits() getSplits()} returns InputSplitList having zero
 * records.
 */
@Test
public void testReadersStartWhenZeroRecords() throws Exception {
    InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class);
    EmployeeRecordReader mockReader = Mockito.mock(EmployeeRecordReader.class);
    Mockito.when(mockInputFormat.createRecordReader(Mockito.any(InputSplit.class),
            Mockito.any(TaskAttemptContext.class))).thenReturn(mockReader);
    Mockito.when(mockReader.nextKeyValue()).thenReturn(false);
    InputSplit mockInputSplit = Mockito.mock(NewObjectsEmployeeInputSplit.class);
    HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<Text, Employee>(
            serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required.
            null, // No value translation required.
            new SerializableSplit(mockInputSplit));
    BoundedReader<KV<Text, Employee>> boundedReader = boundedSource.createReader(p.getOptions());
    assertEquals(false, boundedReader.start());
    assertEquals(Double.valueOf(1), boundedReader.getFractionConsumed());
}

From source file:org.apache.crunch.impl.mr.run.CrunchRecordReader.java

License:Apache License

private boolean initNextRecordReader() throws IOException, InterruptedException {
    if (combineFileSplit != null) {
        if (curReader != null) {
            curReader.close();/*from www  . j a va 2 s.com*/
            curReader = null;
            if (idx > 0) {
                progress += combineFileSplit.getLength(idx - 1);
            }
        }
        // if all chunks have been processed, nothing more to do.
        if (idx == combineFileSplit.getNumPaths()) {
            return false;
        }
    } else if (idx > 0) {
        return false;
    }

    idx++;
    Configuration conf = crunchSplit.getConf();
    InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils
            .newInstance(crunchSplit.getInputFormatClass(), conf);
    this.curReader = inputFormat.createRecordReader(getDelegateSplit(),
            TaskAttemptContextFactory.create(conf, context.getTaskAttemptID()));
    return true;
}