List of usage examples for org.apache.hadoop.mapreduce InputFormat createRecordReader
public abstract RecordReader<K, V> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException;
From source file:mvm.rya.accumulo.pig.AccumuloStorageTest.java
License:Apache License
protected List<AccumuloStorage> createAccumuloStorages(String location) throws IOException, InterruptedException { List<AccumuloStorage> accumuloStorages = new ArrayList<AccumuloStorage>(); AccumuloStorage storage = new AccumuloStorage(); InputFormat inputFormat = storage.getInputFormat(); Job job = new Job(new Configuration()); storage.setLocation(location, job);/*from www . j av a 2 s . co m*/ List<InputSplit> splits = inputFormat.getSplits(job); assertNotNull(splits); for (InputSplit inputSplit : splits) { storage = new AccumuloStorage(); job = new Job(new Configuration()); storage.setLocation(location, job); TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID("jtid", 0, false, 0, 0)); RecordReader recordReader = inputFormat.createRecordReader(inputSplit, taskAttemptContext); recordReader.initialize(inputSplit, taskAttemptContext); storage.prepareToRead(recordReader, null); accumuloStorages.add(storage); } return accumuloStorages; }
From source file:mvm.rya.accumulo.pig.StatementPatternStorageTest.java
License:Apache License
protected List<StatementPatternStorage> createStorages(String location) throws IOException, InterruptedException { List<StatementPatternStorage> storages = new ArrayList<StatementPatternStorage>(); StatementPatternStorage storage = new StatementPatternStorage(); InputFormat inputFormat = storage.getInputFormat(); Job job = new Job(new Configuration()); storage.setLocation(location, job);/* www . ja v a 2 s . co m*/ List<InputSplit> splits = inputFormat.getSplits(job); assertNotNull(splits); for (InputSplit inputSplit : splits) { storage = new StatementPatternStorage(); job = new Job(new Configuration()); storage.setLocation(location, job); TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID("jtid", 0, false, 0, 0)); RecordReader recordReader = inputFormat.createRecordReader(inputSplit, taskAttemptContext); recordReader.initialize(inputSplit, taskAttemptContext); storage.prepareToRead(recordReader, null); storages.add(storage); } return storages; }
From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOReadTest.java
License:Apache License
/** * This test validates behavior of {@link HadoopInputFormatBoundedSource} if RecordReader object * creation fails./*w w w . j av a2s . c om*/ */ @Test public void testReadIfCreateRecordReaderFails() throws Exception { thrown.expect(Exception.class); thrown.expectMessage("Exception in creating RecordReader"); InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class); Mockito.when(mockInputFormat.createRecordReader(Mockito.any(InputSplit.class), Mockito.any(TaskAttemptContext.class))) .thenThrow(new IOException("Exception in creating RecordReader")); HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required. null, // No value translation required. new SerializableSplit()); boundedSource.setInputFormatObj(mockInputFormat); SourceTestUtils.readFromSource(boundedSource, p.getOptions()); }
From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOReadTest.java
License:Apache License
/** * This test validates behavior of HadoopInputFormatSource if {@link * InputFormat#createRecordReader(InputSplit, TaskAttemptContext)} createRecordReader(InputSplit, * TaskAttemptContext)} of InputFormat returns null. *///from w w w .j av a 2s . co m @Test public void testReadWithNullCreateRecordReader() throws Exception { InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class); thrown.expect(IOException.class); thrown.expectMessage(String.format("Null RecordReader object returned by %s", mockInputFormat.getClass())); Mockito.when(mockInputFormat.createRecordReader(Mockito.any(InputSplit.class), Mockito.any(TaskAttemptContext.class))).thenReturn(null); HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required. null, // No value translation required. new SerializableSplit()); boundedSource.setInputFormatObj(mockInputFormat); SourceTestUtils.readFromSource(boundedSource, p.getOptions()); }
From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOReadTest.java
License:Apache License
/** * This test validates behavior of {@link * HadoopInputFormatBoundedSource.HadoopInputFormatReader#start() start()} method if InputFormat's * {@link InputFormat#getSplits(JobContext)} getSplits(JobContext)} returns InputSplitList having * zero records./*from w w w . j a v a 2s . com*/ */ @Test public void testReadersStartWhenZeroRecords() throws Exception { InputFormat mockInputFormat = Mockito.mock(EmployeeInputFormat.class); EmployeeRecordReader mockReader = Mockito.mock(EmployeeRecordReader.class); Mockito.when(mockInputFormat.createRecordReader(Mockito.any(), Mockito.any())).thenReturn(mockReader); Mockito.when(mockReader.nextKeyValue()).thenReturn(false); InputSplit mockInputSplit = Mockito.mock(NewObjectsEmployeeInputSplit.class); HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required. null, // No value translation required. new SerializableSplit(mockInputSplit)); boundedSource.setInputFormatObj(mockInputFormat); BoundedReader<KV<Text, Employee>> reader = boundedSource.createReader(p.getOptions()); assertFalse(reader.start()); assertEquals(Double.valueOf(1), reader.getFractionConsumed()); reader.close(); }
From source file:org.apache.beam.sdk.io.hadoop.format.HadoopFormatIOReadTest.java
License:Apache License
/** * This test validates the method getFractionConsumed()- when a bad progress value is returned by * the inputformat.//from w w w . j av a 2 s . c o m */ @Test public void testGetFractionConsumedForBadProgressValue() throws Exception { InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class); EmployeeRecordReader mockReader = Mockito.mock(EmployeeRecordReader.class); Mockito.when(mockInputFormat.createRecordReader(Mockito.any(), Mockito.any())).thenReturn(mockReader); Mockito.when(mockReader.nextKeyValue()).thenReturn(true); // Set to a bad value , not in range of 0 to 1 Mockito.when(mockReader.getProgress()).thenReturn(2.0F); InputSplit mockInputSplit = Mockito.mock(NewObjectsEmployeeInputSplit.class); HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<>(serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required. null, // No value translation required. new SerializableSplit(mockInputSplit)); boundedSource.setInputFormatObj(mockInputFormat); BoundedReader<KV<Text, Employee>> reader = boundedSource.createReader(p.getOptions()); assertEquals(Double.valueOf(0), reader.getFractionConsumed()); boolean start = reader.start(); assertTrue(start); if (start) { boolean advance = reader.advance(); assertEquals(null, reader.getFractionConsumed()); assertTrue(advance); if (advance) { advance = reader.advance(); assertEquals(null, reader.getFractionConsumed()); } } // Validate if getFractionConsumed() returns null after few number of reads as getProgress // returns invalid value '2' which is not in the range of 0 to 1. assertEquals(null, reader.getFractionConsumed()); reader.close(); }
From source file:org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIOTest.java
License:Apache License
/** * This test validates behavior of {@link HadoopInputFormatBoundedSource} if RecordReader object * creation fails./* w w w . j ava2s . c om*/ */ @Test public void testReadIfCreateRecordReaderFails() throws Exception { thrown.expect(Exception.class); thrown.expectMessage("Exception in creating RecordReader"); InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class); Mockito.when(mockInputFormat.createRecordReader(Mockito.any(InputSplit.class), Mockito.any(TaskAttemptContext.class))) .thenThrow(new IOException("Exception in creating RecordReader")); HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<Text, Employee>( serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required. null, // No value translation required. new SerializableSplit()); boundedSource.setInputFormatObj(mockInputFormat); SourceTestUtils.readFromSource(boundedSource, p.getOptions()); }
From source file:org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIOTest.java
License:Apache License
/** * This test validates behavior of HadoopInputFormatSource if * {@link InputFormat#createRecordReader() createRecordReader()} of InputFormat returns null. */// ww w .j a v a2s .c o m @Test public void testReadWithNullCreateRecordReader() throws Exception { InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class); thrown.expect(IOException.class); thrown.expectMessage(String.format("Null RecordReader object returned by %s", mockInputFormat.getClass())); Mockito.when(mockInputFormat.createRecordReader(Mockito.any(InputSplit.class), Mockito.any(TaskAttemptContext.class))).thenReturn(null); HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<Text, Employee>( serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required. null, // No value translation required. new SerializableSplit()); boundedSource.setInputFormatObj(mockInputFormat); SourceTestUtils.readFromSource(boundedSource, p.getOptions()); }
From source file:org.apache.beam.sdk.io.hadoop.inputformat.HadoopInputFormatIOTest.java
License:Apache License
/** * This test validates behavior of// w w w. j a va2s . c o m * {@link HadoopInputFormatBoundedSource.HadoopInputFormatReader#start() start()} method if * InputFormat's {@link InputFormat#getSplits() getSplits()} returns InputSplitList having zero * records. */ @Test public void testReadersStartWhenZeroRecords() throws Exception { InputFormat<Text, Employee> mockInputFormat = Mockito.mock(EmployeeInputFormat.class); EmployeeRecordReader mockReader = Mockito.mock(EmployeeRecordReader.class); Mockito.when(mockInputFormat.createRecordReader(Mockito.any(InputSplit.class), Mockito.any(TaskAttemptContext.class))).thenReturn(mockReader); Mockito.when(mockReader.nextKeyValue()).thenReturn(false); InputSplit mockInputSplit = Mockito.mock(NewObjectsEmployeeInputSplit.class); HadoopInputFormatBoundedSource<Text, Employee> boundedSource = new HadoopInputFormatBoundedSource<Text, Employee>( serConf, WritableCoder.of(Text.class), AvroCoder.of(Employee.class), null, // No key translation required. null, // No value translation required. new SerializableSplit(mockInputSplit)); BoundedReader<KV<Text, Employee>> boundedReader = boundedSource.createReader(p.getOptions()); assertEquals(false, boundedReader.start()); assertEquals(Double.valueOf(1), boundedReader.getFractionConsumed()); }
From source file:org.apache.crunch.impl.mr.run.CrunchRecordReader.java
License:Apache License
private boolean initNextRecordReader() throws IOException, InterruptedException { if (combineFileSplit != null) { if (curReader != null) { curReader.close();/*from www . j a va 2 s.com*/ curReader = null; if (idx > 0) { progress += combineFileSplit.getLength(idx - 1); } } // if all chunks have been processed, nothing more to do. if (idx == combineFileSplit.getNumPaths()) { return false; } } else if (idx > 0) { return false; } idx++; Configuration conf = crunchSplit.getConf(); InputFormat<K, V> inputFormat = (InputFormat<K, V>) ReflectionUtils .newInstance(crunchSplit.getInputFormatClass(), conf); this.curReader = inputFormat.createRecordReader(getDelegateSplit(), TaskAttemptContextFactory.create(conf, context.getTaskAttemptID())); return true; }