List of usage examples for org.apache.hadoop.mapreduce RecordReader nextKeyValue
public abstract boolean nextKeyValue() throws IOException, InterruptedException;
From source file:com.ikanow.aleph2.search_service.elasticsearch.hadoop.assets.TestAleph2EsInputFormat.java
License:Apache License
@Test public void test_Aleph2EsRecordReader_maxRecords() throws IOException, InterruptedException { @SuppressWarnings("rawtypes") final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class); Mockito.when(mock_shard_record_reader.nextKeyValue()).thenReturn(true); // (ie will keep going forever) Mockito.when(mock_shard_record_reader.getProgress()).thenReturn((float) 4.0); // (just return some dummy number so we can check it's working) // Test version {/*from www . j av a2 s. co m*/ final Configuration config = new Configuration(false); config.set(Aleph2EsInputFormat.BE_DEBUG_MAX_SIZE, "10"); final TaskAttemptContext mock_task = Mockito.mock(TaskAttemptContext.class); Mockito.when(mock_task.getConfiguration()).thenReturn(config); final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader); try { reader_under_test.initialize(null, mock_task); } catch (Exception e) { } // (the _delegate init call will fail out, that's fine) int ii = 0; for (; ii < 100 && reader_under_test.nextKeyValue(); ++ii) { assertTrue("getProgress should be overridden", reader_under_test.getProgress() <= 1.0); } assertEquals("Should have stopped after 10 iterations", 10, ii); } // Normal version { final Configuration config = new Configuration(false); final TaskAttemptContext mock_task = Mockito.mock(TaskAttemptContext.class); Mockito.when(mock_task.getConfiguration()).thenReturn(config); final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader); try { reader_under_test.initialize(null, mock_task); } catch (Exception e) { } // (the _delegate init call will fail out, that's fine) int ii = 0; for (; ii < 100 && reader_under_test.nextKeyValue(); ++ii) { assertTrue("getProgress should return the dummy value", reader_under_test.getProgress() == 4.0); } assertEquals("Should keep going for all 100 iterations", 100, ii); } }
From source file:com.ikanow.aleph2.search_service.elasticsearch.hadoop.assets.TestAleph2EsInputFormat.java
License:Apache License
@Test public void test_Aleph2EsRecordReader_testCoverage() throws IOException, InterruptedException { @SuppressWarnings("rawtypes") final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class, new Answer<Void>() { public Void answer(InvocationOnMock invocation) { //String fn_name = invocation.getMethod().getName(); return null; }/*from w w w . j ava 2 s. c o m*/ }); Mockito.when(mock_shard_record_reader.getProgress()).thenReturn((float) 1.0); Mockito.when(mock_shard_record_reader.nextKeyValue()).thenReturn(true); final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader); // void Functions we don't care about as long as they don't die reader_under_test.close(); // Functions that return something that we can pass along directly assertEquals((float) 1.0, (float) reader_under_test.getProgress(), 0.00001); assertEquals(true, reader_under_test.nextKeyValue()); // Things that throw exceptions try { reader_under_test.createKey(); fail("should have thrown exception"); } catch (Exception e) { } try { reader_under_test.createValue(); fail("should have thrown exception"); } catch (Exception e) { } try { reader_under_test.setCurrentKey("str", "str"); fail("should have thrown exception"); } catch (Exception e) { } try { reader_under_test.setCurrentValue(null, "str"); fail("should have thrown exception"); } catch (Exception e) { } }
From source file:com.ikanow.aleph2.v1.document_db.hadoop.assets.TestAleph2V1InputFormat.java
License:Apache License
@Test public void test_V1DocumentDbRecordReader_testCoverage() throws IOException, InterruptedException { @SuppressWarnings("rawtypes") final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class, new Answer<Void>() { public Void answer(InvocationOnMock invocation) { //String fn_name = invocation.getMethod().getName(); return null; }//from ww w . java 2 s. co m }); Mockito.when(mock_shard_record_reader.getProgress()).thenReturn((float) 1.0); Mockito.when(mock_shard_record_reader.nextKeyValue()).thenReturn(true); @SuppressWarnings("unchecked") final V1DocumentDbRecordReader reader_under_test = new V1DocumentDbRecordReader(mock_shard_record_reader); // void Functions we don't care about as long as they don't die reader_under_test.close(); // Functions that return something that we can pass along directly assertEquals((float) 1.0, (float) reader_under_test.getProgress(), 0.00001); assertEquals(true, reader_under_test.nextKeyValue()); // (basically just coverage testing) try { reader_under_test.initialize(null, null); //(this one doesn't exception for some reason) } catch (Exception e) { } }
From source file:com.inmobi.conduit.distcp.tools.mapred.lib.TestDynamicInputFormat.java
License:Apache License
@Test public void testGetSplits() throws Exception { DistCpOptions options = getOptions(); Configuration configuration = new Configuration(); configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps())); CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing( new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"), options);//from w ww . j a v a 2 s . co m JobID jobId = new JobID(); JobContext jobContext = mock(JobContext.class); when(jobContext.getConfiguration()).thenReturn(configuration); when(jobContext.getJobID()).thenReturn(jobId); DynamicInputFormat<Text, FileStatus> inputFormat = new DynamicInputFormat<Text, FileStatus>(); List<InputSplit> splits = inputFormat.getSplits(jobContext); int nFiles = 0; int taskId = 0; for (InputSplit split : splits) { TaskAttemptID tId = new TaskAttemptID("", 0, true, taskId, 0); final TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class); when(taskAttemptContext.getConfiguration()).thenReturn(configuration); when(taskAttemptContext.getTaskAttemptID()).thenReturn(tId); RecordReader<Text, FileStatus> recordReader = inputFormat.createRecordReader(split, taskAttemptContext); recordReader.initialize(splits.get(0), taskAttemptContext); float previousProgressValue = 0f; while (recordReader.nextKeyValue()) { FileStatus fileStatus = recordReader.getCurrentValue(); String source = fileStatus.getPath().toString(); System.out.println(source); Assert.assertTrue(expectedFilePaths.contains(source)); final float progress = recordReader.getProgress(); Assert.assertTrue(progress >= previousProgressValue); Assert.assertTrue(progress >= 0.0f); Assert.assertTrue(progress <= 1.0f); previousProgressValue = progress; ++nFiles; } Assert.assertTrue(recordReader.getProgress() == 1.0f); ++taskId; } Assert.assertEquals(expectedFilePaths.size(), nFiles); }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestUniformSizeInputFormat.java
License:Apache License
public void testGetSplits(int nMaps) throws Exception { DistCpOptions options = getOptions(nMaps); Configuration configuration = new Configuration(); configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps())); Path listFile = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testGetSplits_1/fileList.seq"); CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(listFile, options); JobContext jobContext = Mockito.mock(JobContext.class); Mockito.when(jobContext.getConfiguration()).thenReturn(configuration); Mockito.when(jobContext.getJobID()).thenReturn(new JobID()); UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat(); List<InputSplit> splits = uniformSizeInputFormat.getSplits(jobContext); //Removing the legacy check - Refer HADOOP-9230 int sizePerMap = totalFileSize / nMaps; checkSplits(listFile, splits);/*from w w w . j a va 2 s.co m*/ int doubleCheckedTotalSize = 0; int previousSplitSize = -1; for (int i = 0; i < splits.size(); ++i) { InputSplit split = splits.get(i); int currentSplitSize = 0; TaskAttemptID taskId = new TaskAttemptID("", 0, true, 0, 0); final TaskAttemptContext taskAttemptContext = Mockito.mock(TaskAttemptContext.class); Mockito.when(taskAttemptContext.getConfiguration()).thenReturn(configuration); Mockito.when(taskAttemptContext.getTaskAttemptID()).thenReturn(taskId); RecordReader<Text, FileStatus> recordReader = uniformSizeInputFormat.createRecordReader(split, taskAttemptContext); recordReader.initialize(split, taskAttemptContext); while (recordReader.nextKeyValue()) { Path sourcePath = recordReader.getCurrentValue().getPath(); FileSystem fs = sourcePath.getFileSystem(configuration); FileStatus fileStatus[] = fs.listStatus(sourcePath); Assert.assertEquals(fileStatus.length, 1); currentSplitSize += fileStatus[0].getLen(); } Assert.assertTrue(previousSplitSize == -1 || Math.abs(currentSplitSize - previousSplitSize) < 0.1 * sizePerMap || i == splits.size() - 1); doubleCheckedTotalSize += currentSplitSize; } Assert.assertEquals(totalFileSize, doubleCheckedTotalSize); }
From source file:com.inmobi.messaging.consumer.databus.mapreduce.TestDatabusInputFormatMapReduce.java
License:Apache License
/** * read the the given split.//from w w w .j a v a 2 s . c om * @return List : List of read messages */ private List<Message> readSplit(DatabusInputFormat format, org.apache.hadoop.mapreduce.InputSplit split, JobConf job) throws IOException, InterruptedException { List<Message> result = new ArrayList<Message>(); RecordReader<LongWritable, Message> reader = format .createRecordReader((org.apache.hadoop.mapreduce.InputSplit) split, context); ((DatabusRecordReader) reader).initialize(split, context); while (reader.nextKeyValue()) { result.add(reader.getCurrentValue()); } reader.close(); return result; }
From source file:com.marcolotz.lung.debug.InputTester.java
License:Creative Commons License
/*** * Method used for local testing the record reader and the Input format. It * generates an input split from the local file system file. * //from ww w. j av a 2 s . co m * @param filePath */ public void localTest(String filePath) { DICOM image; Configuration testConf = new Configuration(false); /* Reads the local file system */ testConf.set("fs.default.name", "file:///"); File testFile = new File(filePath); Path path = new Path(testFile.getAbsoluteFile().toURI()); FileSplit split = new FileSplit(path, 0, testFile.length(), null); InputFormat<NullWritable, BytesWritable> inputFormat = ReflectionUtils .newInstance(WholeFileInputFormat.class, testConf); TaskAttemptContext context = new TaskAttemptContextImpl(testConf, new TaskAttemptID()); try { RecordReader<NullWritable, BytesWritable> reader = inputFormat.createRecordReader(split, context); while (reader.nextKeyValue()) { /* get the bytes array */ BytesWritable inputBytesWritable = (BytesWritable) reader.getCurrentValue(); byte[] inputContent = inputBytesWritable.getBytes(); /* Check for Correct value */ // generateLocalOutput("path/to/output"); InputStream is = new ByteArrayInputStream(inputContent); image = new DICOM(is); image.run("Dicom Test"); /* Prints the bytes as an ImagePlus image */ ImageViewer debug = new ImageViewer(); debug.setImage(image); } } catch (Exception e) { } }
From source file:com.metamx.milano.hadoop.MilanoProtoFileInputFormatTests.java
License:Apache License
@Test public void testReadFile() throws Exception { MilanoProtoFileInputFormat inputFormat = new MilanoProtoFileInputFormat(); FileSplit split = new FileSplit(readFile, 0, protoTestObjects.getFs().getFileStatus(readFile).getLen(), null);//from w ww . j a v a 2 s.c o m org.apache.hadoop.mapreduce.RecordReader<String, Message> recordReader = inputFormat .createRecordReader(split, protoTestObjects.getContext()); recordReader.initialize(split, protoTestObjects.getContext()); for (int i = 0; i < protoTestObjects.getTestItems().size(); i++) { Assert.assertTrue("Fewer objects than expected.", recordReader.nextKeyValue()); Message message = recordReader.getCurrentValue(); protoTestObjects.compareMessages(protoTestObjects.getTestItem(i), message); } recordReader.close(); }
From source file:com.metamx.milano.hadoop.MilanoProtoFileInputFormatTests.java
License:Apache License
@Test public void testReadFileNoMetadata() throws Exception { MilanoProtoFileInputFormat inputFormat = new MilanoProtoFileInputFormat(); inputFormat.setBuilder(Testing.TestItem.newBuilder()); FileSplit split = new FileSplit(readFile, 0, protoTestObjects.getFs().getFileStatus(readFile).getLen(), null);/*from w w w .j a v a2s. c om*/ org.apache.hadoop.mapreduce.RecordReader<String, Message> recordReader = inputFormat .createRecordReader(split, protoTestObjects.getContext()); recordReader.initialize(split, protoTestObjects.getContext()); for (int i = 0; i < protoTestObjects.getTestItems().size(); i++) { Assert.assertTrue("Fewer objects than expected.", recordReader.nextKeyValue()); Message message = recordReader.getCurrentValue(); protoTestObjects.compareMessages(protoTestObjects.getTestItem(i), message); } recordReader.close(); }
From source file:com.phantom.hadoop.examples.terasort.TeraInputFormat.java
License:Apache License
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * //from www .j a v a 2 s . co m * @param job * the job to sample * @param partFile * where to write the output file to * @throws Throwable * if something goes wrong */ public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable { long t1 = System.currentTimeMillis(); Configuration conf = job.getConfiguration(); final TeraInputFormat inFormat = new TeraInputFormat(); final TextSampler sampler = new TextSampler(); int partitions = job.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE, 100000); final List<InputSplit> splits = inFormat.getSplits(job); long t2 = System.currentTimeMillis(); System.out.println("Computing input splits took " + (t2 - t1) + "ms"); int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size()); System.out.println("Sampling " + samples + " splits of " + splits.size()); final long recordsPerSample = sampleSize / samples; final int sampleStep = splits.size() / samples; Thread[] samplerReader = new Thread[samples]; SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group"); // take N samples from different parts of the input for (int i = 0; i < samples; ++i) { final int idx = i; samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) { { setDaemon(true); } public void run() { long records = 0; try { TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context); reader.initialize(splits.get(sampleStep * idx), context); while (reader.nextKeyValue()) { sampler.addKey(new Text(reader.getCurrentKey())); records += 1; if (recordsPerSample <= records) { break; } } } catch (IOException ie) { System.err.println( "Got an exception while reading splits " + StringUtils.stringifyException(ie)); throw new RuntimeException(ie); } catch (InterruptedException e) { } } }; samplerReader[i].start(); } FileSystem outFs = partFile.getFileSystem(conf); DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10, outFs.getDefaultBlockSize(partFile)); for (int i = 0; i < samples; i++) { try { samplerReader[i].join(); if (threadGroup.getThrowable() != null) { throw threadGroup.getThrowable(); } } catch (InterruptedException e) { } } for (Text split : sampler.createPartitions(partitions)) { split.write(writer); } writer.close(); long t3 = System.currentTimeMillis(); System.out.println("Computing parititions took " + (t3 - t2) + "ms"); }