Example usage for org.apache.hadoop.mapreduce RecordReader nextKeyValue

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader nextKeyValue.

Prototype

public abstract boolean nextKeyValue() throws IOException, InterruptedException;

Source Link

Document

Read the next key, value pair.

Usage

From source file:com.ikanow.aleph2.search_service.elasticsearch.hadoop.assets.TestAleph2EsInputFormat.java

License:Apache License

@Test
public void test_Aleph2EsRecordReader_maxRecords() throws IOException, InterruptedException {

    @SuppressWarnings("rawtypes")
    final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class);
    Mockito.when(mock_shard_record_reader.nextKeyValue()).thenReturn(true); // (ie will keep going forever)
    Mockito.when(mock_shard_record_reader.getProgress()).thenReturn((float) 4.0); // (just return some dummy number so we can check it's working)

    // Test version
    {/*from   www . j  av  a2 s.  co m*/
        final Configuration config = new Configuration(false);
        config.set(Aleph2EsInputFormat.BE_DEBUG_MAX_SIZE, "10");
        final TaskAttemptContext mock_task = Mockito.mock(TaskAttemptContext.class);
        Mockito.when(mock_task.getConfiguration()).thenReturn(config);

        final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader);

        try {
            reader_under_test.initialize(null, mock_task);
        } catch (Exception e) {
        } // (the _delegate init call will fail out, that's fine)

        int ii = 0;
        for (; ii < 100 && reader_under_test.nextKeyValue(); ++ii) {
            assertTrue("getProgress should be overridden", reader_under_test.getProgress() <= 1.0);
        }
        assertEquals("Should have stopped after 10 iterations", 10, ii);
    }
    // Normal version
    {
        final Configuration config = new Configuration(false);
        final TaskAttemptContext mock_task = Mockito.mock(TaskAttemptContext.class);
        Mockito.when(mock_task.getConfiguration()).thenReturn(config);

        final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader);

        try {
            reader_under_test.initialize(null, mock_task);
        } catch (Exception e) {
        } // (the _delegate init call will fail out, that's fine)

        int ii = 0;
        for (; ii < 100 && reader_under_test.nextKeyValue(); ++ii) {
            assertTrue("getProgress should return the dummy value", reader_under_test.getProgress() == 4.0);
        }
        assertEquals("Should keep going for all 100 iterations", 100, ii);
    }
}

From source file:com.ikanow.aleph2.search_service.elasticsearch.hadoop.assets.TestAleph2EsInputFormat.java

License:Apache License

@Test
public void test_Aleph2EsRecordReader_testCoverage() throws IOException, InterruptedException {

    @SuppressWarnings("rawtypes")
    final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class, new Answer<Void>() {
        public Void answer(InvocationOnMock invocation) {
            //String fn_name = invocation.getMethod().getName();
            return null;
        }/*from   w  w  w . j ava  2  s.  c o  m*/
    });
    Mockito.when(mock_shard_record_reader.getProgress()).thenReturn((float) 1.0);
    Mockito.when(mock_shard_record_reader.nextKeyValue()).thenReturn(true);

    final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader);

    // void Functions we don't care about as long as they don't die

    reader_under_test.close();

    // Functions that return something that we can pass along directly

    assertEquals((float) 1.0, (float) reader_under_test.getProgress(), 0.00001);
    assertEquals(true, reader_under_test.nextKeyValue());

    // Things that throw exceptions

    try {
        reader_under_test.createKey();
        fail("should have thrown exception");
    } catch (Exception e) {
    }

    try {
        reader_under_test.createValue();
        fail("should have thrown exception");
    } catch (Exception e) {
    }

    try {
        reader_under_test.setCurrentKey("str", "str");
        fail("should have thrown exception");
    } catch (Exception e) {
    }

    try {
        reader_under_test.setCurrentValue(null, "str");
        fail("should have thrown exception");
    } catch (Exception e) {
    }
}

From source file:com.ikanow.aleph2.v1.document_db.hadoop.assets.TestAleph2V1InputFormat.java

License:Apache License

@Test
public void test_V1DocumentDbRecordReader_testCoverage() throws IOException, InterruptedException {

    @SuppressWarnings("rawtypes")
    final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class, new Answer<Void>() {
        public Void answer(InvocationOnMock invocation) {
            //String fn_name = invocation.getMethod().getName();
            return null;
        }//from   ww w  . java 2 s. co m
    });
    Mockito.when(mock_shard_record_reader.getProgress()).thenReturn((float) 1.0);
    Mockito.when(mock_shard_record_reader.nextKeyValue()).thenReturn(true);

    @SuppressWarnings("unchecked")
    final V1DocumentDbRecordReader reader_under_test = new V1DocumentDbRecordReader(mock_shard_record_reader);

    // void Functions we don't care about as long as they don't die

    reader_under_test.close();

    // Functions that return something that we can pass along directly

    assertEquals((float) 1.0, (float) reader_under_test.getProgress(), 0.00001);
    assertEquals(true, reader_under_test.nextKeyValue());

    // (basically just coverage testing)
    try {
        reader_under_test.initialize(null, null);
        //(this one doesn't exception for some reason)
    } catch (Exception e) {
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.lib.TestDynamicInputFormat.java

License:Apache License

@Test
public void testGetSplits() throws Exception {
    DistCpOptions options = getOptions();
    Configuration configuration = new Configuration();
    configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
    CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(
            new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"),
            options);//from   w  ww . j a  v a  2 s . co m

    JobID jobId = new JobID();
    JobContext jobContext = mock(JobContext.class);
    when(jobContext.getConfiguration()).thenReturn(configuration);
    when(jobContext.getJobID()).thenReturn(jobId);
    DynamicInputFormat<Text, FileStatus> inputFormat = new DynamicInputFormat<Text, FileStatus>();
    List<InputSplit> splits = inputFormat.getSplits(jobContext);

    int nFiles = 0;
    int taskId = 0;

    for (InputSplit split : splits) {
        TaskAttemptID tId = new TaskAttemptID("", 0, true, taskId, 0);
        final TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class);
        when(taskAttemptContext.getConfiguration()).thenReturn(configuration);
        when(taskAttemptContext.getTaskAttemptID()).thenReturn(tId);
        RecordReader<Text, FileStatus> recordReader = inputFormat.createRecordReader(split, taskAttemptContext);
        recordReader.initialize(splits.get(0), taskAttemptContext);
        float previousProgressValue = 0f;
        while (recordReader.nextKeyValue()) {
            FileStatus fileStatus = recordReader.getCurrentValue();
            String source = fileStatus.getPath().toString();
            System.out.println(source);
            Assert.assertTrue(expectedFilePaths.contains(source));
            final float progress = recordReader.getProgress();
            Assert.assertTrue(progress >= previousProgressValue);
            Assert.assertTrue(progress >= 0.0f);
            Assert.assertTrue(progress <= 1.0f);
            previousProgressValue = progress;
            ++nFiles;
        }
        Assert.assertTrue(recordReader.getProgress() == 1.0f);

        ++taskId;
    }

    Assert.assertEquals(expectedFilePaths.size(), nFiles);
}

From source file:com.inmobi.conduit.distcp.tools.mapred.TestUniformSizeInputFormat.java

License:Apache License

public void testGetSplits(int nMaps) throws Exception {
    DistCpOptions options = getOptions(nMaps);
    Configuration configuration = new Configuration();
    configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
    Path listFile = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testGetSplits_1/fileList.seq");
    CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(listFile, options);

    JobContext jobContext = Mockito.mock(JobContext.class);
    Mockito.when(jobContext.getConfiguration()).thenReturn(configuration);
    Mockito.when(jobContext.getJobID()).thenReturn(new JobID());
    UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat();
    List<InputSplit> splits = uniformSizeInputFormat.getSplits(jobContext);

    //Removing the legacy check - Refer HADOOP-9230
    int sizePerMap = totalFileSize / nMaps;

    checkSplits(listFile, splits);/*from   w  w w . j  a va  2 s.co  m*/

    int doubleCheckedTotalSize = 0;
    int previousSplitSize = -1;
    for (int i = 0; i < splits.size(); ++i) {
        InputSplit split = splits.get(i);
        int currentSplitSize = 0;
        TaskAttemptID taskId = new TaskAttemptID("", 0, true, 0, 0);
        final TaskAttemptContext taskAttemptContext = Mockito.mock(TaskAttemptContext.class);
        Mockito.when(taskAttemptContext.getConfiguration()).thenReturn(configuration);
        Mockito.when(taskAttemptContext.getTaskAttemptID()).thenReturn(taskId);
        RecordReader<Text, FileStatus> recordReader = uniformSizeInputFormat.createRecordReader(split,
                taskAttemptContext);
        recordReader.initialize(split, taskAttemptContext);
        while (recordReader.nextKeyValue()) {
            Path sourcePath = recordReader.getCurrentValue().getPath();
            FileSystem fs = sourcePath.getFileSystem(configuration);
            FileStatus fileStatus[] = fs.listStatus(sourcePath);
            Assert.assertEquals(fileStatus.length, 1);
            currentSplitSize += fileStatus[0].getLen();
        }
        Assert.assertTrue(previousSplitSize == -1
                || Math.abs(currentSplitSize - previousSplitSize) < 0.1 * sizePerMap || i == splits.size() - 1);

        doubleCheckedTotalSize += currentSplitSize;
    }

    Assert.assertEquals(totalFileSize, doubleCheckedTotalSize);
}

From source file:com.inmobi.messaging.consumer.databus.mapreduce.TestDatabusInputFormatMapReduce.java

License:Apache License

/**
 * read the the given split.//from   w  w w .j a  v a 2 s  .  c om
 * @return List : List of read messages
 */
private List<Message> readSplit(DatabusInputFormat format, org.apache.hadoop.mapreduce.InputSplit split,
        JobConf job) throws IOException, InterruptedException {
    List<Message> result = new ArrayList<Message>();
    RecordReader<LongWritable, Message> reader = format
            .createRecordReader((org.apache.hadoop.mapreduce.InputSplit) split, context);
    ((DatabusRecordReader) reader).initialize(split, context);
    while (reader.nextKeyValue()) {
        result.add(reader.getCurrentValue());
    }
    reader.close();
    return result;
}

From source file:com.marcolotz.lung.debug.InputTester.java

License:Creative Commons License

/***
 * Method used for local testing the record reader and the Input format. It
 * generates an input split from the local file system file.
 * //from  ww  w. j  av  a 2  s  .  co m
 * @param filePath
 */
public void localTest(String filePath) {
    DICOM image;
    Configuration testConf = new Configuration(false);

    /* Reads the local file system */
    testConf.set("fs.default.name", "file:///");

    File testFile = new File(filePath);

    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);

    InputFormat<NullWritable, BytesWritable> inputFormat = ReflectionUtils
            .newInstance(WholeFileInputFormat.class, testConf);
    TaskAttemptContext context = new TaskAttemptContextImpl(testConf, new TaskAttemptID());

    try {
        RecordReader<NullWritable, BytesWritable> reader = inputFormat.createRecordReader(split, context);
        while (reader.nextKeyValue()) {
            /* get the bytes array */
            BytesWritable inputBytesWritable = (BytesWritable) reader.getCurrentValue();
            byte[] inputContent = inputBytesWritable.getBytes();

            /* Check for Correct value */
            // generateLocalOutput("path/to/output");

            InputStream is = new ByteArrayInputStream(inputContent);

            image = new DICOM(is);
            image.run("Dicom Test");

            /* Prints the bytes as an ImagePlus image */
            ImageViewer debug = new ImageViewer();
            debug.setImage(image);
        }
    } catch (Exception e) {

    }
}

From source file:com.metamx.milano.hadoop.MilanoProtoFileInputFormatTests.java

License:Apache License

@Test
public void testReadFile() throws Exception {
    MilanoProtoFileInputFormat inputFormat = new MilanoProtoFileInputFormat();

    FileSplit split = new FileSplit(readFile, 0, protoTestObjects.getFs().getFileStatus(readFile).getLen(),
            null);//from   w ww . j a  v a  2  s.c o  m
    org.apache.hadoop.mapreduce.RecordReader<String, Message> recordReader = inputFormat
            .createRecordReader(split, protoTestObjects.getContext());
    recordReader.initialize(split, protoTestObjects.getContext());

    for (int i = 0; i < protoTestObjects.getTestItems().size(); i++) {
        Assert.assertTrue("Fewer objects than expected.", recordReader.nextKeyValue());
        Message message = recordReader.getCurrentValue();

        protoTestObjects.compareMessages(protoTestObjects.getTestItem(i), message);
    }

    recordReader.close();
}

From source file:com.metamx.milano.hadoop.MilanoProtoFileInputFormatTests.java

License:Apache License

@Test
public void testReadFileNoMetadata() throws Exception {
    MilanoProtoFileInputFormat inputFormat = new MilanoProtoFileInputFormat();
    inputFormat.setBuilder(Testing.TestItem.newBuilder());

    FileSplit split = new FileSplit(readFile, 0, protoTestObjects.getFs().getFileStatus(readFile).getLen(),
            null);/*from   w w  w .j  a  v  a2s. c  om*/
    org.apache.hadoop.mapreduce.RecordReader<String, Message> recordReader = inputFormat
            .createRecordReader(split, protoTestObjects.getContext());
    recordReader.initialize(split, protoTestObjects.getContext());

    for (int i = 0; i < protoTestObjects.getTestItems().size(); i++) {
        Assert.assertTrue("Fewer objects than expected.", recordReader.nextKeyValue());
        Message message = recordReader.getCurrentValue();

        protoTestObjects.compareMessages(protoTestObjects.getTestItem(i), message);
    }

    recordReader.close();
}

From source file:com.phantom.hadoop.examples.terasort.TeraInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * //from www .j  a v a  2  s .  co  m
 * @param job
 *            the job to sample
 * @param partFile
 *            where to write the output file to
 * @throws Throwable
 *             if something goes wrong
 */
public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable {
    long t1 = System.currentTimeMillis();
    Configuration conf = job.getConfiguration();
    final TeraInputFormat inFormat = new TeraInputFormat();
    final TextSampler sampler = new TextSampler();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
    final List<InputSplit> splits = inFormat.getSplits(job);
    long t2 = System.currentTimeMillis();
    System.out.println("Computing input splits took " + (t2 - t1) + "ms");
    int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
    System.out.println("Sampling " + samples + " splits of " + splits.size());
    final long recordsPerSample = sampleSize / samples;
    final int sampleStep = splits.size() / samples;
    Thread[] samplerReader = new Thread[samples];
    SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        final int idx = i;
        samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {
            {
                setDaemon(true);
            }

            public void run() {
                long records = 0;
                try {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
                            new TaskAttemptID());
                    RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx),
                            context);
                    reader.initialize(splits.get(sampleStep * idx), context);
                    while (reader.nextKeyValue()) {
                        sampler.addKey(new Text(reader.getCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records) {
                            break;
                        }
                    }
                } catch (IOException ie) {
                    System.err.println(
                            "Got an exception while reading splits " + StringUtils.stringifyException(ie));
                    throw new RuntimeException(ie);
                } catch (InterruptedException e) {

                }
            }
        };
        samplerReader[i].start();
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10,
            outFs.getDefaultBlockSize(partFile));
    for (int i = 0; i < samples; i++) {
        try {
            samplerReader[i].join();
            if (threadGroup.getThrowable() != null) {
                throw threadGroup.getThrowable();
            }
        } catch (InterruptedException e) {
        }
    }
    for (Text split : sampler.createPartitions(partitions)) {
        split.write(writer);
    }
    writer.close();
    long t3 = System.currentTimeMillis();
    System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}