Example usage for org.apache.hadoop.mapreduce RecordReader nextKeyValue

List of usage examples for org.apache.hadoop.mapreduce RecordReader nextKeyValue

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader nextKeyValue.

Prototype

public abstract boolean nextKeyValue() throws IOException, InterruptedException;

Source Link

Document

Read the next key, value pair.

Usage

From source file:com.ikanow.aleph2.search_service.elasticsearch.hadoop.assets.TestAleph2EsInputFormat.java

License:Apache License

@Test
public void test_Aleph2EsRecordReader_maxRecords() throws IOException, InterruptedException {

    @SuppressWarnings("rawtypes")
    final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class);
    Mockito.when(mock_shard_record_reader.nextKeyValue()).thenReturn(true); // (ie will keep going forever)
    Mockito.when(mock_shard_record_reader.getProgress()).thenReturn((float) 4.0); // (just return some dummy number so we can check it's working)

    // Test version
    {/*from   www . j  av  a2 s.  co m*/
        final Configuration config = new Configuration(false);
        config.set(Aleph2EsInputFormat.BE_DEBUG_MAX_SIZE, "10");
        final TaskAttemptContext mock_task = Mockito.mock(TaskAttemptContext.class);
        Mockito.when(mock_task.getConfiguration()).thenReturn(config);

        final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader);

        try {
            reader_under_test.initialize(null, mock_task);
        } catch (Exception e) {
        } // (the _delegate init call will fail out, that's fine)

        int ii = 0;
        for (; ii < 100 && reader_under_test.nextKeyValue(); ++ii) {
            assertTrue("getProgress should be overridden", reader_under_test.getProgress() <= 1.0);
        }
        assertEquals("Should have stopped after 10 iterations", 10, ii);
    }
    // Normal version
    {
        final Configuration config = new Configuration(false);
        final TaskAttemptContext mock_task = Mockito.mock(TaskAttemptContext.class);
        Mockito.when(mock_task.getConfiguration()).thenReturn(config);

        final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader);

        try {
            reader_under_test.initialize(null, mock_task);
        } catch (Exception e) {
        } // (the _delegate init call will fail out, that's fine)

        int ii = 0;
        for (; ii < 100 && reader_under_test.nextKeyValue(); ++ii) {
            assertTrue("getProgress should return the dummy value", reader_under_test.getProgress() == 4.0);
        }
        assertEquals("Should keep going for all 100 iterations", 100, ii);
    }
}

From source file:com.ikanow.aleph2.search_service.elasticsearch.hadoop.assets.TestAleph2EsInputFormat.java

License:Apache License

@Test
public void test_Aleph2EsRecordReader_testCoverage() throws IOException, InterruptedException {

    @SuppressWarnings("rawtypes")
    final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class, new Answer<Void>() {
        public Void answer(InvocationOnMock invocation) {
            //String fn_name = invocation.getMethod().getName();
            return null;
        }/*from   w  w  w . j ava  2  s.  c o  m*/
    });
    Mockito.when(mock_shard_record_reader.getProgress()).thenReturn((float) 1.0);
    Mockito.when(mock_shard_record_reader.nextKeyValue()).thenReturn(true);

    final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader);

    // void Functions we don't care about as long as they don't die

    reader_under_test.close();

    // Functions that return something that we can pass along directly

    assertEquals((float) 1.0, (float) reader_under_test.getProgress(), 0.00001);
    assertEquals(true, reader_under_test.nextKeyValue());

    // Things that throw exceptions

    try {
        reader_under_test.createKey();
        fail("should have thrown exception");
    } catch (Exception e) {
    }

    try {
        reader_under_test.createValue();
        fail("should have thrown exception");
    } catch (Exception e) {
    }

    try {
        reader_under_test.setCurrentKey("str", "str");
        fail("should have thrown exception");
    } catch (Exception e) {
    }

    try {
        reader_under_test.setCurrentValue(null, "str");
        fail("should have thrown exception");
    } catch (Exception e) {
    }
}

From source file:com.ikanow.aleph2.v1.document_db.hadoop.assets.TestAleph2V1InputFormat.java

License:Apache License

@Test
public void test_V1DocumentDbRecordReader_testCoverage() throws IOException, InterruptedException {

    @SuppressWarnings("rawtypes")
    final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class, new Answer<Void>() {
        public Void answer(InvocationOnMock invocation) {
            //String fn_name = invocation.getMethod().getName();
            return null;
        }//from   ww w  . java 2 s. co m
    });
    Mockito.when(mock_shard_record_reader.getProgress()).thenReturn((float) 1.0);
    Mockito.when(mock_shard_record_reader.nextKeyValue()).thenReturn(true);

    @SuppressWarnings("unchecked")
    final V1DocumentDbRecordReader reader_under_test = new V1DocumentDbRecordReader(mock_shard_record_reader);

    // void Functions we don't care about as long as they don't die

    reader_under_test.close();

    // Functions that return something that we can pass along directly

    assertEquals((float) 1.0, (float) reader_under_test.getProgress(), 0.00001);
    assertEquals(true, reader_under_test.nextKeyValue());

    // (basically just coverage testing)
    try {
        reader_under_test.initialize(null, null);
        //(this one doesn't exception for some reason)
    } catch (Exception e) {
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.lib.TestDynamicInputFormat.java

License:Apache License

@Test
public void testGetSplits() throws Exception {
    DistCpOptions options = getOptions();
    Configuration configuration = new Configuration();
    configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
    CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(
            new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"),
            options);//from   w  ww . j a  v a  2 s . co m

    JobID jobId = new JobID();
    JobContext jobContext = mock(JobContext.class);
    when(jobContext.getConfiguration()).thenReturn(configuration);
    when(jobContext.getJobID()).thenReturn(jobId);
    DynamicInputFormat<Text, FileStatus> inputFormat = new DynamicInputFormat<Text, FileStatus>();
    List<InputSplit> splits = inputFormat.getSplits(jobContext);

    int nFiles = 0;
    int taskId = 0;

    for (InputSplit split : splits) {
        TaskAttemptID tId = new TaskAttemptID("", 0, true, taskId, 0);
        final TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class);
        when(taskAttemptContext.getConfiguration()).thenReturn(configuration);
        when(taskAttemptContext.getTaskAttemptID()).thenReturn(tId);
        RecordReader<Text, FileStatus> recordReader = inputFormat.createRecordReader(split, taskAttemptContext);
        recordReader.initialize(splits.get(0), taskAttemptContext);
        float previousProgressValue = 0f;
        while (recordReader.nextKeyValue()) {
            FileStatus fileStatus = recordReader.getCurrentValue();
            String source = fileStatus.getPath().toString();
            System.out.println(source);
            Assert.assertTrue(expectedFilePaths.contains(source));
            final float progress = recordReader.getProgress();
            Assert.assertTrue(progress >= previousProgressValue);
            Assert.assertTrue(progress >= 0.0f);
            Assert.assertTrue(progress <= 1.0f);
            previousProgressValue = progress;
            ++nFiles;
        }
        Assert.assertTrue(recordReader.getProgress() == 1.0f);

        ++taskId;
    }

    Assert.assertEquals(expectedFilePaths.size(), nFiles);
}

From source file:com.inmobi.conduit.distcp.tools.mapred.TestUniformSizeInputFormat.java

License:Apache License

public void testGetSplits(int nMaps) throws Exception {
    DistCpOptions options = getOptions(nMaps);
    Configuration configuration = new Configuration();
    configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps()));
    Path listFile = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testGetSplits_1/fileList.seq");
    CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(listFile, options);

    JobContext jobContext = Mockito.mock(JobContext.class);
    Mockito.when(jobContext.getConfiguration()).thenReturn(configuration);
    Mockito.when(jobContext.getJobID()).thenReturn(new JobID());
    UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat();
    List<InputSplit> splits = uniformSizeInputFormat.getSplits(jobContext);

    //Removing the legacy check - Refer HADOOP-9230
    int sizePerMap = totalFileSize / nMaps;

    checkSplits(listFile, splits);/*from   w  w w . j  a va  2 s.co  m*/

    int doubleCheckedTotalSize = 0;
    int previousSplitSize = -1;
    for (int i = 0; i < splits.size(); ++i) {
        InputSplit split = splits.get(i);
        int currentSplitSize = 0;
        TaskAttemptID taskId = new TaskAttemptID("", 0, true, 0, 0);
        final TaskAttemptContext taskAttemptContext = Mockito.mock(TaskAttemptContext.class);
        Mockito.when(taskAttemptContext.getConfiguration()).thenReturn(configuration);
        Mockito.when(taskAttemptContext.getTaskAttemptID()).thenReturn(taskId);
        RecordReader<Text, FileStatus> recordReader = uniformSizeInputFormat.createRecordReader(split,
                taskAttemptContext);
        recordReader.initialize(split, taskAttemptContext);
        while (recordReader.nextKeyValue()) {
            Path sourcePath = recordReader.getCurrentValue().getPath();
            FileSystem fs = sourcePath.getFileSystem(configuration);
            FileStatus fileStatus[] = fs.listStatus(sourcePath);
            Assert.assertEquals(fileStatus.length, 1);
            currentSplitSize += fileStatus[0].getLen();
        }
        Assert.assertTrue(previousSplitSize == -1
                || Math.abs(currentSplitSize - previousSplitSize) < 0.1 * sizePerMap || i == splits.size() - 1);

        doubleCheckedTotalSize += currentSplitSize;
    }

    Assert.assertEquals(totalFileSize, doubleCheckedTotalSize);
}

From source file:com.inmobi.messaging.consumer.databus.mapreduce.TestDatabusInputFormatMapReduce.java

License:Apache License

/**
 * read the the given split.//from   w  w w .j a  v a 2 s  .  c om
 * @return List : List of read messages
 */
private List<Message> readSplit(DatabusInputFormat format, org.apache.hadoop.mapreduce.InputSplit split,
        JobConf job) throws IOException, InterruptedException {
    List<Message> result = new ArrayList<Message>();
    RecordReader<LongWritable, Message> reader = format
            .createRecordReader((org.apache.hadoop.mapreduce.InputSplit) split, context);
    ((DatabusRecordReader) reader).initialize(split, context);
    while (reader.nextKeyValue()) {
        result.add(reader.getCurrentValue());
    }
    reader.close();
    return result;
}

From source file:com.marcolotz.lung.debug.InputTester.java

License:Creative Commons License

/***
 * Method used for local testing the record reader and the Input format. It
 * generates an input split from the local file system file.
 * //from  ww  w. j  av  a 2  s  .  co m
 * @param filePath
 */
public void localTest(String filePath) {
    DICOM image;
    Configuration testConf = new Configuration(false);

    /* Reads the local file system */
    testConf.set("fs.default.name", "file:///");

    File testFile = new File(filePath);

    Path path = new Path(testFile.getAbsoluteFile().toURI());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);

    InputFormat<NullWritable, BytesWritable> inputFormat = ReflectionUtils
            .newInstance(WholeFileInputFormat.class, testConf);
    TaskAttemptContext context = new TaskAttemptContextImpl(testConf, new TaskAttemptID());

    try {
        RecordReader<NullWritable, BytesWritable> reader = inputFormat.createRecordReader(split, context);
        while (reader.nextKeyValue()) {
            /* get the bytes array */
            BytesWritable inputBytesWritable = (BytesWritable) reader.getCurrentValue();
            byte[] inputContent = inputBytesWritable.getBytes();

            /* Check for Correct value */
            // generateLocalOutput("path/to/output");

            InputStream is = new ByteArrayInputStream(inputContent);

            image = new DICOM(is);
            image.run("Dicom Test");

            /* Prints the bytes as an ImagePlus image */
            ImageViewer debug = new ImageViewer();
            debug.setImage(image);
        }
    } catch (Exception e) {

    }
}

From source file:com.metamx.milano.hadoop.MilanoProtoFileInputFormatTests.java

License:Apache License

@Test
public void testReadFile() throws Exception {
    MilanoProtoFileInputFormat inputFormat = new MilanoProtoFileInputFormat();

    FileSplit split = new FileSplit(readFile, 0, protoTestObjects.getFs().getFileStatus(readFile).getLen(),
            null);//from   w ww . j a  v a  2  s.c o  m
    org.apache.hadoop.mapreduce.RecordReader<String, Message> recordReader = inputFormat
            .createRecordReader(split, protoTestObjects.getContext());
    recordReader.initialize(split, protoTestObjects.getContext());

    for (int i = 0; i < protoTestObjects.getTestItems().size(); i++) {
        Assert.assertTrue("Fewer objects than expected.", recordReader.nextKeyValue());
        Message message = recordReader.getCurrentValue();

        protoTestObjects.compareMessages(protoTestObjects.getTestItem(i), message);
    }

    recordReader.close();
}

From source file:com.metamx.milano.hadoop.MilanoProtoFileInputFormatTests.java

License:Apache License

@Test
public void testReadFileNoMetadata() throws Exception {
    MilanoProtoFileInputFormat inputFormat = new MilanoProtoFileInputFormat();
    inputFormat.setBuilder(Testing.TestItem.newBuilder());

    FileSplit split = new FileSplit(readFile, 0, protoTestObjects.getFs().getFileStatus(readFile).getLen(),
            null);/*from   w w  w .j  a  v  a2s. c  om*/
    org.apache.hadoop.mapreduce.RecordReader<String, Message> recordReader = inputFormat
            .createRecordReader(split, protoTestObjects.getContext());
    recordReader.initialize(split, protoTestObjects.getContext());

    for (int i = 0; i < protoTestObjects.getTestItems().size(); i++) {
        Assert.assertTrue("Fewer objects than expected.", recordReader.nextKeyValue());
        Message message = recordReader.getCurrentValue();

        protoTestObjects.compareMessages(protoTestObjects.getTestItem(i), message);
    }

    recordReader.close();
}

From source file:com.phantom.hadoop.examples.terasort.TeraInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * //from www .j  a v a  2  s .  co  m
 * @param job
 *            the job to sample
 * @param partFile
 *            where to write the output file to
 * @throws Throwable
 *             if something goes wrong
 */
public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable {
    long t1 = System.currentTimeMillis();
    Configuration conf = job.getConfiguration();
    final TeraInputFormat inFormat = new TeraInputFormat();
    final TextSampler sampler = new TextSampler();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
    final List<InputSplit> splits = inFormat.getSplits(job);
    long t2 = System.currentTimeMillis();
    System.out.println("Computing input splits took " + (t2 - t1) + "ms");
    int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
    System.out.println("Sampling " + samples + " splits of " + splits.size());
    final long recordsPerSample = sampleSize / samples;
    final int sampleStep = splits.size() / samples;
    Thread[] samplerReader = new Thread[samples];
    SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        final int idx = i;
        samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {
            {
                setDaemon(true);
            }

            public void run() {
                long records = 0;
                try {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
                            new TaskAttemptID());
                    RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx),
                            context);
                    reader.initialize(splits.get(sampleStep * idx), context);
                    while (reader.nextKeyValue()) {
                        sampler.addKey(new Text(reader.getCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records) {
                            break;
                        }
                    }
                } catch (IOException ie) {
                    System.err.println(
                            "Got an exception while reading splits " + StringUtils.stringifyException(ie));
                    throw new RuntimeException(ie);
                } catch (InterruptedException e) {

                }
            }
        };
        samplerReader[i].start();
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10,
            outFs.getDefaultBlockSize(partFile));
    for (int i = 0; i < samples; i++) {
        try {
            samplerReader[i].join();
            if (threadGroup.getThrowable() != null) {
                throw threadGroup.getThrowable();
            }
        } catch (InterruptedException e) {
        }
    }
    for (Text split : sampler.createPartitions(partitions)) {
        split.write(writer);
    }
    writer.close();
    long t3 = System.currentTimeMillis();
    System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}