Example usage for org.apache.hadoop.mapreduce RecordReader nextKeyValue

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader nextKeyValue.

Prototype

public abstract boolean nextKeyValue() throws IOException, InterruptedException;

Source Link

Document

Read the next key, value pair.

Usage

From source file:parquet.scrooge.ParquetScroogeSchemeTest.java

License:Apache License

public <T> void verifyScroogeRead(TBase recordToWrite, Class<T> readClass, String expectedStr,
        String projectionFilter) throws Exception {
    Configuration conf = new Configuration();
    conf.set("parquet.thrift.converter.class", ScroogeRecordConverter.class.getName());
    conf.set(ThriftReadSupport.THRIFT_READ_CLASS_KEY, readClass.getName());
    conf.set(ThriftReadSupport.THRIFT_COLUMN_FILTER_KEY, projectionFilter);

    final Path parquetFile = new Path("target/test/TestParquetToThriftReadProjection/file.parquet");
    final FileSystem fs = parquetFile.getFileSystem(conf);
    if (fs.exists(parquetFile)) {
        fs.delete(parquetFile, true);//from   w w  w .  j a  va2 s.  c  om
    }

    //create a test file
    final TProtocolFactory protocolFactory = new TCompactProtocol.Factory();
    final TaskAttemptID taskId = new TaskAttemptID("local", 0, true, 0, 0);
    Class writeClass = recordToWrite.getClass();
    final ThriftToParquetFileWriter w = new ThriftToParquetFileWriter(parquetFile,
            ContextUtil.newTaskAttemptContext(conf, taskId), protocolFactory, writeClass);
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final TProtocol protocol = protocolFactory.getProtocol(new TIOStreamTransport(baos));

    recordToWrite.write(protocol);
    w.write(new BytesWritable(baos.toByteArray()));
    w.close();

    final ParquetScroogeInputFormat<T> parquetScroogeInputFormat = new ParquetScroogeInputFormat<T>();
    final Job job = new Job(conf, "read");
    job.setInputFormatClass(ParquetThriftInputFormat.class);
    ParquetThriftInputFormat.setInputPaths(job, parquetFile);
    final JobID jobID = new JobID("local", 1);
    List<InputSplit> splits = parquetScroogeInputFormat
            .getSplits(new JobContext(ContextUtil.getConfiguration(job), jobID));
    T readValue = null;
    for (InputSplit split : splits) {
        TaskAttemptContext taskAttemptContext = new TaskAttemptContext(ContextUtil.getConfiguration(job),
                new TaskAttemptID(new TaskID(jobID, true, 1), 0));
        final RecordReader<Void, T> reader = parquetScroogeInputFormat.createRecordReader(split,
                taskAttemptContext);
        reader.initialize(split, taskAttemptContext);
        if (reader.nextKeyValue()) {
            readValue = reader.getCurrentValue();
        }
    }
    assertEquals(expectedStr, readValue.toString());
}

From source file:uk.bl.wa.hadoop.mapreduce.lib.DereferencingArchiveToCDXRecordReaderTest.java

License:Open Source License

private void runCDXTest(Configuration conf, String expected) throws Exception {
    File testFile = new File("src/test/resources/rr-test-inputs.txt");
    Path path = new Path(testFile.getAbsoluteFile().toURI().toString());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);

    ArchiveToCDXFileInputFormat inputFormat = ReflectionUtils.newInstance(ArchiveToCDXFileInputFormat.class,
            conf);/*from  w w  w  .j  av a2 s.  co  m*/
    TaskAttemptContext context = new TaskAttemptContext(conf, new TaskAttemptID());
    RecordReader<Text, Text> reader = inputFormat.createRecordReader(split, context);

    reader.initialize(split, context);

    int position = 0;
    String value = "";
    while (reader.nextKeyValue() != false) {
        position += 1;
        if (position == 3)
            value = reader.getCurrentValue().toString();
    }
    // Check the third value is as expected
    log.debug(value);
    Assert.assertEquals(expected, value);
}