Example usage for org.apache.hadoop.mapreduce RecordReader getCurrentValue

List of usage examples for org.apache.hadoop.mapreduce RecordReader getCurrentValue

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader getCurrentValue.

Prototype

public abstract VALUEIN getCurrentValue() throws IOException, InterruptedException;

Source Link

Document

Get the current value.

Usage

From source file:parquet.hadoop.thrift.TestParquetToThriftReadProjection.java

License:Apache License

private <T extends TBase<?, ?>> void shouldDoProjection(Configuration conf, T recordToWrite,
        T exptectedReadResult, Class<? extends TBase<?, ?>> thriftClass) throws Exception {
    final Path parquetFile = new Path("target/test/TestParquetToThriftReadProjection/file.parquet");
    final FileSystem fs = parquetFile.getFileSystem(conf);
    if (fs.exists(parquetFile)) {
        fs.delete(parquetFile, true);/*from w  w w .ja  va 2s.  co m*/
    }

    //create a test file
    final TProtocolFactory protocolFactory = new TCompactProtocol.Factory();
    final TaskAttemptID taskId = new TaskAttemptID("local", 0, true, 0, 0);
    final ThriftToParquetFileWriter w = new ThriftToParquetFileWriter(parquetFile,
            ContextUtil.newTaskAttemptContext(conf, taskId), protocolFactory, thriftClass);
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final TProtocol protocol = protocolFactory.getProtocol(new TIOStreamTransport(baos));

    recordToWrite.write(protocol);
    w.write(new BytesWritable(baos.toByteArray()));
    w.close();

    final ParquetThriftInputFormat<T> parquetThriftInputFormat = new ParquetThriftInputFormat<T>();
    final Job job = new Job(conf, "read");
    job.setInputFormatClass(ParquetThriftInputFormat.class);
    ParquetThriftInputFormat.setInputPaths(job, parquetFile);
    final JobID jobID = new JobID("local", 1);
    List<InputSplit> splits = parquetThriftInputFormat
            .getSplits(ContextUtil.newJobContext(ContextUtil.getConfiguration(job), jobID));
    T readValue = null;
    for (InputSplit split : splits) {
        TaskAttemptContext taskAttemptContext = ContextUtil.newTaskAttemptContext(
                ContextUtil.getConfiguration(job), new TaskAttemptID(new TaskID(jobID, true, 1), 0));
        final RecordReader<Void, T> reader = parquetThriftInputFormat.createRecordReader(split,
                taskAttemptContext);
        reader.initialize(split, taskAttemptContext);
        if (reader.nextKeyValue()) {
            readValue = reader.getCurrentValue();
            LOG.info(readValue);
        }
    }
    assertEquals(exptectedReadResult, readValue);

}

From source file:parquet.scrooge.ParquetScroogeSchemeTest.java

License:Apache License

public <T> void verifyScroogeRead(TBase recordToWrite, Class<T> readClass, String expectedStr,
        String projectionFilter) throws Exception {
    Configuration conf = new Configuration();
    conf.set("parquet.thrift.converter.class", ScroogeRecordConverter.class.getName());
    conf.set(ThriftReadSupport.THRIFT_READ_CLASS_KEY, readClass.getName());
    conf.set(ThriftReadSupport.THRIFT_COLUMN_FILTER_KEY, projectionFilter);

    final Path parquetFile = new Path("target/test/TestParquetToThriftReadProjection/file.parquet");
    final FileSystem fs = parquetFile.getFileSystem(conf);
    if (fs.exists(parquetFile)) {
        fs.delete(parquetFile, true);/*from   w  w  w .j a  va  2  s .com*/
    }

    //create a test file
    final TProtocolFactory protocolFactory = new TCompactProtocol.Factory();
    final TaskAttemptID taskId = new TaskAttemptID("local", 0, true, 0, 0);
    Class writeClass = recordToWrite.getClass();
    final ThriftToParquetFileWriter w = new ThriftToParquetFileWriter(parquetFile,
            ContextUtil.newTaskAttemptContext(conf, taskId), protocolFactory, writeClass);
    final ByteArrayOutputStream baos = new ByteArrayOutputStream();
    final TProtocol protocol = protocolFactory.getProtocol(new TIOStreamTransport(baos));

    recordToWrite.write(protocol);
    w.write(new BytesWritable(baos.toByteArray()));
    w.close();

    final ParquetScroogeInputFormat<T> parquetScroogeInputFormat = new ParquetScroogeInputFormat<T>();
    final Job job = new Job(conf, "read");
    job.setInputFormatClass(ParquetThriftInputFormat.class);
    ParquetThriftInputFormat.setInputPaths(job, parquetFile);
    final JobID jobID = new JobID("local", 1);
    List<InputSplit> splits = parquetScroogeInputFormat
            .getSplits(new JobContext(ContextUtil.getConfiguration(job), jobID));
    T readValue = null;
    for (InputSplit split : splits) {
        TaskAttemptContext taskAttemptContext = new TaskAttemptContext(ContextUtil.getConfiguration(job),
                new TaskAttemptID(new TaskID(jobID, true, 1), 0));
        final RecordReader<Void, T> reader = parquetScroogeInputFormat.createRecordReader(split,
                taskAttemptContext);
        reader.initialize(split, taskAttemptContext);
        if (reader.nextKeyValue()) {
            readValue = reader.getCurrentValue();
        }
    }
    assertEquals(expectedStr, readValue.toString());
}

From source file:uk.bl.wa.hadoop.mapreduce.lib.DereferencingArchiveToCDXRecordReaderTest.java

License:Open Source License

private void runCDXTest(Configuration conf, String expected) throws Exception {
    File testFile = new File("src/test/resources/rr-test-inputs.txt");
    Path path = new Path(testFile.getAbsoluteFile().toURI().toString());
    FileSplit split = new FileSplit(path, 0, testFile.length(), null);

    ArchiveToCDXFileInputFormat inputFormat = ReflectionUtils.newInstance(ArchiveToCDXFileInputFormat.class,
            conf);//  w  ww.  j a  v a  2 s. c om
    TaskAttemptContext context = new TaskAttemptContext(conf, new TaskAttemptID());
    RecordReader<Text, Text> reader = inputFormat.createRecordReader(split, context);

    reader.initialize(split, context);

    int position = 0;
    String value = "";
    while (reader.nextKeyValue() != false) {
        position += 1;
        if (position == 3)
            value = reader.getCurrentValue().toString();
    }
    // Check the third value is as expected
    log.debug(value);
    Assert.assertEquals(expected, value);
}