Example usage for org.apache.hadoop.mapreduce RecordReader getCurrentValue

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader getCurrentValue.

Prototype

public abstract VALUEIN getCurrentValue() throws IOException, InterruptedException;

Source Link

Document

Get the current value.

Usage

From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java

License:Apache License

@Test
public void testFormatStreamRecordReader() throws IOException, InterruptedException {
    File inputDir = tmpFolder.newFolder();
    File partition = new File(inputDir, "1.1000");
    partition.mkdirs();/*from  w  ww.  j  a v  a 2  s  . c  o  m*/
    File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix());
    File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix());

    // write 1 event
    StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile),
            Files.newOutputStreamSupplier(indexFile), 100L);

    StreamEvent streamEvent = new StreamEvent(ImmutableMap.of("header1", "value1", "header2", "value2"),
            Charsets.UTF_8.encode("hello world"), 1000);
    writer.append(streamEvent);
    writer.close();

    FormatSpecification formatSpec = new FormatSpecification(TextRecordFormat.class.getName(),
            Schema.recordOf("event", Schema.Field.of("body", Schema.of(Schema.Type.STRING))),
            Collections.<String, String>emptyMap());
    Configuration conf = new Configuration();
    StreamInputFormat.setBodyFormatSpecification(conf, formatSpec);
    StreamInputFormat.setStreamPath(conf, inputDir.toURI());
    TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID());

    StreamInputFormat format = new StreamInputFormat();

    // read all splits and store the results in the list
    List<GenericStreamEventData<StructuredRecord>> recordsRead = Lists.newArrayList();
    List<InputSplit> inputSplits = format.getSplits(context);
    for (InputSplit split : inputSplits) {
        RecordReader<LongWritable, GenericStreamEventData<StructuredRecord>> recordReader = format
                .createRecordReader(split, context);
        recordReader.initialize(split, context);
        while (recordReader.nextKeyValue()) {
            recordsRead.add(recordReader.getCurrentValue());
        }
    }

    // should only have read 1 record
    Assert.assertEquals(1, recordsRead.size());
    GenericStreamEventData<StructuredRecord> eventData = recordsRead.get(0);
    Assert.assertEquals(streamEvent.getHeaders(), eventData.getHeaders());
    Assert.assertEquals("hello world", eventData.getBody().get("body"));
}

From source file:co.cask.cdap.template.etl.common.ETLDBInputFormat.java

License:Apache License

@Override
protected RecordReader createDBRecordReader(DBInputSplit split, Configuration conf) throws IOException {
    final RecordReader dbRecordReader = super.createDBRecordReader(split, conf);
    return new RecordReader() {
        @Override//ww w  . j av a 2 s .co  m
        public void initialize(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            dbRecordReader.initialize(split, context);
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            return dbRecordReader.nextKeyValue();
        }

        @Override
        public Object getCurrentKey() throws IOException, InterruptedException {
            return dbRecordReader.getCurrentKey();
        }

        @Override
        public Object getCurrentValue() throws IOException, InterruptedException {
            return dbRecordReader.getCurrentValue();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return dbRecordReader.getProgress();
        }

        @Override
        public void close() throws IOException {
            dbRecordReader.close();
            try {
                DriverManager.deregisterDriver(driverShim);
            } catch (SQLException e) {
                throw new IOException(e);
            }
        }
    };
}

From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileTest.java

License:Apache License

@Test
public void testOneFile() throws IOException, InterruptedException {
    Path dir = new Path(tempFolder.getRoot().getAbsolutePath());

    CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>();
    Path inputFile = new Path(dir, "file1.txt");

    writeSequenceFile(inputFile);//from   w  w  w  .ja  v a  2 s .c om

    Job job = new Job(new JobConf());

    FileInputFormat.addInputPath(job, inputFile);

    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size());

    TaskAttemptID taskId = new TaskAttemptID("jt", 0, true, 0, 0);
    Configuration conf1 = new Configuration();
    TaskAttemptContext context1 = new TaskAttemptContext(conf1, taskId);

    RecordReader<Text, Text> rr = inputFormat.createRecordReader(splits.get(0), context1);
    rr.initialize(splits.get(0), context1);
    assertTrue(rr.nextKeyValue());

    assertEquals(key, rr.getCurrentKey());
    assertEquals(value, rr.getCurrentValue());

    assertFalse(rr.nextKeyValue());
    assertEquals(1.0f, rr.getProgress(), 0.1);
}

From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileTest.java

License:Apache License

@Test
public void testTwoFiles() throws IOException, InterruptedException {
    Path dir = new Path(tempFolder.getRoot().getAbsolutePath());

    CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>();
    Path inputFile1 = new Path(dir, "file1.txt");
    Path inputFile2 = new Path(dir, "file2.txt");

    writeSequenceFile(inputFile1);//www.  ja va  2  s.c om
    writeSequenceFile(inputFile2);

    Job job = new Job(new JobConf());

    FileInputFormat.addInputPath(job, inputFile1);
    FileInputFormat.addInputPath(job, inputFile2);

    List<InputSplit> splits = inputFormat.getSplits(job);
    assertEquals(1, splits.size());

    TaskAttemptID taskId = new TaskAttemptID("jt", 0, true, 0, 0);
    Configuration conf1 = new Configuration();
    TaskAttemptContext context1 = new TaskAttemptContext(conf1, taskId);

    RecordReader<Text, Text> rr = inputFormat.createRecordReader(splits.get(0), context1);
    rr.initialize(splits.get(0), context1);
    assertTrue(rr.nextKeyValue());

    assertEquals(key, rr.getCurrentKey());
    assertEquals(value, rr.getCurrentValue());

    assertEquals(0.5f, rr.getProgress(), 0.1);

    assertTrue(rr.nextKeyValue());

    assertEquals(key, rr.getCurrentKey());
    assertEquals(value, rr.getCurrentValue());

    assertFalse(rr.nextKeyValue());
    assertEquals(1.0f, rr.getProgress(), 0.1);
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.HCatTupleInputFormat.java

License:Apache License

@Override
public RecordReader<ITuple, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext taskContext)
        throws IOException, InterruptedException {

    HCatInputFormat iF = new HCatInputFormat();

    @SuppressWarnings("rawtypes")
    final RecordReader<WritableComparable, HCatRecord> hCatRecordReader = iF.createRecordReader(split,
            taskContext);/*from  w  w w .  ja v a  2s .c  o m*/

    return new RecordReader<ITuple, NullWritable>() {

        ITuple tuple = new Tuple(pangoolSchema);

        @Override
        public void close() throws IOException {
            hCatRecordReader.close();
        }

        @Override
        public ITuple getCurrentKey() throws IOException, InterruptedException {
            HCatRecord record = hCatRecordReader.getCurrentValue();
            // Perform conversion between HCatRecord and Tuple
            for (int pos = 0; pos < schema.size(); pos++) {
                tuple.set(pos, record.get(pos));
            }
            return tuple;
        }

        @Override
        public NullWritable getCurrentValue() throws IOException, InterruptedException {
            return NullWritable.get();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return hCatRecordReader.getProgress();
        }

        @Override
        public void initialize(InputSplit iS, TaskAttemptContext context)
                throws IOException, InterruptedException {
            hCatRecordReader.initialize(iS, context);
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            return hCatRecordReader.nextKeyValue();
        }
    };
}

From source file:com.facebook.hiveio.benchmark.InputBenchmark.java

License:Apache License

/**
 * Read all records from a RecordReader//  ww w. j  a  va  2s  . c  om
 *
 * @param reader RecordReader
 * @return number of rows
 * @throws IOException I/O errors
 * @throws InterruptedException thread errors
 */
private static long readFully(RecordReader<WritableComparable, HiveReadableRecord> reader)
        throws IOException, InterruptedException {
    long num = 0;
    while (reader.nextKeyValue()) {
        HiveReadableRecord record = reader.getCurrentValue();
        parseLongLongDouble(record);
        ++num;
    }
    return num;
}

From source file:com.facebook.hiveio.tailer.TailerCmd.java

License:Apache License

/**
 * Read input split/* ww  w  . ja va  2 s  .c  o  m*/
 *
 * @param split InputSplit
 * @param context Context
 * @throws IOException
 * @throws InterruptedException
 */
private void readSplit(InputSplit split, Context context) throws IOException, InterruptedException {
    TaskAttemptID taskId = new TaskAttemptID();
    TaskAttemptContext taskContext = new TaskAttemptContext(context.hiveConf, taskId);
    RecordReader<WritableComparable, HiveReadableRecord> recordReader;
    recordReader = context.hiveApiInputFormat.createRecordReader(split, taskContext);
    recordReader.initialize(split, taskContext);

    int rowsParsed = 0;
    while (recordReader.nextKeyValue() && !context.limitReached(args.limit)) {
        HiveReadableRecord record = recordReader.getCurrentValue();
        if (args.parser.parseOnly) {
            rowParser.parse(record);
        } else {
            recordPrinter.printRecord(record, context.schema.numColumns(), context, args);
        }
        ++rowsParsed;
        if (context.rowsParsed.incrementAndGet() >= args.limit) {
            break;
        }
        if (rowsParsed % args.metricsOpts.updateRows == 0) {
            context.stats.addRows(args.metricsOpts.updateRows);
            rowsParsed = 0;
        }
    }
    context.stats.addRows(rowsParsed);
}

From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java

License:Open Source License

/**
 * Generate random data, compress it, index and md5 hash the data.
 * Then read it all back and md5 that too, to verify that it all went ok.
 * /*from   w  ww .j av  a  2 s  .c  o m*/
 * @param testWithIndex Should we index or not?
 * @param charsToOutput How many characters of random data should we output.
 * @throws IOException
 * @throws NoSuchAlgorithmException
 * @throws InterruptedException
 */
private void runTest(boolean testWithIndex, int charsToOutput)
        throws IOException, NoSuchAlgorithmException, InterruptedException {

    if (!GPLNativeCodeLoader.isNativeCodeLoaded()) {
        LOG.warn("Cannot run this test without the native lzo libraries");
        return;
    }

    Configuration conf = new Configuration();
    conf.setLong("fs.local.block.size", charsToOutput / 2);
    // reducing block size to force a split of the tiny file
    conf.set("io.compression.codecs", LzopCodec.class.getName());

    FileSystem localFs = FileSystem.getLocal(conf);
    localFs.delete(outputDir, true);
    localFs.mkdirs(outputDir);

    Job job = new Job(conf);
    TextOutputFormat.setCompressOutput(job, true);
    TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class);
    TextOutputFormat.setOutputPath(job, outputDir);

    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(job.getConfiguration(),
            new TaskAttemptID("123", 0, TaskType.REDUCE, 1, 2));

    // create some input data
    byte[] expectedMd5 = createTestInput(outputDir, localFs, attemptContext, charsToOutput);

    if (testWithIndex) {
        Path lzoFile = new Path(outputDir, lzoFileName);
        LzoTextInputFormat.createIndex(localFs, lzoFile);
    }

    LzoTextInputFormat inputFormat = new LzoTextInputFormat();
    TextInputFormat.setInputPaths(job, outputDir);

    List<InputSplit> is = inputFormat.getSplits(job);
    //verify we have the right number of lzo chunks
    if (testWithIndex && OUTPUT_BIG == charsToOutput) {
        assertEquals(3, is.size());
    } else {
        assertEquals(1, is.size());
    }

    // let's read it all and calculate the md5 hash
    for (InputSplit inputSplit : is) {
        RecordReader<LongWritable, Text> rr = inputFormat.createRecordReader(inputSplit, attemptContext);
        rr.initialize(inputSplit, attemptContext);

        while (rr.nextKeyValue()) {
            Text value = rr.getCurrentValue();

            md5.update(value.getBytes(), 0, value.getLength());
        }

        rr.close();
    }

    localFs.close();
    assertTrue(Arrays.equals(expectedMd5, md5.digest()));
}

From source file:com.ikanow.aleph2.search_service.elasticsearch.hadoop.assets.TestAleph2EsInputFormat.java

License:Apache License

@Test
public void test_Aleph2EsRecordReader_objectConversion() throws IOException, InterruptedException {

    @SuppressWarnings("rawtypes")
    final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class);

    // mock returns Text key, MapWritable value
    Mockito.when(mock_shard_record_reader.getCurrentKey()).thenReturn(new Text("text_test"));

    final MapWritable test_out = new MapWritable();
    test_out.put(new Text("val_key_text"), new Text("val_val_text"));

    Mockito.when(mock_shard_record_reader.getCurrentValue()).thenReturn(test_out);

    final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader);

    final String key = reader_under_test.getCurrentKey();
    assertEquals(String.class, key.getClass());
    assertEquals("text_test", key);

    final Tuple2<Long, IBatchRecord> value = reader_under_test.getCurrentValue();
    assertEquals(0L, value._1().longValue()); // (so something breaks in here when/if we put some logic in)
    assertEquals(Optional.empty(), value._2().getContent());
    final JsonNode json_val = value._2().getJson();
    assertTrue("Is object: " + json_val, json_val.isObject());
    assertEquals("val_val_text", json_val.get("val_key_text").asText());
    assertEquals("text_test", json_val.get("_id").asText());
}

From source file:com.ikanow.aleph2.v1.document_db.hadoop.assets.TestAleph2V1InputFormat.java

License:Apache License

@Test
public void test_V1DocumentDbRecordReader_objectConversion() throws IOException, InterruptedException {

    @SuppressWarnings("unchecked")
    final RecordReader<Object, BSONObject> mock_record_reader = (RecordReader<Object, BSONObject>) Mockito
            .mock(RecordReader.class);
    Mockito.when(mock_record_reader.getCurrentKey()).thenReturn("text_test");
    final BasicDBObject test_ret = new BasicDBObject();
    test_ret.put("val_key_text", "val_val_text");
    Mockito.when(mock_record_reader.getCurrentValue()).thenReturn(test_ret);

    try (final V1DocumentDbRecordReader reader_under_test = new V1DocumentDbRecordReader(mock_record_reader)) {

        final String key = reader_under_test.getCurrentKey();
        assertEquals(String.class, key.getClass());
        assertEquals("text_test", key);

        final Tuple2<Long, IBatchRecord> value = reader_under_test.getCurrentValue();
        assertEquals(0L, value._1().longValue()); // (so something breaks in here when/if we put some logic in)
        assertEquals(Optional.empty(), value._2().getContent());
        final JsonNode json_val = value._2().getJson();
        assertTrue("Is object: " + json_val, json_val.isObject());
        assertEquals("val_val_text", json_val.get("val_key_text").asText());
    }//from  w  w w .  ja va2 s  . c o m
}