List of usage examples for org.apache.hadoop.mapreduce RecordReader getCurrentValue
public abstract VALUEIN getCurrentValue() throws IOException, InterruptedException;
From source file:co.cask.cdap.data.stream.StreamInputFormatTest.java
License:Apache License
@Test public void testFormatStreamRecordReader() throws IOException, InterruptedException { File inputDir = tmpFolder.newFolder(); File partition = new File(inputDir, "1.1000"); partition.mkdirs();/*from w ww. j a v a 2 s . c o m*/ File eventFile = new File(partition, "bucket.1.0." + StreamFileType.EVENT.getSuffix()); File indexFile = new File(partition, "bucket.1.0." + StreamFileType.INDEX.getSuffix()); // write 1 event StreamDataFileWriter writer = new StreamDataFileWriter(Files.newOutputStreamSupplier(eventFile), Files.newOutputStreamSupplier(indexFile), 100L); StreamEvent streamEvent = new StreamEvent(ImmutableMap.of("header1", "value1", "header2", "value2"), Charsets.UTF_8.encode("hello world"), 1000); writer.append(streamEvent); writer.close(); FormatSpecification formatSpec = new FormatSpecification(TextRecordFormat.class.getName(), Schema.recordOf("event", Schema.Field.of("body", Schema.of(Schema.Type.STRING))), Collections.<String, String>emptyMap()); Configuration conf = new Configuration(); StreamInputFormat.setBodyFormatSpecification(conf, formatSpec); StreamInputFormat.setStreamPath(conf, inputDir.toURI()); TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); StreamInputFormat format = new StreamInputFormat(); // read all splits and store the results in the list List<GenericStreamEventData<StructuredRecord>> recordsRead = Lists.newArrayList(); List<InputSplit> inputSplits = format.getSplits(context); for (InputSplit split : inputSplits) { RecordReader<LongWritable, GenericStreamEventData<StructuredRecord>> recordReader = format .createRecordReader(split, context); recordReader.initialize(split, context); while (recordReader.nextKeyValue()) { recordsRead.add(recordReader.getCurrentValue()); } } // should only have read 1 record Assert.assertEquals(1, recordsRead.size()); GenericStreamEventData<StructuredRecord> eventData = recordsRead.get(0); Assert.assertEquals(streamEvent.getHeaders(), eventData.getHeaders()); Assert.assertEquals("hello world", eventData.getBody().get("body")); }
From source file:co.cask.cdap.template.etl.common.ETLDBInputFormat.java
License:Apache License
@Override protected RecordReader createDBRecordReader(DBInputSplit split, Configuration conf) throws IOException { final RecordReader dbRecordReader = super.createDBRecordReader(split, conf); return new RecordReader() { @Override//ww w . j av a 2 s .co m public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { dbRecordReader.initialize(split, context); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { return dbRecordReader.nextKeyValue(); } @Override public Object getCurrentKey() throws IOException, InterruptedException { return dbRecordReader.getCurrentKey(); } @Override public Object getCurrentValue() throws IOException, InterruptedException { return dbRecordReader.getCurrentValue(); } @Override public float getProgress() throws IOException, InterruptedException { return dbRecordReader.getProgress(); } @Override public void close() throws IOException { dbRecordReader.close(); try { DriverManager.deregisterDriver(driverShim); } catch (SQLException e) { throw new IOException(e); } } }; }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileTest.java
License:Apache License
@Test public void testOneFile() throws IOException, InterruptedException { Path dir = new Path(tempFolder.getRoot().getAbsolutePath()); CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>(); Path inputFile = new Path(dir, "file1.txt"); writeSequenceFile(inputFile);//from w w w .ja v a 2 s .c om Job job = new Job(new JobConf()); FileInputFormat.addInputPath(job, inputFile); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size()); TaskAttemptID taskId = new TaskAttemptID("jt", 0, true, 0, 0); Configuration conf1 = new Configuration(); TaskAttemptContext context1 = new TaskAttemptContext(conf1, taskId); RecordReader<Text, Text> rr = inputFormat.createRecordReader(splits.get(0), context1); rr.initialize(splits.get(0), context1); assertTrue(rr.nextKeyValue()); assertEquals(key, rr.getCurrentKey()); assertEquals(value, rr.getCurrentValue()); assertFalse(rr.nextKeyValue()); assertEquals(1.0f, rr.getProgress(), 0.1); }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileTest.java
License:Apache License
@Test public void testTwoFiles() throws IOException, InterruptedException { Path dir = new Path(tempFolder.getRoot().getAbsolutePath()); CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>(); Path inputFile1 = new Path(dir, "file1.txt"); Path inputFile2 = new Path(dir, "file2.txt"); writeSequenceFile(inputFile1);//www. ja va 2 s.c om writeSequenceFile(inputFile2); Job job = new Job(new JobConf()); FileInputFormat.addInputPath(job, inputFile1); FileInputFormat.addInputPath(job, inputFile2); List<InputSplit> splits = inputFormat.getSplits(job); assertEquals(1, splits.size()); TaskAttemptID taskId = new TaskAttemptID("jt", 0, true, 0, 0); Configuration conf1 = new Configuration(); TaskAttemptContext context1 = new TaskAttemptContext(conf1, taskId); RecordReader<Text, Text> rr = inputFormat.createRecordReader(splits.get(0), context1); rr.initialize(splits.get(0), context1); assertTrue(rr.nextKeyValue()); assertEquals(key, rr.getCurrentKey()); assertEquals(value, rr.getCurrentValue()); assertEquals(0.5f, rr.getProgress(), 0.1); assertTrue(rr.nextKeyValue()); assertEquals(key, rr.getCurrentKey()); assertEquals(value, rr.getCurrentValue()); assertFalse(rr.nextKeyValue()); assertEquals(1.0f, rr.getProgress(), 0.1); }
From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.HCatTupleInputFormat.java
License:Apache License
@Override public RecordReader<ITuple, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext taskContext) throws IOException, InterruptedException { HCatInputFormat iF = new HCatInputFormat(); @SuppressWarnings("rawtypes") final RecordReader<WritableComparable, HCatRecord> hCatRecordReader = iF.createRecordReader(split, taskContext);/*from w w w . ja v a 2s .c o m*/ return new RecordReader<ITuple, NullWritable>() { ITuple tuple = new Tuple(pangoolSchema); @Override public void close() throws IOException { hCatRecordReader.close(); } @Override public ITuple getCurrentKey() throws IOException, InterruptedException { HCatRecord record = hCatRecordReader.getCurrentValue(); // Perform conversion between HCatRecord and Tuple for (int pos = 0; pos < schema.size(); pos++) { tuple.set(pos, record.get(pos)); } return tuple; } @Override public NullWritable getCurrentValue() throws IOException, InterruptedException { return NullWritable.get(); } @Override public float getProgress() throws IOException, InterruptedException { return hCatRecordReader.getProgress(); } @Override public void initialize(InputSplit iS, TaskAttemptContext context) throws IOException, InterruptedException { hCatRecordReader.initialize(iS, context); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { return hCatRecordReader.nextKeyValue(); } }; }
From source file:com.facebook.hiveio.benchmark.InputBenchmark.java
License:Apache License
/** * Read all records from a RecordReader// ww w. j a va 2s . c om * * @param reader RecordReader * @return number of rows * @throws IOException I/O errors * @throws InterruptedException thread errors */ private static long readFully(RecordReader<WritableComparable, HiveReadableRecord> reader) throws IOException, InterruptedException { long num = 0; while (reader.nextKeyValue()) { HiveReadableRecord record = reader.getCurrentValue(); parseLongLongDouble(record); ++num; } return num; }
From source file:com.facebook.hiveio.tailer.TailerCmd.java
License:Apache License
/** * Read input split/* ww w . ja va 2 s .c o m*/ * * @param split InputSplit * @param context Context * @throws IOException * @throws InterruptedException */ private void readSplit(InputSplit split, Context context) throws IOException, InterruptedException { TaskAttemptID taskId = new TaskAttemptID(); TaskAttemptContext taskContext = new TaskAttemptContext(context.hiveConf, taskId); RecordReader<WritableComparable, HiveReadableRecord> recordReader; recordReader = context.hiveApiInputFormat.createRecordReader(split, taskContext); recordReader.initialize(split, taskContext); int rowsParsed = 0; while (recordReader.nextKeyValue() && !context.limitReached(args.limit)) { HiveReadableRecord record = recordReader.getCurrentValue(); if (args.parser.parseOnly) { rowParser.parse(record); } else { recordPrinter.printRecord(record, context.schema.numColumns(), context, args); } ++rowsParsed; if (context.rowsParsed.incrementAndGet() >= args.limit) { break; } if (rowsParsed % args.metricsOpts.updateRows == 0) { context.stats.addRows(args.metricsOpts.updateRows); rowsParsed = 0; } } context.stats.addRows(rowsParsed); }
From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java
License:Open Source License
/** * Generate random data, compress it, index and md5 hash the data. * Then read it all back and md5 that too, to verify that it all went ok. * /*from w ww .j av a 2 s .c o m*/ * @param testWithIndex Should we index or not? * @param charsToOutput How many characters of random data should we output. * @throws IOException * @throws NoSuchAlgorithmException * @throws InterruptedException */ private void runTest(boolean testWithIndex, int charsToOutput) throws IOException, NoSuchAlgorithmException, InterruptedException { if (!GPLNativeCodeLoader.isNativeCodeLoaded()) { LOG.warn("Cannot run this test without the native lzo libraries"); return; } Configuration conf = new Configuration(); conf.setLong("fs.local.block.size", charsToOutput / 2); // reducing block size to force a split of the tiny file conf.set("io.compression.codecs", LzopCodec.class.getName()); FileSystem localFs = FileSystem.getLocal(conf); localFs.delete(outputDir, true); localFs.mkdirs(outputDir); Job job = new Job(conf); TextOutputFormat.setCompressOutput(job, true); TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class); TextOutputFormat.setOutputPath(job, outputDir); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID("123", 0, TaskType.REDUCE, 1, 2)); // create some input data byte[] expectedMd5 = createTestInput(outputDir, localFs, attemptContext, charsToOutput); if (testWithIndex) { Path lzoFile = new Path(outputDir, lzoFileName); LzoTextInputFormat.createIndex(localFs, lzoFile); } LzoTextInputFormat inputFormat = new LzoTextInputFormat(); TextInputFormat.setInputPaths(job, outputDir); List<InputSplit> is = inputFormat.getSplits(job); //verify we have the right number of lzo chunks if (testWithIndex && OUTPUT_BIG == charsToOutput) { assertEquals(3, is.size()); } else { assertEquals(1, is.size()); } // let's read it all and calculate the md5 hash for (InputSplit inputSplit : is) { RecordReader<LongWritable, Text> rr = inputFormat.createRecordReader(inputSplit, attemptContext); rr.initialize(inputSplit, attemptContext); while (rr.nextKeyValue()) { Text value = rr.getCurrentValue(); md5.update(value.getBytes(), 0, value.getLength()); } rr.close(); } localFs.close(); assertTrue(Arrays.equals(expectedMd5, md5.digest())); }
From source file:com.ikanow.aleph2.search_service.elasticsearch.hadoop.assets.TestAleph2EsInputFormat.java
License:Apache License
@Test public void test_Aleph2EsRecordReader_objectConversion() throws IOException, InterruptedException { @SuppressWarnings("rawtypes") final RecordReader mock_shard_record_reader = Mockito.mock(RecordReader.class); // mock returns Text key, MapWritable value Mockito.when(mock_shard_record_reader.getCurrentKey()).thenReturn(new Text("text_test")); final MapWritable test_out = new MapWritable(); test_out.put(new Text("val_key_text"), new Text("val_val_text")); Mockito.when(mock_shard_record_reader.getCurrentValue()).thenReturn(test_out); final Aleph2EsRecordReader reader_under_test = new Aleph2EsRecordReader(mock_shard_record_reader); final String key = reader_under_test.getCurrentKey(); assertEquals(String.class, key.getClass()); assertEquals("text_test", key); final Tuple2<Long, IBatchRecord> value = reader_under_test.getCurrentValue(); assertEquals(0L, value._1().longValue()); // (so something breaks in here when/if we put some logic in) assertEquals(Optional.empty(), value._2().getContent()); final JsonNode json_val = value._2().getJson(); assertTrue("Is object: " + json_val, json_val.isObject()); assertEquals("val_val_text", json_val.get("val_key_text").asText()); assertEquals("text_test", json_val.get("_id").asText()); }
From source file:com.ikanow.aleph2.v1.document_db.hadoop.assets.TestAleph2V1InputFormat.java
License:Apache License
@Test public void test_V1DocumentDbRecordReader_objectConversion() throws IOException, InterruptedException { @SuppressWarnings("unchecked") final RecordReader<Object, BSONObject> mock_record_reader = (RecordReader<Object, BSONObject>) Mockito .mock(RecordReader.class); Mockito.when(mock_record_reader.getCurrentKey()).thenReturn("text_test"); final BasicDBObject test_ret = new BasicDBObject(); test_ret.put("val_key_text", "val_val_text"); Mockito.when(mock_record_reader.getCurrentValue()).thenReturn(test_ret); try (final V1DocumentDbRecordReader reader_under_test = new V1DocumentDbRecordReader(mock_record_reader)) { final String key = reader_under_test.getCurrentKey(); assertEquals(String.class, key.getClass()); assertEquals("text_test", key); final Tuple2<Long, IBatchRecord> value = reader_under_test.getCurrentValue(); assertEquals(0L, value._1().longValue()); // (so something breaks in here when/if we put some logic in) assertEquals(Optional.empty(), value._2().getContent()); final JsonNode json_val = value._2().getJson(); assertTrue("Is object: " + json_val, json_val.isObject()); assertEquals("val_val_text", json_val.get("val_key_text").asText()); }//from w w w . ja va2 s . c o m }