List of usage examples for org.apache.hadoop.io NullWritable get
public static NullWritable get()
From source file:org.apache.orc.bench.ColumnProjectionBenchmark.java
License:Apache License
@Benchmark public void parquet(ExtraCounters counters) throws Exception { JobConf conf = new JobConf(); conf.set("fs.track.impl", TrackingLocalFileSystem.class.getName()); conf.set("fs.defaultFS", "track:///"); if ("taxi".equals(dataset)) { conf.set("columns", "vendor_id,pickup_time"); conf.set("columns.types", "int,timestamp"); } else if ("sales".equals(dataset)) { conf.set("columns", "sales_id,customer_id"); conf.set("columns.types", "bigint,bigint"); } else if ("github".equals(dataset)) { conf.set("columns", "actor,created_at"); conf.set("columns.types", "struct<avatar_url:string,gravatar_id:string," + "id:int,login:string,url:string>,timestamp"); } else {/*from w w w . j a v a2 s . c om*/ throw new IllegalArgumentException("Unknown data set " + dataset); } Path path = Utilities.getVariant(root, dataset, "parquet", compression); FileSystem.Statistics statistics = FileSystem.getStatistics("track:///", TrackingLocalFileSystem.class); statistics.reset(); ParquetInputFormat<ArrayWritable> inputFormat = new ParquetInputFormat<>(DataWritableReadSupport.class); NullWritable nada = NullWritable.get(); FileSplit split = new FileSplit(path, 0, Long.MAX_VALUE, new String[] {}); org.apache.hadoop.mapred.RecordReader<NullWritable, ArrayWritable> recordReader = new ParquetRecordReaderWrapper( inputFormat, split, conf, Reporter.NULL); ArrayWritable value = recordReader.createValue(); while (recordReader.next(nada, value)) { counters.records += 1; } recordReader.close(); counters.bytesRead += statistics.getBytesRead(); counters.reads += statistics.getReadOps(); counters.invocations += 1; }
From source file:org.apache.orc.bench.FullReadBenchmark.java
License:Apache License
@Benchmark public void parquet(ExtraCounters counters) throws Exception { JobConf conf = new JobConf(); conf.set("fs.track.impl", TrackingLocalFileSystem.class.getName()); conf.set("fs.defaultFS", "track:///"); Path path = Utilities.getVariant(root, dataset, "parquet", compression); FileSystem.Statistics statistics = FileSystem.getStatistics("track:///", TrackingLocalFileSystem.class); statistics.reset();/*ww w .j a va 2s . c o m*/ ParquetInputFormat<ArrayWritable> inputFormat = new ParquetInputFormat<>(DataWritableReadSupport.class); NullWritable nada = NullWritable.get(); FileSplit split = new FileSplit(path, 0, Long.MAX_VALUE, new String[] {}); org.apache.hadoop.mapred.RecordReader<NullWritable, ArrayWritable> recordReader = new ParquetRecordReaderWrapper( inputFormat, split, conf, Reporter.NULL); ArrayWritable value = recordReader.createValue(); while (recordReader.next(nada, value)) { counters.records += 1; } recordReader.close(); counters.bytesRead += statistics.getBytesRead(); counters.reads += statistics.getReadOps(); counters.invocations += 1; }
From source file:org.apache.orc.bench.hive.ColumnProjectionBenchmark.java
License:Apache License
@Benchmark public void parquet(ReadCounters counters) throws Exception { JobConf conf = new JobConf(); conf.set("fs.track.impl", TrackingLocalFileSystem.class.getName()); conf.set("fs.defaultFS", "track:///"); if ("taxi".equals(dataset)) { conf.set("columns", "vendor_id,pickup_time"); conf.set("columns.types", "int,timestamp"); } else if ("sales".equals(dataset)) { conf.set("columns", "sales_id,customer_id"); conf.set("columns.types", "bigint,bigint"); } else if ("github".equals(dataset)) { conf.set("columns", "actor,created_at"); conf.set("columns.types", "struct<avatar_url:string,gravatar_id:string," + "id:int,login:string,url:string>,timestamp"); } else {//ww w.jav a 2s . c o m throw new IllegalArgumentException("Unknown data set " + dataset); } Path path = Utilities.getVariant(root, dataset, "parquet", compression); FileSystem.Statistics statistics = FileSystem.getStatistics("track:///", TrackingLocalFileSystem.class); statistics.reset(); ParquetInputFormat<ArrayWritable> inputFormat = new ParquetInputFormat<>(DataWritableReadSupport.class); NullWritable nada = NullWritable.get(); FileSplit split = new FileSplit(path, 0, Long.MAX_VALUE, new String[] {}); org.apache.hadoop.mapred.RecordReader<NullWritable, ArrayWritable> recordReader = new ParquetRecordReaderWrapper( inputFormat, split, conf, Reporter.NULL); ArrayWritable value = recordReader.createValue(); while (recordReader.next(nada, value)) { counters.addRecords(1); } recordReader.close(); counters.addBytes(statistics.getReadOps(), statistics.getBytesRead()); counters.addInvocation(); }
From source file:org.apache.orc.bench.hive.FullReadBenchmark.java
License:Apache License
@Benchmark public void parquet(ReadCounters counters) throws Exception { JobConf conf = new JobConf(); conf.set("fs.track.impl", TrackingLocalFileSystem.class.getName()); conf.set("fs.defaultFS", "track:///"); Path path = Utilities.getVariant(root, dataset, "parquet", compression); FileSystem.Statistics statistics = FileSystem.getStatistics("track:///", TrackingLocalFileSystem.class); statistics.reset();/*from w ww.j av a 2s .c o m*/ ParquetInputFormat<ArrayWritable> inputFormat = new ParquetInputFormat<>(DataWritableReadSupport.class); NullWritable nada = NullWritable.get(); FileSplit split = new FileSplit(path, 0, Long.MAX_VALUE, new String[] {}); org.apache.hadoop.mapred.RecordReader<NullWritable, ArrayWritable> recordReader = new ParquetRecordReaderWrapper( inputFormat, split, conf, Reporter.NULL); ArrayWritable value = recordReader.createValue(); while (recordReader.next(nada, value)) { counters.addRecords(1); } recordReader.close(); counters.addBytes(statistics.getReadOps(), statistics.getBytesRead()); counters.addInvocation(); }
From source file:org.apache.orc.mapred.TestMrUnit.java
License:Apache License
@Test public void testMapred() throws IOException { conf.set("io.serializations", OrcStructSerialization.class.getName() + "," + WritableSerialization.class.getName()); OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.setString(conf, "struct<x:int,y:int>"); OrcConf.MAPRED_SHUFFLE_VALUE_SCHEMA.setString(conf, "struct<z:string>"); MyMapper mapper = new MyMapper(); mapper.configure(conf);/*from ww w . j ava2 s . co m*/ MyReducer reducer = new MyReducer(); reducer.configure(conf); MapReduceDriver<NullWritable, OrcStruct, OrcKey, OrcValue, NullWritable, OrcStruct> driver = new MapReduceDriver<>( mapper, reducer); driver.setConfiguration(conf); NullWritable nada = NullWritable.get(); OrcStruct input = (OrcStruct) OrcStruct .createValue(TypeDescription.fromString("struct<one:struct<x:int,y:int>,two:struct<z:string>>")); IntWritable x = (IntWritable) ((OrcStruct) input.getFieldValue(0)).getFieldValue(0); IntWritable y = (IntWritable) ((OrcStruct) input.getFieldValue(0)).getFieldValue(1); Text z = (Text) ((OrcStruct) input.getFieldValue(1)).getFieldValue(0); // generate the input stream for (int r = 0; r < 20; ++r) { x.set(100 - (r / 4)); y.set(r * 2); z.set(Integer.toHexString(r)); driver.withInput(nada, input); } // generate the expected outputs for (int g = 4; g >= 0; --g) { x.set(100 - g); for (int i = 0; i < 4; ++i) { int r = g * 4 + i; y.set(r * 2); z.set(Integer.toHexString(r)); driver.withOutput(nada, input); } } driver.runTest(); }
From source file:org.apache.orc.mapred.TestOrcFileEvolution.java
License:Apache License
private void checkEvolution(String writerType, String readerType, Object inputRow, Object expectedOutput, boolean tolerateSchema) { TypeDescription readTypeDescr = TypeDescription.fromString(readerType); TypeDescription writerTypeDescr = TypeDescription.fromString(writerType); OrcStruct inputStruct = assembleStruct(writerTypeDescr, inputRow); OrcStruct expectedStruct = assembleStruct(readTypeDescr, expectedOutput); try {/*from www. j a va2s . com*/ Writer writer = OrcFile.createWriter(testFilePath, OrcFile.writerOptions(conf).setSchema(writerTypeDescr).stripeSize(100000).bufferSize(10000) .version(OrcFile.Version.CURRENT)); OrcMapredRecordWriter<OrcStruct> recordWriter = new OrcMapredRecordWriter<OrcStruct>(writer); recordWriter.write(NullWritable.get(), inputStruct); recordWriter.close(mock(Reporter.class)); Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs)); OrcMapredRecordReader<OrcStruct> recordReader = new OrcMapredRecordReader<>(reader, reader.options().schema(readTypeDescr).tolerateMissingSchema(tolerateSchema)); OrcStruct result = recordReader.createValue(); recordReader.next(recordReader.createKey(), result); assertEquals(expectedStruct, result); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:org.apache.orc.mapred.TestOrcOutputFormat.java
License:Apache License
@Test public void testAllTypes() throws Exception { conf.set("mapreduce.task.attempt.id", "attempt_20160101_0001_m_000001_0"); conf.setOutputCommitter(NullOutputCommitter.class); final String typeStr = "struct<b1:binary,b2:boolean,b3:tinyint," + "c:char(10),d1:date,d2:decimal(20,5),d3:double,fff:float,int:int," + "l:array<bigint>,map:map<smallint,string>," + "str:struct<u:uniontype<timestamp,varchar(100)>>,ts:timestamp>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr); FileOutputFormat.setOutputPath(conf, workDir); TypeDescription type = TypeDescription.fromString(typeStr); // build a row object OrcStruct row = (OrcStruct) OrcStruct.createValue(type); ((BytesWritable) row.getFieldValue(0)).set(new byte[] { 1, 2, 3, 4 }, 0, 4); ((BooleanWritable) row.getFieldValue(1)).set(true); ((ByteWritable) row.getFieldValue(2)).set((byte) 23); ((Text) row.getFieldValue(3)).set("aaabbbcccddd"); SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd"); ((DateWritable) row.getFieldValue(4)).set(DateWritable.millisToDays(format.parse("2016-04-01").getTime())); ((HiveDecimalWritable) row.getFieldValue(5)).set(new HiveDecimalWritable("1.23")); ((DoubleWritable) row.getFieldValue(6)).set(1.5); ((FloatWritable) row.getFieldValue(7)).set(4.5f); ((IntWritable) row.getFieldValue(8)).set(31415); OrcList<LongWritable> longList = (OrcList<LongWritable>) row.getFieldValue(9); longList.add(new LongWritable(123)); longList.add(new LongWritable(456)); OrcMap<ShortWritable, Text> map = (OrcMap<ShortWritable, Text>) row.getFieldValue(10); map.put(new ShortWritable((short) 1000), new Text("aaaa")); map.put(new ShortWritable((short) 123), new Text("bbbb")); OrcStruct struct = (OrcStruct) row.getFieldValue(11); OrcUnion union = (OrcUnion) struct.getFieldValue(0); union.set((byte) 1, new Text("abcde")); ((OrcTimestamp) row.getFieldValue(12)).set("1996-12-11 15:00:00"); NullWritable nada = NullWritable.get(); RecordWriter<NullWritable, OrcStruct> writer = new OrcOutputFormat<OrcStruct>().getRecordWriter(fs, conf, "all.orc", Reporter.NULL); for (int r = 0; r < 10; ++r) { row.setFieldValue(8, new IntWritable(r * 10)); writer.write(nada, row);// ww w . j av a 2 s. c o m } union.set((byte) 0, new OrcTimestamp("2011-12-25 12:34:56")); for (int r = 0; r < 10; ++r) { row.setFieldValue(8, new IntWritable(r * 10 + 100)); writer.write(nada, row); } OrcStruct row2 = new OrcStruct(type); writer.write(nada, row2); row.setFieldValue(8, new IntWritable(210)); writer.write(nada, row); writer.close(Reporter.NULL); FileSplit split = new FileSplit(new Path(workDir, "all.orc"), 0, 100000, new String[0]); RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().getRecordReader(split, conf, Reporter.NULL); nada = reader.createKey(); row = reader.createValue(); for (int r = 0; r < 22; ++r) { assertEquals(true, reader.next(nada, row)); if (r == 20) { for (int c = 0; c < 12; ++c) { assertEquals(null, row.getFieldValue(c)); } } else { assertEquals(new BytesWritable(new byte[] { 1, 2, 3, 4 }), row.getFieldValue(0)); assertEquals(new BooleanWritable(true), row.getFieldValue(1)); assertEquals(new ByteWritable((byte) 23), row.getFieldValue(2)); assertEquals(new Text("aaabbbcccd"), row.getFieldValue(3)); assertEquals(new DateWritable(DateWritable.millisToDays(format.parse("2016-04-01").getTime())), row.getFieldValue(4)); assertEquals(new HiveDecimalWritable("1.23"), row.getFieldValue(5)); assertEquals(new DoubleWritable(1.5), row.getFieldValue(6)); assertEquals(new FloatWritable(4.5f), row.getFieldValue(7)); assertEquals(new IntWritable(r * 10), row.getFieldValue(8)); assertEquals(longList, row.getFieldValue(9)); assertEquals(map, row.getFieldValue(10)); if (r < 10) { union.set((byte) 1, new Text("abcde")); } else { union.set((byte) 0, new OrcTimestamp("2011-12-25 12:34:56")); } assertEquals("row " + r, struct, row.getFieldValue(11)); assertEquals("row " + r, new OrcTimestamp("1996-12-11 15:00:00"), row.getFieldValue(12)); } } assertEquals(false, reader.next(nada, row)); }
From source file:org.apache.orc.mapred.TestOrcOutputFormat.java
License:Apache License
/** * Test the case where the top level isn't a struct, but a long. *///from w w w. ja va2 s .c o m @Test public void testLongRoot() throws Exception { conf.set("mapreduce.task.attempt.id", "attempt_20160101_0001_m_000001_0"); conf.setOutputCommitter(NullOutputCommitter.class); conf.set(OrcConf.COMPRESS.getAttribute(), "SNAPPY"); conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000); conf.setInt(OrcConf.BUFFER_SIZE.getAttribute(), 64 * 1024); conf.set(OrcConf.WRITE_FORMAT.getAttribute(), "0.11"); final String typeStr = "bigint"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr); FileOutputFormat.setOutputPath(conf, workDir); TypeDescription type = TypeDescription.fromString(typeStr); LongWritable value = new LongWritable(); NullWritable nada = NullWritable.get(); RecordWriter<NullWritable, LongWritable> writer = new OrcOutputFormat<LongWritable>().getRecordWriter(fs, conf, "long.orc", Reporter.NULL); for (long lo = 0; lo < 2000; ++lo) { value.set(lo); writer.write(nada, value); } writer.close(Reporter.NULL); Path path = new Path(workDir, "long.orc"); Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf)); assertEquals(CompressionKind.SNAPPY, file.getCompressionKind()); assertEquals(2000, file.getNumberOfRows()); assertEquals(1000, file.getRowIndexStride()); assertEquals(64 * 1024, file.getCompressionSize()); assertEquals(OrcFile.Version.V_0_11, file.getFileVersion()); FileSplit split = new FileSplit(path, 0, 100000, new String[0]); RecordReader<NullWritable, LongWritable> reader = new OrcInputFormat<LongWritable>().getRecordReader(split, conf, Reporter.NULL); nada = reader.createKey(); value = reader.createValue(); for (long lo = 0; lo < 2000; ++lo) { assertEquals(true, reader.next(nada, value)); assertEquals(lo, value.get()); } assertEquals(false, reader.next(nada, value)); }
From source file:org.apache.orc.mapred.TestOrcOutputFormat.java
License:Apache License
/** * Make sure that the writer ignores the OrcKey * @throws Exception//w w w . j a va 2 s. c o m */ @Test public void testOrcKey() throws Exception { conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString()); conf.set("mapreduce.task.attempt.id", "attempt_jt0_0_m_0_0"); String TYPE_STRING = "struct<i:int,s:string>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING); conf.setOutputCommitter(NullOutputCommitter.class); TypeDescription schema = TypeDescription.fromString(TYPE_STRING); OrcKey key = new OrcKey(new OrcStruct(schema)); RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(fs, conf, "key.orc", Reporter.NULL); NullWritable nada = NullWritable.get(); for (int r = 0; r < 2000; ++r) { ((OrcStruct) key.key).setAllFields(new IntWritable(r), new Text(Integer.toString(r))); writer.write(nada, key); } writer.close(Reporter.NULL); Path path = new Path(workDir, "key.orc"); Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf)); assertEquals(2000, file.getNumberOfRows()); assertEquals(TYPE_STRING, file.getSchema().toString()); }
From source file:org.apache.orc.mapred.TestOrcOutputFormat.java
License:Apache License
/** * Make sure that the writer ignores the OrcValue * @throws Exception//from w w w .j ava 2 s.c o m */ @Test public void testOrcValue() throws Exception { conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString()); conf.set("mapreduce.task.attempt.id", "attempt_jt0_0_m_0_0"); String TYPE_STRING = "struct<i:int>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING); conf.setOutputCommitter(NullOutputCommitter.class); TypeDescription schema = TypeDescription.fromString(TYPE_STRING); OrcValue value = new OrcValue(new OrcStruct(schema)); RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(fs, conf, "value.orc", Reporter.NULL); NullWritable nada = NullWritable.get(); for (int r = 0; r < 3000; ++r) { ((OrcStruct) value.value).setAllFields(new IntWritable(r)); writer.write(nada, value); } writer.close(Reporter.NULL); Path path = new Path(workDir, "value.orc"); Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf)); assertEquals(3000, file.getNumberOfRows()); assertEquals(TYPE_STRING, file.getSchema().toString()); }