Example usage for org.apache.hadoop.io NullWritable get

List of usage examples for org.apache.hadoop.io NullWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io NullWritable get.

Prototype

public static NullWritable get() 

Source Link

Document

Returns the single instance of this class.

Usage

From source file:org.apache.orc.bench.ColumnProjectionBenchmark.java

License:Apache License

@Benchmark
public void parquet(ExtraCounters counters) throws Exception {
    JobConf conf = new JobConf();
    conf.set("fs.track.impl", TrackingLocalFileSystem.class.getName());
    conf.set("fs.defaultFS", "track:///");
    if ("taxi".equals(dataset)) {
        conf.set("columns", "vendor_id,pickup_time");
        conf.set("columns.types", "int,timestamp");
    } else if ("sales".equals(dataset)) {
        conf.set("columns", "sales_id,customer_id");
        conf.set("columns.types", "bigint,bigint");
    } else if ("github".equals(dataset)) {
        conf.set("columns", "actor,created_at");
        conf.set("columns.types",
                "struct<avatar_url:string,gravatar_id:string," + "id:int,login:string,url:string>,timestamp");
    } else {/*from w  w w  .  j a v a2 s . c  om*/
        throw new IllegalArgumentException("Unknown data set " + dataset);
    }
    Path path = Utilities.getVariant(root, dataset, "parquet", compression);
    FileSystem.Statistics statistics = FileSystem.getStatistics("track:///", TrackingLocalFileSystem.class);
    statistics.reset();
    ParquetInputFormat<ArrayWritable> inputFormat = new ParquetInputFormat<>(DataWritableReadSupport.class);

    NullWritable nada = NullWritable.get();
    FileSplit split = new FileSplit(path, 0, Long.MAX_VALUE, new String[] {});
    org.apache.hadoop.mapred.RecordReader<NullWritable, ArrayWritable> recordReader = new ParquetRecordReaderWrapper(
            inputFormat, split, conf, Reporter.NULL);
    ArrayWritable value = recordReader.createValue();
    while (recordReader.next(nada, value)) {
        counters.records += 1;
    }
    recordReader.close();
    counters.bytesRead += statistics.getBytesRead();
    counters.reads += statistics.getReadOps();
    counters.invocations += 1;
}

From source file:org.apache.orc.bench.FullReadBenchmark.java

License:Apache License

@Benchmark
public void parquet(ExtraCounters counters) throws Exception {
    JobConf conf = new JobConf();
    conf.set("fs.track.impl", TrackingLocalFileSystem.class.getName());
    conf.set("fs.defaultFS", "track:///");
    Path path = Utilities.getVariant(root, dataset, "parquet", compression);
    FileSystem.Statistics statistics = FileSystem.getStatistics("track:///", TrackingLocalFileSystem.class);
    statistics.reset();/*ww  w .j a va 2s  . c o m*/
    ParquetInputFormat<ArrayWritable> inputFormat = new ParquetInputFormat<>(DataWritableReadSupport.class);

    NullWritable nada = NullWritable.get();
    FileSplit split = new FileSplit(path, 0, Long.MAX_VALUE, new String[] {});
    org.apache.hadoop.mapred.RecordReader<NullWritable, ArrayWritable> recordReader = new ParquetRecordReaderWrapper(
            inputFormat, split, conf, Reporter.NULL);
    ArrayWritable value = recordReader.createValue();
    while (recordReader.next(nada, value)) {
        counters.records += 1;
    }
    recordReader.close();
    counters.bytesRead += statistics.getBytesRead();
    counters.reads += statistics.getReadOps();
    counters.invocations += 1;
}

From source file:org.apache.orc.bench.hive.ColumnProjectionBenchmark.java

License:Apache License

@Benchmark
public void parquet(ReadCounters counters) throws Exception {
    JobConf conf = new JobConf();
    conf.set("fs.track.impl", TrackingLocalFileSystem.class.getName());
    conf.set("fs.defaultFS", "track:///");
    if ("taxi".equals(dataset)) {
        conf.set("columns", "vendor_id,pickup_time");
        conf.set("columns.types", "int,timestamp");
    } else if ("sales".equals(dataset)) {
        conf.set("columns", "sales_id,customer_id");
        conf.set("columns.types", "bigint,bigint");
    } else if ("github".equals(dataset)) {
        conf.set("columns", "actor,created_at");
        conf.set("columns.types",
                "struct<avatar_url:string,gravatar_id:string," + "id:int,login:string,url:string>,timestamp");
    } else {//ww w.jav a 2s  .  c o m
        throw new IllegalArgumentException("Unknown data set " + dataset);
    }
    Path path = Utilities.getVariant(root, dataset, "parquet", compression);
    FileSystem.Statistics statistics = FileSystem.getStatistics("track:///", TrackingLocalFileSystem.class);
    statistics.reset();
    ParquetInputFormat<ArrayWritable> inputFormat = new ParquetInputFormat<>(DataWritableReadSupport.class);

    NullWritable nada = NullWritable.get();
    FileSplit split = new FileSplit(path, 0, Long.MAX_VALUE, new String[] {});
    org.apache.hadoop.mapred.RecordReader<NullWritable, ArrayWritable> recordReader = new ParquetRecordReaderWrapper(
            inputFormat, split, conf, Reporter.NULL);
    ArrayWritable value = recordReader.createValue();
    while (recordReader.next(nada, value)) {
        counters.addRecords(1);
    }
    recordReader.close();
    counters.addBytes(statistics.getReadOps(), statistics.getBytesRead());
    counters.addInvocation();
}

From source file:org.apache.orc.bench.hive.FullReadBenchmark.java

License:Apache License

@Benchmark
public void parquet(ReadCounters counters) throws Exception {
    JobConf conf = new JobConf();
    conf.set("fs.track.impl", TrackingLocalFileSystem.class.getName());
    conf.set("fs.defaultFS", "track:///");
    Path path = Utilities.getVariant(root, dataset, "parquet", compression);
    FileSystem.Statistics statistics = FileSystem.getStatistics("track:///", TrackingLocalFileSystem.class);
    statistics.reset();/*from   w  ww.j av  a 2s  .c  o m*/
    ParquetInputFormat<ArrayWritable> inputFormat = new ParquetInputFormat<>(DataWritableReadSupport.class);

    NullWritable nada = NullWritable.get();
    FileSplit split = new FileSplit(path, 0, Long.MAX_VALUE, new String[] {});
    org.apache.hadoop.mapred.RecordReader<NullWritable, ArrayWritable> recordReader = new ParquetRecordReaderWrapper(
            inputFormat, split, conf, Reporter.NULL);
    ArrayWritable value = recordReader.createValue();
    while (recordReader.next(nada, value)) {
        counters.addRecords(1);
    }
    recordReader.close();
    counters.addBytes(statistics.getReadOps(), statistics.getBytesRead());
    counters.addInvocation();
}

From source file:org.apache.orc.mapred.TestMrUnit.java

License:Apache License

@Test
public void testMapred() throws IOException {
    conf.set("io.serializations",
            OrcStructSerialization.class.getName() + "," + WritableSerialization.class.getName());
    OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.setString(conf, "struct<x:int,y:int>");
    OrcConf.MAPRED_SHUFFLE_VALUE_SCHEMA.setString(conf, "struct<z:string>");
    MyMapper mapper = new MyMapper();
    mapper.configure(conf);/*from   ww  w  .  j  ava2 s  . co m*/
    MyReducer reducer = new MyReducer();
    reducer.configure(conf);
    MapReduceDriver<NullWritable, OrcStruct, OrcKey, OrcValue, NullWritable, OrcStruct> driver = new MapReduceDriver<>(
            mapper, reducer);
    driver.setConfiguration(conf);
    NullWritable nada = NullWritable.get();
    OrcStruct input = (OrcStruct) OrcStruct
            .createValue(TypeDescription.fromString("struct<one:struct<x:int,y:int>,two:struct<z:string>>"));
    IntWritable x = (IntWritable) ((OrcStruct) input.getFieldValue(0)).getFieldValue(0);
    IntWritable y = (IntWritable) ((OrcStruct) input.getFieldValue(0)).getFieldValue(1);
    Text z = (Text) ((OrcStruct) input.getFieldValue(1)).getFieldValue(0);

    // generate the input stream
    for (int r = 0; r < 20; ++r) {
        x.set(100 - (r / 4));
        y.set(r * 2);
        z.set(Integer.toHexString(r));
        driver.withInput(nada, input);
    }

    // generate the expected outputs
    for (int g = 4; g >= 0; --g) {
        x.set(100 - g);
        for (int i = 0; i < 4; ++i) {
            int r = g * 4 + i;
            y.set(r * 2);
            z.set(Integer.toHexString(r));
            driver.withOutput(nada, input);
        }
    }
    driver.runTest();
}

From source file:org.apache.orc.mapred.TestOrcFileEvolution.java

License:Apache License

private void checkEvolution(String writerType, String readerType, Object inputRow, Object expectedOutput,
        boolean tolerateSchema) {
    TypeDescription readTypeDescr = TypeDescription.fromString(readerType);
    TypeDescription writerTypeDescr = TypeDescription.fromString(writerType);

    OrcStruct inputStruct = assembleStruct(writerTypeDescr, inputRow);
    OrcStruct expectedStruct = assembleStruct(readTypeDescr, expectedOutput);
    try {/*from www.  j a  va2s . com*/
        Writer writer = OrcFile.createWriter(testFilePath,
                OrcFile.writerOptions(conf).setSchema(writerTypeDescr).stripeSize(100000).bufferSize(10000)
                        .version(OrcFile.Version.CURRENT));

        OrcMapredRecordWriter<OrcStruct> recordWriter = new OrcMapredRecordWriter<OrcStruct>(writer);
        recordWriter.write(NullWritable.get(), inputStruct);
        recordWriter.close(mock(Reporter.class));
        Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
        OrcMapredRecordReader<OrcStruct> recordReader = new OrcMapredRecordReader<>(reader,
                reader.options().schema(readTypeDescr).tolerateMissingSchema(tolerateSchema));
        OrcStruct result = recordReader.createValue();
        recordReader.next(recordReader.createKey(), result);
        assertEquals(expectedStruct, result);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:org.apache.orc.mapred.TestOrcOutputFormat.java

License:Apache License

@Test
public void testAllTypes() throws Exception {
    conf.set("mapreduce.task.attempt.id", "attempt_20160101_0001_m_000001_0");
    conf.setOutputCommitter(NullOutputCommitter.class);
    final String typeStr = "struct<b1:binary,b2:boolean,b3:tinyint,"
            + "c:char(10),d1:date,d2:decimal(20,5),d3:double,fff:float,int:int,"
            + "l:array<bigint>,map:map<smallint,string>,"
            + "str:struct<u:uniontype<timestamp,varchar(100)>>,ts:timestamp>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr);
    FileOutputFormat.setOutputPath(conf, workDir);
    TypeDescription type = TypeDescription.fromString(typeStr);

    // build a row object
    OrcStruct row = (OrcStruct) OrcStruct.createValue(type);
    ((BytesWritable) row.getFieldValue(0)).set(new byte[] { 1, 2, 3, 4 }, 0, 4);
    ((BooleanWritable) row.getFieldValue(1)).set(true);
    ((ByteWritable) row.getFieldValue(2)).set((byte) 23);
    ((Text) row.getFieldValue(3)).set("aaabbbcccddd");
    SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
    ((DateWritable) row.getFieldValue(4)).set(DateWritable.millisToDays(format.parse("2016-04-01").getTime()));
    ((HiveDecimalWritable) row.getFieldValue(5)).set(new HiveDecimalWritable("1.23"));
    ((DoubleWritable) row.getFieldValue(6)).set(1.5);
    ((FloatWritable) row.getFieldValue(7)).set(4.5f);
    ((IntWritable) row.getFieldValue(8)).set(31415);
    OrcList<LongWritable> longList = (OrcList<LongWritable>) row.getFieldValue(9);
    longList.add(new LongWritable(123));
    longList.add(new LongWritable(456));
    OrcMap<ShortWritable, Text> map = (OrcMap<ShortWritable, Text>) row.getFieldValue(10);
    map.put(new ShortWritable((short) 1000), new Text("aaaa"));
    map.put(new ShortWritable((short) 123), new Text("bbbb"));
    OrcStruct struct = (OrcStruct) row.getFieldValue(11);
    OrcUnion union = (OrcUnion) struct.getFieldValue(0);
    union.set((byte) 1, new Text("abcde"));
    ((OrcTimestamp) row.getFieldValue(12)).set("1996-12-11 15:00:00");
    NullWritable nada = NullWritable.get();
    RecordWriter<NullWritable, OrcStruct> writer = new OrcOutputFormat<OrcStruct>().getRecordWriter(fs, conf,
            "all.orc", Reporter.NULL);
    for (int r = 0; r < 10; ++r) {
        row.setFieldValue(8, new IntWritable(r * 10));
        writer.write(nada, row);// ww  w  . j  av  a 2 s.  c  o m
    }
    union.set((byte) 0, new OrcTimestamp("2011-12-25 12:34:56"));
    for (int r = 0; r < 10; ++r) {
        row.setFieldValue(8, new IntWritable(r * 10 + 100));
        writer.write(nada, row);
    }
    OrcStruct row2 = new OrcStruct(type);
    writer.write(nada, row2);
    row.setFieldValue(8, new IntWritable(210));
    writer.write(nada, row);
    writer.close(Reporter.NULL);

    FileSplit split = new FileSplit(new Path(workDir, "all.orc"), 0, 100000, new String[0]);
    RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().getRecordReader(split, conf,
            Reporter.NULL);
    nada = reader.createKey();
    row = reader.createValue();
    for (int r = 0; r < 22; ++r) {
        assertEquals(true, reader.next(nada, row));
        if (r == 20) {
            for (int c = 0; c < 12; ++c) {
                assertEquals(null, row.getFieldValue(c));
            }
        } else {
            assertEquals(new BytesWritable(new byte[] { 1, 2, 3, 4 }), row.getFieldValue(0));
            assertEquals(new BooleanWritable(true), row.getFieldValue(1));
            assertEquals(new ByteWritable((byte) 23), row.getFieldValue(2));
            assertEquals(new Text("aaabbbcccd"), row.getFieldValue(3));
            assertEquals(new DateWritable(DateWritable.millisToDays(format.parse("2016-04-01").getTime())),
                    row.getFieldValue(4));
            assertEquals(new HiveDecimalWritable("1.23"), row.getFieldValue(5));
            assertEquals(new DoubleWritable(1.5), row.getFieldValue(6));
            assertEquals(new FloatWritable(4.5f), row.getFieldValue(7));
            assertEquals(new IntWritable(r * 10), row.getFieldValue(8));
            assertEquals(longList, row.getFieldValue(9));
            assertEquals(map, row.getFieldValue(10));
            if (r < 10) {
                union.set((byte) 1, new Text("abcde"));
            } else {
                union.set((byte) 0, new OrcTimestamp("2011-12-25 12:34:56"));
            }
            assertEquals("row " + r, struct, row.getFieldValue(11));
            assertEquals("row " + r, new OrcTimestamp("1996-12-11 15:00:00"), row.getFieldValue(12));
        }
    }
    assertEquals(false, reader.next(nada, row));
}

From source file:org.apache.orc.mapred.TestOrcOutputFormat.java

License:Apache License

/**
 * Test the case where the top level isn't a struct, but a long.
 *///from   w w w.  ja  va2 s .c o m
@Test
public void testLongRoot() throws Exception {
    conf.set("mapreduce.task.attempt.id", "attempt_20160101_0001_m_000001_0");
    conf.setOutputCommitter(NullOutputCommitter.class);
    conf.set(OrcConf.COMPRESS.getAttribute(), "SNAPPY");
    conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000);
    conf.setInt(OrcConf.BUFFER_SIZE.getAttribute(), 64 * 1024);
    conf.set(OrcConf.WRITE_FORMAT.getAttribute(), "0.11");
    final String typeStr = "bigint";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr);
    FileOutputFormat.setOutputPath(conf, workDir);
    TypeDescription type = TypeDescription.fromString(typeStr);
    LongWritable value = new LongWritable();
    NullWritable nada = NullWritable.get();
    RecordWriter<NullWritable, LongWritable> writer = new OrcOutputFormat<LongWritable>().getRecordWriter(fs,
            conf, "long.orc", Reporter.NULL);
    for (long lo = 0; lo < 2000; ++lo) {
        value.set(lo);
        writer.write(nada, value);
    }
    writer.close(Reporter.NULL);

    Path path = new Path(workDir, "long.orc");
    Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    assertEquals(CompressionKind.SNAPPY, file.getCompressionKind());
    assertEquals(2000, file.getNumberOfRows());
    assertEquals(1000, file.getRowIndexStride());
    assertEquals(64 * 1024, file.getCompressionSize());
    assertEquals(OrcFile.Version.V_0_11, file.getFileVersion());
    FileSplit split = new FileSplit(path, 0, 100000, new String[0]);
    RecordReader<NullWritable, LongWritable> reader = new OrcInputFormat<LongWritable>().getRecordReader(split,
            conf, Reporter.NULL);
    nada = reader.createKey();
    value = reader.createValue();
    for (long lo = 0; lo < 2000; ++lo) {
        assertEquals(true, reader.next(nada, value));
        assertEquals(lo, value.get());
    }
    assertEquals(false, reader.next(nada, value));
}

From source file:org.apache.orc.mapred.TestOrcOutputFormat.java

License:Apache License

/**
 * Make sure that the writer ignores the OrcKey
 * @throws Exception//w w w  .  j  a va  2  s. c  o  m
 */
@Test
public void testOrcKey() throws Exception {
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    conf.set("mapreduce.task.attempt.id", "attempt_jt0_0_m_0_0");
    String TYPE_STRING = "struct<i:int,s:string>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING);
    conf.setOutputCommitter(NullOutputCommitter.class);
    TypeDescription schema = TypeDescription.fromString(TYPE_STRING);
    OrcKey key = new OrcKey(new OrcStruct(schema));
    RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(fs, conf, "key.orc",
            Reporter.NULL);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 2000; ++r) {
        ((OrcStruct) key.key).setAllFields(new IntWritable(r), new Text(Integer.toString(r)));
        writer.write(nada, key);
    }
    writer.close(Reporter.NULL);
    Path path = new Path(workDir, "key.orc");
    Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    assertEquals(2000, file.getNumberOfRows());
    assertEquals(TYPE_STRING, file.getSchema().toString());
}

From source file:org.apache.orc.mapred.TestOrcOutputFormat.java

License:Apache License

/**
 * Make sure that the writer ignores the OrcValue
 * @throws Exception//from w  w w  .j ava 2 s.c  o m
 */
@Test
public void testOrcValue() throws Exception {
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    conf.set("mapreduce.task.attempt.id", "attempt_jt0_0_m_0_0");
    String TYPE_STRING = "struct<i:int>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING);
    conf.setOutputCommitter(NullOutputCommitter.class);
    TypeDescription schema = TypeDescription.fromString(TYPE_STRING);
    OrcValue value = new OrcValue(new OrcStruct(schema));
    RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(fs, conf, "value.orc",
            Reporter.NULL);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 3000; ++r) {
        ((OrcStruct) value.value).setAllFields(new IntWritable(r));
        writer.write(nada, value);
    }
    writer.close(Reporter.NULL);
    Path path = new Path(workDir, "value.orc");
    Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    assertEquals(3000, file.getNumberOfRows());
    assertEquals(TYPE_STRING, file.getSchema().toString());
}