Example usage for org.apache.hadoop.io NullWritable get

List of usage examples for org.apache.hadoop.io NullWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io NullWritable get.

Prototype

public static NullWritable get() 

Source Link

Document

Returns the single instance of this class.

Usage

From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java

License:Apache License

@Test
public void testPredicatePushdown() throws Exception {
    TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id);
    final String typeStr = "struct<i:int,s:string>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr);
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000);
    conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true);
    OutputFormat<NullWritable, OrcStruct> outputFormat = new OrcOutputFormat<OrcStruct>();
    RecordWriter<NullWritable, OrcStruct> writer = outputFormat.getRecordWriter(attemptContext);

    // write 4000 rows with the integer and the binary string
    TypeDescription type = TypeDescription.fromString(typeStr);
    OrcStruct row = (OrcStruct) OrcStruct.createValue(type);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 4000; ++r) {
        row.setFieldValue(0, new IntWritable(r));
        row.setFieldValue(1, new Text(Integer.toBinaryString(r)));
        writer.write(nada, row);/*from ww  w . jav a  2s  .  c  om*/
    }
    writer.close(attemptContext);

    OrcInputFormat.setSearchArgument(conf,
            SearchArgumentFactory.newBuilder()
                    .between("i", PredicateLeaf.Type.LONG, new Long(1500), new Long(1999)).build(),
            new String[] { null, "i", "s" });
    FileSplit split = new FileSplit(new Path(workDir, "part-m-00000.orc"), 0, 1000000, new String[0]);
    RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().createRecordReader(split,
            attemptContext);
    // the sarg should cause it to skip over the rows except 1000 to 2000
    for (int r = 1000; r < 2000; ++r) {
        assertEquals(true, reader.nextKeyValue());
        row = reader.getCurrentValue();
        assertEquals(r, ((IntWritable) row.getFieldValue(0)).get());
        assertEquals(Integer.toBinaryString(r), row.getFieldValue(1).toString());
    }
    assertEquals(false, reader.nextKeyValue());
}

From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java

License:Apache License

@Test
public void testColumnSelection() throws Exception {
    String typeStr = "struct<i:int,j:int,k:int>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr);
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000);
    conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true);
    TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1);
    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id);
    OutputFormat<NullWritable, OrcStruct> outputFormat = new OrcOutputFormat<OrcStruct>();
    RecordWriter<NullWritable, OrcStruct> writer = outputFormat.getRecordWriter(attemptContext);

    // write 4000 rows with the integer and the binary string
    TypeDescription type = TypeDescription.fromString(typeStr);
    OrcStruct row = (OrcStruct) OrcStruct.createValue(type);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 3000; ++r) {
        row.setFieldValue(0, new IntWritable(r));
        row.setFieldValue(1, new IntWritable(r * 2));
        row.setFieldValue(2, new IntWritable(r * 3));
        writer.write(nada, row);//from   ww  w  .  ja v  a 2s. com
    }
    writer.close(attemptContext);

    conf.set(OrcConf.INCLUDE_COLUMNS.getAttribute(), "0,2");
    FileSplit split = new FileSplit(new Path(workDir, "part-m-00000.orc"), 0, 1000000, new String[0]);
    RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().createRecordReader(split,
            attemptContext);
    // the sarg should cause it to skip over the rows except 1000 to 2000
    for (int r = 0; r < 3000; ++r) {
        assertEquals(true, reader.nextKeyValue());
        row = reader.getCurrentValue();
        assertEquals(r, ((IntWritable) row.getFieldValue(0)).get());
        assertEquals(null, row.getFieldValue(1));
        assertEquals(r * 3, ((IntWritable) row.getFieldValue(2)).get());
    }
    assertEquals(false, reader.nextKeyValue());
}

From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java

License:Apache License

/**
 * Make sure that the writer ignores the OrcKey
 * @throws Exception// ww  w  . j av  a 2  s.  c  om
 */
@Test
public void testOrcKey() throws Exception {
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    String TYPE_STRING = "struct<i:int,s:string>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING);
    conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true);
    TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1);
    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id);
    TypeDescription schema = TypeDescription.fromString(TYPE_STRING);
    OrcKey key = new OrcKey(new OrcStruct(schema));
    RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(attemptContext);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 2000; ++r) {
        ((OrcStruct) key.key).setAllFields(new IntWritable(r), new Text(Integer.toString(r)));
        writer.write(nada, key);
    }
    writer.close(attemptContext);
    Path path = new Path(workDir, "part-m-00000.orc");
    Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    assertEquals(2000, file.getNumberOfRows());
    assertEquals(TYPE_STRING, file.getSchema().toString());
}

From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java

License:Apache License

/**
 * Make sure that the writer ignores the OrcValue
 * @throws Exception/*www  . j a va2 s.c om*/
 */
@Test
public void testOrcValue() throws Exception {
    conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString());
    String TYPE_STRING = "struct<i:int>";
    OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING);
    conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true);
    TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1);
    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id);

    TypeDescription schema = TypeDescription.fromString(TYPE_STRING);
    OrcValue value = new OrcValue(new OrcStruct(schema));
    RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(attemptContext);
    NullWritable nada = NullWritable.get();
    for (int r = 0; r < 3000; ++r) {
        ((OrcStruct) value.value).setAllFields(new IntWritable(r));
        writer.write(nada, value);
    }
    writer.close(attemptContext);
    Path path = new Path(workDir, "part-m-00000.orc");
    Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf));
    assertEquals(3000, file.getNumberOfRows());
    assertEquals(TYPE_STRING, file.getSchema().toString());
}

From source file:org.apache.orc.mapreduce.TestMrUnit.java

License:Apache License

@Test
public void testMapred() throws IOException {
    conf.set("io.serializations",
            OrcStructSerialization.class.getName() + "," + WritableSerialization.class.getName());
    OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.setString(conf, "struct<x:int,y:int>");
    OrcConf.MAPRED_SHUFFLE_VALUE_SCHEMA.setString(conf, "struct<z:string>");
    MyMapper mapper = new MyMapper();
    MyReducer reducer = new MyReducer();
    MapReduceDriver<NullWritable, OrcStruct, OrcKey, OrcValue, NullWritable, OrcStruct> driver = new MapReduceDriver<>(
            mapper, reducer);/*from w  w w  . ja va  2s  . c  o m*/
    driver.setConfiguration(conf);
    NullWritable nada = NullWritable.get();
    OrcStruct input = (OrcStruct) OrcStruct
            .createValue(TypeDescription.fromString("struct<one:struct<x:int,y:int>,two:struct<z:string>>"));
    IntWritable x = (IntWritable) ((OrcStruct) input.getFieldValue(0)).getFieldValue(0);
    IntWritable y = (IntWritable) ((OrcStruct) input.getFieldValue(0)).getFieldValue(1);
    Text z = (Text) ((OrcStruct) input.getFieldValue(1)).getFieldValue(0);

    // generate the input stream
    for (int r = 0; r < 20; ++r) {
        x.set(100 - (r / 4));
        y.set(r * 2);
        z.set(Integer.toHexString(r));
        driver.withInput(nada, input);
    }

    // generate the expected outputs
    for (int g = 4; g >= 0; --g) {
        x.set(100 - g);
        for (int i = 0; i < 4; ++i) {
            int r = g * 4 + i;
            y.set(r * 2);
            z.set(Integer.toHexString(r));
            driver.withOutput(nada, input);
        }
    }
    driver.runTest();
}

From source file:org.apache.phoenix.hive.HivePhoenixRecordReader.java

License:Apache License

public boolean next(NullWritable key, T val) throws IOException {
    if (key == null) {
        key = NullWritable.get();
    }/*from  w  ww .j  av  a2  s .c o m*/
    if (this.value == null) {
        this.value = (T) ((DBWritable) ReflectionUtils.newInstance(this.inputClass, this.configuration));
    }
    Preconditions.checkNotNull(this.resultSet);
    try {
        if (!this.resultSet.next()) {
            return false;
        }
        this.value.readFields(this.resultSet);
        LOG.debug("PhoenixRecordReader resultset size" + this.resultSet.getFetchSize());
        return true;
    } catch (SQLException e) {
        LOG.error(String.format(" Error [%s] occurred while iterating over the resultset. ",
                new Object[] { e.getMessage() }));
        Throwables.propagate(e);
    }
    return false;
}

From source file:org.apache.phoenix.hive.mapreduce.PhoenixRecordReader.java

License:Apache License

@Override
public WritableComparable createKey() {
    if (isTransactional) {
        key = new PhoenixRowKey();
    } else {/*from ww  w.  j ava  2 s. co m*/
        key = NullWritable.get();
    }

    return key;
}

From source file:org.apache.phoenix.hive.mapreduce.PhoenixRecordWriter.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*from ww w.ja  va  2s .c  om*/
public void write(Writable w) throws IOException {
    PhoenixResultWritable row = (PhoenixResultWritable) w;

    write(NullWritable.get(), (T) row);
}

From source file:org.apache.phoenix.mapreduce.MultiHfileOutputFormat.java

License:Apache License

private static void writePartitions(Configuration conf, Path partitionsPath,
        Set<TableRowkeyPair> tablesStartKeys) throws IOException {

    LOG.info("Writing partition information to " + partitionsPath);
    if (tablesStartKeys.isEmpty()) {
        throw new IllegalArgumentException("No regions passed");
    }/*  w w  w .  j  a  v  a  2  s.  c o m*/

    // We're generating a list of split points, and we don't ever
    // have keys < the first region (which has an empty start key)
    // so we need to remove it. Otherwise we would end up with an
    // empty reducer with index 0
    TreeSet<TableRowkeyPair> sorted = new TreeSet<TableRowkeyPair>(tablesStartKeys);

    TableRowkeyPair first = sorted.first();
    if (!first.getRowkey().equals(HConstants.EMPTY_BYTE_ARRAY)) {
        throw new IllegalArgumentException("First region of table should have empty start key. Instead has: "
                + Bytes.toStringBinary(first.getRowkey().get()));
    }
    sorted.remove(first);

    // Write the actual file
    FileSystem fs = partitionsPath.getFileSystem(conf);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath, TableRowkeyPair.class,
            NullWritable.class);

    try {
        for (TableRowkeyPair startKey : sorted) {
            writer.append(startKey, NullWritable.get());
        }
    } finally {
        writer.close();
    }

}

From source file:org.apache.phoenix.mapreduce.PhoenixRecordReader.java

License:Apache License

@Override
public boolean nextKeyValue() throws IOException, InterruptedException {
    if (key == null) {
        key = NullWritable.get();
    }// ww w .  j  a va 2s  .  c  o m
    if (value == null) {
        value = ReflectionUtils.newInstance(inputClass, this.configuration);
    }
    Preconditions.checkNotNull(this.resultSet);
    try {
        if (!resultSet.next()) {
            return false;
        }
        value.readFields(resultSet);
        return true;
    } catch (SQLException e) {
        LOG.error(String.format(" Error [%s] occurred while iterating over the resultset. ", e.getMessage()));
        throw new RuntimeException(e);
    }
}