List of usage examples for org.apache.hadoop.io NullWritable get
public static NullWritable get()
From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java
License:Apache License
@Test public void testPredicatePushdown() throws Exception { TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id); final String typeStr = "struct<i:int,s:string>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr); conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString()); conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000); conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true); OutputFormat<NullWritable, OrcStruct> outputFormat = new OrcOutputFormat<OrcStruct>(); RecordWriter<NullWritable, OrcStruct> writer = outputFormat.getRecordWriter(attemptContext); // write 4000 rows with the integer and the binary string TypeDescription type = TypeDescription.fromString(typeStr); OrcStruct row = (OrcStruct) OrcStruct.createValue(type); NullWritable nada = NullWritable.get(); for (int r = 0; r < 4000; ++r) { row.setFieldValue(0, new IntWritable(r)); row.setFieldValue(1, new Text(Integer.toBinaryString(r))); writer.write(nada, row);/*from ww w . jav a 2s . c om*/ } writer.close(attemptContext); OrcInputFormat.setSearchArgument(conf, SearchArgumentFactory.newBuilder() .between("i", PredicateLeaf.Type.LONG, new Long(1500), new Long(1999)).build(), new String[] { null, "i", "s" }); FileSplit split = new FileSplit(new Path(workDir, "part-m-00000.orc"), 0, 1000000, new String[0]); RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().createRecordReader(split, attemptContext); // the sarg should cause it to skip over the rows except 1000 to 2000 for (int r = 1000; r < 2000; ++r) { assertEquals(true, reader.nextKeyValue()); row = reader.getCurrentValue(); assertEquals(r, ((IntWritable) row.getFieldValue(0)).get()); assertEquals(Integer.toBinaryString(r), row.getFieldValue(1).toString()); } assertEquals(false, reader.nextKeyValue()); }
From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java
License:Apache License
@Test public void testColumnSelection() throws Exception { String typeStr = "struct<i:int,j:int,k:int>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, typeStr); conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString()); conf.setInt(OrcConf.ROW_INDEX_STRIDE.getAttribute(), 1000); conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true); TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id); OutputFormat<NullWritable, OrcStruct> outputFormat = new OrcOutputFormat<OrcStruct>(); RecordWriter<NullWritable, OrcStruct> writer = outputFormat.getRecordWriter(attemptContext); // write 4000 rows with the integer and the binary string TypeDescription type = TypeDescription.fromString(typeStr); OrcStruct row = (OrcStruct) OrcStruct.createValue(type); NullWritable nada = NullWritable.get(); for (int r = 0; r < 3000; ++r) { row.setFieldValue(0, new IntWritable(r)); row.setFieldValue(1, new IntWritable(r * 2)); row.setFieldValue(2, new IntWritable(r * 3)); writer.write(nada, row);//from ww w . ja v a 2s. com } writer.close(attemptContext); conf.set(OrcConf.INCLUDE_COLUMNS.getAttribute(), "0,2"); FileSplit split = new FileSplit(new Path(workDir, "part-m-00000.orc"), 0, 1000000, new String[0]); RecordReader<NullWritable, OrcStruct> reader = new OrcInputFormat<OrcStruct>().createRecordReader(split, attemptContext); // the sarg should cause it to skip over the rows except 1000 to 2000 for (int r = 0; r < 3000; ++r) { assertEquals(true, reader.nextKeyValue()); row = reader.getCurrentValue(); assertEquals(r, ((IntWritable) row.getFieldValue(0)).get()); assertEquals(null, row.getFieldValue(1)); assertEquals(r * 3, ((IntWritable) row.getFieldValue(2)).get()); } assertEquals(false, reader.nextKeyValue()); }
From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java
License:Apache License
/** * Make sure that the writer ignores the OrcKey * @throws Exception// ww w . j av a 2 s. c om */ @Test public void testOrcKey() throws Exception { conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString()); String TYPE_STRING = "struct<i:int,s:string>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING); conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true); TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id); TypeDescription schema = TypeDescription.fromString(TYPE_STRING); OrcKey key = new OrcKey(new OrcStruct(schema)); RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(attemptContext); NullWritable nada = NullWritable.get(); for (int r = 0; r < 2000; ++r) { ((OrcStruct) key.key).setAllFields(new IntWritable(r), new Text(Integer.toString(r))); writer.write(nada, key); } writer.close(attemptContext); Path path = new Path(workDir, "part-m-00000.orc"); Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf)); assertEquals(2000, file.getNumberOfRows()); assertEquals(TYPE_STRING, file.getSchema().toString()); }
From source file:org.apache.orc.mapreduce.TestMapreduceOrcOutputFormat.java
License:Apache License
/** * Make sure that the writer ignores the OrcValue * @throws Exception/*www . j a va2 s.c om*/ */ @Test public void testOrcValue() throws Exception { conf.set("mapreduce.output.fileoutputformat.outputdir", workDir.toString()); String TYPE_STRING = "struct<i:int>"; OrcConf.MAPRED_OUTPUT_SCHEMA.setString(conf, TYPE_STRING); conf.setBoolean(OrcOutputFormat.SKIP_TEMP_DIRECTORY, true); TaskAttemptID id = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 1); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(conf, id); TypeDescription schema = TypeDescription.fromString(TYPE_STRING); OrcValue value = new OrcValue(new OrcStruct(schema)); RecordWriter<NullWritable, Writable> writer = new OrcOutputFormat<>().getRecordWriter(attemptContext); NullWritable nada = NullWritable.get(); for (int r = 0; r < 3000; ++r) { ((OrcStruct) value.value).setAllFields(new IntWritable(r)); writer.write(nada, value); } writer.close(attemptContext); Path path = new Path(workDir, "part-m-00000.orc"); Reader file = OrcFile.createReader(path, OrcFile.readerOptions(conf)); assertEquals(3000, file.getNumberOfRows()); assertEquals(TYPE_STRING, file.getSchema().toString()); }
From source file:org.apache.orc.mapreduce.TestMrUnit.java
License:Apache License
@Test public void testMapred() throws IOException { conf.set("io.serializations", OrcStructSerialization.class.getName() + "," + WritableSerialization.class.getName()); OrcConf.MAPRED_SHUFFLE_KEY_SCHEMA.setString(conf, "struct<x:int,y:int>"); OrcConf.MAPRED_SHUFFLE_VALUE_SCHEMA.setString(conf, "struct<z:string>"); MyMapper mapper = new MyMapper(); MyReducer reducer = new MyReducer(); MapReduceDriver<NullWritable, OrcStruct, OrcKey, OrcValue, NullWritable, OrcStruct> driver = new MapReduceDriver<>( mapper, reducer);/*from w w w . ja va 2s . c o m*/ driver.setConfiguration(conf); NullWritable nada = NullWritable.get(); OrcStruct input = (OrcStruct) OrcStruct .createValue(TypeDescription.fromString("struct<one:struct<x:int,y:int>,two:struct<z:string>>")); IntWritable x = (IntWritable) ((OrcStruct) input.getFieldValue(0)).getFieldValue(0); IntWritable y = (IntWritable) ((OrcStruct) input.getFieldValue(0)).getFieldValue(1); Text z = (Text) ((OrcStruct) input.getFieldValue(1)).getFieldValue(0); // generate the input stream for (int r = 0; r < 20; ++r) { x.set(100 - (r / 4)); y.set(r * 2); z.set(Integer.toHexString(r)); driver.withInput(nada, input); } // generate the expected outputs for (int g = 4; g >= 0; --g) { x.set(100 - g); for (int i = 0; i < 4; ++i) { int r = g * 4 + i; y.set(r * 2); z.set(Integer.toHexString(r)); driver.withOutput(nada, input); } } driver.runTest(); }
From source file:org.apache.phoenix.hive.HivePhoenixRecordReader.java
License:Apache License
public boolean next(NullWritable key, T val) throws IOException { if (key == null) { key = NullWritable.get(); }/*from w ww .j av a2 s .c o m*/ if (this.value == null) { this.value = (T) ((DBWritable) ReflectionUtils.newInstance(this.inputClass, this.configuration)); } Preconditions.checkNotNull(this.resultSet); try { if (!this.resultSet.next()) { return false; } this.value.readFields(this.resultSet); LOG.debug("PhoenixRecordReader resultset size" + this.resultSet.getFetchSize()); return true; } catch (SQLException e) { LOG.error(String.format(" Error [%s] occurred while iterating over the resultset. ", new Object[] { e.getMessage() })); Throwables.propagate(e); } return false; }
From source file:org.apache.phoenix.hive.mapreduce.PhoenixRecordReader.java
License:Apache License
@Override public WritableComparable createKey() { if (isTransactional) { key = new PhoenixRowKey(); } else {/*from ww w. j ava 2 s. co m*/ key = NullWritable.get(); } return key; }
From source file:org.apache.phoenix.hive.mapreduce.PhoenixRecordWriter.java
License:Apache License
@SuppressWarnings("unchecked") @Override/*from ww w.ja va 2s .c om*/ public void write(Writable w) throws IOException { PhoenixResultWritable row = (PhoenixResultWritable) w; write(NullWritable.get(), (T) row); }
From source file:org.apache.phoenix.mapreduce.MultiHfileOutputFormat.java
License:Apache License
private static void writePartitions(Configuration conf, Path partitionsPath, Set<TableRowkeyPair> tablesStartKeys) throws IOException { LOG.info("Writing partition information to " + partitionsPath); if (tablesStartKeys.isEmpty()) { throw new IllegalArgumentException("No regions passed"); }/* w w w . j a v a 2 s. c o m*/ // We're generating a list of split points, and we don't ever // have keys < the first region (which has an empty start key) // so we need to remove it. Otherwise we would end up with an // empty reducer with index 0 TreeSet<TableRowkeyPair> sorted = new TreeSet<TableRowkeyPair>(tablesStartKeys); TableRowkeyPair first = sorted.first(); if (!first.getRowkey().equals(HConstants.EMPTY_BYTE_ARRAY)) { throw new IllegalArgumentException("First region of table should have empty start key. Instead has: " + Bytes.toStringBinary(first.getRowkey().get())); } sorted.remove(first); // Write the actual file FileSystem fs = partitionsPath.getFileSystem(conf); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath, TableRowkeyPair.class, NullWritable.class); try { for (TableRowkeyPair startKey : sorted) { writer.append(startKey, NullWritable.get()); } } finally { writer.close(); } }
From source file:org.apache.phoenix.mapreduce.PhoenixRecordReader.java
License:Apache License
@Override public boolean nextKeyValue() throws IOException, InterruptedException { if (key == null) { key = NullWritable.get(); }// ww w . j a va 2s . c o m if (value == null) { value = ReflectionUtils.newInstance(inputClass, this.configuration); } Preconditions.checkNotNull(this.resultSet); try { if (!resultSet.next()) { return false; } value.readFields(resultSet); return true; } catch (SQLException e) { LOG.error(String.format(" Error [%s] occurred while iterating over the resultset. ", e.getMessage())); throw new RuntimeException(e); } }