List of usage examples for org.apache.hadoop.io NullWritable get
public static NullWritable get()
From source file:com.maxpoint.cascading.avro.AvroScheme.java
License:Open Source License
@Override public void sink(FlowProcess<JobConf> process, SinkCall<Object, OutputCollector<AvroWrapper<Record>, Writable>> call) throws IOException { Record record = write(call);//from w w w. j a va 2s .c o m call.getOutput().collect(new AvroWrapper<Record>(record), NullWritable.get()); }
From source file:com.moz.fiji.mapreduce.lib.gather.MapTypeDelimitedFileGatherer.java
License:Apache License
/** * Outputs flattened data without schema definitions. * A single line of data contains one key-value record from a fiji family, formatted as: * [entityid]|[timestamp]|[key]|[value]// ww w .ja v a 2 s . c o m * * @param input The row data to export. * @param context The context to write export to. * @throws IOException if there's an error. */ @Override public void gather(FijiRowData input, GathererContext context) throws IOException { for (String key : input.getQualifiers(mFamily)) { NavigableMap<Long, Object> values = input.getMostRecentValue(mFamily, key); for (Map.Entry<Long, Object> e : values.entrySet()) { // Write this entry out on a single line. mLine.set(makeLine(input.getEntityId(), e.getKey(), key, e.getValue())); context.write(mLine, NullWritable.get()); } } }
From source file:com.moz.fiji.mapreduce.lib.reduce.AvroReducer.java
License:Apache License
/** * Subclasses can use this instead of context.write() to output Avro * messages directly instead of having to wrap them in AvroKey * container objects./*from w ww .ja v a2s . co m*/ * * @param value The avro value to write. * @param context The reducer context. * @throws IOException If there is an error. * @throws InterruptedException If the thread is interrupted. */ protected void write(T value, Context context) throws IOException, InterruptedException { mKey.datum(value); context.write(mKey, NullWritable.get()); }
From source file:com.moz.fiji.mapreduce.output.HFileMapReduceJobOutput.java
License:Apache License
/** * <p>Write out a SequenceFile that can be read by TotalOrderPartitioner * that contains the split points in startKeys.</p> * * <p>This method was copied from HFileOutputFormat in hbase-0.90.1-cdh3u0. I had to * copy it because it's private.</p> * * @param conf The job configuration.//from w w w. j av a 2 s . c o m * @param partitionsPath output path for SequenceFile. * @param startKeys the region start keys to use as the partitions. * @throws IOException If there is an error. */ public static void writePartitionFile(Configuration conf, Path partitionsPath, List<HFileKeyValue> startKeys) throws IOException { if (startKeys.isEmpty()) { throw new IllegalArgumentException("No regions passed"); } // We're generating a list of split points, and we don't ever // have keys < the first region (which has an empty start key) // so we need to remove it. Otherwise we would end up with an // empty reducer with index 0. TreeSet<HFileKeyValue> sorted = new TreeSet<HFileKeyValue>(); sorted.addAll(startKeys); HFileKeyValue first = sorted.first(); if (0 != first.getRowKey().length) { throw new IllegalArgumentException( "First region of table should have empty start row key. Instead has: " + Bytes.toStringBinary(first.getRowKey())); } sorted.remove(first); // Write the actual file final SequenceFile.Writer writer = FijiMRPlatformBridge.get().newSeqFileWriter(conf, partitionsPath, HFileKeyValue.class, NullWritable.class); try { for (HFileKeyValue startKey : sorted) { writer.append(startKey, NullWritable.get()); } } finally { writer.close(); } }
From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormatBase.java
License:Apache License
/** * Write out a {@link org.apache.hadoop.io.SequenceFile} that can be read by * {@link org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner} that contains the split points in * startKeys.//w w w. ja v a 2s .co m */ @SuppressWarnings("deprecation") private static void writePartitions(Configuration conf, Path partitionsPath, List<ImmutableBytesWritable> startKeys) throws IOException { LOG.info("Writing partition information to " + partitionsPath); if (startKeys.isEmpty()) { throw new IllegalArgumentException("No regions passed"); } // We're generating a list of split points, and we don't ever // have keys < the first region (which has an empty start key) // so we need to remove it. Otherwise we would end up with an // empty reducer with index 0 TreeSet<ImmutableBytesWritable> sorted = new TreeSet<ImmutableBytesWritable>(startKeys); ImmutableBytesWritable first = sorted.first(); if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) { throw new IllegalArgumentException("First region of table should have empty start key. Instead has: " + Bytes.toStringBinary(first.get())); } sorted.remove(first); // Write the actual file FileSystem fs = partitionsPath.getFileSystem(conf); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath, ImmutableBytesWritable.class, NullWritable.class); try { for (ImmutableBytesWritable startKey : sorted) { writer.append(startKey, NullWritable.get()); } } finally { writer.close(); } }
From source file:com.redgate.hadoop.hive.azuretables.AzureTablesRecordReader.java
License:Apache License
/** * Grabs the next result and process the DynamicTableEntity into a Hive * friendly MapWriteable//w ww . j a v a2s. com * * @param key * The RowID for the entity. Not that this is not really an Azure * key, since the partition is implicit in the key * @param value * A MapWriteable which will be populated with values from the * DynamicTableEntity returned by the Azure query. */ public boolean next(Text key, MapWritable value) throws IOException { if (!results.hasNext()) return false; DynamicTableEntity entity = results.next(); key.set(entity.getRowKey()); for (Entry<String, EntityProperty> entry : entity.getProperties().entrySet()) { final EntityProperty property = entry.getValue(); // Note that azure table entity keys are forced to lower case for // matching with hive column names final String propertyKey = entry.getKey().toLowerCase(); final String propertyValue = property.getValueAsString(); final Writable writableValue = SERIALIZED_NULL.equals(propertyValue) ? NullWritable.get() : new Text(propertyValue); value.put(new Text(propertyKey), writableValue); } pos++; return true; }
From source file:com.redgate.hadoop.hive.azuretables.AzureTablesSerDe.java
License:Apache License
@Override public Writable serialize(final Object obj, final ObjectInspector inspector) throws SerDeException { final StructObjectInspector structInspector = (StructObjectInspector) inspector; final List<? extends StructField> fields = structInspector.getAllStructFieldRefs(); if (fields.size() != columnNames.size()) { throw new SerDeException( String.format("Required %d columns, received %d.", columnNames.size(), fields.size())); }//from w w w . j a v a 2 s . c o m cachedWritable.clear(); for (int c = 0; c < fieldCount; c++) { StructField structField = fields.get(c); if (structField != null) { final Object field = structInspector.getStructFieldData(obj, fields.get(c)); final ObjectInspector fieldOI = fields.get(c).getFieldObjectInspector(); final StringObjectInspector fieldStringOI = (StringObjectInspector) fieldOI; Writable value = fieldStringOI.getPrimitiveWritableObject(field); if (value == null) { value = NullWritable.get(); } cachedWritable.put(new Text(columnNames.get(c)), value); } } return cachedWritable; }
From source file:com.redgate.hadoop.hive.azuretables.AzureTablesSerDe.java
License:Apache License
@Override public Object deserialize(final Writable wr) throws SerDeException { if (!(wr instanceof MapWritable)) { throw new SerDeException("Expected MapWritable, received " + wr.getClass().getName()); }// w w w. j a v a2 s. co m final MapWritable input = (MapWritable) wr; final Text t = new Text(); row.clear(); for (int i = 0; i < fieldCount; i++) { t.set(columnNames.get(i)); final Writable value = input.get(t); if (value != null && !NullWritable.get().equals(value)) { row.add(value.toString()); } else { row.add(null); } } return row; }
From source file:com.ricemap.spateDB.operations.Repartition.java
License:Apache License
/** * Repartitions a file on local machine without MapReduce jobs. * @param inFs/*from www . j av a 2 s . c om*/ * @param in * @param outFs * @param out * @param cells * @param stockShape * @param rtree * @param overwrite * @throws IOException */ public static <S extends Shape> void repartitionLocal(Path in, Path out, S stockShape, long blockSize, CellInfo[] cells, String sindex, boolean overwrite) throws IOException { FileSystem inFs = in.getFileSystem(new Configuration()); FileSystem outFs = out.getFileSystem(new Configuration()); // Overwrite output file if (outFs.exists(out)) { if (overwrite) outFs.delete(out, true); else throw new RuntimeException( "Output file '" + out + "' already exists and overwrite flag is not set"); } outFs.mkdirs(out); ShapeRecordWriter<Shape> writer; boolean pack = sindex.equals("r+tree"); boolean expand = sindex.equals("rtree"); if (sindex.equals("grid")) { writer = new GridRecordWriter<Shape>(out, null, null, cells, pack, expand); } else if (sindex.equals("rtree") || sindex.equals("r+tree")) { writer = new RTreeGridRecordWriter<Shape>(out, null, null, cells, pack, expand); writer.setStockObject(stockShape); } else { throw new RuntimeException("Unupoorted spatial idnex: " + sindex); } FileStatus inFileStatus = inFs.getFileStatus(in); // Copy blocksize from source file if it's globally indexed if (blockSize == 0) { GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, in); if (globalIndex != null) { blockSize = inFs.getFileStatus(new Path(in, globalIndex.iterator().next().filename)).getBlockSize(); } } if (blockSize != 0) ((GridRecordWriter<Shape>) writer).setBlockSize(blockSize); long length = inFileStatus.getLen(); FSDataInputStream datain = inFs.open(in); ShapeRecordReader<S> reader = new ShapeRecordReader<S>(datain, 0, length); Prism c = reader.createKey(); NullWritable dummy = NullWritable.get(); while (reader.next(c, stockShape)) { writer.write(dummy, stockShape); } writer.close(null); }
From source file:com.ricemap.spateDB.util.RandomSpatialGenerator.java
License:Apache License
/** * Generates random rectangles and write the result to a file. * @param outFS - The file system that contains the output file * @param outputFile - The file name to write to. If either outFS or * outputFile is null, data is generated to the standard output * @param mbr - The whole MBR to generate in * @param shape /*from www . j a va 2s. c o m*/ * @param totalSize - The total size of the generated file * @param blocksize * @throws IOException */ public static void generateFileLocal(Path outFile, Shape shape, String sindex, long totalSize, Prism mbr, DistributionType type, int rectSize, long seed, long blocksize, boolean overwrite) throws IOException { FileSystem outFS = outFile.getFileSystem(new Configuration()); if (blocksize == 0) blocksize = outFS.getDefaultBlockSize(outFile); // Calculate the dimensions of each partition based on gindex type CellInfo[] cells; if (sindex == null) { cells = new CellInfo[] { new CellInfo(1, mbr) }; } else if (sindex.equals("grid")) { int num_partitions = Repartition.calculateNumberOfPartitions(new Configuration(), totalSize, outFS, outFile, blocksize); GridInfo gridInfo = new GridInfo(mbr.t1, mbr.x1, mbr.y1, mbr.t2, mbr.x2, mbr.y2); gridInfo.calculateCellDimensions(num_partitions); cells = gridInfo.getAllCells(); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } // Overwrite output file if (outFS.exists(outFile)) { if (overwrite) outFS.delete(outFile, true); else throw new RuntimeException( "Output file '" + outFile + "' already exists and overwrite flag is not set"); } outFS.mkdirs(outFile); ShapeRecordWriter<Shape> writer; if (sindex == null || sindex.equals("grid")) { writer = new GridRecordWriter<Shape>(outFile, null, null, cells, false, false); } else { throw new RuntimeException("Unupoorted spatial idnex: " + sindex); } if (rectSize == 0) rectSize = 100; long t1 = System.currentTimeMillis(); RandomShapeGenerator<Shape> generator = new RandomShapeGenerator<Shape>(totalSize, mbr, type, rectSize, seed); Prism key = generator.createKey(); while (generator.next(key, shape)) { // Serialize it to text writer.write(NullWritable.get(), shape); } writer.close(null); long t2 = System.currentTimeMillis(); System.out.println("Generation time: " + (t2 - t1) + " millis"); }