Example usage for org.apache.hadoop.io NullWritable get

List of usage examples for org.apache.hadoop.io NullWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io NullWritable get.

Prototype

public static NullWritable get() 

Source Link

Document

Returns the single instance of this class.

Usage

From source file:com.maxpoint.cascading.avro.AvroScheme.java

License:Open Source License

@Override
public void sink(FlowProcess<JobConf> process,
        SinkCall<Object, OutputCollector<AvroWrapper<Record>, Writable>> call) throws IOException {
    Record record = write(call);//from w  w  w. j a  va  2s  .c o m
    call.getOutput().collect(new AvroWrapper<Record>(record), NullWritable.get());
}

From source file:com.moz.fiji.mapreduce.lib.gather.MapTypeDelimitedFileGatherer.java

License:Apache License

/**
 * Outputs flattened data without schema definitions.
 * A single line of data contains one key-value record from a fiji family, formatted as:
 * [entityid]|[timestamp]|[key]|[value]// ww w  .ja  v a  2 s . c o m
 *
 * @param input The row data to export.
 * @param context The context to write export to.
 * @throws IOException if there's an error.
 */
@Override
public void gather(FijiRowData input, GathererContext context) throws IOException {
    for (String key : input.getQualifiers(mFamily)) {
        NavigableMap<Long, Object> values = input.getMostRecentValue(mFamily, key);
        for (Map.Entry<Long, Object> e : values.entrySet()) {
            // Write this entry out on a single line.
            mLine.set(makeLine(input.getEntityId(), e.getKey(), key, e.getValue()));
            context.write(mLine, NullWritable.get());
        }
    }
}

From source file:com.moz.fiji.mapreduce.lib.reduce.AvroReducer.java

License:Apache License

/**
 * Subclasses can use this instead of context.write() to output Avro
 * messages directly instead of having to wrap them in AvroKey
 * container objects./*from   w ww  .ja v  a2s  .  co  m*/
 *
 * @param value The avro value to write.
 * @param context The reducer context.
 * @throws IOException If there is an error.
 * @throws InterruptedException If the thread is interrupted.
 */
protected void write(T value, Context context) throws IOException, InterruptedException {
    mKey.datum(value);
    context.write(mKey, NullWritable.get());
}

From source file:com.moz.fiji.mapreduce.output.HFileMapReduceJobOutput.java

License:Apache License

/**
 * <p>Write out a SequenceFile that can be read by TotalOrderPartitioner
 * that contains the split points in startKeys.</p>
 *
 * <p>This method was copied from HFileOutputFormat in hbase-0.90.1-cdh3u0.  I had to
 * copy it because it's private.</p>
 *
 * @param conf The job configuration.//from w  w w.  j  av a 2 s  .  c o  m
 * @param partitionsPath output path for SequenceFile.
 * @param startKeys the region start keys to use as the partitions.
 * @throws IOException If there is an error.
 */
public static void writePartitionFile(Configuration conf, Path partitionsPath, List<HFileKeyValue> startKeys)
        throws IOException {
    if (startKeys.isEmpty()) {
        throw new IllegalArgumentException("No regions passed");
    }

    // We're generating a list of split points, and we don't ever
    // have keys < the first region (which has an empty start key)
    // so we need to remove it. Otherwise we would end up with an
    // empty reducer with index 0.
    TreeSet<HFileKeyValue> sorted = new TreeSet<HFileKeyValue>();
    sorted.addAll(startKeys);

    HFileKeyValue first = sorted.first();
    if (0 != first.getRowKey().length) {
        throw new IllegalArgumentException(
                "First region of table should have empty start row key. Instead has: "
                        + Bytes.toStringBinary(first.getRowKey()));
    }
    sorted.remove(first);

    // Write the actual file
    final SequenceFile.Writer writer = FijiMRPlatformBridge.get().newSeqFileWriter(conf, partitionsPath,
            HFileKeyValue.class, NullWritable.class);

    try {
        for (HFileKeyValue startKey : sorted) {
            writer.append(startKey, NullWritable.get());
        }
    } finally {
        writer.close();
    }
}

From source file:com.neusoft.hbase.test.hadoop.dataload.HFileOutputFormatBase.java

License:Apache License

/**
 * Write out a {@link org.apache.hadoop.io.SequenceFile} that can be read by
 * {@link org.apache.hadoop.mapreduce.lib.partition.TotalOrderPartitioner} that contains the split points in
 * startKeys.//w  w w.  ja  v a  2s .co m
 */
@SuppressWarnings("deprecation")
private static void writePartitions(Configuration conf, Path partitionsPath,
        List<ImmutableBytesWritable> startKeys) throws IOException {
    LOG.info("Writing partition information to " + partitionsPath);
    if (startKeys.isEmpty()) {
        throw new IllegalArgumentException("No regions passed");
    }

    // We're generating a list of split points, and we don't ever
    // have keys < the first region (which has an empty start key)
    // so we need to remove it. Otherwise we would end up with an
    // empty reducer with index 0
    TreeSet<ImmutableBytesWritable> sorted = new TreeSet<ImmutableBytesWritable>(startKeys);

    ImmutableBytesWritable first = sorted.first();
    if (!first.equals(HConstants.EMPTY_BYTE_ARRAY)) {
        throw new IllegalArgumentException("First region of table should have empty start key. Instead has: "
                + Bytes.toStringBinary(first.get()));
    }
    sorted.remove(first);

    // Write the actual file
    FileSystem fs = partitionsPath.getFileSystem(conf);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, partitionsPath,
            ImmutableBytesWritable.class, NullWritable.class);

    try {
        for (ImmutableBytesWritable startKey : sorted) {
            writer.append(startKey, NullWritable.get());
        }
    } finally {
        writer.close();
    }
}

From source file:com.redgate.hadoop.hive.azuretables.AzureTablesRecordReader.java

License:Apache License

/**
 * Grabs the next result and process the DynamicTableEntity into a Hive
 * friendly MapWriteable//w ww  .  j a v a2s. com
 * 
 * @param key
 *            The RowID for the entity. Not that this is not really an Azure
 *            key, since the partition is implicit in the key
 * @param value
 *            A MapWriteable which will be populated with values from the
 *            DynamicTableEntity returned by the Azure query.
 */
public boolean next(Text key, MapWritable value) throws IOException {
    if (!results.hasNext())
        return false;
    DynamicTableEntity entity = results.next();
    key.set(entity.getRowKey());
    for (Entry<String, EntityProperty> entry : entity.getProperties().entrySet()) {

        final EntityProperty property = entry.getValue();
        // Note that azure table entity keys are forced to lower case for
        // matching with hive column names
        final String propertyKey = entry.getKey().toLowerCase();
        final String propertyValue = property.getValueAsString();
        final Writable writableValue = SERIALIZED_NULL.equals(propertyValue) ? NullWritable.get()
                : new Text(propertyValue);
        value.put(new Text(propertyKey), writableValue);
    }
    pos++;
    return true;
}

From source file:com.redgate.hadoop.hive.azuretables.AzureTablesSerDe.java

License:Apache License

@Override
public Writable serialize(final Object obj, final ObjectInspector inspector) throws SerDeException {
    final StructObjectInspector structInspector = (StructObjectInspector) inspector;
    final List<? extends StructField> fields = structInspector.getAllStructFieldRefs();
    if (fields.size() != columnNames.size()) {
        throw new SerDeException(
                String.format("Required %d columns, received %d.", columnNames.size(), fields.size()));
    }//from w  w w .  j  a  v a  2  s .  c o  m

    cachedWritable.clear();
    for (int c = 0; c < fieldCount; c++) {
        StructField structField = fields.get(c);
        if (structField != null) {
            final Object field = structInspector.getStructFieldData(obj, fields.get(c));
            final ObjectInspector fieldOI = fields.get(c).getFieldObjectInspector();
            final StringObjectInspector fieldStringOI = (StringObjectInspector) fieldOI;
            Writable value = fieldStringOI.getPrimitiveWritableObject(field);
            if (value == null) {
                value = NullWritable.get();
            }
            cachedWritable.put(new Text(columnNames.get(c)), value);
        }
    }
    return cachedWritable;
}

From source file:com.redgate.hadoop.hive.azuretables.AzureTablesSerDe.java

License:Apache License

@Override
public Object deserialize(final Writable wr) throws SerDeException {
    if (!(wr instanceof MapWritable)) {
        throw new SerDeException("Expected MapWritable, received " + wr.getClass().getName());
    }// w  w  w.  j  a  v  a2  s.  co  m

    final MapWritable input = (MapWritable) wr;
    final Text t = new Text();
    row.clear();

    for (int i = 0; i < fieldCount; i++) {
        t.set(columnNames.get(i));
        final Writable value = input.get(t);
        if (value != null && !NullWritable.get().equals(value)) {
            row.add(value.toString());
        } else {
            row.add(null);
        }
    }

    return row;
}

From source file:com.ricemap.spateDB.operations.Repartition.java

License:Apache License

/**
 * Repartitions a file on local machine without MapReduce jobs.
 * @param inFs/*from  www . j av a  2 s  . c  om*/
 * @param in
 * @param outFs
 * @param out
 * @param cells
 * @param stockShape
 * @param rtree
 * @param overwrite
 * @throws IOException 
 */
public static <S extends Shape> void repartitionLocal(Path in, Path out, S stockShape, long blockSize,
        CellInfo[] cells, String sindex, boolean overwrite) throws IOException {
    FileSystem inFs = in.getFileSystem(new Configuration());
    FileSystem outFs = out.getFileSystem(new Configuration());
    // Overwrite output file
    if (outFs.exists(out)) {
        if (overwrite)
            outFs.delete(out, true);
        else
            throw new RuntimeException(
                    "Output file '" + out + "' already exists and overwrite flag is not set");
    }
    outFs.mkdirs(out);

    ShapeRecordWriter<Shape> writer;
    boolean pack = sindex.equals("r+tree");
    boolean expand = sindex.equals("rtree");
    if (sindex.equals("grid")) {
        writer = new GridRecordWriter<Shape>(out, null, null, cells, pack, expand);
    } else if (sindex.equals("rtree") || sindex.equals("r+tree")) {
        writer = new RTreeGridRecordWriter<Shape>(out, null, null, cells, pack, expand);
        writer.setStockObject(stockShape);
    } else {
        throw new RuntimeException("Unupoorted spatial idnex: " + sindex);
    }

    FileStatus inFileStatus = inFs.getFileStatus(in);
    // Copy blocksize from source file if it's globally indexed
    if (blockSize == 0) {
        GlobalIndex<Partition> globalIndex = SpatialSite.getGlobalIndex(inFs, in);
        if (globalIndex != null) {
            blockSize = inFs.getFileStatus(new Path(in, globalIndex.iterator().next().filename)).getBlockSize();
        }
    }
    if (blockSize != 0)
        ((GridRecordWriter<Shape>) writer).setBlockSize(blockSize);

    long length = inFileStatus.getLen();
    FSDataInputStream datain = inFs.open(in);
    ShapeRecordReader<S> reader = new ShapeRecordReader<S>(datain, 0, length);
    Prism c = reader.createKey();

    NullWritable dummy = NullWritable.get();

    while (reader.next(c, stockShape)) {
        writer.write(dummy, stockShape);
    }
    writer.close(null);
}

From source file:com.ricemap.spateDB.util.RandomSpatialGenerator.java

License:Apache License

/**
 * Generates random rectangles and write the result to a file.
 * @param outFS - The file system that contains the output file
 * @param outputFile - The file name to write to. If either outFS or
 *   outputFile is null, data is generated to the standard output
 * @param mbr - The whole MBR to generate in
 * @param shape /*from   www  . j  a va 2s.  c o  m*/
 * @param totalSize - The total size of the generated file
 * @param blocksize 
 * @throws IOException 
 */
public static void generateFileLocal(Path outFile, Shape shape, String sindex, long totalSize, Prism mbr,
        DistributionType type, int rectSize, long seed, long blocksize, boolean overwrite) throws IOException {
    FileSystem outFS = outFile.getFileSystem(new Configuration());
    if (blocksize == 0)
        blocksize = outFS.getDefaultBlockSize(outFile);

    // Calculate the dimensions of each partition based on gindex type
    CellInfo[] cells;
    if (sindex == null) {
        cells = new CellInfo[] { new CellInfo(1, mbr) };
    } else if (sindex.equals("grid")) {
        int num_partitions = Repartition.calculateNumberOfPartitions(new Configuration(), totalSize, outFS,
                outFile, blocksize);

        GridInfo gridInfo = new GridInfo(mbr.t1, mbr.x1, mbr.y1, mbr.t2, mbr.x2, mbr.y2);
        gridInfo.calculateCellDimensions(num_partitions);
        cells = gridInfo.getAllCells();
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    // Overwrite output file
    if (outFS.exists(outFile)) {
        if (overwrite)
            outFS.delete(outFile, true);
        else
            throw new RuntimeException(
                    "Output file '" + outFile + "' already exists and overwrite flag is not set");
    }
    outFS.mkdirs(outFile);

    ShapeRecordWriter<Shape> writer;
    if (sindex == null || sindex.equals("grid")) {
        writer = new GridRecordWriter<Shape>(outFile, null, null, cells, false, false);
    } else {
        throw new RuntimeException("Unupoorted spatial idnex: " + sindex);
    }

    if (rectSize == 0)
        rectSize = 100;
    long t1 = System.currentTimeMillis();

    RandomShapeGenerator<Shape> generator = new RandomShapeGenerator<Shape>(totalSize, mbr, type, rectSize,
            seed);

    Prism key = generator.createKey();

    while (generator.next(key, shape)) {
        // Serialize it to text
        writer.write(NullWritable.get(), shape);
    }
    writer.close(null);
    long t2 = System.currentTimeMillis();

    System.out.println("Generation time: " + (t2 - t1) + " millis");
}