Example usage for org.apache.hadoop.io BytesWritable getBytes

List of usage examples for org.apache.hadoop.io BytesWritable getBytes

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable getBytes.

Prototype

@Override
public byte[] getBytes() 

Source Link

Document

Get the data backing the BytesWritable.

Usage

From source file:smile.wide.counter.ReduceCounter.java

License:Apache License

public void reduce(BytesWritable key, Iterable<DoubleWritable> values, Context context)
        throws IOException, InterruptedException {
    double total = 0;
    for (DoubleWritable p : values) {
        total += p.get();//ww  w .j av  a 2s .  co  m
    }

    byte[] bits = key.getBytes();
    BitBuffer buf = new BitBuffer(bits, bits.length * 8);

    Network net = params.getNet();

    StringBuilder out = new StringBuilder();

    int[][] families = params.getFamilies();
    int pos = Parameters.bitCount(families.length);
    int familyIndex = buf.getBits(0, pos);
    out.append(familyIndex);
    int[] keyFamily = families[familyIndex];
    for (int i = 0; i < keyFamily.length; i++) {
        out.append(' ');
        int handle = keyFamily[i];
        int bitsPerNode = Parameters.bitCount(net.getOutcomeCount(handle));
        int outcomeIndex = buf.getBits(pos, bitsPerNode);
        out.append(net.getOutcomeId(handle, outcomeIndex));
        pos += bitsPerNode;
    }

    context.write(new Text(out.toString()), new DoubleWritable(total));
}

From source file:tachyon.client.keyvalue.hadoop.KeyValueRecordWriter.java

License:Apache License

@Override
public synchronized void write(BytesWritable key, BytesWritable value) throws IOException {
    try {/*from   ww w.  ja  v  a 2s  .c o  m*/
        mWriter.put(key.getBytes(), value.getBytes());
        // Send a progress to the job manager to inform it that the task is still running.
        mProgress.progress();
    } catch (TachyonException e) {
        throw new IOException(e);
    }
}

From source file:uk.bl.wa.hadoop.mapreduce.hash.MessageDigestMapper.java

License:Open Source License

@Override
protected void map(Path key, BytesWritable value, Mapper<Path, BytesWritable, Text, Text>.Context context)
        throws IOException, InterruptedException {
    if (!key.equals(current)) {
        // Extract and emit:
        this.emit(context);
        // Set up a new one:
        current = key;/*from   w  w w  .ja  va 2s.co m*/
        bytes_seen = 0;
        md.reset();
        log.info("Hashing " + current);
    }
    md.update(value.getBytes(), 0, value.getLength());
    bytes_seen += value.getLength();
}

From source file:voldemort.store.readonly.mapreduce.HadoopStoreBuilderPartitioner.java

License:Apache License

@Override
public int getPartition(BytesWritable key, BytesWritable value, int numPartitions) {
    int partitionId = ByteUtils.readInt(value.getBytes(), 4);
    int chunkId = ReadOnlyUtils.chunk(key.getBytes(), numChunks);
    return (partitionId * numChunks + chunkId) % numPartitions;
}

From source file:voldemort.store.readonly.mapreduce.HadoopStoreBuilderReducer.java

License:Apache License

/**
 * Reduce should get sorted MD5 of Voldemort key ( either 16 bytes if saving
 * keys is disabled, else 4 bytes ) as key and for value (a) node-id,
 * partition-id, value - if saving keys is disabled (b) node-id,
 * partition-id, [key-size, key, value-size, value]* if saving keys is
 * enabled//from   w w  w .  j  av a 2s .c  o  m
 */
@Override
public void reduce(BytesWritable key, Iterable<BytesWritable> values, Context context)
        throws IOException, InterruptedException {
    Iterator<BytesWritable> iterator = values.iterator();

    // Write key and position
    this.indexFileStream.write(key.getBytes(), 0, key.getLength());
    this.indexFileStream.writeInt(this.position);

    // Run key through checksum digest
    if (this.checkSumDigestIndex != null) {
        this.checkSumDigestIndex.update(key.getBytes(), 0, key.getLength());
        this.checkSumDigestIndex.update(this.position);
    }

    int numKeyValues = 0;
    ByteArrayOutputStream stream = new ByteArrayOutputStream();
    DataOutputStream valueStream = new DataOutputStream(stream);

    while (iterator.hasNext()) {
        BytesWritable writable = iterator.next();
        byte[] valueBytes = writable.getBytes();

        if (this.nodeId == -1)
            this.nodeId = ByteUtils.readInt(valueBytes, 0);
        if (this.partitionId == -1)
            this.partitionId = ByteUtils.readInt(valueBytes, 4);
        if (this.chunkId == -1)
            this.chunkId = ReadOnlyUtils.chunk(key.getBytes(), this.numChunks);

        int valueLength = writable.getLength() - 8;
        if (saveKeys) {
            // Write (key_length + key + value_length + value)
            valueStream.write(valueBytes, 8, valueLength);
        } else {
            // Write (value_length + value)
            valueStream.writeInt(valueLength);
            valueStream.write(valueBytes, 8, valueLength);
        }

        numKeyValues++;

        // if we have multiple values for this md5 that is a collision,
        // throw an exception--either the data itself has duplicates, there
        // are trillions of keys, or someone is attempting something
        // malicious ( We don't expect collisions when saveKeys = false )
        if (!saveKeys && numKeyValues > 1)
            throw new VoldemortException("Duplicate keys detected for md5 sum "
                    + ByteUtils.toHexString(ByteUtils.copy(key.getBytes(), 0, key.getLength())));

    }

    if (saveKeys) {
        // Write the number of k/vs as a single byte
        byte[] numBuf = new byte[1];
        numBuf[0] = (byte) numKeyValues;

        this.valueFileStream.write(numBuf);
        this.position += 1;

        if (this.checkSumDigestValue != null) {
            this.checkSumDigestValue.update(numBuf);
        }
    }

    // Write the value out
    valueStream.flush();

    byte[] value = stream.toByteArray();
    this.valueFileStream.write(value);
    this.position += value.length;

    if (this.checkSumDigestValue != null) {
        this.checkSumDigestValue.update(value);
    }

    if (this.position < 0)
        throw new VoldemortException("Chunk overflow exception: chunk " + chunkId + " has exceeded "
                + Integer.MAX_VALUE + " bytes.");

}

From source file:voldemort.store.readonly.mr.AvroStoreBuilderMapper.java

License:Apache License

/**
 * Create the voldemort key and value from the input Avro record by
 * extracting the key and value and map it out for each of the responsible
 * voldemort nodes/*from ww  w.  j av  a2s  .  c o m*/
 * 
 * 
 * The output value is the node_id & partition_id of the responsible node
 * followed by serialized value
 */
@Override
public void map(GenericData.Record record, AvroCollector<Pair<ByteBuffer, ByteBuffer>> collector,
        Reporter reporter) throws IOException {

    byte[] keyBytes = keySerializer.toBytes(record.get(keyField));
    byte[] valBytes = valueSerializer.toBytes(record.get(valField));

    // Compress key and values if required
    if (keySerializerDefinition.hasCompression()) {
        keyBytes = keyCompressor.deflate(keyBytes);
    }

    if (valueSerializerDefinition.hasCompression()) {
        valBytes = valueCompressor.deflate(valBytes);
    }

    // Get the output byte arrays ready to populate
    byte[] outputValue;
    BytesWritable outputKey;

    // Leave initial offset for (a) node id (b) partition id
    // since they are written later
    int offsetTillNow = 2 * ByteUtils.SIZE_OF_INT;

    if (getSaveKeys()) {

        // In order - 4 ( for node id ) + 4 ( partition id ) + 1 (
        // replica
        // type - primary | secondary | tertiary... ] + 4 ( key size )
        // size ) + 4 ( value size ) + key + value
        outputValue = new byte[valBytes.length + keyBytes.length + ByteUtils.SIZE_OF_BYTE
                + 4 * ByteUtils.SIZE_OF_INT];

        // Write key length - leave byte for replica type
        offsetTillNow += ByteUtils.SIZE_OF_BYTE;
        ByteUtils.writeInt(outputValue, keyBytes.length, offsetTillNow);

        // Write value length
        offsetTillNow += ByteUtils.SIZE_OF_INT;
        ByteUtils.writeInt(outputValue, valBytes.length, offsetTillNow);

        // Write key
        offsetTillNow += ByteUtils.SIZE_OF_INT;
        System.arraycopy(keyBytes, 0, outputValue, offsetTillNow, keyBytes.length);

        // Write value
        offsetTillNow += keyBytes.length;
        System.arraycopy(valBytes, 0, outputValue, offsetTillNow, valBytes.length);

        // Generate MR key - upper 8 bytes of 16 byte md5
        outputKey = new BytesWritable(ByteUtils.copy(md5er.digest(keyBytes), 0, 2 * ByteUtils.SIZE_OF_INT));

    } else {

        // In order - 4 ( for node id ) + 4 ( partition id ) + value
        outputValue = new byte[valBytes.length + 2 * ByteUtils.SIZE_OF_INT];

        // Write value
        System.arraycopy(valBytes, 0, outputValue, offsetTillNow, valBytes.length);

        // Generate MR key - 16 byte md5
        outputKey = new BytesWritable(md5er.digest(keyBytes));

    }

    // Generate partition and node list this key is destined for
    List<Integer> partitionList = routingStrategy.getPartitionList(keyBytes);
    Node[] partitionToNode = routingStrategy.getPartitionToNode();

    for (int replicaType = 0; replicaType < partitionList.size(); replicaType++) {

        // Node id
        ByteUtils.writeInt(outputValue, partitionToNode[partitionList.get(replicaType)].getId(), 0);

        if (getSaveKeys()) {
            // Primary partition id
            ByteUtils.writeInt(outputValue, partitionList.get(0), ByteUtils.SIZE_OF_INT);

            // Replica type
            ByteUtils.writeBytes(outputValue, replicaType, 2 * ByteUtils.SIZE_OF_INT, ByteUtils.SIZE_OF_BYTE);
        } else {
            // Partition id
            ByteUtils.writeInt(outputValue, partitionList.get(replicaType), ByteUtils.SIZE_OF_INT);
        }
        BytesWritable outputVal = new BytesWritable(outputValue);

        ByteBuffer keyBuffer = null, valueBuffer = null;

        byte[] md5KeyBytes = outputKey.getBytes();
        keyBuffer = ByteBuffer.allocate(md5KeyBytes.length);
        keyBuffer.put(md5KeyBytes);
        keyBuffer.rewind();

        valueBuffer = ByteBuffer.allocate(outputValue.length);
        valueBuffer.put(outputValue);
        valueBuffer.rewind();

        Pair<ByteBuffer, ByteBuffer> p = new Pair<ByteBuffer, ByteBuffer>(keyBuffer, valueBuffer);

        collector.collect(p);
    }
    md5er.reset();
}

From source file:voldemort.store.readonly.mr.serialization.JsonDeserializerComparator.java

License:Apache License

public int compare(BytesWritable o1, BytesWritable o2) {
    return this.compareBytes(o1.getBytes(), 0, o1.getLength(), o2.getBytes(), 0, o2.getLength());
}

From source file:voldemort.store.readonly.mr.utils.HadoopUtils.java

License:Apache License

/**
 * Tag the BytesWritable with an integer at the END
 *//*from w  w w .ja  va  2 s.co m*/
public static void appendTag(BytesWritable writable, int tag) {
    int size = writable.getLength();

    if (writable.getCapacity() < size + 4) {
        // BytesWritable preserves old values
        writable.setCapacity(size + 4);
    }

    ByteUtils.writeInt(writable.getBytes(), tag, size);
    writable.setSize(size + 4);
}

From source file:voldemort.store.readonly.mr.utils.HadoopUtils.java

License:Apache License

/**
 * read and return integer from the END of BytesWritable The tag bytes are
 * NOT removed//from   ww w.ja v  a  2s.c om
 */
public static int readTag(BytesWritable readable) {
    return ByteUtils.readInt(readable.getBytes(), readable.getLength() - 4);
}

From source file:weka.distributed.hadoop.CorrelationMatrixRowHadoopReducer.java

License:Open Source License

@Override
public void reduce(Text key, Iterable<BytesWritable> values, Context context) throws IOException {
    List<MatrixRowHolder> rowsToAgg = new ArrayList<MatrixRowHolder>();

    try {/*from   w  w w  . j a v  a2  s .co  m*/
        for (BytesWritable b : values) {
            byte[] bytes = b.getBytes();

            rowsToAgg.add(deserialize(bytes));
        }
    } catch (ClassNotFoundException ex) {
        throw new IOException(ex);
    }

    if (rowsToAgg.size() > 0) {

        int rowNum = rowsToAgg.get(0).getRowNumber();

        List<double[]> rows = new ArrayList<double[]>();
        List<int[]> coOcc = null;
        if (!m_missingsWereReplacedWithMeans) {
            coOcc = new ArrayList<int[]>();
        }

        for (MatrixRowHolder r : rowsToAgg) {
            if (r.getRowNumber() != rowNum) {
                throw new IOException("Matrix row numbers for this key appear to differ!");
            }
            rows.add(r.getRow());
            if (!m_missingsWereReplacedWithMeans) {
                coOcc.add(r.getCoOccurrencesCounts());
            }
        }
        try {
            double[] aggregated = m_task.aggregate(rowsToAgg.get(0).getRowNumber(), rows, coOcc,
                    m_headerWithSummaryAtts, m_missingsWereReplacedWithMeans, m_covariance, m_deleteClassIfSet);

            // assemble Text key (row num) and Text row (space separated
            // values)

            Text outKey = new Text();
            outKey.set("" + rowNum);

            StringBuilder b = new StringBuilder();
            for (int i = 0; i < aggregated.length; i++) {
                if (i < aggregated.length - 1) {
                    b.append("" + aggregated[i]).append(" ");
                } else {
                    b.append("" + aggregated[i]);
                }
            }

            Text outVal = new Text();
            outVal.set(b.toString());
            context.write(outKey, outVal);
        } catch (DistributedWekaException e) {
            throw new IOException(e);
        } catch (InterruptedException e) {
            throw new IOException(e);
        }
    }
}