Example usage for org.apache.hadoop.io BytesWritable getLength

List of usage examples for org.apache.hadoop.io BytesWritable getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable getLength.

Prototype

@Override
public int getLength() 

Source Link

Document

Get the current size of the buffer.

Usage

From source file:org.zuinnote.hadoop.bitcoin.format.BitcoinFormatHadoopTest.java

License:Apache License

@Test
public void readBitcoinRawBlockInputFormatGzipCompressed() throws IOException {
    JobConf job = new JobConf(defaultConf);
    CompressionCodec gzip = new GzipCodec();
    ReflectionUtils.setConf(gzip, job);/*  w w  w.j a va  2s.  c o m*/
    ClassLoader classLoader = getClass().getClassLoader();
    String fileName = "version4comp.blk.gz";
    String fileNameBlock = classLoader.getResource("testdata/" + fileName).getFile();
    Path file = new Path(fileNameBlock);
    FileInputFormat.setInputPaths(job, file);
    BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat();
    format.configure(job);
    InputSplit[] inputSplits = format.getSplits(job, 1);
    assertEquals("Only one split generated for compressed block", 1, inputSplits.length);
    RecordReader<BytesWritable, BytesWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
    assertNotNull("Format returned  null RecordReader", reader);
    BytesWritable key = new BytesWritable();
    BytesWritable block = new BytesWritable();
    assertTrue("Input Split for block version contains at least one block", reader.next(key, block));
    assertEquals("Compressed block must have a size of 998.039 bytes", 998039, block.getLength());
    BytesWritable emptyKey = new BytesWritable();
    BytesWritable emptyBlock = new BytesWritable();
    assertFalse("No further blocks in compressed block", reader.next(emptyKey, emptyBlock));
}

From source file:org.zuinnote.hadoop.bitcoin.format.BitcoinFormatHadoopTest.java

License:Apache License

@Test
public void readBitcoinRawBlockInputFormatBzip2Compressed() throws IOException {
    JobConf job = new JobConf(defaultConf);
    CompressionCodec bzip2 = new BZip2Codec();
    ReflectionUtils.setConf(bzip2, job);
    ClassLoader classLoader = getClass().getClassLoader();
    String fileName = "version4comp.blk.bz2";
    String fileNameBlock = classLoader.getResource("testdata/" + fileName).getFile();
    Path file = new Path(fileNameBlock);
    FileInputFormat.setInputPaths(job, file);
    BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat();
    format.configure(job);/*from   w w  w. j  a v a 2s  .  c  om*/
    InputSplit[] inputSplits = format.getSplits(job, 1);
    assertEquals("Only one split generated for compressed block", 1, inputSplits.length);
    RecordReader<BytesWritable, BytesWritable> reader = format.getRecordReader(inputSplits[0], job, reporter);
    assertNotNull("Format returned  null RecordReader", reader);
    BytesWritable key = new BytesWritable();
    BytesWritable block = new BytesWritable();
    assertTrue("Input Split for block version contains at least one block", reader.next(key, block));
    assertEquals("Compressed block must have a size of 998.039 bytes", 998039, block.getLength());
    BytesWritable emptyKey = new BytesWritable();
    BytesWritable emptyBlock = new BytesWritable();
    assertFalse("No further blocks in compressed block", reader.next(emptyKey, emptyBlock));

}

From source file:parquet.hadoop.thrift.ThriftBytesWriteSupport.java

License:Apache License

private TProtocol protocol(BytesWritable record) {
    TProtocol protocol = protocolFactory
            .getProtocol(new TIOStreamTransport(new ByteArrayInputStream(record.getBytes())));

    /* Reduce the chance of OOM when data is corrupted. When readBinary is called on TBinaryProtocol, it reads the length of the binary first,
     so if the data is corrupted, it could read a big integer as the length of the binary and therefore causes OOM to happen.
     Currently this fix only applies to TBinaryProtocol which has the setReadLength defined.
      *///from  ww  w  . j  a  v a2  s.co m
    if (protocol instanceof TBinaryProtocol) {
        ((TBinaryProtocol) protocol).setReadLength(record.getLength());
    }
    return protocol;
}

From source file:protobuf.examples.ProtobufMapper.java

License:Open Source License

public void map(LongWritable key, BytesWritable value, OutputCollector<Text, IntWritable> output,
        Reporter reporter) throws IOException {

    LOG.info("In Mapper Get Data: " + value.toString());

    int bufferSize = value.getLength();
    byte buffer[] = new byte[bufferSize];
    System.arraycopy(value.getBytes(), 0, buffer, 0, bufferSize);

    output.collect(new Text("msg.getEmail()"), new IntWritable(1));
}

From source file:shark.io.MutableBytesWritable.java

License:Apache License

/**
 * Set the BytesWritable to the contents of the given newData.
 * @param newData the value to set this BytesWritable to.
 *///from   ww w  . j  a v  a2s.  co m
public void set(BytesWritable newData) {
    set(newData.getBytes(), 0, newData.getLength());
}

From source file:tachyon.client.keyvalue.hadoop.KeyValueRecordReader.java

License:Apache License

@Override
public synchronized boolean next(BytesWritable keyWritable, BytesWritable valueWritable) throws IOException {
    if (!mKeyValuePairIterator.hasNext()) {
        return false;
    }//from   w  ww.  ja v  a 2 s  . c  o  m

    KeyValuePair pair;
    try {
        pair = mKeyValuePairIterator.next();
    } catch (TachyonException e) {
        throw new IOException(e);
    }

    // TODO(cc): Implement a ByteBufferInputStream which is backed by a ByteBuffer so we could
    // benefit from zero-copy.
    DataInputStream key = new DataInputStream(
            new ByteArrayInputStream(BufferUtils.newByteArrayFromByteBuffer(pair.getKey())));
    try {
        keyWritable.readFields(key);
    } finally {
        key.close();
    }

    DataInputStream value = new DataInputStream(
            new ByteArrayInputStream(BufferUtils.newByteArrayFromByteBuffer(pair.getValue())));
    try {
        valueWritable.readFields(value);
    } finally {
        value.close();
    }

    mKeyValuePairsBytesRead += keyWritable.getLength() + valueWritable.getLength();
    mNumVisitedKeyValuePairs++;
    return true;
}

From source file:uk.bl.wa.hadoop.mapreduce.hash.MessageDigestMapper.java

License:Open Source License

@Override
protected void map(Path key, BytesWritable value, Mapper<Path, BytesWritable, Text, Text>.Context context)
        throws IOException, InterruptedException {
    if (!key.equals(current)) {
        // Extract and emit:
        this.emit(context);
        // Set up a new one:
        current = key;//w w  w  .jav a 2 s.  c  o  m
        bytes_seen = 0;
        md.reset();
        log.info("Hashing " + current);
    }
    md.update(value.getBytes(), 0, value.getLength());
    bytes_seen += value.getLength();
}

From source file:voldemort.store.readonly.mapreduce.HadoopStoreBuilderReducer.java

License:Apache License

/**
 * Reduce should get sorted MD5 of Voldemort key ( either 16 bytes if saving
 * keys is disabled, else 4 bytes ) as key and for value (a) node-id,
 * partition-id, value - if saving keys is disabled (b) node-id,
 * partition-id, [key-size, key, value-size, value]* if saving keys is
 * enabled//  www.  j a  v  a  2s. c om
 */
@Override
public void reduce(BytesWritable key, Iterable<BytesWritable> values, Context context)
        throws IOException, InterruptedException {
    Iterator<BytesWritable> iterator = values.iterator();

    // Write key and position
    this.indexFileStream.write(key.getBytes(), 0, key.getLength());
    this.indexFileStream.writeInt(this.position);

    // Run key through checksum digest
    if (this.checkSumDigestIndex != null) {
        this.checkSumDigestIndex.update(key.getBytes(), 0, key.getLength());
        this.checkSumDigestIndex.update(this.position);
    }

    int numKeyValues = 0;
    ByteArrayOutputStream stream = new ByteArrayOutputStream();
    DataOutputStream valueStream = new DataOutputStream(stream);

    while (iterator.hasNext()) {
        BytesWritable writable = iterator.next();
        byte[] valueBytes = writable.getBytes();

        if (this.nodeId == -1)
            this.nodeId = ByteUtils.readInt(valueBytes, 0);
        if (this.partitionId == -1)
            this.partitionId = ByteUtils.readInt(valueBytes, 4);
        if (this.chunkId == -1)
            this.chunkId = ReadOnlyUtils.chunk(key.getBytes(), this.numChunks);

        int valueLength = writable.getLength() - 8;
        if (saveKeys) {
            // Write (key_length + key + value_length + value)
            valueStream.write(valueBytes, 8, valueLength);
        } else {
            // Write (value_length + value)
            valueStream.writeInt(valueLength);
            valueStream.write(valueBytes, 8, valueLength);
        }

        numKeyValues++;

        // if we have multiple values for this md5 that is a collision,
        // throw an exception--either the data itself has duplicates, there
        // are trillions of keys, or someone is attempting something
        // malicious ( We don't expect collisions when saveKeys = false )
        if (!saveKeys && numKeyValues > 1)
            throw new VoldemortException("Duplicate keys detected for md5 sum "
                    + ByteUtils.toHexString(ByteUtils.copy(key.getBytes(), 0, key.getLength())));

    }

    if (saveKeys) {
        // Write the number of k/vs as a single byte
        byte[] numBuf = new byte[1];
        numBuf[0] = (byte) numKeyValues;

        this.valueFileStream.write(numBuf);
        this.position += 1;

        if (this.checkSumDigestValue != null) {
            this.checkSumDigestValue.update(numBuf);
        }
    }

    // Write the value out
    valueStream.flush();

    byte[] value = stream.toByteArray();
    this.valueFileStream.write(value);
    this.position += value.length;

    if (this.checkSumDigestValue != null) {
        this.checkSumDigestValue.update(value);
    }

    if (this.position < 0)
        throw new VoldemortException("Chunk overflow exception: chunk " + chunkId + " has exceeded "
                + Integer.MAX_VALUE + " bytes.");

}

From source file:voldemort.store.readonly.mr.serialization.JsonDeserializerComparator.java

License:Apache License

public int compare(BytesWritable o1, BytesWritable o2) {
    return this.compareBytes(o1.getBytes(), 0, o1.getLength(), o2.getBytes(), 0, o2.getLength());
}

From source file:voldemort.store.readonly.mr.utils.HadoopUtils.java

License:Apache License

/**
 * Tag the BytesWritable with an integer at the END
 *///from   w w w. ja v a2 s  . c o  m
public static void appendTag(BytesWritable writable, int tag) {
    int size = writable.getLength();

    if (writable.getCapacity() < size + 4) {
        // BytesWritable preserves old values
        writable.setCapacity(size + 4);
    }

    ByteUtils.writeInt(writable.getBytes(), tag, size);
    writable.setSize(size + 4);
}