Example usage for org.apache.hadoop.io BytesWritable get

List of usage examples for org.apache.hadoop.io BytesWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable get.

Prototype

@Deprecated
public byte[] get() 

Source Link

Document

Get the data from the BytesWritable.

Usage

From source file:test.KeyValueTextOutputFormat.java

License:Apache License

/**
 * create the final out file, and output row by row. After one row is
 * appended, a configured row separator is appended
 * /*w  w  w .  ja v a 2s  .  c  o m*/
 * @param jc
 *          the job configuration file
 * @param outPath
 *          the final output file to be created
 * @param valueClass
 *          the value class used for create
 * @param isCompressed
 *          whether the content is compressed or not
 * @param tableProperties
 *          the tableProperties of this file's corresponding table
 * @param progress
 *          progress used for status report
 * @return the RecordWriter
 */
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass,
        boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    int rowSeparator = 0;
    String rowSeparatorString = tableProperties.getProperty(Constants.LINE_DELIM, "\n");
    try {
        rowSeparator = Byte.parseByte(rowSeparatorString);
    } catch (NumberFormatException e) {
        rowSeparator = rowSeparatorString.charAt(0);
    }

    final int finalRowSeparator = rowSeparator;
    FileSystem fs = outPath.getFileSystem(jc);
    final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath), isCompressed);
    final byte[] key = "key".getBytes();
    final byte[] split = "\t".getBytes();
    return new RecordWriter() {
        public void write(Writable r) throws IOException {
            if (r instanceof Text) {
                Text tr = (Text) r;
                outStream.write(key);
                outStream.write(split);
                outStream.write(tr.getBytes(), 0, tr.getLength());
                outStream.write(finalRowSeparator);
            } else {
                // DynamicSerDe always writes out BytesWritable
                BytesWritable bw = (BytesWritable) r;
                outStream.write(bw.get(), 0, bw.getSize());
                outStream.write(finalRowSeparator);
            }
        }

        public void close(boolean abort) throws IOException {
            outStream.close();
        }
    };
}

From source file:voldemort.contrib.batchindexer.performance.BdbBuildPerformanceTest.java

License:Apache License

public static void main(String[] args) throws FileNotFoundException, IOException {
    if (args.length != 3)
        Utils.croak("USAGE: java " + BdbBuildPerformanceTest.class.getName()
                + "serverPropsFile storeName jsonSequenceDataFile");

    String serverPropsFile = args[0];
    String storeName = args[1];/*from  ww  w  . jav  a 2s .  co  m*/
    String jsonDataFile = args[2];

    final Store<ByteArray, byte[], byte[]> store = new BdbStorageConfiguration(
            new VoldemortConfig(new Props(new File(serverPropsFile)))).getStore(
                    TestUtils.makeStoreDefinition(storeName), TestUtils.makeSingleNodeRoutingStrategy());

    final AtomicInteger obsoletes = new AtomicInteger(0);

    Path jsonFilePath = new Path(jsonDataFile);
    FileStatus jsonFileStatus = jsonFilePath.getFileSystem(new Configuration()).listStatus(jsonFilePath)[0];
    final SequenceFileRecordReader<BytesWritable, BytesWritable> reader = new SequenceFileRecordReader<BytesWritable, BytesWritable>(
            new Configuration(), new FileSplit(jsonFilePath, 0, jsonFileStatus.getLen(), (String[]) null));

    PerformanceTest readWriteTest = new PerformanceTest() {

        @Override
        public void doOperation(int index) throws Exception {
            try {

                BytesWritable key = new BytesWritable();
                BytesWritable value = new BytesWritable();

                reader.next(key, value);
                store.put(new ByteArray(ByteUtils.copy(key.get(), 0, key.getSize())),
                        Versioned.value(ByteUtils.copy(value.get(), 0, value.getSize())), null);
            } catch (ObsoleteVersionException e) {
                obsoletes.incrementAndGet();
            }
        }
    };
    readWriteTest.run(30 * 1000 * 1000, 1);
    System.out.println("Bdb write throuhput with one thread:");
    readWriteTest.printStats();
}

From source file:voldemort.contrib.batchindexer.performance.MysqlBuildPerformanceTest.java

License:Apache License

public static void main(String[] args) throws FileNotFoundException, IOException {
    if (args.length != 3)
        Utils.croak("USAGE: java " + MysqlBuildPerformanceTest.class.getName()
                + "serverPropsFile storeName jsonSequenceDataFile");

    String serverPropsFile = args[0];
    String storeName = args[1];/*from w w w. ja v a  2  s  .co  m*/
    String jsonDataFile = args[2];

    final Store<ByteArray, byte[], byte[]> store = new MysqlStorageConfiguration(
            new VoldemortConfig(new Props(new File(serverPropsFile)))).getStore(
                    TestUtils.makeStoreDefinition(storeName), TestUtils.makeSingleNodeRoutingStrategy());

    final AtomicInteger obsoletes = new AtomicInteger(0);

    Path jsonFilePath = new Path(jsonDataFile);
    FileStatus jsonFileStatus = jsonFilePath.getFileSystem(new Configuration()).listStatus(jsonFilePath)[0];
    final SequenceFileRecordReader<BytesWritable, BytesWritable> reader = new SequenceFileRecordReader<BytesWritable, BytesWritable>(
            new Configuration(), new FileSplit(jsonFilePath, 0, jsonFileStatus.getLen(), (String[]) null));

    PerformanceTest readWriteTest = new PerformanceTest() {

        @Override
        public void doOperation(int index) throws Exception {
            try {

                BytesWritable key = new BytesWritable();
                BytesWritable value = new BytesWritable();

                reader.next(key, value);
                store.put(new ByteArray(ByteUtils.copy(key.get(), 0, key.getSize())),
                        Versioned.value(ByteUtils.copy(value.get(), 0, value.getSize())), null);
            } catch (ObsoleteVersionException e) {
                obsoletes.incrementAndGet();
            }
        }
    };
    readWriteTest.run(1000, 1);
    System.out.println("MySQl write throuhput with one thread:");
    readWriteTest.printStats();
}

From source file:voldemort.store.readonly.disk.HadoopStoreWriter.java

License:Apache License

@Override
public void write(BytesWritable key, Iterator<BytesWritable> iterator, Reporter reporter) throws IOException {

    // Write key and position
    this.indexFileStream.write(key.get(), 0, key.getSize());
    this.indexFileStream.writeInt(this.position);

    // Run key through checksum digest
    if (this.checkSumDigestIndex != null) {
        this.checkSumDigestIndex.update(key.get(), 0, key.getSize());
        this.checkSumDigestIndex.update(this.position);
    }/*w  ww  .  ja va  2 s .co  m*/

    short numTuples = 0;
    ByteArrayOutputStream stream = new ByteArrayOutputStream();
    DataOutputStream valueStream = new DataOutputStream(stream);

    while (iterator.hasNext()) {
        BytesWritable writable = iterator.next();
        byte[] valueBytes = writable.get();
        int offsetTillNow = 0;

        // Read node Id
        if (this.nodeId == -1)
            this.nodeId = ByteUtils.readInt(valueBytes, offsetTillNow);
        offsetTillNow += ByteUtils.SIZE_OF_INT;

        // Read partition id
        if (this.partitionId == -1)
            this.partitionId = ByteUtils.readInt(valueBytes, offsetTillNow);
        offsetTillNow += ByteUtils.SIZE_OF_INT;

        // Read chunk id
        if (this.chunkId == -1)
            this.chunkId = ReadOnlyUtils.chunk(key.get(), getNumChunks());

        // Read replica type
        if (getSaveKeys()) {
            if (this.replicaType == -1)
                this.replicaType = (int) ByteUtils.readBytes(valueBytes, offsetTillNow, ByteUtils.SIZE_OF_BYTE);
            offsetTillNow += ByteUtils.SIZE_OF_BYTE;
        }

        int valueLength = writable.getSize() - offsetTillNow;
        if (getSaveKeys()) {
            // Write ( key_length, value_length, key,
            // value )
            valueStream.write(valueBytes, offsetTillNow, valueLength);
        } else {
            // Write (value_length + value)
            valueStream.writeInt(valueLength);
            valueStream.write(valueBytes, offsetTillNow, valueLength);
        }

        numTuples++;

        // If we have multiple values for this md5 that is a collision,
        // throw an exception--either the data itself has duplicates, there
        // are trillions of keys, or someone is attempting something
        // malicious ( We obviously expect collisions when we save keys )
        if (!getSaveKeys() && numTuples > 1)
            throw new VoldemortException("Duplicate keys detected for md5 sum "
                    + ByteUtils.toHexString(ByteUtils.copy(key.get(), 0, key.getSize())));

    }

    if (numTuples < 0) {
        // Overflow
        throw new VoldemortException("Found too many collisions: chunk " + chunkId + " has exceeded "
                + Short.MAX_VALUE + " collisions.");
    } else if (numTuples > 1) {
        // Update number of collisions + max keys per collision
        reporter.incrCounter(CollisionCounter.NUM_COLLISIONS, 1);

        long numCollisions = reporter.getCounter(CollisionCounter.MAX_COLLISIONS).getCounter();
        if (numTuples > numCollisions) {
            reporter.incrCounter(CollisionCounter.MAX_COLLISIONS, numTuples - numCollisions);
        }
    }

    // Flush the value
    valueStream.flush();
    byte[] value = stream.toByteArray();

    // Start writing to file now
    // First, if save keys flag set the number of keys
    if (getSaveKeys()) {

        this.valueFileStream.writeShort(numTuples);
        this.position += ByteUtils.SIZE_OF_SHORT;

        if (this.checkSumDigestValue != null) {
            this.checkSumDigestValue.update(numTuples);
        }
    }

    this.valueFileStream.write(value);
    this.position += value.length;

    if (this.checkSumDigestValue != null) {
        this.checkSumDigestValue.update(value);
    }

    if (this.position < 0)
        throw new VoldemortException("Chunk overflow exception: chunk " + chunkId + " has exceeded "
                + Integer.MAX_VALUE + " bytes.");
}

From source file:voldemort.store.readonly.disk.HadoopStoreWriterPerBucket.java

License:Apache License

@Override
public void write(BytesWritable key, Iterator<BytesWritable> iterator, Reporter reporter) throws IOException {

    // Read chunk id
    int chunkId = ReadOnlyUtils.chunk(key.get(), getNumChunks());

    // Write key and position
    this.indexFileStream[chunkId].write(key.get(), 0, key.getSize());
    this.indexFileStream[chunkId].writeInt(this.position[chunkId]);

    // Run key through checksum digest
    if (this.checkSumDigestIndex[chunkId] != null) {
        this.checkSumDigestIndex[chunkId].update(key.get(), 0, key.getSize());
        this.checkSumDigestIndex[chunkId].update(this.position[chunkId]);
    }//from   ww  w.  j av  a 2  s .co m

    short numTuples = 0;
    ByteArrayOutputStream stream = new ByteArrayOutputStream();
    DataOutputStream valueStream = new DataOutputStream(stream);

    while (iterator.hasNext()) {
        BytesWritable writable = iterator.next();
        byte[] valueBytes = writable.get();
        int offsetTillNow = 0;

        // Read node Id
        if (this.nodeId == -1)
            this.nodeId = ByteUtils.readInt(valueBytes, offsetTillNow);
        offsetTillNow += ByteUtils.SIZE_OF_INT;

        // Read partition id
        if (this.partitionId == -1)
            this.partitionId = ByteUtils.readInt(valueBytes, offsetTillNow);
        offsetTillNow += ByteUtils.SIZE_OF_INT;

        // Read replica type
        if (getSaveKeys()) {
            if (this.replicaType == -1)
                this.replicaType = (int) ByteUtils.readBytes(valueBytes, offsetTillNow, ByteUtils.SIZE_OF_BYTE);
            offsetTillNow += ByteUtils.SIZE_OF_BYTE;
        }

        int valueLength = writable.getSize() - offsetTillNow;
        if (getSaveKeys()) {
            // Write ( key_length, value_length, key,
            // value )
            valueStream.write(valueBytes, offsetTillNow, valueLength);
        } else {
            // Write (value_length + value)
            valueStream.writeInt(valueLength);
            valueStream.write(valueBytes, offsetTillNow, valueLength);
        }

        numTuples++;

        // If we have multiple values for this md5 that is a collision,
        // throw an exception--either the data itself has duplicates, there
        // are trillions of keys, or someone is attempting something
        // malicious ( We obviously expect collisions when we save keys )
        if (!getSaveKeys() && numTuples > 1)
            throw new VoldemortException("Duplicate keys detected for md5 sum "
                    + ByteUtils.toHexString(ByteUtils.copy(key.get(), 0, key.getSize())));

    }

    if (numTuples < 0) {
        // Overflow
        throw new VoldemortException("Found too many collisions: chunk " + chunkId + " has exceeded "
                + Short.MAX_VALUE + " collisions.");
    } else if (numTuples > 1) {
        // Update number of collisions + max keys per collision
        reporter.incrCounter(CollisionCounter.NUM_COLLISIONS, 1);

        long numCollisions = reporter.getCounter(CollisionCounter.MAX_COLLISIONS).getCounter();
        if (numTuples > numCollisions) {
            reporter.incrCounter(CollisionCounter.MAX_COLLISIONS, numTuples - numCollisions);
        }
    }

    // Flush the value
    valueStream.flush();
    byte[] value = stream.toByteArray();

    // Start writing to file now
    // First, if save keys flag set the number of keys
    if (getSaveKeys()) {

        this.valueFileStream[chunkId].writeShort(numTuples);
        this.position[chunkId] += ByteUtils.SIZE_OF_SHORT;

        if (this.checkSumDigestValue[chunkId] != null) {
            this.checkSumDigestValue[chunkId].update(numTuples);
        }
    }

    this.valueFileStream[chunkId].write(value);
    this.position[chunkId] += value.length;

    if (this.checkSumDigestValue[chunkId] != null) {
        this.checkSumDigestValue[chunkId].update(value);
    }

    if (this.position[chunkId] < 0)
        throw new VoldemortException("Chunk overflow exception: chunk " + chunkId + " has exceeded "
                + Integer.MAX_VALUE + " bytes.");

}

From source file:voldemort.store.readonly.mr.HadoopStoreBuilderPartitioner.java

License:Apache License

public int getPartition(BytesWritable key, BytesWritable value, int numReduceTasks) {
    int partitionId = ByteUtils.readInt(value.get(), ByteUtils.SIZE_OF_INT);
    int chunkId = ReadOnlyUtils.chunk(key.get(), getNumChunks());
    if (getSaveKeys()) {
        int replicaType = (int) ByteUtils.readBytes(value.get(), 2 * ByteUtils.SIZE_OF_INT,
                ByteUtils.SIZE_OF_BYTE);
        if (getReducerPerBucket()) {
            return (partitionId * getStoreDef().getReplicationFactor() + replicaType) % numReduceTasks;
        } else {/*  w ww  .ja v  a 2 s. c om*/
            return ((partitionId * getStoreDef().getReplicationFactor() * getNumChunks())
                    + (replicaType * getNumChunks()) + chunkId) % numReduceTasks;
        }
    } else {
        if (getReducerPerBucket()) {
            return partitionId % numReduceTasks;
        } else {
            return (partitionId * getNumChunks() + chunkId) % numReduceTasks;
        }

    }
}

From source file:voldemort.store.readonly.mr.JsonStoreBuilderMapper.java

License:Apache License

private Object makeResult(BytesWritable writable, JsonTypeSerializer serializer, String selection,
        StoreBuilderTransformation trans) {
    Object obj = serializer.toObject(writable.get());
    if (selection != null) {
        Map m = (Map) obj;
        obj = m.get(selection);//ww  w .j av  a 2 s  .  co m
    }

    if (trans != null)
        obj = trans.transform(obj);

    return obj;
}

From source file:voldemort.store.readonly.mr.serialization.JsonMapper.java

License:Apache License

@SuppressWarnings("unchecked")
public void map(BytesWritable key, BytesWritable value, OutputCollector<BytesWritable, BytesWritable> output,
        Reporter reporter) throws IOException {
    if (!isConfigured())
        throw new IllegalStateException(
                "JsonMapper's configure method wasn't called.  Please make sure that super.configure() is called.");

    mapObjects(getInputKeySerializer().toObject(key.get()), getInputValueSerializer().toObject(value.get()),
            getOutputCollector(output), reporter);
}

From source file:voldemort.store.readonly.mr.serialization.JsonReducer.java

License:Apache License

public void reduce(BytesWritable key, Iterator<BytesWritable> values,
        OutputCollector<BytesWritable, BytesWritable> output, Reporter reporter) throws IOException {
    reduceObjects(getInputKeySerializer().toObject(key.get()),
            new TranslatingIterator(getInputValueSerializer(), values), getOutputCollector(output), reporter);
}

From source file:voldemort.store.readwrite.mr.HadoopRWStoreBuilderPartitioner.java

License:Apache License

public int getPartition(BytesWritable key, BytesWritable value, int numReduceTasks) {
    int nodeId = ByteUtils.readInt(key.get(), 0);
    int chunkId = ByteUtils.readInt(key.get(), 4);
    return (nodeId * getNumChunks() + chunkId) % numReduceTasks;
}