List of usage examples for org.apache.hadoop.io BytesWritable get
@Deprecated public byte[] get()
From source file:test.KeyValueTextOutputFormat.java
License:Apache License
/** * create the final out file, and output row by row. After one row is * appended, a configured row separator is appended * /*w w w . ja v a 2s . c o m*/ * @param jc * the job configuration file * @param outPath * the final output file to be created * @param valueClass * the value class used for create * @param isCompressed * whether the content is compressed or not * @param tableProperties * the tableProperties of this file's corresponding table * @param progress * progress used for status report * @return the RecordWriter */ @Override public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException { int rowSeparator = 0; String rowSeparatorString = tableProperties.getProperty(Constants.LINE_DELIM, "\n"); try { rowSeparator = Byte.parseByte(rowSeparatorString); } catch (NumberFormatException e) { rowSeparator = rowSeparatorString.charAt(0); } final int finalRowSeparator = rowSeparator; FileSystem fs = outPath.getFileSystem(jc); final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath), isCompressed); final byte[] key = "key".getBytes(); final byte[] split = "\t".getBytes(); return new RecordWriter() { public void write(Writable r) throws IOException { if (r instanceof Text) { Text tr = (Text) r; outStream.write(key); outStream.write(split); outStream.write(tr.getBytes(), 0, tr.getLength()); outStream.write(finalRowSeparator); } else { // DynamicSerDe always writes out BytesWritable BytesWritable bw = (BytesWritable) r; outStream.write(bw.get(), 0, bw.getSize()); outStream.write(finalRowSeparator); } } public void close(boolean abort) throws IOException { outStream.close(); } }; }
From source file:voldemort.contrib.batchindexer.performance.BdbBuildPerformanceTest.java
License:Apache License
public static void main(String[] args) throws FileNotFoundException, IOException { if (args.length != 3) Utils.croak("USAGE: java " + BdbBuildPerformanceTest.class.getName() + "serverPropsFile storeName jsonSequenceDataFile"); String serverPropsFile = args[0]; String storeName = args[1];/*from ww w . jav a 2s . co m*/ String jsonDataFile = args[2]; final Store<ByteArray, byte[], byte[]> store = new BdbStorageConfiguration( new VoldemortConfig(new Props(new File(serverPropsFile)))).getStore( TestUtils.makeStoreDefinition(storeName), TestUtils.makeSingleNodeRoutingStrategy()); final AtomicInteger obsoletes = new AtomicInteger(0); Path jsonFilePath = new Path(jsonDataFile); FileStatus jsonFileStatus = jsonFilePath.getFileSystem(new Configuration()).listStatus(jsonFilePath)[0]; final SequenceFileRecordReader<BytesWritable, BytesWritable> reader = new SequenceFileRecordReader<BytesWritable, BytesWritable>( new Configuration(), new FileSplit(jsonFilePath, 0, jsonFileStatus.getLen(), (String[]) null)); PerformanceTest readWriteTest = new PerformanceTest() { @Override public void doOperation(int index) throws Exception { try { BytesWritable key = new BytesWritable(); BytesWritable value = new BytesWritable(); reader.next(key, value); store.put(new ByteArray(ByteUtils.copy(key.get(), 0, key.getSize())), Versioned.value(ByteUtils.copy(value.get(), 0, value.getSize())), null); } catch (ObsoleteVersionException e) { obsoletes.incrementAndGet(); } } }; readWriteTest.run(30 * 1000 * 1000, 1); System.out.println("Bdb write throuhput with one thread:"); readWriteTest.printStats(); }
From source file:voldemort.contrib.batchindexer.performance.MysqlBuildPerformanceTest.java
License:Apache License
public static void main(String[] args) throws FileNotFoundException, IOException { if (args.length != 3) Utils.croak("USAGE: java " + MysqlBuildPerformanceTest.class.getName() + "serverPropsFile storeName jsonSequenceDataFile"); String serverPropsFile = args[0]; String storeName = args[1];/*from w w w. ja v a 2 s .co m*/ String jsonDataFile = args[2]; final Store<ByteArray, byte[], byte[]> store = new MysqlStorageConfiguration( new VoldemortConfig(new Props(new File(serverPropsFile)))).getStore( TestUtils.makeStoreDefinition(storeName), TestUtils.makeSingleNodeRoutingStrategy()); final AtomicInteger obsoletes = new AtomicInteger(0); Path jsonFilePath = new Path(jsonDataFile); FileStatus jsonFileStatus = jsonFilePath.getFileSystem(new Configuration()).listStatus(jsonFilePath)[0]; final SequenceFileRecordReader<BytesWritable, BytesWritable> reader = new SequenceFileRecordReader<BytesWritable, BytesWritable>( new Configuration(), new FileSplit(jsonFilePath, 0, jsonFileStatus.getLen(), (String[]) null)); PerformanceTest readWriteTest = new PerformanceTest() { @Override public void doOperation(int index) throws Exception { try { BytesWritable key = new BytesWritable(); BytesWritable value = new BytesWritable(); reader.next(key, value); store.put(new ByteArray(ByteUtils.copy(key.get(), 0, key.getSize())), Versioned.value(ByteUtils.copy(value.get(), 0, value.getSize())), null); } catch (ObsoleteVersionException e) { obsoletes.incrementAndGet(); } } }; readWriteTest.run(1000, 1); System.out.println("MySQl write throuhput with one thread:"); readWriteTest.printStats(); }
From source file:voldemort.store.readonly.disk.HadoopStoreWriter.java
License:Apache License
@Override public void write(BytesWritable key, Iterator<BytesWritable> iterator, Reporter reporter) throws IOException { // Write key and position this.indexFileStream.write(key.get(), 0, key.getSize()); this.indexFileStream.writeInt(this.position); // Run key through checksum digest if (this.checkSumDigestIndex != null) { this.checkSumDigestIndex.update(key.get(), 0, key.getSize()); this.checkSumDigestIndex.update(this.position); }/*w ww . ja va 2 s .co m*/ short numTuples = 0; ByteArrayOutputStream stream = new ByteArrayOutputStream(); DataOutputStream valueStream = new DataOutputStream(stream); while (iterator.hasNext()) { BytesWritable writable = iterator.next(); byte[] valueBytes = writable.get(); int offsetTillNow = 0; // Read node Id if (this.nodeId == -1) this.nodeId = ByteUtils.readInt(valueBytes, offsetTillNow); offsetTillNow += ByteUtils.SIZE_OF_INT; // Read partition id if (this.partitionId == -1) this.partitionId = ByteUtils.readInt(valueBytes, offsetTillNow); offsetTillNow += ByteUtils.SIZE_OF_INT; // Read chunk id if (this.chunkId == -1) this.chunkId = ReadOnlyUtils.chunk(key.get(), getNumChunks()); // Read replica type if (getSaveKeys()) { if (this.replicaType == -1) this.replicaType = (int) ByteUtils.readBytes(valueBytes, offsetTillNow, ByteUtils.SIZE_OF_BYTE); offsetTillNow += ByteUtils.SIZE_OF_BYTE; } int valueLength = writable.getSize() - offsetTillNow; if (getSaveKeys()) { // Write ( key_length, value_length, key, // value ) valueStream.write(valueBytes, offsetTillNow, valueLength); } else { // Write (value_length + value) valueStream.writeInt(valueLength); valueStream.write(valueBytes, offsetTillNow, valueLength); } numTuples++; // If we have multiple values for this md5 that is a collision, // throw an exception--either the data itself has duplicates, there // are trillions of keys, or someone is attempting something // malicious ( We obviously expect collisions when we save keys ) if (!getSaveKeys() && numTuples > 1) throw new VoldemortException("Duplicate keys detected for md5 sum " + ByteUtils.toHexString(ByteUtils.copy(key.get(), 0, key.getSize()))); } if (numTuples < 0) { // Overflow throw new VoldemortException("Found too many collisions: chunk " + chunkId + " has exceeded " + Short.MAX_VALUE + " collisions."); } else if (numTuples > 1) { // Update number of collisions + max keys per collision reporter.incrCounter(CollisionCounter.NUM_COLLISIONS, 1); long numCollisions = reporter.getCounter(CollisionCounter.MAX_COLLISIONS).getCounter(); if (numTuples > numCollisions) { reporter.incrCounter(CollisionCounter.MAX_COLLISIONS, numTuples - numCollisions); } } // Flush the value valueStream.flush(); byte[] value = stream.toByteArray(); // Start writing to file now // First, if save keys flag set the number of keys if (getSaveKeys()) { this.valueFileStream.writeShort(numTuples); this.position += ByteUtils.SIZE_OF_SHORT; if (this.checkSumDigestValue != null) { this.checkSumDigestValue.update(numTuples); } } this.valueFileStream.write(value); this.position += value.length; if (this.checkSumDigestValue != null) { this.checkSumDigestValue.update(value); } if (this.position < 0) throw new VoldemortException("Chunk overflow exception: chunk " + chunkId + " has exceeded " + Integer.MAX_VALUE + " bytes."); }
From source file:voldemort.store.readonly.disk.HadoopStoreWriterPerBucket.java
License:Apache License
@Override public void write(BytesWritable key, Iterator<BytesWritable> iterator, Reporter reporter) throws IOException { // Read chunk id int chunkId = ReadOnlyUtils.chunk(key.get(), getNumChunks()); // Write key and position this.indexFileStream[chunkId].write(key.get(), 0, key.getSize()); this.indexFileStream[chunkId].writeInt(this.position[chunkId]); // Run key through checksum digest if (this.checkSumDigestIndex[chunkId] != null) { this.checkSumDigestIndex[chunkId].update(key.get(), 0, key.getSize()); this.checkSumDigestIndex[chunkId].update(this.position[chunkId]); }//from ww w. j av a 2 s .co m short numTuples = 0; ByteArrayOutputStream stream = new ByteArrayOutputStream(); DataOutputStream valueStream = new DataOutputStream(stream); while (iterator.hasNext()) { BytesWritable writable = iterator.next(); byte[] valueBytes = writable.get(); int offsetTillNow = 0; // Read node Id if (this.nodeId == -1) this.nodeId = ByteUtils.readInt(valueBytes, offsetTillNow); offsetTillNow += ByteUtils.SIZE_OF_INT; // Read partition id if (this.partitionId == -1) this.partitionId = ByteUtils.readInt(valueBytes, offsetTillNow); offsetTillNow += ByteUtils.SIZE_OF_INT; // Read replica type if (getSaveKeys()) { if (this.replicaType == -1) this.replicaType = (int) ByteUtils.readBytes(valueBytes, offsetTillNow, ByteUtils.SIZE_OF_BYTE); offsetTillNow += ByteUtils.SIZE_OF_BYTE; } int valueLength = writable.getSize() - offsetTillNow; if (getSaveKeys()) { // Write ( key_length, value_length, key, // value ) valueStream.write(valueBytes, offsetTillNow, valueLength); } else { // Write (value_length + value) valueStream.writeInt(valueLength); valueStream.write(valueBytes, offsetTillNow, valueLength); } numTuples++; // If we have multiple values for this md5 that is a collision, // throw an exception--either the data itself has duplicates, there // are trillions of keys, or someone is attempting something // malicious ( We obviously expect collisions when we save keys ) if (!getSaveKeys() && numTuples > 1) throw new VoldemortException("Duplicate keys detected for md5 sum " + ByteUtils.toHexString(ByteUtils.copy(key.get(), 0, key.getSize()))); } if (numTuples < 0) { // Overflow throw new VoldemortException("Found too many collisions: chunk " + chunkId + " has exceeded " + Short.MAX_VALUE + " collisions."); } else if (numTuples > 1) { // Update number of collisions + max keys per collision reporter.incrCounter(CollisionCounter.NUM_COLLISIONS, 1); long numCollisions = reporter.getCounter(CollisionCounter.MAX_COLLISIONS).getCounter(); if (numTuples > numCollisions) { reporter.incrCounter(CollisionCounter.MAX_COLLISIONS, numTuples - numCollisions); } } // Flush the value valueStream.flush(); byte[] value = stream.toByteArray(); // Start writing to file now // First, if save keys flag set the number of keys if (getSaveKeys()) { this.valueFileStream[chunkId].writeShort(numTuples); this.position[chunkId] += ByteUtils.SIZE_OF_SHORT; if (this.checkSumDigestValue[chunkId] != null) { this.checkSumDigestValue[chunkId].update(numTuples); } } this.valueFileStream[chunkId].write(value); this.position[chunkId] += value.length; if (this.checkSumDigestValue[chunkId] != null) { this.checkSumDigestValue[chunkId].update(value); } if (this.position[chunkId] < 0) throw new VoldemortException("Chunk overflow exception: chunk " + chunkId + " has exceeded " + Integer.MAX_VALUE + " bytes."); }
From source file:voldemort.store.readonly.mr.HadoopStoreBuilderPartitioner.java
License:Apache License
public int getPartition(BytesWritable key, BytesWritable value, int numReduceTasks) { int partitionId = ByteUtils.readInt(value.get(), ByteUtils.SIZE_OF_INT); int chunkId = ReadOnlyUtils.chunk(key.get(), getNumChunks()); if (getSaveKeys()) { int replicaType = (int) ByteUtils.readBytes(value.get(), 2 * ByteUtils.SIZE_OF_INT, ByteUtils.SIZE_OF_BYTE); if (getReducerPerBucket()) { return (partitionId * getStoreDef().getReplicationFactor() + replicaType) % numReduceTasks; } else {/* w ww .ja v a 2 s. c om*/ return ((partitionId * getStoreDef().getReplicationFactor() * getNumChunks()) + (replicaType * getNumChunks()) + chunkId) % numReduceTasks; } } else { if (getReducerPerBucket()) { return partitionId % numReduceTasks; } else { return (partitionId * getNumChunks() + chunkId) % numReduceTasks; } } }
From source file:voldemort.store.readonly.mr.JsonStoreBuilderMapper.java
License:Apache License
private Object makeResult(BytesWritable writable, JsonTypeSerializer serializer, String selection, StoreBuilderTransformation trans) { Object obj = serializer.toObject(writable.get()); if (selection != null) { Map m = (Map) obj; obj = m.get(selection);//ww w .j av a 2 s . co m } if (trans != null) obj = trans.transform(obj); return obj; }
From source file:voldemort.store.readonly.mr.serialization.JsonMapper.java
License:Apache License
@SuppressWarnings("unchecked") public void map(BytesWritable key, BytesWritable value, OutputCollector<BytesWritable, BytesWritable> output, Reporter reporter) throws IOException { if (!isConfigured()) throw new IllegalStateException( "JsonMapper's configure method wasn't called. Please make sure that super.configure() is called."); mapObjects(getInputKeySerializer().toObject(key.get()), getInputValueSerializer().toObject(value.get()), getOutputCollector(output), reporter); }
From source file:voldemort.store.readonly.mr.serialization.JsonReducer.java
License:Apache License
public void reduce(BytesWritable key, Iterator<BytesWritable> values, OutputCollector<BytesWritable, BytesWritable> output, Reporter reporter) throws IOException { reduceObjects(getInputKeySerializer().toObject(key.get()), new TranslatingIterator(getInputValueSerializer(), values), getOutputCollector(output), reporter); }
From source file:voldemort.store.readwrite.mr.HadoopRWStoreBuilderPartitioner.java
License:Apache License
public int getPartition(BytesWritable key, BytesWritable value, int numReduceTasks) { int nodeId = ByteUtils.readInt(key.get(), 0); int chunkId = ByteUtils.readInt(key.get(), 4); return (nodeId * getNumChunks() + chunkId) % numReduceTasks; }