List of usage examples for org.apache.hadoop.io BytesWritable getBytes
@Override public byte[] getBytes()
From source file:smile.wide.counter.ReduceCounter.java
License:Apache License
public void reduce(BytesWritable key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException { double total = 0; for (DoubleWritable p : values) { total += p.get();//ww w .j av a 2s . co m } byte[] bits = key.getBytes(); BitBuffer buf = new BitBuffer(bits, bits.length * 8); Network net = params.getNet(); StringBuilder out = new StringBuilder(); int[][] families = params.getFamilies(); int pos = Parameters.bitCount(families.length); int familyIndex = buf.getBits(0, pos); out.append(familyIndex); int[] keyFamily = families[familyIndex]; for (int i = 0; i < keyFamily.length; i++) { out.append(' '); int handle = keyFamily[i]; int bitsPerNode = Parameters.bitCount(net.getOutcomeCount(handle)); int outcomeIndex = buf.getBits(pos, bitsPerNode); out.append(net.getOutcomeId(handle, outcomeIndex)); pos += bitsPerNode; } context.write(new Text(out.toString()), new DoubleWritable(total)); }
From source file:tachyon.client.keyvalue.hadoop.KeyValueRecordWriter.java
License:Apache License
@Override public synchronized void write(BytesWritable key, BytesWritable value) throws IOException { try {/*from ww w. ja v a 2s .c o m*/ mWriter.put(key.getBytes(), value.getBytes()); // Send a progress to the job manager to inform it that the task is still running. mProgress.progress(); } catch (TachyonException e) { throw new IOException(e); } }
From source file:uk.bl.wa.hadoop.mapreduce.hash.MessageDigestMapper.java
License:Open Source License
@Override protected void map(Path key, BytesWritable value, Mapper<Path, BytesWritable, Text, Text>.Context context) throws IOException, InterruptedException { if (!key.equals(current)) { // Extract and emit: this.emit(context); // Set up a new one: current = key;/*from w w w .ja va 2s.co m*/ bytes_seen = 0; md.reset(); log.info("Hashing " + current); } md.update(value.getBytes(), 0, value.getLength()); bytes_seen += value.getLength(); }
From source file:voldemort.store.readonly.mapreduce.HadoopStoreBuilderPartitioner.java
License:Apache License
@Override public int getPartition(BytesWritable key, BytesWritable value, int numPartitions) { int partitionId = ByteUtils.readInt(value.getBytes(), 4); int chunkId = ReadOnlyUtils.chunk(key.getBytes(), numChunks); return (partitionId * numChunks + chunkId) % numPartitions; }
From source file:voldemort.store.readonly.mapreduce.HadoopStoreBuilderReducer.java
License:Apache License
/** * Reduce should get sorted MD5 of Voldemort key ( either 16 bytes if saving * keys is disabled, else 4 bytes ) as key and for value (a) node-id, * partition-id, value - if saving keys is disabled (b) node-id, * partition-id, [key-size, key, value-size, value]* if saving keys is * enabled//from w w w . j av a 2s .c o m */ @Override public void reduce(BytesWritable key, Iterable<BytesWritable> values, Context context) throws IOException, InterruptedException { Iterator<BytesWritable> iterator = values.iterator(); // Write key and position this.indexFileStream.write(key.getBytes(), 0, key.getLength()); this.indexFileStream.writeInt(this.position); // Run key through checksum digest if (this.checkSumDigestIndex != null) { this.checkSumDigestIndex.update(key.getBytes(), 0, key.getLength()); this.checkSumDigestIndex.update(this.position); } int numKeyValues = 0; ByteArrayOutputStream stream = new ByteArrayOutputStream(); DataOutputStream valueStream = new DataOutputStream(stream); while (iterator.hasNext()) { BytesWritable writable = iterator.next(); byte[] valueBytes = writable.getBytes(); if (this.nodeId == -1) this.nodeId = ByteUtils.readInt(valueBytes, 0); if (this.partitionId == -1) this.partitionId = ByteUtils.readInt(valueBytes, 4); if (this.chunkId == -1) this.chunkId = ReadOnlyUtils.chunk(key.getBytes(), this.numChunks); int valueLength = writable.getLength() - 8; if (saveKeys) { // Write (key_length + key + value_length + value) valueStream.write(valueBytes, 8, valueLength); } else { // Write (value_length + value) valueStream.writeInt(valueLength); valueStream.write(valueBytes, 8, valueLength); } numKeyValues++; // if we have multiple values for this md5 that is a collision, // throw an exception--either the data itself has duplicates, there // are trillions of keys, or someone is attempting something // malicious ( We don't expect collisions when saveKeys = false ) if (!saveKeys && numKeyValues > 1) throw new VoldemortException("Duplicate keys detected for md5 sum " + ByteUtils.toHexString(ByteUtils.copy(key.getBytes(), 0, key.getLength()))); } if (saveKeys) { // Write the number of k/vs as a single byte byte[] numBuf = new byte[1]; numBuf[0] = (byte) numKeyValues; this.valueFileStream.write(numBuf); this.position += 1; if (this.checkSumDigestValue != null) { this.checkSumDigestValue.update(numBuf); } } // Write the value out valueStream.flush(); byte[] value = stream.toByteArray(); this.valueFileStream.write(value); this.position += value.length; if (this.checkSumDigestValue != null) { this.checkSumDigestValue.update(value); } if (this.position < 0) throw new VoldemortException("Chunk overflow exception: chunk " + chunkId + " has exceeded " + Integer.MAX_VALUE + " bytes."); }
From source file:voldemort.store.readonly.mr.AvroStoreBuilderMapper.java
License:Apache License
/** * Create the voldemort key and value from the input Avro record by * extracting the key and value and map it out for each of the responsible * voldemort nodes/*from ww w. j av a2s . c o m*/ * * * The output value is the node_id & partition_id of the responsible node * followed by serialized value */ @Override public void map(GenericData.Record record, AvroCollector<Pair<ByteBuffer, ByteBuffer>> collector, Reporter reporter) throws IOException { byte[] keyBytes = keySerializer.toBytes(record.get(keyField)); byte[] valBytes = valueSerializer.toBytes(record.get(valField)); // Compress key and values if required if (keySerializerDefinition.hasCompression()) { keyBytes = keyCompressor.deflate(keyBytes); } if (valueSerializerDefinition.hasCompression()) { valBytes = valueCompressor.deflate(valBytes); } // Get the output byte arrays ready to populate byte[] outputValue; BytesWritable outputKey; // Leave initial offset for (a) node id (b) partition id // since they are written later int offsetTillNow = 2 * ByteUtils.SIZE_OF_INT; if (getSaveKeys()) { // In order - 4 ( for node id ) + 4 ( partition id ) + 1 ( // replica // type - primary | secondary | tertiary... ] + 4 ( key size ) // size ) + 4 ( value size ) + key + value outputValue = new byte[valBytes.length + keyBytes.length + ByteUtils.SIZE_OF_BYTE + 4 * ByteUtils.SIZE_OF_INT]; // Write key length - leave byte for replica type offsetTillNow += ByteUtils.SIZE_OF_BYTE; ByteUtils.writeInt(outputValue, keyBytes.length, offsetTillNow); // Write value length offsetTillNow += ByteUtils.SIZE_OF_INT; ByteUtils.writeInt(outputValue, valBytes.length, offsetTillNow); // Write key offsetTillNow += ByteUtils.SIZE_OF_INT; System.arraycopy(keyBytes, 0, outputValue, offsetTillNow, keyBytes.length); // Write value offsetTillNow += keyBytes.length; System.arraycopy(valBytes, 0, outputValue, offsetTillNow, valBytes.length); // Generate MR key - upper 8 bytes of 16 byte md5 outputKey = new BytesWritable(ByteUtils.copy(md5er.digest(keyBytes), 0, 2 * ByteUtils.SIZE_OF_INT)); } else { // In order - 4 ( for node id ) + 4 ( partition id ) + value outputValue = new byte[valBytes.length + 2 * ByteUtils.SIZE_OF_INT]; // Write value System.arraycopy(valBytes, 0, outputValue, offsetTillNow, valBytes.length); // Generate MR key - 16 byte md5 outputKey = new BytesWritable(md5er.digest(keyBytes)); } // Generate partition and node list this key is destined for List<Integer> partitionList = routingStrategy.getPartitionList(keyBytes); Node[] partitionToNode = routingStrategy.getPartitionToNode(); for (int replicaType = 0; replicaType < partitionList.size(); replicaType++) { // Node id ByteUtils.writeInt(outputValue, partitionToNode[partitionList.get(replicaType)].getId(), 0); if (getSaveKeys()) { // Primary partition id ByteUtils.writeInt(outputValue, partitionList.get(0), ByteUtils.SIZE_OF_INT); // Replica type ByteUtils.writeBytes(outputValue, replicaType, 2 * ByteUtils.SIZE_OF_INT, ByteUtils.SIZE_OF_BYTE); } else { // Partition id ByteUtils.writeInt(outputValue, partitionList.get(replicaType), ByteUtils.SIZE_OF_INT); } BytesWritable outputVal = new BytesWritable(outputValue); ByteBuffer keyBuffer = null, valueBuffer = null; byte[] md5KeyBytes = outputKey.getBytes(); keyBuffer = ByteBuffer.allocate(md5KeyBytes.length); keyBuffer.put(md5KeyBytes); keyBuffer.rewind(); valueBuffer = ByteBuffer.allocate(outputValue.length); valueBuffer.put(outputValue); valueBuffer.rewind(); Pair<ByteBuffer, ByteBuffer> p = new Pair<ByteBuffer, ByteBuffer>(keyBuffer, valueBuffer); collector.collect(p); } md5er.reset(); }
From source file:voldemort.store.readonly.mr.serialization.JsonDeserializerComparator.java
License:Apache License
public int compare(BytesWritable o1, BytesWritable o2) { return this.compareBytes(o1.getBytes(), 0, o1.getLength(), o2.getBytes(), 0, o2.getLength()); }
From source file:voldemort.store.readonly.mr.utils.HadoopUtils.java
License:Apache License
/** * Tag the BytesWritable with an integer at the END *//*from w w w .ja va 2 s.co m*/ public static void appendTag(BytesWritable writable, int tag) { int size = writable.getLength(); if (writable.getCapacity() < size + 4) { // BytesWritable preserves old values writable.setCapacity(size + 4); } ByteUtils.writeInt(writable.getBytes(), tag, size); writable.setSize(size + 4); }
From source file:voldemort.store.readonly.mr.utils.HadoopUtils.java
License:Apache License
/** * read and return integer from the END of BytesWritable The tag bytes are * NOT removed//from ww w.ja v a 2s.c om */ public static int readTag(BytesWritable readable) { return ByteUtils.readInt(readable.getBytes(), readable.getLength() - 4); }
From source file:weka.distributed.hadoop.CorrelationMatrixRowHadoopReducer.java
License:Open Source License
@Override public void reduce(Text key, Iterable<BytesWritable> values, Context context) throws IOException { List<MatrixRowHolder> rowsToAgg = new ArrayList<MatrixRowHolder>(); try {/*from w w w . j a v a2 s .co m*/ for (BytesWritable b : values) { byte[] bytes = b.getBytes(); rowsToAgg.add(deserialize(bytes)); } } catch (ClassNotFoundException ex) { throw new IOException(ex); } if (rowsToAgg.size() > 0) { int rowNum = rowsToAgg.get(0).getRowNumber(); List<double[]> rows = new ArrayList<double[]>(); List<int[]> coOcc = null; if (!m_missingsWereReplacedWithMeans) { coOcc = new ArrayList<int[]>(); } for (MatrixRowHolder r : rowsToAgg) { if (r.getRowNumber() != rowNum) { throw new IOException("Matrix row numbers for this key appear to differ!"); } rows.add(r.getRow()); if (!m_missingsWereReplacedWithMeans) { coOcc.add(r.getCoOccurrencesCounts()); } } try { double[] aggregated = m_task.aggregate(rowsToAgg.get(0).getRowNumber(), rows, coOcc, m_headerWithSummaryAtts, m_missingsWereReplacedWithMeans, m_covariance, m_deleteClassIfSet); // assemble Text key (row num) and Text row (space separated // values) Text outKey = new Text(); outKey.set("" + rowNum); StringBuilder b = new StringBuilder(); for (int i = 0; i < aggregated.length; i++) { if (i < aggregated.length - 1) { b.append("" + aggregated[i]).append(" "); } else { b.append("" + aggregated[i]); } } Text outVal = new Text(); outVal.set(b.toString()); context.write(outKey, outVal); } catch (DistributedWekaException e) { throw new IOException(e); } catch (InterruptedException e) { throw new IOException(e); } } }