List of usage examples for org.apache.hadoop.io BytesWritable getLength
@Override public int getLength()
From source file:org.zuinnote.hadoop.bitcoin.format.BitcoinFormatHadoopTest.java
License:Apache License
@Test public void readBitcoinRawBlockInputFormatGzipCompressed() throws IOException { JobConf job = new JobConf(defaultConf); CompressionCodec gzip = new GzipCodec(); ReflectionUtils.setConf(gzip, job);/* w w w.j a va 2s. c o m*/ ClassLoader classLoader = getClass().getClassLoader(); String fileName = "version4comp.blk.gz"; String fileNameBlock = classLoader.getResource("testdata/" + fileName).getFile(); Path file = new Path(fileNameBlock); FileInputFormat.setInputPaths(job, file); BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat(); format.configure(job); InputSplit[] inputSplits = format.getSplits(job, 1); assertEquals("Only one split generated for compressed block", 1, inputSplits.length); RecordReader<BytesWritable, BytesWritable> reader = format.getRecordReader(inputSplits[0], job, reporter); assertNotNull("Format returned null RecordReader", reader); BytesWritable key = new BytesWritable(); BytesWritable block = new BytesWritable(); assertTrue("Input Split for block version contains at least one block", reader.next(key, block)); assertEquals("Compressed block must have a size of 998.039 bytes", 998039, block.getLength()); BytesWritable emptyKey = new BytesWritable(); BytesWritable emptyBlock = new BytesWritable(); assertFalse("No further blocks in compressed block", reader.next(emptyKey, emptyBlock)); }
From source file:org.zuinnote.hadoop.bitcoin.format.BitcoinFormatHadoopTest.java
License:Apache License
@Test public void readBitcoinRawBlockInputFormatBzip2Compressed() throws IOException { JobConf job = new JobConf(defaultConf); CompressionCodec bzip2 = new BZip2Codec(); ReflectionUtils.setConf(bzip2, job); ClassLoader classLoader = getClass().getClassLoader(); String fileName = "version4comp.blk.bz2"; String fileNameBlock = classLoader.getResource("testdata/" + fileName).getFile(); Path file = new Path(fileNameBlock); FileInputFormat.setInputPaths(job, file); BitcoinRawBlockFileInputFormat format = new BitcoinRawBlockFileInputFormat(); format.configure(job);/*from w w w. j a v a 2s . c om*/ InputSplit[] inputSplits = format.getSplits(job, 1); assertEquals("Only one split generated for compressed block", 1, inputSplits.length); RecordReader<BytesWritable, BytesWritable> reader = format.getRecordReader(inputSplits[0], job, reporter); assertNotNull("Format returned null RecordReader", reader); BytesWritable key = new BytesWritable(); BytesWritable block = new BytesWritable(); assertTrue("Input Split for block version contains at least one block", reader.next(key, block)); assertEquals("Compressed block must have a size of 998.039 bytes", 998039, block.getLength()); BytesWritable emptyKey = new BytesWritable(); BytesWritable emptyBlock = new BytesWritable(); assertFalse("No further blocks in compressed block", reader.next(emptyKey, emptyBlock)); }
From source file:parquet.hadoop.thrift.ThriftBytesWriteSupport.java
License:Apache License
private TProtocol protocol(BytesWritable record) { TProtocol protocol = protocolFactory .getProtocol(new TIOStreamTransport(new ByteArrayInputStream(record.getBytes()))); /* Reduce the chance of OOM when data is corrupted. When readBinary is called on TBinaryProtocol, it reads the length of the binary first, so if the data is corrupted, it could read a big integer as the length of the binary and therefore causes OOM to happen. Currently this fix only applies to TBinaryProtocol which has the setReadLength defined. *///from ww w . j a v a2 s.co m if (protocol instanceof TBinaryProtocol) { ((TBinaryProtocol) protocol).setReadLength(record.getLength()); } return protocol; }
From source file:protobuf.examples.ProtobufMapper.java
License:Open Source License
public void map(LongWritable key, BytesWritable value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { LOG.info("In Mapper Get Data: " + value.toString()); int bufferSize = value.getLength(); byte buffer[] = new byte[bufferSize]; System.arraycopy(value.getBytes(), 0, buffer, 0, bufferSize); output.collect(new Text("msg.getEmail()"), new IntWritable(1)); }
From source file:shark.io.MutableBytesWritable.java
License:Apache License
/** * Set the BytesWritable to the contents of the given newData. * @param newData the value to set this BytesWritable to. *///from ww w . j a v a2s. co m public void set(BytesWritable newData) { set(newData.getBytes(), 0, newData.getLength()); }
From source file:tachyon.client.keyvalue.hadoop.KeyValueRecordReader.java
License:Apache License
@Override public synchronized boolean next(BytesWritable keyWritable, BytesWritable valueWritable) throws IOException { if (!mKeyValuePairIterator.hasNext()) { return false; }//from w ww. ja v a 2 s . c o m KeyValuePair pair; try { pair = mKeyValuePairIterator.next(); } catch (TachyonException e) { throw new IOException(e); } // TODO(cc): Implement a ByteBufferInputStream which is backed by a ByteBuffer so we could // benefit from zero-copy. DataInputStream key = new DataInputStream( new ByteArrayInputStream(BufferUtils.newByteArrayFromByteBuffer(pair.getKey()))); try { keyWritable.readFields(key); } finally { key.close(); } DataInputStream value = new DataInputStream( new ByteArrayInputStream(BufferUtils.newByteArrayFromByteBuffer(pair.getValue()))); try { valueWritable.readFields(value); } finally { value.close(); } mKeyValuePairsBytesRead += keyWritable.getLength() + valueWritable.getLength(); mNumVisitedKeyValuePairs++; return true; }
From source file:uk.bl.wa.hadoop.mapreduce.hash.MessageDigestMapper.java
License:Open Source License
@Override protected void map(Path key, BytesWritable value, Mapper<Path, BytesWritable, Text, Text>.Context context) throws IOException, InterruptedException { if (!key.equals(current)) { // Extract and emit: this.emit(context); // Set up a new one: current = key;//w w w .jav a 2 s. c o m bytes_seen = 0; md.reset(); log.info("Hashing " + current); } md.update(value.getBytes(), 0, value.getLength()); bytes_seen += value.getLength(); }
From source file:voldemort.store.readonly.mapreduce.HadoopStoreBuilderReducer.java
License:Apache License
/** * Reduce should get sorted MD5 of Voldemort key ( either 16 bytes if saving * keys is disabled, else 4 bytes ) as key and for value (a) node-id, * partition-id, value - if saving keys is disabled (b) node-id, * partition-id, [key-size, key, value-size, value]* if saving keys is * enabled// www. j a v a 2s. c om */ @Override public void reduce(BytesWritable key, Iterable<BytesWritable> values, Context context) throws IOException, InterruptedException { Iterator<BytesWritable> iterator = values.iterator(); // Write key and position this.indexFileStream.write(key.getBytes(), 0, key.getLength()); this.indexFileStream.writeInt(this.position); // Run key through checksum digest if (this.checkSumDigestIndex != null) { this.checkSumDigestIndex.update(key.getBytes(), 0, key.getLength()); this.checkSumDigestIndex.update(this.position); } int numKeyValues = 0; ByteArrayOutputStream stream = new ByteArrayOutputStream(); DataOutputStream valueStream = new DataOutputStream(stream); while (iterator.hasNext()) { BytesWritable writable = iterator.next(); byte[] valueBytes = writable.getBytes(); if (this.nodeId == -1) this.nodeId = ByteUtils.readInt(valueBytes, 0); if (this.partitionId == -1) this.partitionId = ByteUtils.readInt(valueBytes, 4); if (this.chunkId == -1) this.chunkId = ReadOnlyUtils.chunk(key.getBytes(), this.numChunks); int valueLength = writable.getLength() - 8; if (saveKeys) { // Write (key_length + key + value_length + value) valueStream.write(valueBytes, 8, valueLength); } else { // Write (value_length + value) valueStream.writeInt(valueLength); valueStream.write(valueBytes, 8, valueLength); } numKeyValues++; // if we have multiple values for this md5 that is a collision, // throw an exception--either the data itself has duplicates, there // are trillions of keys, or someone is attempting something // malicious ( We don't expect collisions when saveKeys = false ) if (!saveKeys && numKeyValues > 1) throw new VoldemortException("Duplicate keys detected for md5 sum " + ByteUtils.toHexString(ByteUtils.copy(key.getBytes(), 0, key.getLength()))); } if (saveKeys) { // Write the number of k/vs as a single byte byte[] numBuf = new byte[1]; numBuf[0] = (byte) numKeyValues; this.valueFileStream.write(numBuf); this.position += 1; if (this.checkSumDigestValue != null) { this.checkSumDigestValue.update(numBuf); } } // Write the value out valueStream.flush(); byte[] value = stream.toByteArray(); this.valueFileStream.write(value); this.position += value.length; if (this.checkSumDigestValue != null) { this.checkSumDigestValue.update(value); } if (this.position < 0) throw new VoldemortException("Chunk overflow exception: chunk " + chunkId + " has exceeded " + Integer.MAX_VALUE + " bytes."); }
From source file:voldemort.store.readonly.mr.serialization.JsonDeserializerComparator.java
License:Apache License
public int compare(BytesWritable o1, BytesWritable o2) { return this.compareBytes(o1.getBytes(), 0, o1.getLength(), o2.getBytes(), 0, o2.getLength()); }
From source file:voldemort.store.readonly.mr.utils.HadoopUtils.java
License:Apache License
/** * Tag the BytesWritable with an integer at the END *///from w w w. ja v a2 s . c o m public static void appendTag(BytesWritable writable, int tag) { int size = writable.getLength(); if (writable.getCapacity() < size + 4) { // BytesWritable preserves old values writable.setCapacity(size + 4); } ByteUtils.writeInt(writable.getBytes(), tag, size); writable.setSize(size + 4); }