List of usage examples for org.apache.hadoop.io BytesWritable set
public void set(byte[] newData, int offset, int length)
From source file:org.apache.tez.runtime.library.common.comparator.TestProxyComparator.java
License:Apache License
private static final void set(BytesWritable bw, String s) { byte[] b = s.getBytes(Charset.forName("utf-8")); bw.set(b, 0, b.length); }
From source file:org.commoncrawl.nutch.tools.arc.ArcRecordReader.java
License:Apache License
/** * <p>Returns true if the next record in the split is read into the key and * value pair. The key will be the arc record header and the values will be * the raw content bytes of the arc record.</p> * // w ww .ja v a2s . c o m * @param key The record key * @param value The record value * * @return True if the next record is read. * * @throws IOException If an error occurs while reading the record value. */ public boolean next(Text key, BytesWritable value) throws IOException { try { // get the starting position on the input stream long startRead = in.getPos(); byte[] magicBuffer = null; // we need this loop to handle false positives in reading of gzip records while (true) { // while we haven't passed the end of the split if (startRead >= splitEnd) { return false; } // scanning for the gzip header boolean foundStart = false; while (!foundStart) { // start at the current file position and scan for 1K at time, break // if there is no more to read startRead = in.getPos(); magicBuffer = new byte[1024]; int read = in.read(magicBuffer); if (read < 0) { break; } // scan the byte array for the gzip header magic number. This happens // byte by byte for (int i = 0; i < read - 1; i++) { byte[] testMagic = new byte[2]; System.arraycopy(magicBuffer, i, testMagic, 0, 2); if (isMagic(testMagic)) { // set the next start to the current gzip header startRead += i; foundStart = true; break; } } } // seek to the start of the gzip header in.seek(startRead); ByteArrayOutputStream baos = null; int totalRead = 0; try { // read 4K of the gzip at a time putting into a byte array byte[] buffer = new byte[4096]; GZIPInputStream zin = new GZIPInputStream(in); int gzipRead = -1; baos = new ByteArrayOutputStream(); while ((gzipRead = zin.read(buffer, 0, buffer.length)) != -1) { baos.write(buffer, 0, gzipRead); totalRead += gzipRead; } } catch (Exception e) { // there are times we get false positives where the gzip header exists // but it is not an actual gzip record, so we ignore it and start // over seeking // LOG.debug("Ignoring position: " + (startRead)); if (startRead + 1 < fileLen) { in.seek(startRead + 1); } continue; } // change the output stream to a byte array byte[] content = baos.toByteArray(); // the first line of the raw content in arc files is the header int eol = 0; for (int i = 0; i < content.length; i++) { if (i > 0 && content[i] == '\n') { eol = i; break; } } // create the header and the raw content minus the header String header = new String(content, 0, eol).trim(); byte[] raw = new byte[(content.length - eol) - 1]; System.arraycopy(content, eol + 1, raw, 0, raw.length); // populate key and values with the header and raw content. Text keyText = (Text) key; keyText.set(header); BytesWritable valueBytes = (BytesWritable) value; valueBytes.set(raw, 0, raw.length); // TODO: It would be best to start at the end of the gzip read but // the bytes read in gzip don't match raw bytes in the file so we // overshoot the next header. With this current method you get // some false positives but don't miss records. if (startRead + 1 < fileLen) { in.seek(startRead + 1); } // populated the record, now return return true; } } catch (Exception e) { LOG.equals(StringUtils.stringifyException(e)); } // couldn't populate the record or there is no next record to read return false; }
From source file:org.geotools.WholeFile.WholeFileRecordReader.java
License:Apache License
@Override public boolean next(Text key, BytesWritable value) throws IOException { if (!processed) { byte[] contents = new byte[(int) fileSplit.getLength()]; Path file = fileSplit.getPath(); String fileName = file.getName(); key.set(fileName);//w ww.j a va 2 s .c o m FileSystem fs = file.getFileSystem(conf); FSDataInputStream in = null; try { in = fs.open(file); IOUtils.readFully(in, contents, 0, contents.length); value.set(contents, 0, contents.length); } finally { IOUtils.closeStream(in); } processed = true; return true; } return false; }
From source file:org.pentaho.hadoop.mapreduce.converter.converters.KettleTypeToBytesWritableConverter.java
License:Apache License
@Override public BytesWritable convert(ValueMetaInterface meta, Object obj) throws TypeConversionException { try {//from w w w .ja v a 2s . c o m BytesWritable result = new BytesWritable(); byte[] binary = meta.getBinary(obj); result.set(binary, 0, binary.length); return result; } catch (Exception ex) { throw new TypeConversionException(BaseMessages.getString(TypeConverterFactory.class, "ErrorConverting", BytesWritable.class.getSimpleName(), obj), ex); } }
From source file:org.pooledtimeseries.cartesian.CartesianRecordReader.java
License:Apache License
@Override public boolean next(Text key, BytesWritable value) throws IOException { do {//from w ww . jav a 2 s. c om // If we are to go to the next left key/value pair if (goToNextLeft) { // Read the next key value pair, false means no more pairs if (!leftRR.next(lkey, lvalue)) { // If no more, then this task is nearly finished alldone = true; break; } else { // If we aren't done, set the value to the key and set // our flags goToNextLeft = alldone = false; // Reset the right record reader this.rightRR = this.rightFIF.getRecordReader(this.rightIS, this.rightConf, this.rightReporter); } if (this.pairWithItself) { // shifting right data set to avoid repeated pairs // we consider a,b == b,a for (int i = 0; i < rightShiftCount; i++) { rightRR.next(rkey, rvalue); } rightShiftCount++; } } // Read the next key value pair from the right data set if (rightRR.next(rkey, rvalue)) { // If success, set key and value for left and right splits key.set(lkey.toString() + "~" + rkey.toString()); // Merge FeatureVector of both videos // Order is important and should be same as order of key List<FeatureVector> featureList = (List<FeatureVector>) PoTSerialiser.getObject(lvalue.getBytes()); featureList.addAll((List<FeatureVector>) PoTSerialiser.getObject(rvalue.getBytes())); byte[] featureListBytes = PoTSerialiser.getBytes(featureList); value.set(featureListBytes, 0, featureListBytes.length); // This assumes that key will always be unique among all splits if (lkey.toString().equals(rkey.toString())) { this.pairWithItself = true; } } else { // Otherwise, this right data set is complete // and we should go to the next left pair goToNextLeft = true; } // This loop will continue if we finished reading key/value // pairs from the right data set } while (goToNextLeft); if (alldone) { // reset shift counter rightShiftCount = 1; this.pairWithItself = false; } // Return true if a key/value pair was read, false otherwise return !alldone; }
From source file:org.zuinnote.hadoop.bitcoin.format.BitcoinBlockRecordReader.java
License:Apache License
/** * * Read a next block. //from ww w. j a v a2 s . c o m * * @param key is a 64 byte array (hashMerkleRoot and prevHashBlock) * @param value is a deserialized Java object of class BitcoinBlock * * @return true if next block is available, false if not */ public boolean next(BytesWritable key, BitcoinBlock value) throws IOException { // read all the blocks, if necessary a block overlapping a split while (getFilePosition() <= getEnd()) { // did we already went beyond the split (remote) or do we have no further data left? BitcoinBlock dataBlock = null; try { dataBlock = getBbr().readBlock(); } catch (BitcoinBlockReadException e) { // log LOG.error(e); } if (dataBlock == null) return false; byte[] hashMerkleRoot = dataBlock.getHashMerkleRoot(); byte[] hashPrevBlock = dataBlock.getHashPrevBlock(); byte[] newKey = new byte[hashMerkleRoot.length + hashPrevBlock.length]; for (int i = 0; i < hashMerkleRoot.length; i++) { newKey[i] = hashMerkleRoot[i]; } for (int j = 0; j < hashPrevBlock.length; j++) { newKey[j + hashMerkleRoot.length] = hashPrevBlock[j]; } key.set(newKey, 0, newKey.length); value.set(dataBlock); return true; } return false; }
From source file:org.zuinnote.hadoop.bitcoin.format.BitcoinRawBlockRecordReader.java
License:Apache License
/** * * Read a next block. //from www . j a va 2s .c o m * * @param key is a 64 byte array (hashMerkleRoot and prevHashBlock) * @param value is a deserialized Java object of class BitcoinBlock * * @return true if next block is available, false if not */ public boolean next(BytesWritable key, BytesWritable value) throws IOException { // read all the blocks, if necessary a block overlapping a split while (getFilePosition() <= getEnd()) { // did we already went beyond the split (remote) or do we have no further data left? ByteBuffer dataBlock = null; try { dataBlock = getBbr().readRawBlock(); } catch (BitcoinBlockReadException e) { // log LOG.error(e); } if (dataBlock == null) return false; byte newKey[] = getBbr().getKeyFromRawBlock(dataBlock); key.set(newKey, 0, newKey.length); byte[] dataBlockArray = null; if (dataBlock.hasArray() == true) { dataBlockArray = dataBlock.array(); } else { dataBlockArray = new byte[dataBlock.capacity()]; dataBlock.get(dataBlockArray); } value.set(dataBlockArray, 0, dataBlockArray.length); return true; } return false; }
From source file:org.zuinnote.hadoop.bitcoin.format.BitcoinTransactionRecordReader.java
License:Apache License
/** * * Read a next block. /* ww w. ja v a2 s. c o m*/ * * @param key is a 68 byte array (hashMerkleRoot, prevHashBlock, transActionCounter) * @param value is a deserialized Java object of class BitcoinBlock * * @return true if next block is available, false if not */ public boolean next(BytesWritable key, BitcoinTransaction value) throws IOException { // read all the blocks, if necessary a block overlapping a split while (getFilePosition() <= getEnd()) { // did we already went beyond the split (remote) or do we have no further data left? if ((currentBitcoinBlock == null) || (currentBitcoinBlock.getTransactions().size() == currentTransactionCounterInBlock)) { try { currentBitcoinBlock = getBbr().readBlock(); currentTransactionCounterInBlock = 0; } catch (BitcoinBlockReadException e) { // log LOG.error(e); } } if (currentBitcoinBlock == null) return false; BitcoinTransaction currentTransaction = currentBitcoinBlock.getTransactions() .get(currentTransactionCounterInBlock); // the unique identifier that is linked in other transaction is usually its hash byte[] newKey = new byte[0]; try { newKey = BitcoinUtil.getTransactionHash(currentTransaction); } catch (NoSuchAlgorithmException nsae) { LOG.error("Cannot calculate transaction hash. Algorithm not available. Exception: " + nsae.toString()); } key.set(newKey, 0, newKey.length); value.set(currentTransaction); currentTransactionCounterInBlock++; return true; } return false; }
From source file:phoenix.datatorrent.operator.input.kafka.PartitionableKafkaInputOperator.java
License:Open Source License
private BytesWritable getBytesFromKafka(Message msg) { ByteBuffer buf = msg.payload(); BytesWritable payload = new BytesWritable(); int origSize = buf.remaining(); byte[] bytes = new byte[origSize]; buf.get(bytes, buf.position(), origSize); payload.set(bytes, 0, origSize); return payload; }
From source file:protobuf.mapred.ProtobufRecordReader.java
License:Open Source License
public synchronized boolean next(LongWritable key, BytesWritable value) throws IOException { if (recordReader.isBlockConsumed(splitLength)) { LOG.info("Consumed all the split"); key = null;//from w ww .jav a 2 s .c o m value = null; return false; } recordio.RecordReader.Buffer data = recordReader.read(); if (data == null) { LOG.info("get EOF, consumed all the file"); key = null; value = null; return false; } pos = recordReader.getConsumedBytes() + start; key.set(pos); value.set(data.buffer, data.offset, data.length); return true; }