List of usage examples for org.apache.hadoop.io BytesWritable set
public void set(byte[] newData, int offset, int length)
From source file:org.apache.hama.bsp.TestBSPMessageBundle.java
License:Apache License
public void testSerializationDeserialization() throws IOException { BSPMessageBundle<BytesWritable> bundle = new BSPMessageBundle<BytesWritable>(); BytesWritable[] testMessages = new BytesWritable[16]; for (int i = 0; i < testMessages.length; ++i) { // Create a one byte tag containing the number of the message. byte[] tag = new byte[1]; tag[0] = (byte) i; // Create a four bytes data part containing serialized number of the // message. ByteArrayOutputStream baos = new ByteArrayOutputStream(); baos.write(i);// w ww . ja v a 2 s . c om baos.close(); byte[] data = baos.toByteArray(); BytesWritable msg = new BytesWritable(); msg.set(data, 0, data.length); testMessages[i] = msg; bundle.addMessage(testMessages[i]); } // Serialize it. ByteArrayOutputStream baos = new ByteArrayOutputStream(); bundle.write(new DataOutputStream(baos)); baos.close(); // Deserialize it. BSPMessageBundle<BytesWritable> readBundle = new BSPMessageBundle<BytesWritable>(); readBundle.readFields(new DataInputStream(new ByteArrayInputStream(baos.toByteArray()))); // Check contents. int messageNumber = 0; Iterator<BytesWritable> it = readBundle.iterator(); while (it.hasNext()) { BytesWritable byteMessage = it.next(); assertTrue(Arrays.equals(testMessages[messageNumber].getBytes(), byteMessage.getBytes())); ++messageNumber; } assertEquals(testMessages.length, messageNumber); }
From source file:org.apache.hive.hcatalog.streaming.DelimitedInputWriter.java
License:Apache License
@Override public Object encode(byte[] record) throws SerializationError { try {/*from w ww. j a v a2s .co m*/ BytesWritable blob = new BytesWritable(); blob.set(record, 0, record.length); return serde.deserialize(blob); } catch (SerDeException e) { throw new SerializationError("Unable to convert byte[] record into Object", e); } }
From source file:org.apache.metron.spout.pcap.deserializer.FromKeyDeserializer.java
License:Apache License
@Override public boolean deserializeKeyValue(byte[] key, byte[] value, LongWritable outKey, BytesWritable outValue) { Long ts = converter.toNanoseconds(fromBytes(key)); outKey.set(ts);/* w w w . j ava 2s . co m*/ byte[] packetHeaderized = PcapHelper.addPacketHeader(ts, value, endianness); byte[] globalHeaderized = PcapHelper.addGlobalHeader(packetHeaderized, endianness); outValue.set(globalHeaderized, 0, globalHeaderized.length); return true; }
From source file:org.apache.metron.spout.pcap.deserializer.FromPacketDeserializer.java
License:Apache License
@Override public boolean deserializeKeyValue(byte[] key, byte[] value, LongWritable outKey, BytesWritable outValue) { Long ts = PcapHelper.getTimestamp(value); if (ts != null) { outKey.set(ts);/*from w w w.java2 s. co m*/ outValue.set(value, 0, value.length); return true; } else { return false; } }
From source file:org.apache.nutch.tools.arc.ArcRecordReader.java
License:Apache License
/** * <p>/*from w w w. j a va 2s .c om*/ * Returns true if the next record in the split is read into the key and value * pair. The key will be the arc record header and the values will be the raw * content bytes of the arc record. * </p> * * @param key * The record key * @param value * The record value * * @return True if the next record is read. * * @throws IOException * If an error occurs while reading the record value. */ public boolean next(Text key, BytesWritable value) throws IOException { try { // get the starting position on the input stream long startRead = in.getPos(); byte[] magicBuffer = null; // we need this loop to handle false positives in reading of gzip records while (true) { // while we haven't passed the end of the split if (startRead >= splitEnd) { return false; } // scanning for the gzip header boolean foundStart = false; while (!foundStart) { // start at the current file position and scan for 1K at time, break // if there is no more to read startRead = in.getPos(); magicBuffer = new byte[1024]; int read = in.read(magicBuffer); if (read < 0) { break; } // scan the byte array for the gzip header magic number. This happens // byte by byte for (int i = 0; i < read - 1; i++) { byte[] testMagic = new byte[2]; System.arraycopy(magicBuffer, i, testMagic, 0, 2); if (isMagic(testMagic)) { // set the next start to the current gzip header startRead += i; foundStart = true; break; } } } // seek to the start of the gzip header in.seek(startRead); ByteArrayOutputStream baos = null; int totalRead = 0; try { // read 4K of the gzip at a time putting into a byte array byte[] buffer = new byte[4096]; GZIPInputStream zin = new GZIPInputStream(in); int gzipRead = -1; baos = new ByteArrayOutputStream(); while ((gzipRead = zin.read(buffer, 0, buffer.length)) != -1) { baos.write(buffer, 0, gzipRead); totalRead += gzipRead; } } catch (Exception e) { // there are times we get false positives where the gzip header exists // but it is not an actual gzip record, so we ignore it and start // over seeking System.out.println("Ignoring position: " + (startRead)); if (startRead + 1 < fileLen) { in.seek(startRead + 1); } continue; } // change the output stream to a byte array byte[] content = baos.toByteArray(); // the first line of the raw content in arc files is the header int eol = 0; for (int i = 0; i < content.length; i++) { if (i > 0 && content[i] == '\n') { eol = i; break; } } // create the header and the raw content minus the header String header = new String(content, 0, eol).trim(); byte[] raw = new byte[(content.length - eol) - 1]; System.arraycopy(content, eol + 1, raw, 0, raw.length); // populate key and values with the header and raw content. Text keyText = key; keyText.set(header); BytesWritable valueBytes = value; valueBytes.set(raw, 0, raw.length); // TODO: It would be best to start at the end of the gzip read but // the bytes read in gzip don't match raw bytes in the file so we // overshoot the next header. With this current method you get // some false positives but don't miss records. if (startRead + 1 < fileLen) { in.seek(startRead + 1); } // populated the record, now return return true; } } catch (Exception e) { LOG.equals(StringUtils.stringifyException(e)); } // couldn't populate the record or there is no next record to read return false; }
From source file:org.apache.orc.mapred.OrcMapredRecordReader.java
License:Apache License
static BytesWritable nextBinary(ColumnVector vector, int row, Object previous) { if (vector.isRepeating) { row = 0;//from w w w .j a va 2s. co m } if (vector.noNulls || !vector.isNull[row]) { BytesWritable result; if (previous == null || previous.getClass() != BytesWritable.class) { result = new BytesWritable(); } else { result = (BytesWritable) previous; } BytesColumnVector bytes = (BytesColumnVector) vector; result.set(bytes.vector[row], bytes.start[row], bytes.length[row]); return result; } else { return null; } }
From source file:org.apache.sqoop.avro.AvroUtil.java
License:Apache License
/** * Convert from Avro type to Sqoop's java representation of the SQL type * see SqlManager#toJavaType// w ww .ja v a 2 s . c om */ public static Object fromAvro(Object avroObject, Schema schema, String type) { if (avroObject == null) { return null; } switch (schema.getType()) { case NULL: return null; case BOOLEAN: case INT: case FLOAT: case DOUBLE: return avroObject; case LONG: if (type.equals(DATE_TYPE)) { return new Date((Long) avroObject); } else if (type.equals(TIME_TYPE)) { return new Time((Long) avroObject); } else if (type.equals(TIMESTAMP_TYPE)) { return new Timestamp((Long) avroObject); } return avroObject; case BYTES: ByteBuffer bb = (ByteBuffer) avroObject; BytesWritable bw = new BytesWritable(); bw.set(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining()); if (type.equals(BLOB_REF_TYPE)) { // TODO: Should convert BytesWritable to BlobRef properly. (SQOOP-991) throw new UnsupportedOperationException("BlobRef not supported"); } return bw; case STRING: if (type.equals(BIG_DECIMAL_TYPE)) { return new BigDecimal(avroObject.toString()); } else if (type.equals(DATE_TYPE)) { return Date.valueOf(avroObject.toString()); } else if (type.equals(TIME_TYPE)) { return Time.valueOf(avroObject.toString()); } else if (type.equals(TIMESTAMP_TYPE)) { return Timestamp.valueOf(avroObject.toString()); } return avroObject.toString(); case ENUM: return avroObject.toString(); case UNION: List<Schema> types = schema.getTypes(); if (types.size() != 2) { throw new IllegalArgumentException("Only support union with null"); } Schema s1 = types.get(0); Schema s2 = types.get(1); if (s1.getType() == Schema.Type.NULL) { return fromAvro(avroObject, s2, type); } else if (s2.getType() == Schema.Type.NULL) { return fromAvro(avroObject, s1, type); } else { throw new IllegalArgumentException("Only support union with null"); } case FIXED: if (isDecimal(schema)) { // Should automatically be a BigDecimal object. return avroObject; } else { return new BytesWritable(((GenericFixed) avroObject).bytes()); } case RECORD: case ARRAY: case MAP: default: throw new IllegalArgumentException("Cannot convert Avro type " + schema.getType()); } }
From source file:org.apache.sqoop.mapreduce.AvroExportMapper.java
License:Apache License
private Object fromAvro(Object avroObject, Schema fieldSchema, String columnType) { // map from Avro type to Sqoop's Java representation of the SQL type // see SqlManager#toJavaType if (avroObject == null) { return null; }//from ww w . java2 s. co m switch (fieldSchema.getType()) { case NULL: return null; case BOOLEAN: case INT: case FLOAT: case DOUBLE: return avroObject; case LONG: if (columnType.equals(DATE_TYPE)) { return new Date((Long) avroObject); } else if (columnType.equals(TIME_TYPE)) { return new Time((Long) avroObject); } else if (columnType.equals(TIMESTAMP_TYPE)) { return new Timestamp((Long) avroObject); } return avroObject; case BYTES: ByteBuffer bb = (ByteBuffer) avroObject; BytesWritable bw = new BytesWritable(); bw.set(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining()); return bw; case STRING: if (columnType.equals(BIG_DECIMAL_TYPE)) { return new BigDecimal(avroObject.toString()); } else if (columnType.equals(DATE_TYPE)) { return Date.valueOf(avroObject.toString()); } else if (columnType.equals(TIME_TYPE)) { return Time.valueOf(avroObject.toString()); } else if (columnType.equals(TIMESTAMP_TYPE)) { return Timestamp.valueOf(avroObject.toString()); } return avroObject.toString(); case ENUM: return ((GenericEnumSymbol) avroObject).toString(); case UNION: List<Schema> types = fieldSchema.getTypes(); if (types.size() != 2) { throw new IllegalArgumentException("Only support union with null"); } Schema s1 = types.get(0); Schema s2 = types.get(1); if (s1.getType() == Schema.Type.NULL) { return fromAvro(avroObject, s2, columnType); } else if (s2.getType() == Schema.Type.NULL) { return fromAvro(avroObject, s1, columnType); } else { throw new IllegalArgumentException("Only support union with null"); } case FIXED: return new BytesWritable(((GenericFixed) avroObject).bytes()); case RECORD: case ARRAY: case MAP: default: throw new IllegalArgumentException("Cannot convert Avro type " + fieldSchema.getType()); } }
From source file:org.apache.sqoop.mapreduce.hcat.SqoopHCatExportHelper.java
License:Apache License
private Object convertBinaryTypes(Object val, String javaColType) { byte[] bb = (byte[]) val; if (javaColType.equals(BYTESWRITABLE)) { BytesWritable bw = new BytesWritable(); bw.set(bb, 0, bb.length); return bw; }//from w ww. j av a 2s .com return null; }
From source file:org.apache.tajo.storage.sequencefile.SequenceFileAppender.java
License:Apache License
@Override public void addTuple(Tuple tuple) throws IOException { Datum datum;/*w w w . ja va2s. co m*/ if (serde instanceof BinarySerializerDeserializer) { byte nullByte = 0; int lasti = 0; for (int i = 0; i < columnNum; i++) { datum = tuple.get(i); // set bit to 1 if a field is not null if (null != datum) { nullByte |= 1 << (i % 8); } // write the null byte every eight elements or // if this is the last element and serialize the // corresponding 8 struct fields at the same time if (7 == i % 8 || i == columnNum - 1) { os.write(nullByte); for (int j = lasti; j <= i; j++) { datum = tuple.get(j); switch (schema.getColumn(j).getDataType().getType()) { case TEXT: BytesUtils.writeVLong(os, datum.asTextBytes().length); break; case PROTOBUF: ProtobufDatum protobufDatum = (ProtobufDatum) datum; BytesUtils.writeVLong(os, protobufDatum.asByteArray().length); break; case CHAR: case INET4: case BLOB: BytesUtils.writeVLong(os, datum.asByteArray().length); break; default: } serde.serialize(schema.getColumn(j), datum, os, nullChars); if (isShuffle) { // it is to calculate min/max values, and it is only used for the intermediate file. stats.analyzeField(j, datum); } } lasti = i + 1; nullByte = 0; } } BytesWritable b = new BytesWritable(); b.set(os.getData(), 0, os.getLength()); writer.append(EMPTY_KEY, b); } else { for (int i = 0; i < columnNum; i++) { datum = tuple.get(i); serde.serialize(schema.getColumn(i), datum, os, nullChars); if (columnNum - 1 > i) { os.write((byte) delimiter); } if (isShuffle) { // it is to calculate min/max values, and it is only used for the intermediate file. stats.analyzeField(i, datum); } } writer.append(EMPTY_KEY, new Text(os.toByteArray())); } os.reset(); pos += writer.getLength(); rowCount++; if (enabledStats) { stats.incrementRow(); } }