Example usage for org.apache.hadoop.io BytesWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable set.

Prototype

public void set(byte[] newData, int offset, int length)

Source Link

Document

Set the value to a copy of the given byte range

Usage

From source file:org.apache.hama.bsp.TestBSPMessageBundle.java

License:Apache License

public void testSerializationDeserialization() throws IOException {
    BSPMessageBundle<BytesWritable> bundle = new BSPMessageBundle<BytesWritable>();
    BytesWritable[] testMessages = new BytesWritable[16];
    for (int i = 0; i < testMessages.length; ++i) {
        // Create a one byte tag containing the number of the message.
        byte[] tag = new byte[1];
        tag[0] = (byte) i;
        // Create a four bytes data part containing serialized number of the
        // message.
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        baos.write(i);// w  ww  .  ja  v a  2 s  . c  om
        baos.close();
        byte[] data = baos.toByteArray();
        BytesWritable msg = new BytesWritable();
        msg.set(data, 0, data.length);
        testMessages[i] = msg;
        bundle.addMessage(testMessages[i]);
    }

    // Serialize it.
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    bundle.write(new DataOutputStream(baos));
    baos.close();
    // Deserialize it.
    BSPMessageBundle<BytesWritable> readBundle = new BSPMessageBundle<BytesWritable>();
    readBundle.readFields(new DataInputStream(new ByteArrayInputStream(baos.toByteArray())));
    // Check contents.
    int messageNumber = 0;

    Iterator<BytesWritable> it = readBundle.iterator();
    while (it.hasNext()) {
        BytesWritable byteMessage = it.next();

        assertTrue(Arrays.equals(testMessages[messageNumber].getBytes(), byteMessage.getBytes()));
        ++messageNumber;
    }

    assertEquals(testMessages.length, messageNumber);
}

From source file:org.apache.hive.hcatalog.streaming.DelimitedInputWriter.java

License:Apache License

@Override
public Object encode(byte[] record) throws SerializationError {
    try {/*from   w ww. j  a v  a2s .co m*/
        BytesWritable blob = new BytesWritable();
        blob.set(record, 0, record.length);
        return serde.deserialize(blob);
    } catch (SerDeException e) {
        throw new SerializationError("Unable to convert byte[] record into Object", e);
    }
}

From source file:org.apache.metron.spout.pcap.deserializer.FromKeyDeserializer.java

License:Apache License

@Override
public boolean deserializeKeyValue(byte[] key, byte[] value, LongWritable outKey, BytesWritable outValue) {
    Long ts = converter.toNanoseconds(fromBytes(key));
    outKey.set(ts);/*  w  w  w  .  j  ava 2s .  co m*/
    byte[] packetHeaderized = PcapHelper.addPacketHeader(ts, value, endianness);
    byte[] globalHeaderized = PcapHelper.addGlobalHeader(packetHeaderized, endianness);
    outValue.set(globalHeaderized, 0, globalHeaderized.length);
    return true;
}

From source file:org.apache.metron.spout.pcap.deserializer.FromPacketDeserializer.java

License:Apache License

@Override
public boolean deserializeKeyValue(byte[] key, byte[] value, LongWritable outKey, BytesWritable outValue) {
    Long ts = PcapHelper.getTimestamp(value);
    if (ts != null) {
        outKey.set(ts);/*from  w w  w.java2 s.  co m*/
        outValue.set(value, 0, value.length);
        return true;
    } else {
        return false;
    }
}

From source file:org.apache.nutch.tools.arc.ArcRecordReader.java

License:Apache License

/**
 * <p>/*from   w  w  w.  j  a va  2s  .c om*/
 * Returns true if the next record in the split is read into the key and value
 * pair. The key will be the arc record header and the values will be the raw
 * content bytes of the arc record.
 * </p>
 * 
 * @param key
 *          The record key
 * @param value
 *          The record value
 * 
 * @return True if the next record is read.
 * 
 * @throws IOException
 *           If an error occurs while reading the record value.
 */
public boolean next(Text key, BytesWritable value) throws IOException {

    try {

        // get the starting position on the input stream
        long startRead = in.getPos();
        byte[] magicBuffer = null;

        // we need this loop to handle false positives in reading of gzip records
        while (true) {

            // while we haven't passed the end of the split
            if (startRead >= splitEnd) {
                return false;
            }

            // scanning for the gzip header
            boolean foundStart = false;
            while (!foundStart) {

                // start at the current file position and scan for 1K at time, break
                // if there is no more to read
                startRead = in.getPos();
                magicBuffer = new byte[1024];
                int read = in.read(magicBuffer);
                if (read < 0) {
                    break;
                }

                // scan the byte array for the gzip header magic number. This happens
                // byte by byte
                for (int i = 0; i < read - 1; i++) {
                    byte[] testMagic = new byte[2];
                    System.arraycopy(magicBuffer, i, testMagic, 0, 2);
                    if (isMagic(testMagic)) {
                        // set the next start to the current gzip header
                        startRead += i;
                        foundStart = true;
                        break;
                    }
                }
            }

            // seek to the start of the gzip header
            in.seek(startRead);
            ByteArrayOutputStream baos = null;
            int totalRead = 0;

            try {

                // read 4K of the gzip at a time putting into a byte array
                byte[] buffer = new byte[4096];
                GZIPInputStream zin = new GZIPInputStream(in);
                int gzipRead = -1;
                baos = new ByteArrayOutputStream();
                while ((gzipRead = zin.read(buffer, 0, buffer.length)) != -1) {
                    baos.write(buffer, 0, gzipRead);
                    totalRead += gzipRead;
                }
            } catch (Exception e) {

                // there are times we get false positives where the gzip header exists
                // but it is not an actual gzip record, so we ignore it and start
                // over seeking
                System.out.println("Ignoring position: " + (startRead));
                if (startRead + 1 < fileLen) {
                    in.seek(startRead + 1);
                }
                continue;
            }

            // change the output stream to a byte array
            byte[] content = baos.toByteArray();

            // the first line of the raw content in arc files is the header
            int eol = 0;
            for (int i = 0; i < content.length; i++) {
                if (i > 0 && content[i] == '\n') {
                    eol = i;
                    break;
                }
            }

            // create the header and the raw content minus the header
            String header = new String(content, 0, eol).trim();
            byte[] raw = new byte[(content.length - eol) - 1];
            System.arraycopy(content, eol + 1, raw, 0, raw.length);

            // populate key and values with the header and raw content.
            Text keyText = key;
            keyText.set(header);
            BytesWritable valueBytes = value;
            valueBytes.set(raw, 0, raw.length);

            // TODO: It would be best to start at the end of the gzip read but
            // the bytes read in gzip don't match raw bytes in the file so we
            // overshoot the next header. With this current method you get
            // some false positives but don't miss records.
            if (startRead + 1 < fileLen) {
                in.seek(startRead + 1);
            }

            // populated the record, now return
            return true;
        }
    } catch (Exception e) {
        LOG.equals(StringUtils.stringifyException(e));
    }

    // couldn't populate the record or there is no next record to read
    return false;
}

From source file:org.apache.orc.mapred.OrcMapredRecordReader.java

License:Apache License

static BytesWritable nextBinary(ColumnVector vector, int row, Object previous) {
    if (vector.isRepeating) {
        row = 0;//from  w w  w .j  a va 2s.  co m
    }
    if (vector.noNulls || !vector.isNull[row]) {
        BytesWritable result;
        if (previous == null || previous.getClass() != BytesWritable.class) {
            result = new BytesWritable();
        } else {
            result = (BytesWritable) previous;
        }
        BytesColumnVector bytes = (BytesColumnVector) vector;
        result.set(bytes.vector[row], bytes.start[row], bytes.length[row]);
        return result;
    } else {
        return null;
    }
}

From source file:org.apache.sqoop.avro.AvroUtil.java

License:Apache License

/**
 * Convert from Avro type to Sqoop's java representation of the SQL type
 * see SqlManager#toJavaType//  w ww .ja v  a  2  s .  c  om
 */
public static Object fromAvro(Object avroObject, Schema schema, String type) {
    if (avroObject == null) {
        return null;
    }

    switch (schema.getType()) {
    case NULL:
        return null;
    case BOOLEAN:
    case INT:
    case FLOAT:
    case DOUBLE:
        return avroObject;
    case LONG:
        if (type.equals(DATE_TYPE)) {
            return new Date((Long) avroObject);
        } else if (type.equals(TIME_TYPE)) {
            return new Time((Long) avroObject);
        } else if (type.equals(TIMESTAMP_TYPE)) {
            return new Timestamp((Long) avroObject);
        }
        return avroObject;
    case BYTES:
        ByteBuffer bb = (ByteBuffer) avroObject;
        BytesWritable bw = new BytesWritable();
        bw.set(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining());
        if (type.equals(BLOB_REF_TYPE)) {
            // TODO: Should convert BytesWritable to BlobRef properly. (SQOOP-991)
            throw new UnsupportedOperationException("BlobRef not supported");
        }
        return bw;
    case STRING:
        if (type.equals(BIG_DECIMAL_TYPE)) {
            return new BigDecimal(avroObject.toString());
        } else if (type.equals(DATE_TYPE)) {
            return Date.valueOf(avroObject.toString());
        } else if (type.equals(TIME_TYPE)) {
            return Time.valueOf(avroObject.toString());
        } else if (type.equals(TIMESTAMP_TYPE)) {
            return Timestamp.valueOf(avroObject.toString());
        }
        return avroObject.toString();
    case ENUM:
        return avroObject.toString();
    case UNION:
        List<Schema> types = schema.getTypes();
        if (types.size() != 2) {
            throw new IllegalArgumentException("Only support union with null");
        }
        Schema s1 = types.get(0);
        Schema s2 = types.get(1);
        if (s1.getType() == Schema.Type.NULL) {
            return fromAvro(avroObject, s2, type);
        } else if (s2.getType() == Schema.Type.NULL) {
            return fromAvro(avroObject, s1, type);
        } else {
            throw new IllegalArgumentException("Only support union with null");
        }
    case FIXED:
        if (isDecimal(schema)) {
            // Should automatically be a BigDecimal object.
            return avroObject;
        } else {
            return new BytesWritable(((GenericFixed) avroObject).bytes());
        }
    case RECORD:
    case ARRAY:
    case MAP:
    default:
        throw new IllegalArgumentException("Cannot convert Avro type " + schema.getType());
    }
}

From source file:org.apache.sqoop.mapreduce.AvroExportMapper.java

License:Apache License

private Object fromAvro(Object avroObject, Schema fieldSchema, String columnType) {
    // map from Avro type to Sqoop's Java representation of the SQL type
    // see SqlManager#toJavaType

    if (avroObject == null) {
        return null;
    }//from   ww  w .  java2 s.  co m

    switch (fieldSchema.getType()) {
    case NULL:
        return null;
    case BOOLEAN:
    case INT:
    case FLOAT:
    case DOUBLE:
        return avroObject;
    case LONG:
        if (columnType.equals(DATE_TYPE)) {
            return new Date((Long) avroObject);
        } else if (columnType.equals(TIME_TYPE)) {
            return new Time((Long) avroObject);
        } else if (columnType.equals(TIMESTAMP_TYPE)) {
            return new Timestamp((Long) avroObject);
        }
        return avroObject;
    case BYTES:
        ByteBuffer bb = (ByteBuffer) avroObject;
        BytesWritable bw = new BytesWritable();
        bw.set(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining());
        return bw;
    case STRING:
        if (columnType.equals(BIG_DECIMAL_TYPE)) {
            return new BigDecimal(avroObject.toString());
        } else if (columnType.equals(DATE_TYPE)) {
            return Date.valueOf(avroObject.toString());
        } else if (columnType.equals(TIME_TYPE)) {
            return Time.valueOf(avroObject.toString());
        } else if (columnType.equals(TIMESTAMP_TYPE)) {
            return Timestamp.valueOf(avroObject.toString());
        }
        return avroObject.toString();
    case ENUM:
        return ((GenericEnumSymbol) avroObject).toString();
    case UNION:
        List<Schema> types = fieldSchema.getTypes();
        if (types.size() != 2) {
            throw new IllegalArgumentException("Only support union with null");
        }
        Schema s1 = types.get(0);
        Schema s2 = types.get(1);
        if (s1.getType() == Schema.Type.NULL) {
            return fromAvro(avroObject, s2, columnType);
        } else if (s2.getType() == Schema.Type.NULL) {
            return fromAvro(avroObject, s1, columnType);
        } else {
            throw new IllegalArgumentException("Only support union with null");
        }
    case FIXED:
        return new BytesWritable(((GenericFixed) avroObject).bytes());
    case RECORD:
    case ARRAY:
    case MAP:
    default:
        throw new IllegalArgumentException("Cannot convert Avro type " + fieldSchema.getType());
    }
}

From source file:org.apache.sqoop.mapreduce.hcat.SqoopHCatExportHelper.java

License:Apache License

private Object convertBinaryTypes(Object val, String javaColType) {
    byte[] bb = (byte[]) val;
    if (javaColType.equals(BYTESWRITABLE)) {
        BytesWritable bw = new BytesWritable();
        bw.set(bb, 0, bb.length);
        return bw;
    }//from   w ww.  j  av a  2s .com
    return null;
}

From source file:org.apache.tajo.storage.sequencefile.SequenceFileAppender.java

License:Apache License

@Override
public void addTuple(Tuple tuple) throws IOException {
    Datum datum;/*w  w w .  ja va2s.  co m*/

    if (serde instanceof BinarySerializerDeserializer) {
        byte nullByte = 0;
        int lasti = 0;
        for (int i = 0; i < columnNum; i++) {
            datum = tuple.get(i);

            // set bit to 1 if a field is not null
            if (null != datum) {
                nullByte |= 1 << (i % 8);
            }

            // write the null byte every eight elements or
            // if this is the last element and serialize the
            // corresponding 8 struct fields at the same time
            if (7 == i % 8 || i == columnNum - 1) {
                os.write(nullByte);

                for (int j = lasti; j <= i; j++) {
                    datum = tuple.get(j);

                    switch (schema.getColumn(j).getDataType().getType()) {
                    case TEXT:
                        BytesUtils.writeVLong(os, datum.asTextBytes().length);
                        break;
                    case PROTOBUF:
                        ProtobufDatum protobufDatum = (ProtobufDatum) datum;
                        BytesUtils.writeVLong(os, protobufDatum.asByteArray().length);
                        break;
                    case CHAR:
                    case INET4:
                    case BLOB:
                        BytesUtils.writeVLong(os, datum.asByteArray().length);
                        break;
                    default:
                    }

                    serde.serialize(schema.getColumn(j), datum, os, nullChars);

                    if (isShuffle) {
                        // it is to calculate min/max values, and it is only used for the intermediate file.
                        stats.analyzeField(j, datum);
                    }
                }
                lasti = i + 1;
                nullByte = 0;
            }
        }

        BytesWritable b = new BytesWritable();
        b.set(os.getData(), 0, os.getLength());
        writer.append(EMPTY_KEY, b);

    } else {
        for (int i = 0; i < columnNum; i++) {
            datum = tuple.get(i);
            serde.serialize(schema.getColumn(i), datum, os, nullChars);

            if (columnNum - 1 > i) {
                os.write((byte) delimiter);
            }

            if (isShuffle) {
                // it is to calculate min/max values, and it is only used for the intermediate file.
                stats.analyzeField(i, datum);
            }

        }
        writer.append(EMPTY_KEY, new Text(os.toByteArray()));
    }

    os.reset();
    pos += writer.getLength();
    rowCount++;

    if (enabledStats) {
        stats.incrementRow();
    }
}