Example usage for org.apache.hadoop.io BytesWritable set

List of usage examples for org.apache.hadoop.io BytesWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable set.

Prototype

public void set(byte[] newData, int offset, int length) 

Source Link

Document

Set the value to a copy of the given byte range

Usage

From source file:org.apache.hama.bsp.TestBSPMessageBundle.java

License:Apache License

public void testSerializationDeserialization() throws IOException {
    BSPMessageBundle<BytesWritable> bundle = new BSPMessageBundle<BytesWritable>();
    BytesWritable[] testMessages = new BytesWritable[16];
    for (int i = 0; i < testMessages.length; ++i) {
        // Create a one byte tag containing the number of the message.
        byte[] tag = new byte[1];
        tag[0] = (byte) i;
        // Create a four bytes data part containing serialized number of the
        // message.
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        baos.write(i);// w  ww  .  ja  v a  2 s  . c  om
        baos.close();
        byte[] data = baos.toByteArray();
        BytesWritable msg = new BytesWritable();
        msg.set(data, 0, data.length);
        testMessages[i] = msg;
        bundle.addMessage(testMessages[i]);
    }

    // Serialize it.
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    bundle.write(new DataOutputStream(baos));
    baos.close();
    // Deserialize it.
    BSPMessageBundle<BytesWritable> readBundle = new BSPMessageBundle<BytesWritable>();
    readBundle.readFields(new DataInputStream(new ByteArrayInputStream(baos.toByteArray())));
    // Check contents.
    int messageNumber = 0;

    Iterator<BytesWritable> it = readBundle.iterator();
    while (it.hasNext()) {
        BytesWritable byteMessage = it.next();

        assertTrue(Arrays.equals(testMessages[messageNumber].getBytes(), byteMessage.getBytes()));
        ++messageNumber;
    }

    assertEquals(testMessages.length, messageNumber);
}

From source file:org.apache.hive.hcatalog.streaming.DelimitedInputWriter.java

License:Apache License

@Override
public Object encode(byte[] record) throws SerializationError {
    try {/*from   w ww. j  a v  a2s .co m*/
        BytesWritable blob = new BytesWritable();
        blob.set(record, 0, record.length);
        return serde.deserialize(blob);
    } catch (SerDeException e) {
        throw new SerializationError("Unable to convert byte[] record into Object", e);
    }
}

From source file:org.apache.metron.spout.pcap.deserializer.FromKeyDeserializer.java

License:Apache License

@Override
public boolean deserializeKeyValue(byte[] key, byte[] value, LongWritable outKey, BytesWritable outValue) {
    Long ts = converter.toNanoseconds(fromBytes(key));
    outKey.set(ts);/*  w  w  w  .  j  ava 2s .  co m*/
    byte[] packetHeaderized = PcapHelper.addPacketHeader(ts, value, endianness);
    byte[] globalHeaderized = PcapHelper.addGlobalHeader(packetHeaderized, endianness);
    outValue.set(globalHeaderized, 0, globalHeaderized.length);
    return true;
}

From source file:org.apache.metron.spout.pcap.deserializer.FromPacketDeserializer.java

License:Apache License

@Override
public boolean deserializeKeyValue(byte[] key, byte[] value, LongWritable outKey, BytesWritable outValue) {
    Long ts = PcapHelper.getTimestamp(value);
    if (ts != null) {
        outKey.set(ts);/*from  w w  w.java2 s.  co m*/
        outValue.set(value, 0, value.length);
        return true;
    } else {
        return false;
    }
}

From source file:org.apache.nutch.tools.arc.ArcRecordReader.java

License:Apache License

/**
 * <p>/*from   w  w  w.  j  a va  2s  .c om*/
 * Returns true if the next record in the split is read into the key and value
 * pair. The key will be the arc record header and the values will be the raw
 * content bytes of the arc record.
 * </p>
 * 
 * @param key
 *          The record key
 * @param value
 *          The record value
 * 
 * @return True if the next record is read.
 * 
 * @throws IOException
 *           If an error occurs while reading the record value.
 */
public boolean next(Text key, BytesWritable value) throws IOException {

    try {

        // get the starting position on the input stream
        long startRead = in.getPos();
        byte[] magicBuffer = null;

        // we need this loop to handle false positives in reading of gzip records
        while (true) {

            // while we haven't passed the end of the split
            if (startRead >= splitEnd) {
                return false;
            }

            // scanning for the gzip header
            boolean foundStart = false;
            while (!foundStart) {

                // start at the current file position and scan for 1K at time, break
                // if there is no more to read
                startRead = in.getPos();
                magicBuffer = new byte[1024];
                int read = in.read(magicBuffer);
                if (read < 0) {
                    break;
                }

                // scan the byte array for the gzip header magic number. This happens
                // byte by byte
                for (int i = 0; i < read - 1; i++) {
                    byte[] testMagic = new byte[2];
                    System.arraycopy(magicBuffer, i, testMagic, 0, 2);
                    if (isMagic(testMagic)) {
                        // set the next start to the current gzip header
                        startRead += i;
                        foundStart = true;
                        break;
                    }
                }
            }

            // seek to the start of the gzip header
            in.seek(startRead);
            ByteArrayOutputStream baos = null;
            int totalRead = 0;

            try {

                // read 4K of the gzip at a time putting into a byte array
                byte[] buffer = new byte[4096];
                GZIPInputStream zin = new GZIPInputStream(in);
                int gzipRead = -1;
                baos = new ByteArrayOutputStream();
                while ((gzipRead = zin.read(buffer, 0, buffer.length)) != -1) {
                    baos.write(buffer, 0, gzipRead);
                    totalRead += gzipRead;
                }
            } catch (Exception e) {

                // there are times we get false positives where the gzip header exists
                // but it is not an actual gzip record, so we ignore it and start
                // over seeking
                System.out.println("Ignoring position: " + (startRead));
                if (startRead + 1 < fileLen) {
                    in.seek(startRead + 1);
                }
                continue;
            }

            // change the output stream to a byte array
            byte[] content = baos.toByteArray();

            // the first line of the raw content in arc files is the header
            int eol = 0;
            for (int i = 0; i < content.length; i++) {
                if (i > 0 && content[i] == '\n') {
                    eol = i;
                    break;
                }
            }

            // create the header and the raw content minus the header
            String header = new String(content, 0, eol).trim();
            byte[] raw = new byte[(content.length - eol) - 1];
            System.arraycopy(content, eol + 1, raw, 0, raw.length);

            // populate key and values with the header and raw content.
            Text keyText = key;
            keyText.set(header);
            BytesWritable valueBytes = value;
            valueBytes.set(raw, 0, raw.length);

            // TODO: It would be best to start at the end of the gzip read but
            // the bytes read in gzip don't match raw bytes in the file so we
            // overshoot the next header. With this current method you get
            // some false positives but don't miss records.
            if (startRead + 1 < fileLen) {
                in.seek(startRead + 1);
            }

            // populated the record, now return
            return true;
        }
    } catch (Exception e) {
        LOG.equals(StringUtils.stringifyException(e));
    }

    // couldn't populate the record or there is no next record to read
    return false;
}

From source file:org.apache.orc.mapred.OrcMapredRecordReader.java

License:Apache License

static BytesWritable nextBinary(ColumnVector vector, int row, Object previous) {
    if (vector.isRepeating) {
        row = 0;//from  w w  w .j  a va 2s.  co m
    }
    if (vector.noNulls || !vector.isNull[row]) {
        BytesWritable result;
        if (previous == null || previous.getClass() != BytesWritable.class) {
            result = new BytesWritable();
        } else {
            result = (BytesWritable) previous;
        }
        BytesColumnVector bytes = (BytesColumnVector) vector;
        result.set(bytes.vector[row], bytes.start[row], bytes.length[row]);
        return result;
    } else {
        return null;
    }
}

From source file:org.apache.sqoop.avro.AvroUtil.java

License:Apache License

/**
 * Convert from Avro type to Sqoop's java representation of the SQL type
 * see SqlManager#toJavaType//  w ww .ja v  a  2  s .  c  om
 */
public static Object fromAvro(Object avroObject, Schema schema, String type) {
    if (avroObject == null) {
        return null;
    }

    switch (schema.getType()) {
    case NULL:
        return null;
    case BOOLEAN:
    case INT:
    case FLOAT:
    case DOUBLE:
        return avroObject;
    case LONG:
        if (type.equals(DATE_TYPE)) {
            return new Date((Long) avroObject);
        } else if (type.equals(TIME_TYPE)) {
            return new Time((Long) avroObject);
        } else if (type.equals(TIMESTAMP_TYPE)) {
            return new Timestamp((Long) avroObject);
        }
        return avroObject;
    case BYTES:
        ByteBuffer bb = (ByteBuffer) avroObject;
        BytesWritable bw = new BytesWritable();
        bw.set(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining());
        if (type.equals(BLOB_REF_TYPE)) {
            // TODO: Should convert BytesWritable to BlobRef properly. (SQOOP-991)
            throw new UnsupportedOperationException("BlobRef not supported");
        }
        return bw;
    case STRING:
        if (type.equals(BIG_DECIMAL_TYPE)) {
            return new BigDecimal(avroObject.toString());
        } else if (type.equals(DATE_TYPE)) {
            return Date.valueOf(avroObject.toString());
        } else if (type.equals(TIME_TYPE)) {
            return Time.valueOf(avroObject.toString());
        } else if (type.equals(TIMESTAMP_TYPE)) {
            return Timestamp.valueOf(avroObject.toString());
        }
        return avroObject.toString();
    case ENUM:
        return avroObject.toString();
    case UNION:
        List<Schema> types = schema.getTypes();
        if (types.size() != 2) {
            throw new IllegalArgumentException("Only support union with null");
        }
        Schema s1 = types.get(0);
        Schema s2 = types.get(1);
        if (s1.getType() == Schema.Type.NULL) {
            return fromAvro(avroObject, s2, type);
        } else if (s2.getType() == Schema.Type.NULL) {
            return fromAvro(avroObject, s1, type);
        } else {
            throw new IllegalArgumentException("Only support union with null");
        }
    case FIXED:
        if (isDecimal(schema)) {
            // Should automatically be a BigDecimal object.
            return avroObject;
        } else {
            return new BytesWritable(((GenericFixed) avroObject).bytes());
        }
    case RECORD:
    case ARRAY:
    case MAP:
    default:
        throw new IllegalArgumentException("Cannot convert Avro type " + schema.getType());
    }
}

From source file:org.apache.sqoop.mapreduce.AvroExportMapper.java

License:Apache License

private Object fromAvro(Object avroObject, Schema fieldSchema, String columnType) {
    // map from Avro type to Sqoop's Java representation of the SQL type
    // see SqlManager#toJavaType

    if (avroObject == null) {
        return null;
    }//from   ww  w .  java2 s.  co m

    switch (fieldSchema.getType()) {
    case NULL:
        return null;
    case BOOLEAN:
    case INT:
    case FLOAT:
    case DOUBLE:
        return avroObject;
    case LONG:
        if (columnType.equals(DATE_TYPE)) {
            return new Date((Long) avroObject);
        } else if (columnType.equals(TIME_TYPE)) {
            return new Time((Long) avroObject);
        } else if (columnType.equals(TIMESTAMP_TYPE)) {
            return new Timestamp((Long) avroObject);
        }
        return avroObject;
    case BYTES:
        ByteBuffer bb = (ByteBuffer) avroObject;
        BytesWritable bw = new BytesWritable();
        bw.set(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining());
        return bw;
    case STRING:
        if (columnType.equals(BIG_DECIMAL_TYPE)) {
            return new BigDecimal(avroObject.toString());
        } else if (columnType.equals(DATE_TYPE)) {
            return Date.valueOf(avroObject.toString());
        } else if (columnType.equals(TIME_TYPE)) {
            return Time.valueOf(avroObject.toString());
        } else if (columnType.equals(TIMESTAMP_TYPE)) {
            return Timestamp.valueOf(avroObject.toString());
        }
        return avroObject.toString();
    case ENUM:
        return ((GenericEnumSymbol) avroObject).toString();
    case UNION:
        List<Schema> types = fieldSchema.getTypes();
        if (types.size() != 2) {
            throw new IllegalArgumentException("Only support union with null");
        }
        Schema s1 = types.get(0);
        Schema s2 = types.get(1);
        if (s1.getType() == Schema.Type.NULL) {
            return fromAvro(avroObject, s2, columnType);
        } else if (s2.getType() == Schema.Type.NULL) {
            return fromAvro(avroObject, s1, columnType);
        } else {
            throw new IllegalArgumentException("Only support union with null");
        }
    case FIXED:
        return new BytesWritable(((GenericFixed) avroObject).bytes());
    case RECORD:
    case ARRAY:
    case MAP:
    default:
        throw new IllegalArgumentException("Cannot convert Avro type " + fieldSchema.getType());
    }
}

From source file:org.apache.sqoop.mapreduce.hcat.SqoopHCatExportHelper.java

License:Apache License

private Object convertBinaryTypes(Object val, String javaColType) {
    byte[] bb = (byte[]) val;
    if (javaColType.equals(BYTESWRITABLE)) {
        BytesWritable bw = new BytesWritable();
        bw.set(bb, 0, bb.length);
        return bw;
    }//from   w ww.  j  av a  2s .com
    return null;
}

From source file:org.apache.tajo.storage.sequencefile.SequenceFileAppender.java

License:Apache License

@Override
public void addTuple(Tuple tuple) throws IOException {
    Datum datum;/*w  w w .  ja va2s.  co m*/

    if (serde instanceof BinarySerializerDeserializer) {
        byte nullByte = 0;
        int lasti = 0;
        for (int i = 0; i < columnNum; i++) {
            datum = tuple.get(i);

            // set bit to 1 if a field is not null
            if (null != datum) {
                nullByte |= 1 << (i % 8);
            }

            // write the null byte every eight elements or
            // if this is the last element and serialize the
            // corresponding 8 struct fields at the same time
            if (7 == i % 8 || i == columnNum - 1) {
                os.write(nullByte);

                for (int j = lasti; j <= i; j++) {
                    datum = tuple.get(j);

                    switch (schema.getColumn(j).getDataType().getType()) {
                    case TEXT:
                        BytesUtils.writeVLong(os, datum.asTextBytes().length);
                        break;
                    case PROTOBUF:
                        ProtobufDatum protobufDatum = (ProtobufDatum) datum;
                        BytesUtils.writeVLong(os, protobufDatum.asByteArray().length);
                        break;
                    case CHAR:
                    case INET4:
                    case BLOB:
                        BytesUtils.writeVLong(os, datum.asByteArray().length);
                        break;
                    default:
                    }

                    serde.serialize(schema.getColumn(j), datum, os, nullChars);

                    if (isShuffle) {
                        // it is to calculate min/max values, and it is only used for the intermediate file.
                        stats.analyzeField(j, datum);
                    }
                }
                lasti = i + 1;
                nullByte = 0;
            }
        }

        BytesWritable b = new BytesWritable();
        b.set(os.getData(), 0, os.getLength());
        writer.append(EMPTY_KEY, b);

    } else {
        for (int i = 0; i < columnNum; i++) {
            datum = tuple.get(i);
            serde.serialize(schema.getColumn(i), datum, os, nullChars);

            if (columnNum - 1 > i) {
                os.write((byte) delimiter);
            }

            if (isShuffle) {
                // it is to calculate min/max values, and it is only used for the intermediate file.
                stats.analyzeField(i, datum);
            }

        }
        writer.append(EMPTY_KEY, new Text(os.toByteArray()));
    }

    os.reset();
    pos += writer.getLength();
    rowCount++;

    if (enabledStats) {
        stats.incrementRow();
    }
}