Example usage for org.apache.hadoop.io Text getLength

List of usage examples for org.apache.hadoop.io Text getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getLength.

Prototype

@Override
public int getLength() 

Source Link

Document

Returns the number of bytes in the byte array

Usage

From source file:com.datasalt.utils.mapred.joiner.MultiJoinChanneledMapper.java

License:Apache License

protected void emit(Text grouping, WritableComparable secondarySort, OUTPUT_VALUE datum)
        throws IOException, InterruptedException {
    emitBytes(grouping.getBytes(), 0, grouping.getLength(), secondarySort, datum);
}

From source file:com.datasalt.utils.mapred.joiner.MultiJoinChanneledMapper.java

License:Apache License

protected void emit(Text grouping, OUTPUT_VALUE datum) throws IOException, InterruptedException {
    emitBytes(grouping.getBytes(), 0, grouping.getLength(), null, datum);
}

From source file:com.datasalt.utils.mapred.joiner.MultiJoinMultiChannelMapper.java

License:Apache License

protected void emit(Text grouping, WritableComparable secondarySort, Object datum, int channel)
        throws IOException, InterruptedException {
    emitBytes(grouping.getBytes(), 0, grouping.getLength(), secondarySort, datum, channel);
}

From source file:com.datasalt.utils.mapred.joiner.MultiJoinMultiChannelMapper.java

License:Apache License

protected void emit(Text grouping, Object datum, int channel) throws IOException, InterruptedException {
    emitBytes(grouping.getBytes(), 0, grouping.getLength(), null, datum, channel);
}

From source file:com.ebay.nest.io.sede.binarysortable.BinarySortableSerDe.java

License:Apache License

static void serialize(OutputByteBuffer buffer, Object o, ObjectInspector oi, boolean invert)
        throws SerDeException {
    // Is this field a null?
    if (o == null) {
        buffer.write((byte) 0, invert);
        return;// w ww. j  a va2  s  .c  om
    }
    // This field is not a null.
    buffer.write((byte) 1, invert);

    switch (oi.getCategory()) {
    case PRIMITIVE: {
        PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi;
        switch (poi.getPrimitiveCategory()) {
        case VOID: {
            return;
        }
        case BOOLEAN: {
            boolean v = ((BooleanObjectInspector) poi).get(o);
            buffer.write((byte) (v ? 2 : 1), invert);
            return;
        }
        case BYTE: {
            ByteObjectInspector boi = (ByteObjectInspector) poi;
            byte v = boi.get(o);
            buffer.write((byte) (v ^ 0x80), invert);
            return;
        }
        case SHORT: {
            ShortObjectInspector spoi = (ShortObjectInspector) poi;
            short v = spoi.get(o);
            buffer.write((byte) ((v >> 8) ^ 0x80), invert);
            buffer.write((byte) v, invert);
            return;
        }
        case INT: {
            IntObjectInspector ioi = (IntObjectInspector) poi;
            int v = ioi.get(o);
            serializeInt(buffer, v, invert);
            return;
        }
        case LONG: {
            LongObjectInspector loi = (LongObjectInspector) poi;
            long v = loi.get(o);
            buffer.write((byte) ((v >> 56) ^ 0x80), invert);
            buffer.write((byte) (v >> 48), invert);
            buffer.write((byte) (v >> 40), invert);
            buffer.write((byte) (v >> 32), invert);
            buffer.write((byte) (v >> 24), invert);
            buffer.write((byte) (v >> 16), invert);
            buffer.write((byte) (v >> 8), invert);
            buffer.write((byte) v, invert);
            return;
        }
        case FLOAT: {
            FloatObjectInspector foi = (FloatObjectInspector) poi;
            int v = Float.floatToIntBits(foi.get(o));
            if ((v & (1 << 31)) != 0) {
                // negative number, flip all bits
                v = ~v;
            } else {
                // positive number, flip the first bit
                v = v ^ (1 << 31);
            }
            buffer.write((byte) (v >> 24), invert);
            buffer.write((byte) (v >> 16), invert);
            buffer.write((byte) (v >> 8), invert);
            buffer.write((byte) v, invert);
            return;
        }
        case DOUBLE: {
            DoubleObjectInspector doi = (DoubleObjectInspector) poi;
            long v = Double.doubleToLongBits(doi.get(o));
            if ((v & (1L << 63)) != 0) {
                // negative number, flip all bits
                v = ~v;
            } else {
                // positive number, flip the first bit
                v = v ^ (1L << 63);
            }
            buffer.write((byte) (v >> 56), invert);
            buffer.write((byte) (v >> 48), invert);
            buffer.write((byte) (v >> 40), invert);
            buffer.write((byte) (v >> 32), invert);
            buffer.write((byte) (v >> 24), invert);
            buffer.write((byte) (v >> 16), invert);
            buffer.write((byte) (v >> 8), invert);
            buffer.write((byte) v, invert);
            return;
        }
        case STRING: {
            StringObjectInspector soi = (StringObjectInspector) poi;
            Text t = soi.getPrimitiveWritableObject(o);
            serializeBytes(buffer, t.getBytes(), t.getLength(), invert);
            return;
        }

        case VARCHAR: {
            HiveVarcharObjectInspector hcoi = (HiveVarcharObjectInspector) poi;
            HiveVarcharWritable hc = hcoi.getPrimitiveWritableObject(o);
            // use varchar's text field directly
            Text t = hc.getTextValue();
            serializeBytes(buffer, t.getBytes(), t.getLength(), invert);
            return;
        }

        case BINARY: {
            BinaryObjectInspector baoi = (BinaryObjectInspector) poi;
            BytesWritable ba = baoi.getPrimitiveWritableObject(o);
            byte[] toSer = new byte[ba.getLength()];
            System.arraycopy(ba.getBytes(), 0, toSer, 0, ba.getLength());
            serializeBytes(buffer, toSer, ba.getLength(), invert);
            return;
        }
        case DATE: {
            DateObjectInspector doi = (DateObjectInspector) poi;
            int v = doi.getPrimitiveWritableObject(o).getDays();
            serializeInt(buffer, v, invert);
            return;
        }
        case TIMESTAMP: {
            TimestampObjectInspector toi = (TimestampObjectInspector) poi;
            TimestampWritable t = toi.getPrimitiveWritableObject(o);
            byte[] data = t.getBinarySortable();
            for (int i = 0; i < data.length; i++) {
                buffer.write(data[i], invert);
            }
            return;
        }
        case DECIMAL: {
            // decimals are encoded in three pieces:
            // sign: 1, 2 or 3 for smaller, equal or larger than 0 respectively
            // factor: Number that indicates the amount of digits you have to move
            // the decimal point left or right until the resulting number is smaller
            // than zero but has something other than 0 as the first digit.
            // digits: which is a string of all the digits in the decimal. If the number
            // is negative the binary string will be inverted to get the correct ordering.
            // Example: 0.00123
            // Sign is 3 (bigger than 0)
            // Factor is -2 (move decimal point 2 positions right)
            // Digits are: 123

            HiveDecimalObjectInspector boi = (HiveDecimalObjectInspector) poi;
            HiveDecimal dec = boi.getPrimitiveJavaObject(o);

            // get the sign of the big decimal
            int sign = dec.compareTo(HiveDecimal.ZERO);

            // we'll encode the absolute value (sign is separate)
            dec = dec.abs();

            // get the scale factor to turn big decimal into a decimal < 1
            int factor = dec.precision() - dec.scale();
            factor = sign == 1 ? factor : -factor;

            // convert the absolute big decimal to string
            dec.scaleByPowerOfTen(Math.abs(dec.scale()));
            String digits = dec.unscaledValue().toString();

            // finally write out the pieces (sign, scale, digits)
            buffer.write((byte) (sign + 1), invert);
            buffer.write((byte) ((factor >> 24) ^ 0x80), invert);
            buffer.write((byte) (factor >> 16), invert);
            buffer.write((byte) (factor >> 8), invert);
            buffer.write((byte) factor, invert);
            serializeBytes(buffer, digits.getBytes(decimalCharSet), digits.length(),
                    sign == -1 ? !invert : invert);
            return;
        }

        default: {
            throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
        }
        }
    }
    case LIST: {
        ListObjectInspector loi = (ListObjectInspector) oi;
        ObjectInspector eoi = loi.getListElementObjectInspector();

        // \1 followed by each element
        int size = loi.getListLength(o);
        for (int eid = 0; eid < size; eid++) {
            buffer.write((byte) 1, invert);
            serialize(buffer, loi.getListElement(o, eid), eoi, invert);
        }
        // and \0 to terminate
        buffer.write((byte) 0, invert);
        return;
    }
    case MAP: {
        MapObjectInspector moi = (MapObjectInspector) oi;
        ObjectInspector koi = moi.getMapKeyObjectInspector();
        ObjectInspector voi = moi.getMapValueObjectInspector();

        // \1 followed by each key and then each value
        Map<?, ?> map = moi.getMap(o);
        for (Map.Entry<?, ?> entry : map.entrySet()) {
            buffer.write((byte) 1, invert);
            serialize(buffer, entry.getKey(), koi, invert);
            serialize(buffer, entry.getValue(), voi, invert);
        }
        // and \0 to terminate
        buffer.write((byte) 0, invert);
        return;
    }
    case STRUCT: {
        StructObjectInspector soi = (StructObjectInspector) oi;
        List<? extends StructField> fields = soi.getAllStructFieldRefs();

        for (int i = 0; i < fields.size(); i++) {
            serialize(buffer, soi.getStructFieldData(o, fields.get(i)), fields.get(i).getFieldObjectInspector(),
                    invert);
        }
        return;
    }
    case UNION: {
        UnionObjectInspector uoi = (UnionObjectInspector) oi;
        byte tag = uoi.getTag(o);
        buffer.write(tag, invert);
        serialize(buffer, uoi.getField(o), uoi.getObjectInspectors().get(tag), invert);
        return;
    }
    default: {
        throw new RuntimeException("Unrecognized type: " + oi.getCategory());
    }
    }

}

From source file:com.ebay.nest.io.sede.columnar.ColumnarStruct.java

License:Apache License

/**
 * Construct a ColumnarStruct object with the TypeInfo. It creates the first
 * level object at the first place//from  w  w w. ja  v  a  2 s.  c  om
 *
 * @param oi
 *          the ObjectInspector representing the type of this LazyStruct.
 * @param notSkippedColumnIDs
 *          the column ids that should not be skipped
 */
public ColumnarStruct(ObjectInspector oi, ArrayList<Integer> notSkippedColumnIDs, Text nullSequence) {
    super(oi, notSkippedColumnIDs);
    if (nullSequence != null) {
        this.nullSequence = nullSequence;
        this.lengthNullSequence = nullSequence.getLength();
    }
}

From source file:com.ebay.nest.io.sede.lazy.LazyArray.java

License:Apache License

/**
 * Get the element without checking out-of-bound index.
 *//*from   w  w w.  ja  v  a2s  .  c  o m*/
private Object uncheckedGetElement(int index) {
    if (elementInited[index]) {
        return arrayElements[index] == null ? null : arrayElements[index].getObject();
    }
    elementInited[index] = true;

    Text nullSequence = oi.getNullSequence();

    int elementLength = startPosition[index + 1] - startPosition[index] - 1;
    if (elementLength == nullSequence.getLength() && 0 == LazyUtils.compare(bytes.getData(),
            startPosition[index], elementLength, nullSequence.getBytes(), 0, nullSequence.getLength())) {
        return arrayElements[index] = null;
    }
    arrayElements[index] = LazyFactory.createLazyObject(oi.getListElementObjectInspector());
    arrayElements[index].init(bytes, startPosition[index], elementLength);
    return arrayElements[index].getObject();
}

From source file:com.ebay.nest.io.sede.lazy.LazyMap.java

License:Apache License

/**
 * Get the value object with the index without checking parsed.
 *
 * @param index/*from w w  w . j a va  2s. c  o  m*/
 *          The index into the array starting from 0
 */
private LazyObject uncheckedGetValue(int index) {
    if (valueInited[index]) {
        return valueObjects[index];
    }
    valueInited[index] = true;
    Text nullSequence = oi.getNullSequence();
    int valueIBegin = keyEnd[index] + 1;
    int valueILength = keyStart[index + 1] - 1 - valueIBegin;
    if (valueILength < 0
            || ((valueILength == nullSequence.getLength()) && 0 == LazyUtils.compare(bytes.getData(),
                    valueIBegin, valueILength, nullSequence.getBytes(), 0, nullSequence.getLength()))) {
        return valueObjects[index] = null;
    }
    valueObjects[index] = LazyFactory.createLazyObject(oi.getMapValueObjectInspector());
    valueObjects[index].init(bytes, valueIBegin, valueILength);
    return valueObjects[index];
}

From source file:com.ebay.nest.io.sede.lazy.LazyMap.java

License:Apache License

/**
 * Get the key object with the index without checking parsed.
 *
 * @param index//ww w. ja v  a  2  s . c o m
 *          The index into the array starting from 0
 */
private LazyPrimitive<?, ?> uncheckedGetKey(int index) {
    if (keyInited[index]) {
        return keyObjects[index];
    }
    keyInited[index] = true;

    Text nullSequence = oi.getNullSequence();
    int keyIBegin = keyStart[index];
    int keyILength = keyEnd[index] - keyStart[index];
    if (keyILength < 0 || ((keyILength == nullSequence.getLength()) && 0 == LazyUtils.compare(bytes.getData(),
            keyIBegin, keyILength, nullSequence.getBytes(), 0, nullSequence.getLength()))) {
        return keyObjects[index] = null;
    }
    // Keys are always primitive
    keyObjects[index] = LazyFactory
            .createLazyPrimitiveClass((PrimitiveObjectInspector) oi.getMapKeyObjectInspector());
    keyObjects[index].init(bytes, keyIBegin, keyILength);
    return keyObjects[index];
}

From source file:com.ebay.nest.io.sede.lazy.LazySimpleSerDe.java

License:Apache License

/**
 * Deserialize a row from the Writable to a LazyObject.
 *
 * @param field//from  w w  w  . jav a 2s  . c  om
 *          the Writable that contains the data
 * @return The deserialized row Object.
 * @see SerDe#deserialize(Writable)
 */
@Override
public Object deserialize(Writable field) throws SerDeException {
    if (byteArrayRef == null) {
        byteArrayRef = new ByteArrayRef();
    }
    if (field instanceof BytesWritable) {
        BytesWritable b = (BytesWritable) field;
        // For backward-compatibility with hadoop 0.17
        byteArrayRef.setData(b.getBytes());
        cachedLazyStruct.init(byteArrayRef, 0, b.getLength());
    } else if (field instanceof Text) {
        Text t = (Text) field;
        byteArrayRef.setData(t.getBytes());
        cachedLazyStruct.init(byteArrayRef, 0, t.getLength());
    } else {
        throw new SerDeException(getClass().toString() + ": expects either BytesWritable or Text object!");
    }
    lastOperationSerialize = false;
    lastOperationDeserialize = true;
    return cachedLazyStruct;
}