com.facebook.presto.hive.rcfile.RcBinaryBlockLoader.java Source code

Java tutorial

Introduction

Here is the source code for com.facebook.presto.hive.rcfile.RcBinaryBlockLoader.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.hive.rcfile;

import com.facebook.presto.hive.HiveType;
import com.facebook.presto.hive.rcfile.RcFilePageSource.RcFileColumnsBatch;
import com.facebook.presto.spi.block.BlockBuilder;
import com.facebook.presto.spi.block.BlockBuilderStatus;
import com.facebook.presto.spi.block.LazyArrayBlock;
import com.facebook.presto.spi.block.LazyBlockLoader;
import com.facebook.presto.spi.block.LazyFixedWidthBlock;
import com.facebook.presto.spi.block.LazySliceArrayBlock;
import com.facebook.presto.spi.type.Type;
import com.google.common.base.Throwables;
import io.airlift.slice.ByteArrays;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryFactory;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryObject;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.io.WritableUtils;

import java.io.IOException;
import java.util.Arrays;

import static com.facebook.presto.hive.HiveType.HIVE_BINARY;
import static com.facebook.presto.hive.HiveType.HIVE_BOOLEAN;
import static com.facebook.presto.hive.HiveType.HIVE_BYTE;
import static com.facebook.presto.hive.HiveType.HIVE_DATE;
import static com.facebook.presto.hive.HiveType.HIVE_DOUBLE;
import static com.facebook.presto.hive.HiveType.HIVE_FLOAT;
import static com.facebook.presto.hive.HiveType.HIVE_INT;
import static com.facebook.presto.hive.HiveType.HIVE_LONG;
import static com.facebook.presto.hive.HiveType.HIVE_SHORT;
import static com.facebook.presto.hive.HiveType.HIVE_STRING;
import static com.facebook.presto.hive.HiveType.HIVE_TIMESTAMP;
import static com.facebook.presto.hive.HiveUtil.isStructuralType;
import static com.facebook.presto.hive.util.SerDeUtils.serializeObject;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static io.airlift.slice.SizeOf.SIZE_OF_INT;
import static io.airlift.slice.SizeOf.SIZE_OF_LONG;
import static io.airlift.slice.SizeOf.SIZE_OF_SHORT;
import static io.airlift.slice.Slices.wrappedBooleanArray;
import static io.airlift.slice.Slices.wrappedDoubleArray;
import static io.airlift.slice.Slices.wrappedLongArray;

public class RcBinaryBlockLoader implements RcFileBlockLoader {
    private static final byte HIVE_EMPTY_STRING_BYTE = (byte) 0xbf;

    @Override
    public LazyBlockLoader<LazyFixedWidthBlock> fixedWidthBlockLoader(RcFileColumnsBatch batch, int fieldId,
            HiveType hiveType) {
        if (HIVE_BOOLEAN.equals(hiveType)) {
            return new LazyBooleanBlockLoader(batch, fieldId);
        }
        if (HIVE_BYTE.equals(hiveType)) {
            return new LazyByteBlockLoader(batch, fieldId);
        }
        if (HIVE_SHORT.equals(hiveType)) {
            return new LazyShortBlockLoader(batch, fieldId);
        }
        if (HIVE_INT.equals(hiveType)) {
            return new LazyIntBlockLoader(batch, fieldId);
        }
        if (HIVE_LONG.equals(hiveType)) {
            return new LazyLongBlockLoader(batch, fieldId);
        }
        if (HIVE_DATE.equals(hiveType)) {
            return new LazyDateBlockLoader(batch, fieldId);
        }
        if (HIVE_TIMESTAMP.equals(hiveType)) {
            return new LazyTimestampBlockLoader(batch, fieldId);
        }
        if (HIVE_FLOAT.equals(hiveType)) {
            return new LazyFloatBlockLoader(batch, fieldId);
        }
        if (HIVE_DOUBLE.equals(hiveType)) {
            return new LazyDoubleBlockLoader(batch, fieldId);
        }
        throw new UnsupportedOperationException("Unsupported column type: " + hiveType);
    }

    @Override
    public LazyBlockLoader<LazySliceArrayBlock> variableWidthBlockLoader(RcFileColumnsBatch batch, int fieldId,
            HiveType hiveType, ObjectInspector fieldInspector) {
        if (HIVE_STRING.equals(hiveType) || HIVE_BINARY.equals(hiveType)) {
            return new LazySliceBlockLoader(batch, fieldId);
        }
        throw new UnsupportedOperationException("Unsupported column type: " + hiveType);
    }

    @Override
    public LazyBlockLoader<LazyArrayBlock> structuralBlockLoader(RcFileColumnsBatch batch, int fieldId,
            HiveType hiveType, ObjectInspector fieldInspector, Type type) {
        checkArgument(isStructuralType(hiveType), "hiveType (" + hiveType + ") is not structuralType");
        return new LazyStructuralBlockLoader(type, batch, fieldId, fieldInspector);
    }

    private static final class LazyBooleanBlockLoader implements LazyBlockLoader<LazyFixedWidthBlock> {
        private final RcFileColumnsBatch batch;
        private final int fieldId;
        private boolean loaded;

        public LazyBooleanBlockLoader(RcFileColumnsBatch batch, int fieldId) {
            this.batch = batch;
            this.fieldId = fieldId;
        }

        @Override
        public void load(LazyFixedWidthBlock block) {
            if (loaded) {
                return;
            }

            try {
                BytesRefArrayWritable columnBatch = batch.getColumn(fieldId);
                int positionInBatch = batch.getPositionInBatch();

                int positionCount = block.getPositionCount();
                boolean[] isNull = new boolean[positionCount];
                boolean[] vector = new boolean[positionCount];

                for (int i = 0; i < positionCount; i++) {
                    BytesRefWritable writable = columnBatch.unCheckedGet(i + positionInBatch);

                    int length = writable.getLength();
                    if (length != 0) {
                        byte[] bytes = writable.getData();
                        int start = writable.getStart();
                        vector[i] = bytes[start] != 0;
                    } else {
                        isNull[i] = true;
                    }
                }

                block.setNullVector(isNull);
                block.setRawSlice(wrappedBooleanArray(vector, 0, positionCount));

                loaded = true;
            } catch (IOException e) {
                throw Throwables.propagate(e);
            }
        }
    }

    private static final class LazyByteBlockLoader implements LazyBlockLoader<LazyFixedWidthBlock> {
        private final RcFileColumnsBatch batch;
        private final int fieldId;
        private boolean loaded;

        private LazyByteBlockLoader(RcFileColumnsBatch batch, int fieldId) {
            this.batch = batch;
            this.fieldId = fieldId;
        }

        @Override
        public void load(LazyFixedWidthBlock block) {
            if (loaded) {
                return;
            }

            try {
                BytesRefArrayWritable columnBatch = batch.getColumn(fieldId);
                int positionInBatch = batch.getPositionInBatch();

                int batchSize = block.getPositionCount();
                boolean[] isNull = new boolean[batchSize];
                long[] vector = new long[batchSize];

                for (int i = 0; i < batchSize; i++) {
                    BytesRefWritable writable = columnBatch.unCheckedGet(i + positionInBatch);

                    int length = writable.getLength();
                    if (length != 0) {
                        byte[] bytes = writable.getData();
                        int start = writable.getStart();
                        vector[i] = bytes[start];
                    } else {
                        isNull[i] = true;
                    }
                }

                block.setNullVector(isNull);
                block.setRawSlice(wrappedLongArray(vector));

                loaded = true;
            } catch (IOException e) {
                throw Throwables.propagate(e);
            }
        }
    }

    private static final class LazyShortBlockLoader implements LazyBlockLoader<LazyFixedWidthBlock> {
        private final RcFileColumnsBatch batch;
        private final int fieldId;
        private boolean loaded;

        private LazyShortBlockLoader(RcFileColumnsBatch batch, int fieldId) {
            this.batch = batch;
            this.fieldId = fieldId;
        }

        @Override
        public void load(LazyFixedWidthBlock block) {
            if (loaded) {
                return;
            }

            try {
                BytesRefArrayWritable columnBatch = batch.getColumn(fieldId);
                int positionInBatch = batch.getPositionInBatch();

                int batchSize = block.getPositionCount();
                boolean[] isNull = new boolean[batchSize];
                long[] vector = new long[batchSize];

                for (int i = 0; i < batchSize; i++) {
                    BytesRefWritable writable = columnBatch.unCheckedGet(i + positionInBatch);

                    int length = writable.getLength();
                    if (length != 0) {
                        checkState(length == SIZE_OF_SHORT, "Short should be 2 bytes");

                        // the file format uses big endian
                        byte[] bytes = writable.getData();
                        int start = writable.getStart();
                        vector[i] = (long) Short.reverseBytes(ByteArrays.getShort(bytes, start));
                    } else {
                        isNull[i] = true;
                    }
                }

                block.setNullVector(isNull);
                block.setRawSlice(wrappedLongArray(vector));

                loaded = true;
            } catch (IOException e) {
                throw Throwables.propagate(e);
            }
        }
    }

    private static final class LazyIntBlockLoader implements LazyBlockLoader<LazyFixedWidthBlock> {
        private final RcFileColumnsBatch batch;
        private final int fieldId;
        private boolean loaded;

        private LazyIntBlockLoader(RcFileColumnsBatch batch, int fieldId) {
            this.batch = batch;
            this.fieldId = fieldId;
        }

        @Override
        public void load(LazyFixedWidthBlock block) {
            if (loaded) {
                return;
            }

            try {
                BytesRefArrayWritable columnBatch = batch.getColumn(fieldId);
                int positionInBatch = batch.getPositionInBatch();

                int batchSize = block.getPositionCount();
                boolean[] isNull = new boolean[batchSize];
                long[] vector = new long[batchSize];

                for (int i = 0; i < batchSize; i++) {
                    BytesRefWritable writable = columnBatch.unCheckedGet(i + positionInBatch);

                    byte[] bytes = writable.getData();
                    int start = writable.getStart();
                    int length = writable.getLength();
                    if (length == 0) {
                        isNull[i] = true;
                    } else if (length == 1) {
                        vector[i] = bytes[start];
                    } else {
                        vector[i] = readVInt(bytes, start, length);
                    }
                }

                block.setNullVector(isNull);
                block.setRawSlice(wrappedLongArray(vector));

                loaded = true;
            } catch (IOException e) {
                throw Throwables.propagate(e);
            }
        }
    }

    private static final class LazyLongBlockLoader implements LazyBlockLoader<LazyFixedWidthBlock> {
        private final RcFileColumnsBatch batch;
        private final int fieldId;
        private boolean loaded;

        private LazyLongBlockLoader(RcFileColumnsBatch batch, int fieldId) {
            this.batch = batch;
            this.fieldId = fieldId;
        }

        @Override
        public void load(LazyFixedWidthBlock block) {
            if (loaded) {
                return;
            }

            try {
                BytesRefArrayWritable columnBatch = batch.getColumn(fieldId);
                int positionInBatch = batch.getPositionInBatch();

                int batchSize = block.getPositionCount();
                boolean[] isNull = new boolean[batchSize];
                long[] vector = new long[batchSize];

                for (int i = 0; i < batchSize; i++) {
                    BytesRefWritable writable = columnBatch.unCheckedGet(i + positionInBatch);

                    byte[] bytes = writable.getData();
                    int start = writable.getStart();
                    int length = writable.getLength();
                    if (length == 0) {
                        isNull[i] = true;
                    } else if (length == 1) {
                        vector[i] = bytes[start];
                    } else {
                        vector[i] = readVInt(bytes, start, length);
                    }
                }

                block.setNullVector(isNull);
                block.setRawSlice(wrappedLongArray(vector));

                loaded = true;
            } catch (IOException e) {
                throw Throwables.propagate(e);
            }
        }
    }

    private static final class LazyDateBlockLoader implements LazyBlockLoader<LazyFixedWidthBlock> {
        private final RcFileColumnsBatch batch;
        private final int fieldId;
        private boolean loaded;

        private LazyDateBlockLoader(RcFileColumnsBatch batch, int fieldId) {
            this.batch = batch;
            this.fieldId = fieldId;
        }

        @Override
        public void load(LazyFixedWidthBlock block) {
            if (loaded) {
                return;
            }

            try {
                BytesRefArrayWritable columnBatch = batch.getColumn(fieldId);
                int positionInBatch = batch.getPositionInBatch();

                int positionCount = block.getPositionCount();
                boolean[] isNull = new boolean[positionCount];
                long[] vector = new long[positionCount];

                for (int i = 0; i < positionCount; i++) {
                    BytesRefWritable writable = columnBatch.unCheckedGet(i + positionInBatch);

                    int length = writable.getLength();
                    if (length != 0) {
                        byte[] bytes = writable.getData();
                        int start = writable.getStart();
                        long daysSinceEpoch = readVInt(bytes, start, length);
                        vector[i] = daysSinceEpoch;
                    } else {
                        isNull[i] = true;
                    }
                }

                block.setNullVector(isNull);
                block.setRawSlice(wrappedLongArray(vector));

                loaded = true;
            } catch (IOException e) {
                throw Throwables.propagate(e);
            }
        }
    }

    private static final class LazyTimestampBlockLoader implements LazyBlockLoader<LazyFixedWidthBlock> {
        private final RcFileColumnsBatch batch;
        private final int fieldId;
        private boolean loaded;

        private LazyTimestampBlockLoader(RcFileColumnsBatch batch, int fieldId) {
            this.batch = batch;
            this.fieldId = fieldId;
        }

        @Override
        public void load(LazyFixedWidthBlock block) {
            if (loaded) {
                return;
            }

            try {
                BytesRefArrayWritable columnBatch = batch.getColumn(fieldId);
                int positionInBatch = batch.getPositionInBatch();

                int batchSize = block.getPositionCount();
                boolean[] isNull = new boolean[batchSize];
                long[] vector = new long[batchSize];

                for (int i = 0; i < batchSize; i++) {
                    BytesRefWritable writable = columnBatch.unCheckedGet(i + positionInBatch);

                    int length = writable.getLength();
                    if (length != 0) {
                        byte[] bytes = writable.getData();
                        int start = writable.getStart();

                        long seconds = TimestampWritable.getSeconds(bytes, start);
                        long nanos = TimestampWritable.getNanos(bytes, start + SIZE_OF_INT);
                        vector[i] = (seconds * 1000) + (nanos / 1_000_000);
                    } else {
                        isNull[i] = true;
                    }
                }

                block.setNullVector(isNull);
                block.setRawSlice(wrappedLongArray(vector));

                loaded = true;
            } catch (IOException e) {
                throw Throwables.propagate(e);
            }
        }
    }

    private static final class LazyFloatBlockLoader implements LazyBlockLoader<LazyFixedWidthBlock> {
        private final RcFileColumnsBatch batch;
        private final int fieldId;
        private boolean loaded;

        private LazyFloatBlockLoader(RcFileColumnsBatch batch, int fieldId) {
            this.batch = batch;
            this.fieldId = fieldId;
        }

        @Override
        public void load(LazyFixedWidthBlock block) {
            if (loaded) {
                return;
            }

            try {
                BytesRefArrayWritable columnBatch = batch.getColumn(fieldId);
                int positionInBatch = batch.getPositionInBatch();

                int batchSize = block.getPositionCount();
                boolean[] isNull = new boolean[batchSize];
                double[] vector = new double[batchSize];

                for (int i = 0; i < batchSize; i++) {
                    BytesRefWritable writable = columnBatch.unCheckedGet(i + positionInBatch);

                    int length = writable.getLength();
                    if (length != 0) {
                        checkState(length == SIZE_OF_INT, "Float should be 4 bytes");

                        byte[] bytes = writable.getData();
                        int start = writable.getStart();
                        int intBits = ByteArrays.getInt(bytes, start);

                        // the file format uses big endian
                        vector[i] = (double) Float.intBitsToFloat(Integer.reverseBytes(intBits));
                    } else {
                        isNull[i] = true;
                    }
                }

                block.setNullVector(isNull);
                block.setRawSlice(wrappedDoubleArray(vector));

                loaded = true;
            } catch (IOException e) {
                throw Throwables.propagate(e);
            }
        }
    }

    private static final class LazyDoubleBlockLoader implements LazyBlockLoader<LazyFixedWidthBlock> {
        private final RcFileColumnsBatch batch;
        private final int fieldId;
        private boolean loaded;

        private LazyDoubleBlockLoader(RcFileColumnsBatch batch, int fieldId) {
            this.batch = batch;
            this.fieldId = fieldId;
        }

        @Override
        public void load(LazyFixedWidthBlock block) {
            if (loaded) {
                return;
            }

            try {
                BytesRefArrayWritable columnBatch = batch.getColumn(fieldId);
                int positionInBatch = batch.getPositionInBatch();

                int batchSize = block.getPositionCount();
                boolean[] isNull = new boolean[batchSize];
                double[] vector = new double[batchSize];

                for (int i = 0; i < batchSize; i++) {
                    BytesRefWritable writable = columnBatch.unCheckedGet(i + positionInBatch);

                    int length = writable.getLength();
                    if (length != 0) {
                        checkState(length == SIZE_OF_LONG, "Double should be 8 bytes");

                        byte[] bytes = writable.getData();
                        int start = writable.getStart();
                        long longBits = ByteArrays.getLong(bytes, start);

                        // the file format uses big endian
                        vector[i] = Double.longBitsToDouble(Long.reverseBytes(longBits));
                    } else {
                        isNull[i] = true;
                    }
                }

                block.setNullVector(isNull);
                block.setRawSlice(wrappedDoubleArray(vector));

                loaded = true;
            } catch (IOException e) {
                throw Throwables.propagate(e);
            }
        }
    }

    private static final class LazySliceBlockLoader implements LazyBlockLoader<LazySliceArrayBlock> {
        private final RcFileColumnsBatch batch;
        private final int fieldId;
        private boolean loaded;

        private LazySliceBlockLoader(RcFileColumnsBatch batch, int fieldId) {
            this.batch = batch;
            this.fieldId = fieldId;
        }

        @Override
        public void load(LazySliceArrayBlock block) {
            if (loaded) {
                return;
            }

            try {
                BytesRefArrayWritable columnBatch = batch.getColumn(fieldId);
                int positionInBatch = batch.getPositionInBatch();

                int batchSize = block.getPositionCount();
                Slice[] vector = new Slice[batchSize];

                for (int i = 0; i < batchSize; i++) {
                    BytesRefWritable writable = columnBatch.unCheckedGet(i + positionInBatch);

                    int length = writable.getLength();
                    if (length > 0) {
                        byte[] bytes = writable.getData();
                        int start = writable.getStart();
                        if ((length == 1) && bytes[start] == HIVE_EMPTY_STRING_BYTE) {
                            vector[i] = Slices.EMPTY_SLICE;
                        } else {
                            vector[i] = Slices.wrappedBuffer(Arrays.copyOfRange(bytes, start, start + length));
                        }
                    }
                }

                block.setValues(vector);

                loaded = true;
            } catch (IOException e) {
                throw Throwables.propagate(e);
            }
        }
    }

    private static final class LazyStructuralBlockLoader implements LazyBlockLoader<LazyArrayBlock> {
        private final Type type;
        private final RcFileColumnsBatch batch;
        private final int fieldId;
        private final ObjectInspector fieldInspector;
        private boolean loaded;

        private LazyStructuralBlockLoader(Type type, RcFileColumnsBatch batch, int fieldId,
                ObjectInspector fieldInspector) {
            this.type = type;
            this.batch = batch;
            this.fieldId = fieldId;
            this.fieldInspector = fieldInspector;
        }

        @Override
        public void load(LazyArrayBlock block) {
            if (loaded) {
                return;
            }

            try {
                BytesRefArrayWritable columnBatch = batch.getColumn(fieldId);
                int positionInBatch = batch.getPositionInBatch();

                int batchSize = block.getPositionCount();
                BlockBuilder blockBuilder = type.createBlockBuilder(new BlockBuilderStatus(), batchSize);

                for (int i = 0; i < batchSize; i++) {
                    BytesRefWritable writable = columnBatch.unCheckedGet(i + positionInBatch);

                    int length = writable.getLength();
                    if (length > 0) {
                        byte[] bytes = writable.getData();
                        int start = writable.getStart();
                        LazyBinaryObject lazyObject = LazyBinaryFactory.createLazyBinaryObject(fieldInspector);
                        ByteArrayRef byteArrayRef = new ByteArrayRef();
                        byteArrayRef.setData(bytes);
                        lazyObject.init(byteArrayRef, start, length);
                        serializeObject(type, blockBuilder, lazyObject.getObject(), fieldInspector);
                    }
                }

                block.copyFromBlock(blockBuilder.build());

                loaded = true;
            } catch (IOException e) {
                throw Throwables.propagate(e);
            }
        }
    }

    // faster version of org.apache.hadoop.io.WritableUtils.readVLong
    private static long readVInt(byte[] bytes, int start, int length) {
        long value = 0;
        for (int i = 1; i < length; i++) {
            value <<= 8;
            value |= (bytes[start + i] & 0xFF);
        }
        return WritableUtils.isNegativeVInt(bytes[start]) ? ~value : value;
    }
}