com.facebook.presto.hive.DwrfRecordCursorProvider.java Source code

Java tutorial

Introduction

Here is the source code for com.facebook.presto.hive.DwrfRecordCursorProvider.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.hive;

import com.facebook.hive.orc.OrcFile;
import com.facebook.hive.orc.OrcProto;
import com.facebook.hive.orc.OrcProto.Type;
import com.facebook.hive.orc.OrcSerde;
import com.facebook.hive.orc.Reader;
import com.facebook.hive.orc.RecordReader;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.TupleDomain;
import com.facebook.presto.spi.type.TypeManager;
import com.google.common.base.Optional;
import com.google.common.base.Predicate;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.ReaderWriterProfiler;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.mapred.JobConf;
import org.joda.time.DateTimeZone;

import java.util.List;
import java.util.Properties;

import static com.facebook.presto.hive.HiveUtil.getDeserializer;
import static com.facebook.presto.hive.HiveUtil.getTableObjectInspector;
import static com.google.common.collect.Iterables.all;

public class DwrfRecordCursorProvider implements HiveRecordCursorProvider {
    @Override
    public Optional<HiveRecordCursor> createHiveRecordCursor(String clientId, Configuration configuration,
            ConnectorSession session, Path path, long start, long length, Properties schema,
            List<HiveColumnHandle> columns, List<HivePartitionKey> partitionKeys,
            TupleDomain<HiveColumnHandle> tupleDomain, DateTimeZone hiveStorageTimeZone, TypeManager typeManager) {
        @SuppressWarnings("deprecation")
        Deserializer deserializer = getDeserializer(schema);
        if (!(deserializer instanceof OrcSerde)) {
            return Optional.absent();
        }

        StructObjectInspector rowInspector = getTableObjectInspector(schema);
        if (!all(rowInspector.getAllStructFieldRefs(), isSupportedDwrfType())) {
            throw new IllegalArgumentException("DWRF does not support DATE type");
        }

        ReaderWriterProfiler.setProfilerOptions(configuration);

        RecordReader recordReader;
        try {
            FileSystem fileSystem = path.getFileSystem(configuration);
            Reader reader = OrcFile.createReader(fileSystem, path, new JobConf(configuration));
            boolean[] include = findIncludedColumns(reader.getTypes(), columns);
            recordReader = reader.rows(start, length, include);
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }

        return Optional.<HiveRecordCursor>of(new DwrfHiveRecordCursor(recordReader, length, schema, partitionKeys,
                columns, hiveStorageTimeZone, DateTimeZone.forID(session.getTimeZoneKey().getId()), typeManager));
    }

    private static Predicate<StructField> isSupportedDwrfType() {
        return new Predicate<StructField>() {
            @Override
            public boolean apply(StructField hiveColumnHandle) {
                return !hasDateType(hiveColumnHandle.getFieldObjectInspector());
            }
        };
    }

    private static boolean[] findIncludedColumns(List<Type> types, List<HiveColumnHandle> columns) {
        boolean[] includes = new boolean[types.size()];
        includes[0] = true;

        OrcProto.Type root = types.get(0);
        List<Integer> included = Lists.transform(columns, HiveColumnHandle.hiveColumnIndexGetter());
        for (int i = 0; i < root.getSubtypesCount(); ++i) {
            if (included.contains(i)) {
                includeColumnRecursive(types, includes, root.getSubtypes(i));
            }
        }

        // if we are filtering at least one column, return the boolean array
        for (boolean include : includes) {
            if (!include) {
                return includes;
            }
        }
        return null;
    }

    private static void includeColumnRecursive(List<OrcProto.Type> types, boolean[] result, int typeId) {
        result[typeId] = true;
        OrcProto.Type type = types.get(typeId);
        int children = type.getSubtypesCount();
        for (int i = 0; i < children; ++i) {
            includeColumnRecursive(types, result, type.getSubtypes(i));
        }
    }

    static boolean hasDateType(ObjectInspector objectInspector) {
        if (objectInspector instanceof PrimitiveObjectInspector) {
            PrimitiveObjectInspector primitiveInspector = (PrimitiveObjectInspector) objectInspector;
            return primitiveInspector.getPrimitiveCategory() == PrimitiveCategory.DATE;
        }
        if (objectInspector instanceof ListObjectInspector) {
            ListObjectInspector listInspector = (ListObjectInspector) objectInspector;
            return hasDateType(listInspector.getListElementObjectInspector());
        }
        if (objectInspector instanceof MapObjectInspector) {
            MapObjectInspector mapInspector = (MapObjectInspector) objectInspector;
            return hasDateType(mapInspector.getMapKeyObjectInspector())
                    || hasDateType(mapInspector.getMapValueObjectInspector());
        }
        if (objectInspector instanceof StructObjectInspector) {
            for (StructField field : ((StructObjectInspector) objectInspector).getAllStructFieldRefs()) {
                if (hasDateType(field.getFieldObjectInspector())) {
                    return true;
                }
            }
            return false;
        }
        throw new IllegalArgumentException("Unknown object inspector type " + objectInspector);
    }
}