com.splicemachine.orc.OrcTester.java Source code

Java tutorial

Introduction

Here is the source code for com.splicemachine.orc.OrcTester.java

Source

/*
 * Copyright (c) 2012 - 2017 Splice Machine, Inc.
 *
 * This file is part of Splice Machine.
 * Splice Machine is free software: you can redistribute it and/or modify it under the terms of the
 * GNU Affero General Public License as published by the Free Software Foundation, either
 * version 3, or (at your option) any later version.
 * Splice Machine is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Affero General Public License for more details.
 * You should have received a copy of the GNU Affero General Public License along with Splice Machine.
 * If not, see <http://www.gnu.org/licenses/>.
 */
package com.splicemachine.orc;

import com.google.common.base.Preconditions;
import com.splicemachine.orc.block.BlockFactory;
import com.splicemachine.orc.block.ColumnBlock;
import com.splicemachine.orc.memory.AggregatedMemoryContext;
import com.splicemachine.orc.metadata.DwrfMetadataReader;
import com.splicemachine.orc.metadata.MetadataReader;
import com.splicemachine.orc.metadata.OrcMetadataReader;
import com.google.common.base.Throwables;
import com.google.common.collect.*;
import io.airlift.units.DataSize;
import io.airlift.units.DataSize.Unit;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
import org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
import org.apache.hadoop.hive.serde2.Serializer;
import org.apache.hadoop.hive.serde2.objectinspector.*;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.typeinfo.*;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.spark.sql.execution.vectorized.ColumnVector;
import org.apache.spark.sql.types.*;
import org.apache.spark.sql.types.StructField;
import org.joda.time.DateTimeZone;
import shapeless.DataT;

import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Field;
import java.sql.Date;
import java.sql.Timestamp;
import java.time.LocalDate;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.util.*;
import java.util.Map.Entry;

import static com.splicemachine.orc.OrcTester.Compression.NONE;
import static com.splicemachine.orc.OrcTester.Compression.ZLIB;
import static com.splicemachine.orc.OrcTester.Format.DWRF;
import static com.splicemachine.orc.OrcTester.Format.ORC_12;
import static com.splicemachine.orc.OrcTester.Format.ORC_11;
import static com.google.common.base.Functions.constant;
import static com.google.common.collect.Iterables.transform;
import static com.google.common.io.Files.createTempDir;
import static io.airlift.testing.FileUtils.deleteRecursively;
import static io.airlift.units.DataSize.succinctBytes;
import static java.lang.Math.toIntExact;
import static java.lang.String.format;
import static java.util.Arrays.asList;
import static org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.*;
import static org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils.getTypeInfoFromTypeString;
import static org.junit.Assert.*;

public class OrcTester {
    public static final DateTimeZone HIVE_STORAGE_TIME_ZONE = DateTimeZone.getDefault();

    public enum Format {
        ORC_12, ORC_11, DWRF
    }

    public enum Compression {
        ZLIB, SNAPPY, NONE
    }

    private boolean structTestsEnabled;
    private boolean mapTestsEnabled;
    private boolean listTestsEnabled;
    private boolean complexStructuralTestsEnabled;
    private boolean structuralNullTestsEnabled;
    private boolean reverseTestsEnabled;
    private boolean nullTestsEnabled;
    private boolean skipBatchTestsEnabled;
    private boolean skipStripeTestsEnabled;
    private Set<Format> formats = ImmutableSet.of();
    private Set<Compression> compressions = ImmutableSet.of();

    public static OrcTester quickOrcTester() {
        OrcTester orcTester = new OrcTester();
        orcTester.structTestsEnabled = true;
        //        orcTester.mapTestsEnabled = true; // ENABLE MAP Streams JL
        orcTester.mapTestsEnabled = false;
        orcTester.listTestsEnabled = true;
        orcTester.nullTestsEnabled = true;
        orcTester.skipBatchTestsEnabled = true;
        orcTester.formats = ImmutableSet.of(ORC_12);
        orcTester.compressions = ImmutableSet.of(ZLIB);
        return orcTester;
    }

    public static OrcTester fullOrcTester() {
        OrcTester orcTester = new OrcTester();
        orcTester.structTestsEnabled = true;
        //        orcTester.mapTestsEnabled = true;
        orcTester.mapTestsEnabled = false;
        orcTester.listTestsEnabled = true;
        orcTester.complexStructuralTestsEnabled = true;
        orcTester.structuralNullTestsEnabled = true;
        orcTester.reverseTestsEnabled = true;
        orcTester.nullTestsEnabled = true;
        orcTester.skipBatchTestsEnabled = true;
        orcTester.skipStripeTestsEnabled = true;
        orcTester.formats = ImmutableSet.copyOf(new Format[] { ORC_12, ORC_11 });
        orcTester.compressions = ImmutableSet.copyOf(Compression.values());
        return orcTester;
    }

    public void testRoundTrip(ObjectInspector objectInspector, Iterable<?> readValues, DataType type)
            throws Exception {
        // just the values
        testRoundTripType(objectInspector, readValues, type);

        // all nulls
        assertRoundTrip(objectInspector, transform(readValues, constant(null)), type);

        // values wrapped in struct
        if (structTestsEnabled) {
            testStructRoundTrip(objectInspector, readValues, type);
        }

        // values wrapped in a struct wrapped in a struct
        if (complexStructuralTestsEnabled) {
            testStructRoundTrip(createHiveStructInspector(objectInspector),
                    transform(readValues, OrcTester::toHiveStruct), rowType(type, type, type));
        }

        // values wrapped in map
        if (mapTestsEnabled) {
            testMapRoundTrip(objectInspector, readValues, type);
        }

        // values wrapped in list
        if (listTestsEnabled) {
            testListRoundTrip(objectInspector, readValues, type);
        }

        // values wrapped in a list wrapped in a list
        if (complexStructuralTestsEnabled) {
            testListRoundTrip(createHiveListInspector(objectInspector),
                    transform(readValues, OrcTester::toHiveList), arrayType(type));
        }
    }

    private void testStructRoundTrip(ObjectInspector objectInspector, Iterable<?> readValues, DataType elementType)
            throws Exception {
        DataType rowType = rowType(elementType, elementType, elementType);
        // values in simple struct
        testRoundTripType(createHiveStructInspector(objectInspector),
                transform(readValues, OrcTester::toHiveStruct), rowType);

        if (structuralNullTestsEnabled) {
            // values and nulls in simple struct
            testRoundTripType(createHiveStructInspector(objectInspector),
                    transform(insertNullEvery(5, readValues), OrcTester::toHiveStruct), rowType);

            // all null values in simple struct
            testRoundTripType(createHiveStructInspector(objectInspector),
                    transform(transform(readValues, constant(null)), OrcTester::toHiveStruct), rowType);
        }
    }

    private void testMapRoundTrip(ObjectInspector objectInspector, Iterable<?> readValues, DataType elementType)
            throws Exception {
        DataType mapType = mapType(elementType, elementType);

        // maps can not have a null key, so select a value to use for the map key when the value is null
        Object readNullKeyValue = Iterables.getLast(readValues);

        // values in simple map
        testRoundTripType(createHiveMapInspector(objectInspector),
                transform(readValues, value -> toHiveMap(value, readNullKeyValue)), mapType);

        if (structuralNullTestsEnabled) {
            // values and nulls in simple map
            testRoundTripType(createHiveMapInspector(objectInspector),
                    transform(insertNullEvery(5, readValues), value -> toHiveMap(value, readNullKeyValue)),
                    mapType);

            // all null values in simple map
            testRoundTripType(createHiveMapInspector(objectInspector),
                    transform(transform(readValues, constant(null)), value -> toHiveMap(value, readNullKeyValue)),
                    mapType);
        }
    }

    private void testListRoundTrip(ObjectInspector objectInspector, Iterable<?> readValues, DataType elementType)
            throws Exception {
        DataType arrayType = arrayType(elementType);
        // values in simple list
        testRoundTripType(createHiveListInspector(objectInspector), transform(readValues, OrcTester::toHiveList),
                arrayType);

        if (structuralNullTestsEnabled) {
            // values and nulls in simple list
            testRoundTripType(createHiveListInspector(objectInspector),
                    transform(insertNullEvery(5, readValues), OrcTester::toHiveList), arrayType);

            // all null values in simple list
            testRoundTripType(createHiveListInspector(objectInspector),
                    transform(transform(readValues, constant(null)), OrcTester::toHiveList), arrayType);
        }
    }

    private void testRoundTripType(ObjectInspector objectInspector, Iterable<?> readValues, DataType type)
            throws Exception {
        // forward order
        assertRoundTrip(objectInspector, readValues, type);

        // reverse order
        if (reverseTestsEnabled) {
            assertRoundTrip(objectInspector, reverse(readValues), type);
        }

        if (nullTestsEnabled) {
            // forward order with nulls
            assertRoundTrip(objectInspector, insertNullEvery(5, readValues), type);

            // reverse order with nulls
            if (reverseTestsEnabled) {
                assertRoundTrip(objectInspector, insertNullEvery(5, reverse(readValues)), type);
            }
        }
    }

    public void assertRoundTrip(ObjectInspector objectInspector, Iterable<?> readValues, DataType type)
            throws Exception {
        for (Format formatVersion : formats) {
            MetadataReader metadataReader;
            if (DWRF == formatVersion) {
                if (hasType(objectInspector, PrimitiveCategory.DATE)) {
                    // DWRF doesn't support dates
                    return;
                }
                if (hasType(objectInspector, PrimitiveCategory.DECIMAL)) {
                    // DWRF doesn't support decimals
                    return;
                }
                if (hasType(objectInspector, PrimitiveCategory.CHAR)) {
                    // DWRF doesn't support chars
                    return;
                }
                metadataReader = new DwrfMetadataReader();
            } else {
                metadataReader = new OrcMetadataReader();
            }
            for (Compression compression : compressions) {
                try (TempFile tempFile = new TempFile()) {
                    writeOrcColumn(tempFile.getFile(), formatVersion, compression, objectInspector,
                            readValues.iterator());

                    assertFileContents(objectInspector, tempFile, readValues, false, false, metadataReader, type);

                    if (skipBatchTestsEnabled) {
                        assertFileContents(objectInspector, tempFile, readValues, true, false, metadataReader,
                                type);
                    }

                    if (skipStripeTestsEnabled) {
                        assertFileContents(objectInspector, tempFile, readValues, false, true, metadataReader,
                                type);
                    }
                }
            }
        }
    }

    private static void assertFileContents(ObjectInspector objectInspector, TempFile tempFile,
            Iterable<?> expectedValues, boolean skipFirstBatch, boolean skipStripe, MetadataReader metadataReader,
            DataType type) throws IOException {
        OrcRecordReader recordReader = createCustomOrcRecordReader(tempFile, metadataReader, OrcPredicate.TRUE,
                type);
        assertEquals(recordReader.getReaderPosition(), 0);
        assertEquals(recordReader.getFilePosition(), 0);

        boolean isFirst = true;
        int rowsProcessed = 0;
        Iterator<?> iterator = expectedValues.iterator();
        for (int batchSize = toIntExact(recordReader.nextBatch()); batchSize >= 0; batchSize = toIntExact(
                recordReader.nextBatch())) {
            if (skipStripe && rowsProcessed < 10000) {
                assertEquals(advance(iterator, batchSize), batchSize);
            } else if (skipFirstBatch && isFirst) {
                assertEquals(advance(iterator, batchSize), batchSize);
                isFirst = false;
            } else {
                ColumnVector vector = recordReader.readBlock(type, 0);
                ColumnBlock block = BlockFactory.getColumnBlock(vector, type);
                List<Object> data = new ArrayList<>(vector.getElementsAppended());
                for (int position = 0; position < vector.getElementsAppended(); position++) {
                    data.add(block.getTestObject(position));
                }
                for (int i = 0; i < batchSize; i++) {
                    assertTrue(iterator.hasNext());
                    Object expected = iterator.next();
                    Object actual = data.get(i);
                    assertColumnValueEquals(type, actual, expected);
                }
            }
            assertEquals(recordReader.getReaderPosition(), rowsProcessed);
            assertEquals(recordReader.getFilePosition(), rowsProcessed);
            rowsProcessed += batchSize;
        }
        assertFalse(iterator.hasNext());

        assertEquals(recordReader.getReaderPosition(), rowsProcessed);
        assertEquals(recordReader.getFilePosition(), rowsProcessed);
        recordReader.close();
    }

    private static void assertColumnValueEquals(DataType type, Object actual, Object expected) {
        if (actual == null) {
            assertNull(expected);
            return;
        }
        if (type instanceof ArrayType) {
            List<?> actualArray = (List<?>) actual;
            List<?> expectedArray = (List<?>) expected;
            assertEquals(actualArray.size(), expectedArray.size());
            DataType elementType = ((ArrayType) type).elementType();
            for (int i = 0; i < actualArray.size(); i++) {
                Object actualElement = actualArray.get(i);
                Object expectedElement = expectedArray.get(i);
                assertColumnValueEquals(elementType, actualElement, expectedElement);
            }
        } else if (type instanceof MapType) {
            Map<?, ?> actualMap = (Map<?, ?>) actual;
            Map<?, ?> expectedMap = (Map<?, ?>) expected;
            assertEquals(actualMap.size(), expectedMap.size());

            DataType keyType = ((MapType) type).keyType();
            DataType valueType = ((MapType) type).valueType();

            List<Entry<?, ?>> expectedEntries = new ArrayList<>(expectedMap.entrySet());
            for (Entry<?, ?> actualEntry : actualMap.entrySet()) {
                Iterator<Entry<?, ?>> iterator = expectedEntries.iterator();
                while (iterator.hasNext()) {
                    Entry<?, ?> expectedEntry = iterator.next();
                    try {
                        assertColumnValueEquals(keyType, actualEntry.getKey(), expectedEntry.getKey());
                        assertColumnValueEquals(valueType, actualEntry.getValue(), expectedEntry.getValue());
                        iterator.remove();
                    } catch (AssertionError ignored) {
                    }
                }
            }
            assertTrue("Unmatched entries " + expectedEntries, expectedEntries.isEmpty());
        } else if (type instanceof StructType) {

            StructField[] fieldTypes = ((StructType) type).fields();

            List<?> actualRow = (List<?>) actual;
            List<?> expectedRow = (List<?>) expected;
            assertEquals(actualRow.size(), fieldTypes.length);
            assertEquals(actualRow.size(), expectedRow.size());

            for (int fieldId = 0; fieldId < actualRow.size(); fieldId++) {
                DataType fieldType = fieldTypes[fieldId].dataType();
                Object actualElement = actualRow.get(fieldId);
                Object expectedElement = expectedRow.get(fieldId);
                assertColumnValueEquals(fieldType, actualElement, expectedElement);
            }
        } else if (type instanceof DoubleType) {
            Double actualDouble = (Double) actual;
            Double expectedDouble = (Double) expected;
            if (actualDouble.isNaN()) {
                assertTrue("expected double to be NaN", expectedDouble.isNaN());
            } else {
                assertEquals(actualDouble, expectedDouble, 0.001);
            }
        } else if (!Objects.equals(actual, expected)) {
            assertEquals(expected, actual);
        }
    }

    static OrcRecordReader createCustomOrcRecordReader(TempFile tempFile, MetadataReader metadataReader,
            OrcPredicate predicate, DataType type) throws IOException {
        OrcDataSource orcDataSource = new FileOrcDataSource(tempFile.getFile(), new DataSize(1, Unit.MEGABYTE),
                new DataSize(1, Unit.MEGABYTE), new DataSize(1, Unit.MEGABYTE));
        OrcReader orcReader = new OrcReader(orcDataSource, metadataReader, new DataSize(1, Unit.MEGABYTE),
                new DataSize(1, Unit.MEGABYTE));

        assertEquals(orcReader.getColumnNames(), ImmutableList.of("test"));
        assertEquals(orcReader.getFooter().getRowsInRowGroup(), 10_000);

        return orcReader.createRecordReader(ImmutableMap.of(0, type), predicate, HIVE_STORAGE_TIME_ZONE,
                new AggregatedMemoryContext(), Collections.EMPTY_LIST, Collections.EMPTY_LIST);
    }

    static DataSize writeOrcColumn(File outputFile, Format format, Compression compression,
            ObjectInspector columnObjectInspector, Iterator<?> values) throws Exception {
        RecordWriter recordWriter;
        if (DWRF == format) {
            recordWriter = createDwrfRecordWriter(outputFile, compression, columnObjectInspector);
        } else {
            recordWriter = createOrcRecordWriter(outputFile, format, compression, columnObjectInspector);
        }
        return writeOrcFileColumnOld(outputFile, format, recordWriter, columnObjectInspector, values);
    }

    public static DataSize writeOrcFileColumnOld(File outputFile, Format format, RecordWriter recordWriter,
            ObjectInspector columnObjectInspector, Iterator<?> values) throws Exception {
        SettableStructObjectInspector objectInspector = createSettableStructObjectInspector("test",
                columnObjectInspector);
        Object row = objectInspector.create();

        List<org.apache.hadoop.hive.serde2.objectinspector.StructField> fields = ImmutableList
                .copyOf(objectInspector.getAllStructFieldRefs());

        int i = 0;
        TypeInfo typeInfo = getTypeInfoFromTypeString(columnObjectInspector.getTypeName());
        while (values.hasNext()) {
            Object value = values.next();
            value = preprocessWriteValueOld(typeInfo, value);
            objectInspector.setStructFieldData(row, fields.get(0), value);

            @SuppressWarnings("deprecation")
            Serializer serde;
            if (DWRF == format) {
                serde = new org.apache.hadoop.hive.ql.io.orc.OrcSerde();
                if (i == 142_345) {
                    setDwrfLowMemoryFlag(recordWriter);
                }
            } else {
                serde = new OrcSerde();
            }
            Writable record = serde.serialize(row, objectInspector);
            recordWriter.write(record);
            i++;
        }

        recordWriter.close(false);
        return succinctBytes(outputFile.length());
    }

    private static Object preprocessWriteValueOld(TypeInfo typeInfo, Object value) throws IOException {
        if (value == null) {
            return null;
        }
        switch (typeInfo.getCategory()) {
        case PRIMITIVE:
            PrimitiveObjectInspector.PrimitiveCategory primitiveCategory = ((PrimitiveTypeInfo) typeInfo)
                    .getPrimitiveCategory();
            switch (primitiveCategory) {
            case BOOLEAN:
                return value;
            case BYTE:
                return ((Number) value).byteValue();
            case SHORT:
                return ((Number) value).shortValue();
            case INT:
                return ((Number) value).intValue();
            case LONG:
                return ((Number) value).longValue();
            case FLOAT:
                return ((Number) value).floatValue();
            case DOUBLE:
                return ((Number) value).doubleValue();
            case DECIMAL:
                return HiveDecimal.create(((Decimal) value).toBigDecimal().bigDecimal());
            case STRING:
                return value;
            case CHAR:
                return new HiveChar(value.toString(), ((CharTypeInfo) typeInfo).getLength());
            case DATE:
                LocalDate localDate = LocalDate.ofEpochDay((int) value);
                ZonedDateTime zonedDateTime = localDate.atStartOfDay(ZoneId.systemDefault());

                long millis = zonedDateTime.toEpochSecond() * 1000;
                Date date = new Date(0);
                // mills must be set separately to avoid masking
                date.setTime(millis);
                return date;
            case TIMESTAMP:
                long millisUtc = ((Long) value).intValue();
                return new Timestamp(millisUtc);
            case BINARY:
                return ((String) value).getBytes();
            //                        return (byte[])value;
            }
            break;
        case MAP:
            MapTypeInfo mapTypeInfo = (MapTypeInfo) typeInfo;
            TypeInfo keyTypeInfo = mapTypeInfo.getMapKeyTypeInfo();
            TypeInfo valueTypeInfo = mapTypeInfo.getMapValueTypeInfo();
            Map<Object, Object> newMap = new HashMap<>();
            for (Entry<?, ?> entry : ((Map<?, ?>) value).entrySet()) {
                newMap.put(preprocessWriteValueOld(keyTypeInfo, entry.getKey()),
                        preprocessWriteValueOld(valueTypeInfo, entry.getValue()));
            }
            return newMap;
        case LIST:
            ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
            TypeInfo elementTypeInfo = listTypeInfo.getListElementTypeInfo();
            List<Object> newList = new ArrayList<>(((Collection<?>) value).size());
            for (Object element : (Iterable<?>) value) {
                newList.add(preprocessWriteValueOld(elementTypeInfo, element));
            }
            return newList;
        case STRUCT:
            StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
            List<?> fieldValues = (List<?>) value;
            List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
            List<Object> newStruct = new ArrayList<>();
            for (int fieldId = 0; fieldId < fieldValues.size(); fieldId++) {
                newStruct.add(preprocessWriteValueOld(fieldTypeInfos.get(fieldId), fieldValues.get(fieldId)));
            }
            return newStruct;
        }
        throw new IOException(format("Unsupported Hive type: %s", typeInfo));
    }

    private static void setDwrfLowMemoryFlag(RecordWriter recordWriter) {
        Object writer = getFieldValue(recordWriter, "writer");
        Object memoryManager = getFieldValue(writer, "memoryManager");
        setFieldValue(memoryManager, "lowMemoryMode", true);
        try {
            writer.getClass().getMethod("enterLowMemoryMode").invoke(writer);
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
    }

    private static Object getFieldValue(Object instance, String name) {
        try {
            Field writerField = instance.getClass().getDeclaredField(name);
            writerField.setAccessible(true);
            return writerField.get(instance);
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
    }

    private static void setFieldValue(Object instance, String name, Object value) {
        try {
            Field writerField = instance.getClass().getDeclaredField(name);
            writerField.setAccessible(true);
            writerField.set(instance, value);
        } catch (Exception e) {
            throw Throwables.propagate(e);
        }
    }

    static RecordWriter createOrcRecordWriter(File outputFile, Format format, Compression compression,
            ObjectInspector columnObjectInspector) throws IOException {
        JobConf jobConf = new JobConf();
        jobConf.set("hive.exec.orc.write.format", format == ORC_12 ? "0.12" : "0.11");
        jobConf.set("hive.exec.orc.default.compress", compression.name());

        return new OrcOutputFormat().getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class,
                compression != NONE, createTableProperties("test", columnObjectInspector.getTypeName()), () -> {
                });
    }

    private static RecordWriter createDwrfRecordWriter(File outputFile, Compression compressionCodec,
            ObjectInspector columnObjectInspector) throws IOException {
        JobConf jobConf = new JobConf();
        jobConf.set("hive.exec.orc.default.compress", compressionCodec.name());
        jobConf.set("hive.exec.orc.compress", compressionCodec.name());
        OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
        OrcConf.setIntVar(jobConf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 2);
        OrcConf.setBoolVar(jobConf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true);
        return new OrcOutputFormat().getHiveRecordWriter(jobConf, new Path(outputFile.toURI()), Text.class,
                compressionCodec != NONE, createTableProperties("test", columnObjectInspector.getTypeName()),
                () -> {
                });
    }

    static SettableStructObjectInspector createSettableStructObjectInspector(String name,
            ObjectInspector objectInspector) {
        return getStandardStructObjectInspector(ImmutableList.of(name), ImmutableList.of(objectInspector));
    }

    private static Properties createTableProperties(String name, String type) {
        Properties orderTableProperties = new Properties();
        orderTableProperties.setProperty("columns", name);
        orderTableProperties.setProperty("columns.types", type);
        return orderTableProperties;
    }

    static class TempFile implements Closeable {
        private final File tempDir;
        private final File file;

        public TempFile() {
            tempDir = createTempDir();
            tempDir.mkdirs();
            file = new File(tempDir, "data.rcfile");
        }

        public File getFile() {
            return file;
        }

        @Override
        public void close() {
            FileUtils.deleteQuietly(tempDir);
            // hadoop creates crc files that must be deleted also, so just delete the whole directory
            //            deleteRecursively(tempDir);
        }
    }

    private static <T> Iterable<T> reverse(Iterable<T> iterable) {
        return Lists.reverse(ImmutableList.copyOf(iterable));
    }

    private static <T> Iterable<T> insertNullEvery(int n, Iterable<T> iterable) {
        return () -> new AbstractIterator<T>() {
            private final Iterator<T> delegate = iterable.iterator();
            private int position;

            @Override
            protected T computeNext() {
                position++;
                if (position > n) {
                    position = 0;
                    return null;
                }

                if (!delegate.hasNext()) {
                    return endOfData();
                }

                return delegate.next();
            }
        };
    }

    private static StandardStructObjectInspector createHiveStructInspector(ObjectInspector objectInspector) {
        return getStandardStructObjectInspector(ImmutableList.of("a", "b", "c"),
                ImmutableList.of(objectInspector, objectInspector, objectInspector));
    }

    private static List<Object> toHiveStruct(Object input) {
        return asList(input, input, input);
    }

    private static StandardMapObjectInspector createHiveMapInspector(ObjectInspector objectInspector) {
        return getStandardMapObjectInspector(objectInspector, objectInspector);
    }

    private static Map<Object, Object> toHiveMap(Object input, Object nullKeyValue) {
        Map<Object, Object> map = new HashMap<>();
        map.put(input != null ? input : nullKeyValue, input);
        return map;
    }

    private static StandardListObjectInspector createHiveListInspector(ObjectInspector objectInspector) {
        return getStandardListObjectInspector(objectInspector);
    }

    private static List<Object> toHiveList(Object input) {
        return asList(input, input, input, input);
    }

    private static boolean hasType(ObjectInspector objectInspector, PrimitiveCategory... types) {
        if (objectInspector instanceof PrimitiveObjectInspector) {
            PrimitiveObjectInspector primitiveInspector = (PrimitiveObjectInspector) objectInspector;
            PrimitiveCategory primitiveCategory = primitiveInspector.getPrimitiveCategory();
            for (PrimitiveCategory type : types) {
                if (primitiveCategory == type) {
                    return true;
                }
            }
            return false;
        }
        if (objectInspector instanceof ListObjectInspector) {
            ListObjectInspector listInspector = (ListObjectInspector) objectInspector;
            return hasType(listInspector.getListElementObjectInspector(), types);
        }
        if (objectInspector instanceof MapObjectInspector) {
            MapObjectInspector mapInspector = (MapObjectInspector) objectInspector;
            return hasType(mapInspector.getMapKeyObjectInspector(), types)
                    || hasType(mapInspector.getMapValueObjectInspector(), types);
        }
        if (objectInspector instanceof StructObjectInspector) {
            for (org.apache.hadoop.hive.serde2.objectinspector.StructField field : ((StructObjectInspector) objectInspector)
                    .getAllStructFieldRefs()) {
                if (hasType(field.getFieldObjectInspector(), types)) {
                    return true;
                }
            }
            return false;
        }
        throw new IllegalArgumentException("Unknown object inspector type " + objectInspector);
    }

    private static DataType arrayType(DataType elementType) {
        return DataTypes.createArrayType(elementType);
    }

    private static DataType mapType(DataType keyType, DataType valueType) {
        return MapType.apply(keyType, valueType);
    }

    private static DataType rowType(DataType... fieldTypes) {
        StructField structField[] = new StructField[fieldTypes.length];
        for (int i = 0; i < fieldTypes.length; i++)
            structField[i] = DataTypes.createStructField("field_" + i, fieldTypes[i], true);
        return DataTypes.createStructType(structField);
    }

    public static int advance(Iterator<?> iterator, int numberToAdvance) {
        Preconditions.checkNotNull(iterator);
        Preconditions.checkArgument(numberToAdvance >= 0, "numberToAdvance must be nonnegative");

        int i;
        for (i = 0; i < numberToAdvance && iterator.hasNext(); ++i) {
            iterator.next();
        }

        return i;
    }

}