com.facebook.hive.orc.TestOrcFile.java Source code

Java tutorial

Introduction

Here is the source code for com.facebook.hive.orc.TestOrcFile.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.facebook.hive.orc;

import static com.facebook.hive.orc.OrcTestUtils.byteBuf;
import static com.facebook.hive.orc.OrcTestUtils.bytes;
import static com.facebook.hive.orc.OrcTestUtils.inner;
import static com.facebook.hive.orc.OrcTestUtils.list;
import static com.facebook.hive.orc.OrcTestUtils.map;
import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertFalse;
import static junit.framework.Assert.assertNotNull;
import static junit.framework.Assert.assertNull;
import static junit.framework.Assert.assertTrue;

import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;

import com.facebook.hive.orc.compression.CompressionKind;
import com.facebook.hive.orc.statistics.BooleanColumnStatistics;
import com.facebook.hive.orc.statistics.ColumnStatistics;
import com.facebook.hive.orc.statistics.DoubleColumnStatistics;
import com.facebook.hive.orc.statistics.IntegerColumnStatistics;
import com.facebook.hive.orc.statistics.StringColumnStatistics;
import com.google.common.collect.ImmutableList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.serde2.ReaderWriterProfiler;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;

import com.facebook.hive.orc.OrcTestUtils.BigRow;
import com.facebook.hive.orc.OrcTestUtils.DoubleRow;
import com.facebook.hive.orc.OrcTestUtils.InnerStruct;
import com.facebook.hive.orc.OrcTestUtils.IntStruct;
import com.facebook.hive.orc.OrcTestUtils.MiddleStruct;
import com.facebook.hive.orc.OrcTestUtils.ReallyBigRow;
import com.facebook.hive.orc.OrcTestUtils.StringListWithId;
import com.facebook.hive.orc.OrcTestUtils.StringStruct;
import com.facebook.hive.orc.lazy.LazyTreeReader;
import com.facebook.hive.orc.lazy.OrcLazyBinary;
import com.facebook.hive.orc.lazy.OrcLazyBoolean;
import com.facebook.hive.orc.lazy.OrcLazyByte;
import com.facebook.hive.orc.lazy.OrcLazyDouble;
import com.facebook.hive.orc.lazy.OrcLazyFloat;
import com.facebook.hive.orc.lazy.OrcLazyInt;
import com.facebook.hive.orc.lazy.OrcLazyList;
import com.facebook.hive.orc.lazy.OrcLazyLong;
import com.facebook.hive.orc.lazy.OrcLazyMap;
import com.facebook.hive.orc.lazy.OrcLazyObject;
import com.facebook.hive.orc.lazy.OrcLazyObjectInspectorUtils;
import com.facebook.hive.orc.lazy.OrcLazyRow;
import com.facebook.hive.orc.lazy.OrcLazyShort;
import com.facebook.hive.orc.lazy.OrcLazyString;
import com.facebook.hive.orc.lazy.OrcLazyStruct;
import com.facebook.hive.orc.lazy.OrcLazyTimestamp;
import com.facebook.hive.orc.lazy.OrcLazyUnion;

/**
 * Tests for the top level reader/streamFactory of ORC files.
 */
public class TestOrcFile {

    Path workDir = new Path(
            System.getProperty("test.tmp.dir", "target" + File.separator + "test" + File.separator + "tmp"));

    Configuration conf;
    FileSystem fs;
    Path testFilePath, testFilePath2;

    @Rule
    public TestName testCaseName = new TestName();

    @Before
    public void openFileSystem() throws Exception {
        conf = new Configuration();
        fs = FileSystem.getLocal(conf);
        testFilePath = new Path(workDir, "TestOrcFile." + testCaseName.getMethodName() + ".orc");
        testFilePath2 = new Path(workDir, "TestOrcFile2." + testCaseName.getMethodName() + ".orc");
        fs.delete(testFilePath, false);
        fs.delete(testFilePath2, false);
    }

    @Test
    public void testHash() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(BigRow.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 100000, CompressionKind.ZLIB, 10000,
                10000);
        writer.addRow(new BigRow(false, (byte) 1, (short) 1, 1, 1L, (float) 1.0, 1.0, bytes(1), "1",
                new MiddleStruct(inner(1, "bye"), inner(2, "sigh")), list(inner(3, "good"), inner(4, "bad")),
                map(inner(3, "good"), inner(4, "bad"))));
        writer.addRow(new BigRow(null, null, null, null, null, null, null, null, null, null, null, null));
        writer.close();
        ReaderWriterProfiler.setProfilerOptions(conf);
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        lazyRow = (OrcLazyStruct) rows.next(lazyRow);
        row = (OrcStruct) lazyRow.materialize();
        assertEquals(1, row.getFieldValue(0).hashCode());
        assertEquals(1, row.getFieldValue(1).hashCode());
        assertEquals(1, row.getFieldValue(2).hashCode());
        assertEquals(1, row.getFieldValue(3).hashCode());
        assertEquals(1, row.getFieldValue(4).hashCode());
        assertEquals(1065353216, row.getFieldValue(5).hashCode());
        assertEquals(1072693248, row.getFieldValue(6).hashCode());
        assertEquals(32, row.getFieldValue(7).hashCode());
        assertEquals(80, row.getFieldValue(8).hashCode());
        assertEquals(8417130, row.getFieldValue(9).hashCode());
        assertEquals(127296452, row.getFieldValue(10).hashCode());
        assertEquals(7, row.getFieldValue(11).hashCode());

        lazyRow = (OrcLazyStruct) rows.next(lazyRow);
        row = (OrcStruct) lazyRow.materialize();
        assertEquals(0, row.getFieldValue(0).hashCode());
        assertEquals(0, row.getFieldValue(1).hashCode());
        assertEquals(0, row.getFieldValue(2).hashCode());
        assertEquals(0, row.getFieldValue(3).hashCode());
        assertEquals(0, row.getFieldValue(4).hashCode());
        assertEquals(0, row.getFieldValue(5).hashCode());
        assertEquals(0, row.getFieldValue(6).hashCode());
        assertEquals(0, row.getFieldValue(7).hashCode());
        assertEquals(0, row.getFieldValue(8).hashCode());
        assertEquals(0, row.getFieldValue(9).hashCode());
        assertEquals(0, row.getFieldValue(10).hashCode());
        assertEquals(0, row.getFieldValue(11).hashCode());
    }

    @Test
    public void testDeepCopy() throws Exception {
        // Create a table and write a row to it
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(BigRow.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 100000, CompressionKind.ZLIB, 10000,
                10000);
        writer.addRow(new BigRow(false, (byte) 1, (short) 1, 1, 1L, (float) 1.0, 1.0, bytes(1), "1",
                new MiddleStruct(inner(1, "bye"), inner(2, "sigh")), list(inner(3, "good"), inner(4, "bad")),
                map(inner(3, "good"), inner(4, "bad"))));

        writer.close();

        // Prepare to tread back the row
        ReaderWriterProfiler.setProfilerOptions(conf);
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        lazyRow = (OrcLazyStruct) rows.next(lazyRow);
        row = (OrcStruct) lazyRow.materialize();

        // Check that the object read equals what is expected, then copy the object, and make the same
        // check
        OrcLazyObject obj;
        assertEquals(false, ((BooleanWritable) ((OrcLazyBoolean) row.getFieldValue(0)).materialize()).get());
        obj = new OrcLazyBoolean((OrcLazyBoolean) row.getFieldValue(0));
        assertEquals(false, ((BooleanWritable) obj.materialize()).get());

        assertEquals(1, ((ByteWritable) ((OrcLazyByte) row.getFieldValue(1)).materialize()).get());
        obj = new OrcLazyByte((OrcLazyByte) row.getFieldValue(1));
        assertEquals(1, ((ByteWritable) obj.materialize()).get());

        assertEquals(1, ((ShortWritable) ((OrcLazyShort) row.getFieldValue(2)).materialize()).get());
        obj = new OrcLazyShort((OrcLazyShort) row.getFieldValue(2));
        assertEquals(1, ((ShortWritable) obj.materialize()).get());

        assertEquals(1, ((IntWritable) ((OrcLazyInt) row.getFieldValue(3)).materialize()).get());
        obj = new OrcLazyInt((OrcLazyInt) row.getFieldValue(3));
        assertEquals(1, ((IntWritable) obj.materialize()).get());

        assertEquals(1, ((LongWritable) ((OrcLazyLong) row.getFieldValue(4)).materialize()).get());
        obj = new OrcLazyLong((OrcLazyLong) row.getFieldValue(4));
        assertEquals(1, ((LongWritable) obj.materialize()).get());

        assertEquals(1.0f, ((FloatWritable) ((OrcLazyFloat) row.getFieldValue(5)).materialize()).get());
        obj = new OrcLazyFloat((OrcLazyFloat) row.getFieldValue(5));
        assertEquals(1.0f, ((FloatWritable) obj.materialize()).get());

        assertEquals(1.0, ((DoubleWritable) ((OrcLazyDouble) row.getFieldValue(6)).materialize()).get());
        obj = new OrcLazyDouble((OrcLazyDouble) row.getFieldValue(6));
        assertEquals(1.0, ((DoubleWritable) obj.materialize()).get());

        assertEquals(bytes(1), ((OrcLazyBinary) row.getFieldValue(7)).materialize());
        obj = new OrcLazyBinary((OrcLazyBinary) row.getFieldValue(7));
        assertEquals(bytes(1), obj.materialize());

        assertEquals("1", ((Text) ((OrcLazyString) row.getFieldValue(8)).materialize()).toString());
        obj = new OrcLazyString((OrcLazyString) row.getFieldValue(8));
        assertEquals("1", ((Text) obj.materialize()).toString());

        // Currently copies are not supported for complex types
    }

    @Test
    public void testSeekAcrossChunks() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(DoubleRow.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }

        // Create a table consisting of a single column of doubles
        // Add enough values to it to get 3 index strides (doubles are 8 bytes) more is ok
        // Note that the compression buffer size and index stride length are very important
        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 2097152, CompressionKind.ZLIB,
                262144, 10000);
        Random rand = new Random(42);
        double[] values = new double[131702];

        // The first compression block is all 0's
        for (int i = 0; i < 32768; i++) {
            values[i] = 0;
            writer.addRow(new DoubleRow(values[i]));
        }

        // The second compression block is random doubles
        for (int i = 0; i < 32768; i++) {
            values[i + 32768] = rand.nextDouble();
            writer.addRow(new DoubleRow(values[i + 32768]));
        }

        // The third compression block is all 0's
        // (important so it compresses to the same size as the first)
        for (int i = 0; i < 32768; i++) {
            values[i + 32768 + 32768] = 0;
            writer.addRow(new DoubleRow(values[i + 32768 + 32768]));
        }

        // The fourth compression block is random
        for (int i = 0; i < 32768; i++) {
            values[i + 32768 + 32768 + 32768] = rand.nextDouble();
            writer.addRow(new DoubleRow(values[i + 32768 + 32768 + 32768]));
        }

        writer.close();
        OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_READ_COMPRESSION_STRIDES, 2);
        OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_EAGER_HDFS_READ, false);
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);

        StructObjectInspector readerInspector = (StructObjectInspector) reader.getObjectInspector();
        List<? extends StructField> fields = readerInspector.getAllStructFieldRefs();
        DoubleObjectInspector columnInspector = (DoubleObjectInspector) fields.get(0).getFieldObjectInspector();

        RecordReader rows = reader.rows(null);
        Object row = null;

        // Skip enough values to get to the 2nd index stride in the first chunk
        for (int i = 0; i < 40001; i++) {
            row = rows.next(row);
        }

        // This will set previousOffset to be the size of the first compression block and the
        // compressionOffset to some other value (doesn't matter what point is it's different from the
        // start of the compression block)
        assertEquals(values[40000], columnInspector.get(readerInspector.getStructFieldData(row, fields.get(0))));

        // Skip enough values to get to the 2nd index stride of the second chunk
        for (int i = 0; i < 80000; i++) {
            rows.next(row);
        }

        // When seek is called, previousOffset will equal newCompressedOffset since the former is the
        // the length of the first compression block and the latter is the length of the third
        // compression block (remember the chunks contain 2 index strides), so if we only check this
        // (or for some other reason) we will not adjust compressedIndex, we will read the wrong data
        assertEquals(values[120000], columnInspector.get(readerInspector.getStructFieldData(row, fields.get(0))));

        rows.close();
    }

    @Test
    public void test1() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(BigRow.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 100000, CompressionKind.ZLIB, 10000,
                10000);
        writer.addRow(new BigRow(false, (byte) 1, (short) 1024, 65536, Long.MAX_VALUE, (float) 1.0, -15.0,
                bytes(0, 1, 2, 3, 4), "hi", new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
                list(inner(3, "good"), inner(4, "bad")), map()));
        writer.addRow(new BigRow(true, (byte) 100, (short) 2048, 65536, Long.MAX_VALUE, (float) 2.0, -5.0, bytes(),
                "bye", new MiddleStruct(inner(1, "bye"), inner(2, "sigh")),
                list(inner(100000000, "cat"), inner(-100000, "in"), inner(1234, "hat")),
                map(inner(5, "chani"), inner(1, "mauddib"))));
        writer.close();
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);

        // check the stats
        ColumnStatistics[] stats = reader.getStatistics();
        assertEquals(2, stats[1].getNumberOfValues());
        assertEquals(1, ((BooleanColumnStatistics) stats[1]).getFalseCount());
        assertEquals(1, ((BooleanColumnStatistics) stats[1]).getTrueCount());
        assertEquals("count: 2 true: 1", stats[1].toString());

        assertEquals(2048, ((IntegerColumnStatistics) stats[3]).getMaximum());
        assertEquals(1024, ((IntegerColumnStatistics) stats[3]).getMinimum());
        assertEquals(true, ((IntegerColumnStatistics) stats[3]).isSumDefined());
        assertEquals(3072, ((IntegerColumnStatistics) stats[3]).getSum());
        assertEquals("count: 2 min: 1024 max: 2048 sum: 3072", stats[3].toString());

        assertEquals(Long.MAX_VALUE, ((IntegerColumnStatistics) stats[5]).getMaximum());
        assertEquals(Long.MAX_VALUE, ((IntegerColumnStatistics) stats[5]).getMinimum());
        assertEquals(false, ((IntegerColumnStatistics) stats[5]).isSumDefined());
        assertEquals("count: 2 min: 9223372036854775807 max: 9223372036854775807", stats[5].toString());

        assertEquals(-15.0, ((DoubleColumnStatistics) stats[7]).getMinimum());
        assertEquals(-5.0, ((DoubleColumnStatistics) stats[7]).getMaximum());
        assertEquals(-20.0, ((DoubleColumnStatistics) stats[7]).getSum(), 0.00001);
        assertEquals("count: 2 min: -15.0 max: -5.0 sum: -20.0", stats[7].toString());

        assertEquals("count: 2 min: bye max: hi", stats[9].toString());

        // check the inspectors
        StructObjectInspector readerInspector = (StructObjectInspector) reader.getObjectInspector();
        assertEquals(ObjectInspector.Category.STRUCT, readerInspector.getCategory());
        assertEquals("struct<boolean1:boolean,byte1:tinyint,short1:smallint,"
                + "int1:int,long1:bigint,float1:float,double1:double,bytes1:"
                + "binary,string1:string,middle:struct<list:array<struct<int1:int,"
                + "string1:string>>>,list:array<struct<int1:int,string1:string>>,"
                + "map:map<string,struct<int1:int,string1:string>>>", readerInspector.getTypeName());
        List<? extends StructField> fields = readerInspector.getAllStructFieldRefs();
        BooleanObjectInspector bo = (BooleanObjectInspector) readerInspector.getStructFieldRef("boolean1")
                .getFieldObjectInspector();
        ByteObjectInspector by = (ByteObjectInspector) readerInspector.getStructFieldRef("byte1")
                .getFieldObjectInspector();
        ShortObjectInspector sh = (ShortObjectInspector) readerInspector.getStructFieldRef("short1")
                .getFieldObjectInspector();
        IntObjectInspector in = (IntObjectInspector) readerInspector.getStructFieldRef("int1")
                .getFieldObjectInspector();
        LongObjectInspector lo = (LongObjectInspector) readerInspector.getStructFieldRef("long1")
                .getFieldObjectInspector();
        FloatObjectInspector fl = (FloatObjectInspector) readerInspector.getStructFieldRef("float1")
                .getFieldObjectInspector();
        DoubleObjectInspector dbl = (DoubleObjectInspector) readerInspector.getStructFieldRef("double1")
                .getFieldObjectInspector();
        BinaryObjectInspector bi = (BinaryObjectInspector) readerInspector.getStructFieldRef("bytes1")
                .getFieldObjectInspector();
        StringObjectInspector st = (StringObjectInspector) readerInspector.getStructFieldRef("string1")
                .getFieldObjectInspector();
        StructObjectInspector mid = (StructObjectInspector) readerInspector.getStructFieldRef("middle")
                .getFieldObjectInspector();
        List<? extends StructField> midFields = mid.getAllStructFieldRefs();
        ListObjectInspector midli = (ListObjectInspector) midFields.get(0).getFieldObjectInspector();
        StructObjectInspector inner = (StructObjectInspector) midli.getListElementObjectInspector();
        List<? extends StructField> inFields = inner.getAllStructFieldRefs();
        IntObjectInspector inner_in = (IntObjectInspector) inFields.get(0).getFieldObjectInspector();
        StringObjectInspector inner_st = (StringObjectInspector) inFields.get(1).getFieldObjectInspector();
        ListObjectInspector li = (ListObjectInspector) readerInspector.getStructFieldRef("list")
                .getFieldObjectInspector();
        MapObjectInspector ma = (MapObjectInspector) readerInspector.getStructFieldRef("map")
                .getFieldObjectInspector();
        StructObjectInspector lc = (StructObjectInspector) li.getListElementObjectInspector();
        StringObjectInspector mk = (StringObjectInspector) ma.getMapKeyObjectInspector();
        StructObjectInspector mv = (StructObjectInspector) ma.getMapValueObjectInspector();
        RecordReader rows = reader.rows(null);
        Object row = rows.next(null);
        assertNotNull(row);
        // check the contents of the first row
        assertEquals(false, bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
        assertEquals(1, by.get(readerInspector.getStructFieldData(row, fields.get(1))));
        assertEquals(1024, sh.get(readerInspector.getStructFieldData(row, fields.get(2))));
        assertEquals(65536, in.get(readerInspector.getStructFieldData(row, fields.get(3))));
        assertEquals(Long.MAX_VALUE, lo.get(readerInspector.getStructFieldData(row, fields.get(4))));
        assertEquals(1.0, fl.get(readerInspector.getStructFieldData(row, fields.get(5))), 0.00001);
        assertEquals(-15.0, dbl.get(readerInspector.getStructFieldData(row, fields.get(6))), 0.00001);
        assertEquals(bytes(0, 1, 2, 3, 4),
                bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(7))));
        assertEquals("hi", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(8))));
        List<?> midRow = midli.getList(
                mid.getStructFieldData(readerInspector.getStructFieldData(row, fields.get(9)), midFields.get(0)));
        assertNotNull(midRow);
        assertEquals(2, midRow.size());
        assertEquals(1, inner_in.get(inner.getStructFieldData(midRow.get(0), inFields.get(0))));
        assertEquals("bye",
                inner_st.getPrimitiveJavaObject(inner.getStructFieldData(midRow.get(0), inFields.get(1))));
        assertEquals(2, inner_in.get(inner.getStructFieldData(midRow.get(1), inFields.get(0))));
        assertEquals("sigh",
                inner_st.getPrimitiveJavaObject(inner.getStructFieldData(midRow.get(1), inFields.get(1))));
        List<?> list = li.getList(readerInspector.getStructFieldData(row, fields.get(10)));
        assertEquals(2, list.size());
        assertEquals(3, inner_in.get(inner.getStructFieldData(list.get(0), inFields.get(0))));
        assertEquals("good",
                inner_st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(0), inFields.get(1))));
        assertEquals(4, inner_in.get(inner.getStructFieldData(list.get(1), inFields.get(0))));
        assertEquals("bad",
                inner_st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(1), inFields.get(1))));
        Map<?, ?> map = ma.getMap(readerInspector.getStructFieldData(row, fields.get(11)));
        assertEquals(0, map.size());

        // check the contents of second row
        assertEquals(true, rows.hasNext());
        row = rows.next(row);
        assertEquals(true, bo.get(readerInspector.getStructFieldData(row, fields.get(0))));
        assertEquals(100, by.get(readerInspector.getStructFieldData(row, fields.get(1))));
        assertEquals(2048, sh.get(readerInspector.getStructFieldData(row, fields.get(2))));
        assertEquals(65536, in.get(readerInspector.getStructFieldData(row, fields.get(3))));
        assertEquals(Long.MAX_VALUE, lo.get(readerInspector.getStructFieldData(row, fields.get(4))));
        assertEquals(2.0, fl.get(readerInspector.getStructFieldData(row, fields.get(5))), 0.00001);
        assertEquals(-5.0, dbl.get(readerInspector.getStructFieldData(row, fields.get(6))), 0.00001);
        assertEquals(bytes(),
                bi.getPrimitiveWritableObject(readerInspector.getStructFieldData(row, fields.get(7))));
        assertEquals("bye", st.getPrimitiveJavaObject(readerInspector.getStructFieldData(row, fields.get(8))));
        midRow = midli.getList(
                mid.getStructFieldData(readerInspector.getStructFieldData(row, fields.get(9)), midFields.get(0)));
        assertNotNull(midRow);
        assertEquals(2, midRow.size());
        assertEquals(1, inner_in.get(inner.getStructFieldData(midRow.get(0), inFields.get(0))));
        assertEquals("bye",
                inner_st.getPrimitiveJavaObject(inner.getStructFieldData(midRow.get(0), inFields.get(1))));
        assertEquals(2, inner_in.get(inner.getStructFieldData(midRow.get(1), inFields.get(0))));
        assertEquals("sigh",
                inner_st.getPrimitiveJavaObject(inner.getStructFieldData(midRow.get(1), inFields.get(1))));
        list = li.getList(readerInspector.getStructFieldData(row, fields.get(10)));
        assertEquals(3, list.size());
        assertEquals(100000000, inner_in.get(inner.getStructFieldData(list.get(0), inFields.get(0))));
        assertEquals("cat",
                inner_st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(0), inFields.get(1))));
        assertEquals(-100000, inner_in.get(inner.getStructFieldData(list.get(1), inFields.get(0))));
        assertEquals("in", inner_st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(1), inFields.get(1))));
        assertEquals(1234, inner_in.get(inner.getStructFieldData(list.get(2), inFields.get(0))));
        assertEquals("hat",
                inner_st.getPrimitiveJavaObject(inner.getStructFieldData(list.get(2), inFields.get(1))));
        map = ma.getMap(readerInspector.getStructFieldData(row, fields.get(11)));
        assertEquals(2, map.size());
        boolean[] found = new boolean[2];
        for (Object key : map.keySet()) {
            String str = mk.getPrimitiveJavaObject(key);
            if (str.equals("chani")) {
                assertEquals(false, found[0]);
                assertEquals(5, inner_in.get(inner.getStructFieldData(map.get(key), inFields.get(0))));
                assertEquals(str,
                        inner_st.getPrimitiveJavaObject(inner.getStructFieldData(map.get(key), inFields.get(1))));
                found[0] = true;
            } else if (str.equals("mauddib")) {
                assertEquals(false, found[1]);
                assertEquals(1, inner_in.get(inner.getStructFieldData(map.get(key), inFields.get(0))));
                assertEquals(str,
                        inner_st.getPrimitiveJavaObject(inner.getStructFieldData(map.get(key), inFields.get(1))));
                found[1] = true;
            } else {
                throw new IllegalArgumentException("Unknown key " + str);
            }
        }
        assertEquals(true, found[0]);
        assertEquals(true, found[1]);

        // handle the close up
        assertEquals(false, rows.hasNext());
        rows.close();
    }

    @Test
    public void testColumnProjection() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 1000, CompressionKind.NONE, 100,
                1000);
        Random r1 = new Random(1);
        Random r2 = new Random(2);
        int x;
        int minInt = 0, maxInt = 0;
        String y;
        String minStr = null, maxStr = null;
        for (int i = 0; i < 21000; ++i) {
            x = r1.nextInt();
            y = Long.toHexString(r2.nextLong());
            if (i == 0 || x < minInt) {
                minInt = x;
            }
            if (i == 0 || x > maxInt) {
                maxInt = x;
            }
            if (i == 0 || y.compareTo(minStr) < 0) {
                minStr = y;
            }
            if (i == 0 || y.compareTo(maxStr) > 0) {
                maxStr = y;
            }
            writer.addRow(inner(x, y));
        }
        writer.close();
        ReaderWriterProfiler.setProfilerOptions(conf);
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);

        // check out the statistics
        ColumnStatistics[] stats = reader.getStatistics();
        assertEquals(3, stats.length);
        for (ColumnStatistics s : stats) {
            assertEquals(21000, s.getNumberOfValues());
            if (s instanceof IntegerColumnStatistics) {
                assertEquals(minInt, ((IntegerColumnStatistics) s).getMinimum());
                assertEquals(maxInt, ((IntegerColumnStatistics) s).getMaximum());
            } else if (s instanceof StringColumnStatistics) {
                assertEquals(maxStr, ((StringColumnStatistics) s).getMaximum());
                assertEquals(minStr, ((StringColumnStatistics) s).getMinimum());
            }
        }

        // check out the types
        List<OrcProto.Type> types = reader.getTypes();
        assertEquals(3, types.size());
        assertEquals(OrcProto.Type.Kind.STRUCT, types.get(0).getKind());
        assertEquals(2, types.get(0).getSubtypesCount());
        assertEquals(1, types.get(0).getSubtypes(0));
        assertEquals(2, types.get(0).getSubtypes(1));
        assertEquals(OrcProto.Type.Kind.INT, types.get(1).getKind());
        assertEquals(0, types.get(1).getSubtypesCount());
        assertEquals(OrcProto.Type.Kind.STRING, types.get(2).getKind());
        assertEquals(0, types.get(2).getSubtypesCount());

        // read the contents and make sure they match
        RecordReader rows1 = reader.rows(new boolean[] { true, true, false });
        RecordReader rows2 = reader.rows(new boolean[] { true, false, true });
        r1 = new Random(1);
        r2 = new Random(2);
        OrcLazyStruct row1 = null;
        OrcLazyStruct row2 = null;
        for (int i = 0; i < 21000; ++i) {
            assertEquals(true, rows1.hasNext());
            assertEquals(true, rows2.hasNext());
            row1 = (OrcLazyStruct) rows1.next(row1);
            row2 = (OrcLazyStruct) rows2.next(row2);
            assertEquals(r1.nextInt(),
                    ((IntWritable) ((OrcLazyInt) ((OrcStruct) row1.materialize()).getFieldValue(0)).materialize())
                            .get());
            assertEquals(Long.toHexString(r2.nextLong()),
                    ((OrcLazyString) ((OrcStruct) row2.materialize()).getFieldValue(1)).materialize().toString());
        }
        assertEquals(false, rows1.hasNext());
        assertEquals(false, rows2.hasNext());
        rows1.close();
        rows2.close();
    }

    @Test
    public void testEmptyFile() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(BigRow.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 1000, CompressionKind.NONE, 100,
                10000);
        writer.close();
        ReaderWriterProfiler.setProfilerOptions(conf);
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        assertEquals(false, reader.rows(null).hasNext());
        assertEquals(CompressionKind.NONE, reader.getCompression());
        assertEquals(0, reader.getNumberOfRows());
        assertEquals(0, reader.getCompressionSize());
        assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
        assertEquals(3, reader.getContentLength());
        assertEquals(false, reader.getStripes().iterator().hasNext());
    }

    @Test
    public void testMetaData() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(BigRow.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 1000, CompressionKind.NONE, 100,
                10000);
        writer.addUserMetadata("my.meta", byteBuf(1, 2, 3, 4, 5, 6, 7, -1, -2, 127, -128));
        writer.addUserMetadata("clobber", byteBuf(1, 2, 3));
        writer.addUserMetadata("clobber", byteBuf(4, 3, 2, 1));
        ByteBuffer bigBuf = ByteBuffer.allocate(40000);
        Random random = new Random(0);
        random.nextBytes(bigBuf.array());
        writer.addUserMetadata("big", bigBuf);
        bigBuf.position(0);
        writer.addRow(new BigRow(true, (byte) 127, (short) 1024, 42, 42L * 1024 * 1024 * 1024, (float) 3.1415,
                -2.713, null, null, null, null, null));
        writer.addUserMetadata("clobber", byteBuf(5, 7, 11, 13, 17, 19));
        writer.close();
        ReaderWriterProfiler.setProfilerOptions(conf);
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        assertEquals(byteBuf(5, 7, 11, 13, 17, 19), reader.getMetadataValue("clobber"));
        assertEquals(byteBuf(1, 2, 3, 4, 5, 6, 7, -1, -2, 127, -128), reader.getMetadataValue("my.meta"));
        assertEquals(bigBuf, reader.getMetadataValue("big"));
        try {
            reader.getMetadataValue("unknown");
            assertTrue(false);
        } catch (IllegalArgumentException iae) {
            // PASS
        }
        int i = 0;
        for (String key : reader.getMetadataKeys()) {
            if ("my.meta".equals(key) || "clobber".equals(key) || "big".equals(key)) {
                i += 1;
            } else {
                throw new IllegalArgumentException("unknown key " + key);
            }
        }
        assertEquals(3, i);
    }

    /**
     * We test union and timestamp separately since we need to make the
     * object inspector manually. (The Hive reflection-based doesn't handle
     * them properly.)
     */
    @Test
    public void testUnionAndTimestamp() throws Exception {
        final List<OrcProto.Type> types = ImmutableList.of(
                OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRUCT).addFieldNames("time")
                        .addFieldNames("union").addSubtypes(1).addSubtypes(2).build(),
                OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.TIMESTAMP).build(),
                OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.UNION).addSubtypes(3).addSubtypes(4).build(),
                OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.INT).build(),
                OrcProto.Type.newBuilder().setKind(OrcProto.Type.Kind.STRING).build());

        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = OrcLazyObjectInspectorUtils.createWritableObjectInspector(0, types);
        }
        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 15 * 1024, CompressionKind.NONE,
                100, 10000);
        OrcStruct row = new OrcStruct(types.get(0).getFieldNamesList());
        OrcUnion union = new OrcUnion();
        row.setFieldValue(1, union);
        row.setFieldValue(0, Timestamp.valueOf("2000-03-12 15:00:00"));
        union.set((byte) 0, new IntWritable(42));
        writer.addRow(row);
        row.setFieldValue(0, Timestamp.valueOf("2000-03-20 12:00:00.123456789"));
        union.set((byte) 1, new Text("hello"));
        writer.addRow(row);
        row.setFieldValue(0, null);
        row.setFieldValue(1, null);
        writer.addRow(row);
        row.setFieldValue(1, union);
        union.set((byte) 0, null);
        writer.addRow(row);
        union.set((byte) 1, null);
        writer.addRow(row);
        union.set((byte) 0, new IntWritable(200000));
        row.setFieldValue(0, Timestamp.valueOf("1900-01-01 00:00:00"));
        writer.addRow(row);
        for (int i = 1900; i < 2200; ++i) {
            row.setFieldValue(0, Timestamp.valueOf(i + "-05-05 12:34:56." + i));
            if ((i & 1) == 0) {
                union.set((byte) 0, new IntWritable(i * i));
            } else {
                union.set((byte) 1, new Text(new Integer(i * i).toString()));
            }
            writer.addRow(row);
        }
        // let's add a lot of constant rows to test the rle
        row.setFieldValue(0, null);
        union.set((byte) 0, new IntWritable(1732050807));
        for (int i = 0; i < 5000; ++i) {
            writer.addRow(row);
        }
        union.set((byte) 0, new IntWritable(0));
        writer.addRow(row);
        union.set((byte) 0, new IntWritable(10));
        writer.addRow(row);
        union.set((byte) 0, new IntWritable(138));
        writer.addRow(row);
        writer.close();
        ReaderWriterProfiler.setProfilerOptions(conf);
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        assertEquals(false, reader.getMetadataKeys().iterator().hasNext());
        assertEquals(5309, reader.getNumberOfRows());
        int stripeCount = 0;
        int rowCount = 0;
        long currentOffset = -1;
        for (StripeInformation stripe : reader.getStripes()) {
            stripeCount += 1;
            rowCount += stripe.getNumberOfRows();
            if (currentOffset < 0) {
                currentOffset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength()
                        + stripe.getFooterLength();
            } else {
                assertEquals(currentOffset, stripe.getOffset());
                currentOffset += stripe.getIndexLength() + stripe.getDataLength() + stripe.getFooterLength();
            }
        }
        assertEquals(reader.getNumberOfRows(), rowCount);
        assertEquals(2, stripeCount);
        assertEquals(reader.getContentLength(), currentOffset);
        RecordReader rows = reader.rows(null);
        assertEquals(0, rows.getRowNumber());
        assertEquals(0.0, rows.getProgress(), 0.000001);
        assertEquals(true, rows.hasNext());
        OrcLazyStruct lazyRow = (OrcLazyStruct) rows.next(null);
        row = (OrcStruct) lazyRow.materialize();
        inspector = reader.getObjectInspector();
        assertEquals("struct<time:timestamp,union:uniontype<int,string>>", inspector.getTypeName());
        assertEquals(Timestamp.valueOf("2000-03-12 15:00:00"),
                ((TimestampWritable) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp());
        union = (OrcUnion) ((OrcLazyUnion) row.getFieldValue(1)).materialize();
        assertEquals(0, union.getTag());
        assertEquals(new IntWritable(42), union.getObject());
        lazyRow = (OrcLazyStruct) rows.next(lazyRow);
        row = (OrcStruct) lazyRow.materialize();
        assertEquals(Timestamp.valueOf("2000-03-20 12:00:00.123456789"),
                ((TimestampWritable) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp());
        ((OrcLazyUnion) row.getFieldValue(1)).materialize();
        assertEquals(1, union.getTag());
        assertEquals(new Text("hello"), union.getObject());
        lazyRow = (OrcLazyStruct) rows.next(lazyRow);
        row = (OrcStruct) lazyRow.materialize();
        assertEquals(null, ((OrcLazyObject) row.getFieldValue(0)).materialize());
        assertEquals(null, ((OrcLazyObject) row.getFieldValue(1)).materialize());
        lazyRow = (OrcLazyStruct) rows.next(lazyRow);
        row = (OrcStruct) lazyRow.materialize();
        assertEquals(null, ((OrcLazyObject) row.getFieldValue(0)).materialize());
        union = (OrcUnion) ((OrcLazyUnion) row.getFieldValue(1)).materialize();
        assertEquals(0, union.getTag());
        assertEquals(null, union.getObject());
        lazyRow = (OrcLazyStruct) rows.next(lazyRow);
        row = (OrcStruct) lazyRow.materialize();
        assertEquals(null, ((OrcLazyObject) row.getFieldValue(0)).materialize());
        ((OrcLazyUnion) row.getFieldValue(1)).materialize();
        assertEquals(1, union.getTag());
        assertEquals(null, union.getObject());
        lazyRow = (OrcLazyStruct) rows.next(lazyRow);
        row = (OrcStruct) lazyRow.materialize();
        assertEquals(Timestamp.valueOf("1900-01-01 00:00:00"),
                ((TimestampWritable) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp());
        ((OrcLazyUnion) row.getFieldValue(1)).materialize();
        assertEquals(new IntWritable(200000), union.getObject());
        for (int i = 1900; i < 2200; ++i) {
            lazyRow = (OrcLazyStruct) rows.next(lazyRow);
            row = (OrcStruct) lazyRow.materialize();
            assertEquals(Timestamp.valueOf(i + "-05-05 12:34:56." + i),
                    ((TimestampWritable) ((OrcLazyTimestamp) row.getFieldValue(0)).materialize()).getTimestamp());
            ((OrcLazyUnion) row.getFieldValue(1)).materialize();
            if ((i & 1) == 0) {
                assertEquals(0, union.getTag());
                assertEquals(new IntWritable(i * i), union.getObject());
            } else {
                assertEquals(1, union.getTag());
                assertEquals(new Text(new Integer(i * i).toString()), union.getObject());
            }
        }
        for (int i = 0; i < 5000; ++i) {
            lazyRow = (OrcLazyStruct) rows.next(lazyRow);
            row = (OrcStruct) lazyRow.materialize();
            ((OrcLazyUnion) row.getFieldValue(1)).materialize();
            assertEquals(new IntWritable(1732050807), union.getObject());
        }
        lazyRow = (OrcLazyStruct) rows.next(lazyRow);
        row = (OrcStruct) lazyRow.materialize();
        ((OrcLazyUnion) row.getFieldValue(1)).materialize();
        assertEquals(new IntWritable(0), union.getObject());
        lazyRow = (OrcLazyStruct) rows.next(lazyRow);
        row = (OrcStruct) lazyRow.materialize();
        ((OrcLazyUnion) row.getFieldValue(1)).materialize();
        assertEquals(new IntWritable(10), union.getObject());
        lazyRow = (OrcLazyStruct) rows.next(lazyRow);
        row = (OrcStruct) lazyRow.materialize();
        ((OrcLazyUnion) row.getFieldValue(1)).materialize();
        assertEquals(new IntWritable(138), union.getObject());
        assertEquals(false, rows.hasNext());
        assertEquals(1.0, rows.getProgress(), 0.00001);
        assertEquals(reader.getNumberOfRows(), rows.getRowNumber());
        rows.close();
    }

    /**
     * Read and write a randomly generated snappy file.
     * @throws Exception
     */
    @Test
    public void testSnappy() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 1000, CompressionKind.SNAPPY, 100,
                10000);
        Random rand = new Random(12);
        for (int i = 0; i < 10000; ++i) {
            writer.addRow(new InnerStruct(rand.nextInt(), Integer.toHexString(rand.nextInt())));
        }
        writer.close();
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        rand = new Random(12);
        OrcLazyStruct row = null;
        for (int i = 0; i < 10000; ++i) {
            assertEquals(true, rows.hasNext());
            row = (OrcLazyStruct) rows.next(row);
            assertEquals(rand.nextInt(),
                    ((IntWritable) ((OrcLazyInt) ((OrcStruct) row.materialize()).getFieldValue(0)).materialize())
                            .get());
            assertEquals(Integer.toHexString(rand.nextInt()),
                    ((OrcLazyString) ((OrcStruct) row.materialize()).getFieldValue(1)).materialize().toString());
        }
        assertEquals(false, rows.hasNext());
        rows.close();
    }

    /**
     * Read and write a randomly generated snappy file.
     * @throws Exception
     */
    @Test
    public void testWithoutIndex() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }

        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 5000, CompressionKind.SNAPPY, 1000,
                0);
        Random rand = new Random(24);
        for (int i = 0; i < 10000; ++i) {
            InnerStruct row = new InnerStruct(rand.nextInt(), Integer.toBinaryString(rand.nextInt()));
            for (int j = 0; j < 5; ++j) {
                writer.addRow(row);
            }
        }
        writer.close();
        ReaderWriterProfiler.setProfilerOptions(conf);
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        assertEquals(50000, reader.getNumberOfRows());
        assertEquals(0, reader.getRowIndexStride());
        StripeInformation stripe = reader.getStripes().iterator().next();
        assertEquals(true, stripe.getDataLength() != 0);
        assertEquals(0, stripe.getIndexLength());
        RecordReader rows = reader.rows(null);
        rand = new Random(24);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        for (int i = 0; i < 10000; ++i) {
            int intVal = rand.nextInt();
            String strVal = Integer.toBinaryString(rand.nextInt());
            for (int j = 0; j < 5; ++j) {
                assertEquals(true, rows.hasNext());
                lazyRow = (OrcLazyStruct) rows.next(lazyRow);
                row = (OrcStruct) lazyRow.materialize();
                assertEquals(intVal, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
                assertEquals(strVal, ((OrcLazyString) row.getFieldValue(1)).materialize().toString());
            }
        }
        assertEquals(false, rows.hasNext());
        rows.close();
    }

    private static class RandomRowInputs {
        long[] intValues;
        double[] doubleValues;
        String[] stringValues;
        BytesWritable[] byteValues;
        String[] words = new String[128];

        public RandomRowInputs(int count) {
            intValues = new long[count];
            doubleValues = new double[count];
            stringValues = new String[count];
            byteValues = new BytesWritable[count];
        }
    }

    private RandomRowInputs writeRandomRows(int count, boolean lowMemoryMode) throws IOException {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(ReallyBigRow.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }

        ReaderWriterProfiler.setProfilerOptions(conf);
        OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_FILE_ENABLE_LOW_MEMORY_MODE, lowMemoryMode);
        Writer writer = new WriterImpl(fs, testFilePath, conf, inspector, 8000000, CompressionKind.ZLIB, 65536,
                1000, new MemoryManager(conf));
        Random rand = new Random(42);
        RandomRowInputs inputs = new RandomRowInputs(count);
        long[] intValues = inputs.intValues;
        double[] doubleValues = inputs.doubleValues;
        String[] stringValues = inputs.stringValues;
        BytesWritable[] byteValues = inputs.byteValues;
        String[] words = inputs.words;
        for (int i = 0; i < words.length; ++i) {
            words[i] = Integer.toHexString(rand.nextInt());
        }
        for (int i = 0; i < count / 2; ++i) {
            intValues[2 * i] = rand.nextLong();
            intValues[2 * i + 1] = rand.nextLong();
            stringValues[2 * i] = words[rand.nextInt(words.length)];
            stringValues[2 * i + 1] = words[rand.nextInt(words.length)];
        }
        for (int i = 0; i < count; ++i) {
            doubleValues[i] = rand.nextDouble();
            byte[] buf = new byte[20];
            rand.nextBytes(buf);
            byteValues[i] = new BytesWritable(buf);
        }
        for (int i = 0; i < count; ++i) {
            ReallyBigRow bigrow = createRandomRow(intValues, doubleValues, stringValues, byteValues, words, i);
            writer.addRow(bigrow);
        }
        writer.close();
        writer = null;
        return inputs;
    }

    private static enum NumberOfNulls {
        // No nulls
        NONE,
        // Every nth value is null
        SOME,
        // Every nth value is NOT null
        MANY
    }

    private void compareRowsUsingPrimitives(ReallyBigRow expected, OrcLazyBoolean boolean1, OrcLazyShort short1,
            OrcLazyInt int1, OrcLazyLong long1, OrcLazyShort short2, OrcLazyInt int2, OrcLazyLong long2,
            OrcLazyShort short3, OrcLazyInt int3, OrcLazyLong long3, OrcLazyFloat float1, OrcLazyDouble double1)
            throws IOException {
        try {
            boolean b1 = boolean1.materializeBoolean();
            assertEquals(expected.boolean1.booleanValue(), b1);
        } catch (IOException e) {
            assert (boolean1.nextIsNull());
            assertNull(expected.boolean1);
        }

        if (short1.nextIsNull()) {
            assertNull(expected.short1);
        } else {
            assertEquals(expected.short1.shortValue(), ((ShortWritable) short1.materialize()).get());
        }

        try {
            int i1 = int1.materializeInt();
            assertEquals(expected.int1.intValue(), i1);
        } catch (IOException e) {
            assert (int1.nextIsNull());
            assertNull(expected.int1);
        }

        try {
            long l1 = long1.materializeLong();
            assertEquals(expected.long1.longValue(), l1);
        } catch (IOException e) {
            assert (long1.nextIsNull());
            assertNull(expected.long1);
        }

        try {
            short s2 = short2.materializeShort();
            assertEquals(expected.short2.shortValue(), s2);
        } catch (IOException e) {
            assert (short2.nextIsNull());
            assertNull(expected.short2);
        }

        try {
            int i2 = int2.materializeInt();
            assertEquals(expected.int2.intValue(), i2);
        } catch (IOException e) {
            assert (int2.nextIsNull());
            assertNull(expected.int2);
        }

        try {
            long l2 = long2.materializeLong();
            assertEquals(expected.long2.longValue(), l2);
        } catch (IOException e) {
            assert (long2.nextIsNull());
            assertNull(expected.long2);
        }

        try {
            short s3 = short3.materializeShort();
            assertEquals(expected.short3.shortValue(), s3);
        } catch (IOException e) {
            assert (short3.nextIsNull());
            assertNull(expected.short3);
        }

        try {
            int i3 = int3.materializeInt();
            assertEquals(expected.int3.intValue(), i3);
        } catch (IOException e) {
            assert (int3.nextIsNull());
            assertNull(expected.int3);
        }

        try {
            long l3 = long3.materializeLong();
            assertEquals(expected.long3.longValue(), l3);
        } catch (IOException e) {
            assert (long3.nextIsNull());
            assertNull(expected.long3);
        }

        try {
            float f1 = float1.materializeFloat();
            assertEquals(expected.float1.floatValue(), f1, 0.0001);
        } catch (IOException e) {
            assert (float1.nextIsNull());
            assertNull(expected.float1);
        }

        try {
            double d1 = double1.materializeDouble();
            assertEquals(expected.double1.doubleValue(), d1, 0.0001);
        } catch (IOException e) {
            assert (double1.nextIsNull());
            assertNull(expected.double1);
        }
    }

    private void compareRows(OrcStruct row, RandomRowInputs inputs, int rowNumber, NumberOfNulls numNulls,
            boolean testPrimitives) throws Exception {
        ReallyBigRow expected = null;
        switch (numNulls) {
        case MANY:
        case SOME:
            expected = createRandomRowWithNulls(inputs.intValues, inputs.doubleValues, inputs.stringValues,
                    inputs.byteValues, inputs.words, rowNumber, numNulls);
            break;
        case NONE:
            expected = createRandomRow(inputs.intValues, inputs.doubleValues, inputs.stringValues,
                    inputs.byteValues, inputs.words, rowNumber);
            break;
        }
        OrcLazyBoolean boolean1 = (OrcLazyBoolean) row.getFieldValue(0);
        if (boolean1.nextIsNull()) {
            assertNull(expected.boolean1);
        } else {
            assertEquals(expected.boolean1.booleanValue(), ((BooleanWritable) boolean1.materialize()).get());
        }

        if (((OrcLazyObject) row.getFieldValue(1)).nextIsNull()) {
            assertNull(expected.byte1);
        } else {
            assertEquals(expected.byte1.byteValue(),
                    ((ByteWritable) ((OrcLazyByte) row.getFieldValue(1)).materialize()).get());
        }

        OrcLazyShort short1 = (OrcLazyShort) row.getFieldValue(2);
        try {
            short s1 = short1.materializeShort();
            assertEquals(expected.short1.shortValue(), s1);
        } catch (IOException e) {
            assert (short1.nextIsNull());
            assertNull(expected.short1);
        }

        OrcLazyInt int1 = (OrcLazyInt) row.getFieldValue(3);
        if (int1.nextIsNull()) {
            assertNull(expected.int1);
        } else {
            assertEquals(expected.int1.intValue(), ((IntWritable) int1.materialize()).get());
        }

        OrcLazyLong long1 = (OrcLazyLong) row.getFieldValue(4);
        if (long1.nextIsNull()) {
            assertNull(expected.long1);
        } else {
            assertEquals(expected.long1.longValue(), ((LongWritable) long1.materialize()).get());
        }

        OrcLazyShort short2 = (OrcLazyShort) row.getFieldValue(5);
        if (short2.nextIsNull()) {
            assertNull(expected.short2);
        } else {
            assertEquals(expected.short2.shortValue(), ((ShortWritable) short2.materialize()).get());
        }

        OrcLazyInt int2 = (OrcLazyInt) row.getFieldValue(6);
        if (int2.nextIsNull()) {
            assertNull(expected.int2);
        } else {
            assertEquals(expected.int2.intValue(), ((IntWritable) int2.materialize()).get());
        }

        OrcLazyLong long2 = (OrcLazyLong) row.getFieldValue(7);
        if (long2.nextIsNull()) {
            assertNull(expected.long2);
        } else {
            assertEquals(expected.long2.longValue(), ((LongWritable) long2.materialize()).get());
        }

        OrcLazyShort short3 = (OrcLazyShort) row.getFieldValue(8);
        if (short3.nextIsNull()) {
            assertNull(expected.short3);
        } else {
            assertEquals(expected.short3.shortValue(), ((ShortWritable) short3.materialize()).get());
        }

        OrcLazyInt int3 = (OrcLazyInt) row.getFieldValue(9);
        if (int3.nextIsNull()) {
            assertNull(expected.int3);
        } else {
            assertEquals(expected.int3.intValue(), ((IntWritable) int3.materialize()).get());
        }

        OrcLazyLong long3 = (OrcLazyLong) row.getFieldValue(10);
        if (long3.nextIsNull()) {
            assertNull(expected.long3);
        } else {
            assertEquals(expected.long3.longValue(), ((LongWritable) long3.materialize()).get());
        }

        OrcLazyFloat float1 = (OrcLazyFloat) row.getFieldValue(11);
        if (float1.nextIsNull()) {
            assertNull(expected.float1);
        } else {
            assertEquals(expected.float1.floatValue(), ((FloatWritable) float1.materialize()).get(), 0.0001);
        }

        OrcLazyDouble double1 = (OrcLazyDouble) row.getFieldValue(12);
        if (double1.nextIsNull()) {
            assertNull(expected.double1);
        } else {
            assertEquals(expected.double1.doubleValue(), ((DoubleWritable) double1.materialize()).get(), 0.0001);
        }

        if (((OrcLazyObject) row.getFieldValue(13)).nextIsNull()) {
            assertNull(expected.bytes1);
        } else {
            assertEquals(expected.bytes1, ((OrcLazyBinary) row.getFieldValue(13)).materialize());
        }

        if (((OrcLazyObject) row.getFieldValue(14)).nextIsNull()) {
            assertNull(expected.string1);
        } else {
            assertEquals(expected.string1, ((OrcLazyString) row.getFieldValue(14)).materialize());
        }

        if (((OrcLazyString) row.getFieldValue(15)).nextIsNull()) {
            assertNull(expected.string2);
        } else {
            assertEquals(expected.string2, ((OrcLazyString) row.getFieldValue(15)).materialize());
        }

        if (((OrcLazyString) row.getFieldValue(16)).nextIsNull()) {
            assertNull(expected.string3);
        } else {
            assertEquals(expected.string3, ((OrcLazyString) row.getFieldValue(16)).materialize());
        }

        if (((OrcLazyObject) row.getFieldValue(17)).nextIsNull()) {
            assertNull(expected.middle);
        } else {
            final List<InnerStruct> expectedList = expected.middle.list;
            final OrcStruct actualMiddle = (OrcStruct) ((OrcLazyStruct) row.getFieldValue(17)).materialize();
            final List<OrcStruct> actualList = (List) actualMiddle.getFieldValue(0);
            compareListOfStructs(expectedList, actualList);
            final List<String> actualFieldNames = actualMiddle.getFieldNames();
            final List<String> expectedFieldNames = ImmutableList.of("list");
            compareLists(expectedFieldNames, actualFieldNames);
        }
        if (((OrcLazyObject) row.getFieldValue(18)).nextIsNull()) {
            assertNull(expected.list);
        } else {
            compareListOfStructs(expected.list, (List) ((OrcLazyList) row.getFieldValue(18)).materialize());
        }
        if (((OrcLazyObject) row.getFieldValue(19)).nextIsNull()) {
            assertNull(expected.map);
        } else {
            compareMap(expected.map, (Map) ((OrcLazyMap) row.getFieldValue(19)).materialize());
        }

        if (testPrimitives) {
            compareRowsUsingPrimitives(expected, boolean1, short1, int1, long1, short2, int2, long2, short3, int3,
                    long3, float1, double1);
        }
    }

    private void compareRowsWithoutNextIsNull(OrcStruct row, RandomRowInputs inputs, int rowNumber,
            NumberOfNulls numNulls, boolean usingPrimitives) throws Exception {

        ReallyBigRow expected = null;
        switch (numNulls) {
        case MANY:
        case SOME:
            expected = createRandomRowWithNulls(inputs.intValues, inputs.doubleValues, inputs.stringValues,
                    inputs.byteValues, inputs.words, rowNumber, numNulls);
            break;
        case NONE:
            expected = createRandomRow(inputs.intValues, inputs.doubleValues, inputs.stringValues,
                    inputs.byteValues, inputs.words, rowNumber);
            break;
        }

        OrcLazyBoolean lazyBoolean1 = (OrcLazyBoolean) row.getFieldValue(0);
        BooleanWritable boolean1 = (BooleanWritable) lazyBoolean1.materialize();
        if (boolean1 == null) {
            assertNull(expected.boolean1);
        } else {
            assertEquals(expected.boolean1.booleanValue(), boolean1.get());
        }

        ByteWritable byte1 = (ByteWritable) ((OrcLazyByte) row.getFieldValue(1)).materialize();
        if (byte1 == null) {
            assertNull(expected.byte1);
        } else {
            assertEquals(expected.byte1.byteValue(), byte1.get());
        }

        OrcLazyShort lazyShort1 = (OrcLazyShort) row.getFieldValue(2);
        ShortWritable short1 = (ShortWritable) lazyShort1.materialize();
        if (short1 == null) {
            assertNull(expected.short1);
        } else {
            assertEquals(expected.short1.shortValue(), short1.get());
        }

        OrcLazyInt lazyInt1 = (OrcLazyInt) row.getFieldValue(3);
        IntWritable int1 = (IntWritable) lazyInt1.materialize();
        if (int1 == null) {
            assertNull(expected.int1);
        } else {
            assertEquals(expected.int1.intValue(), int1.get());
        }

        OrcLazyLong lazyLong1 = (OrcLazyLong) row.getFieldValue(4);
        LongWritable long1 = (LongWritable) lazyLong1.materialize();
        if (long1 == null) {
            assertNull(expected.long1);
        } else {
            assertEquals(expected.long1.longValue(), long1.get());
        }

        OrcLazyShort lazyShort2 = (OrcLazyShort) row.getFieldValue(5);
        ShortWritable short2 = (ShortWritable) lazyShort2.materialize();
        if (short2 == null) {
            assertNull(expected.short2);
        } else {
            assertEquals(expected.short2.shortValue(), short2.get());
        }

        OrcLazyInt lazyInt2 = (OrcLazyInt) row.getFieldValue(6);
        IntWritable int2 = (IntWritable) lazyInt2.materialize();
        if (int2 == null) {
            assertNull(expected.int2);
        } else {
            assertEquals(expected.int2.intValue(), int2.get());
        }

        OrcLazyLong lazyLong2 = (OrcLazyLong) row.getFieldValue(7);
        LongWritable long2 = (LongWritable) lazyLong2.materialize();
        if (long2 == null) {
            assertNull(expected.long2);
        } else {
            assertEquals(expected.long2.longValue(), long2.get());
        }

        OrcLazyShort lazyShort3 = (OrcLazyShort) row.getFieldValue(8);
        ShortWritable short3 = (ShortWritable) lazyShort3.materialize();
        if (short3 == null) {
            assertNull(expected.short3);
        } else {
            assertEquals(expected.short3.shortValue(), short3.get());
        }

        OrcLazyInt lazyInt3 = (OrcLazyInt) row.getFieldValue(9);
        IntWritable int3 = (IntWritable) lazyInt3.materialize();
        if (int3 == null) {
            assertNull(expected.int3);
        } else {
            assertEquals(expected.int3.intValue(), int3.get());
        }

        OrcLazyLong lazyLong3 = (OrcLazyLong) row.getFieldValue(10);
        LongWritable long3 = (LongWritable) lazyLong3.materialize();
        if (long3 == null) {
            assertNull(expected.long3);
        } else {
            assertEquals(expected.long3.longValue(), long3.get());
        }

        OrcLazyFloat lazyFloat1 = (OrcLazyFloat) row.getFieldValue(11);
        FloatWritable float1 = (FloatWritable) lazyFloat1.materialize();
        if (float1 == null) {
            assertNull(expected.float1);
        } else {
            assertEquals(expected.float1.floatValue(), float1.get(), 0.0001);
        }

        OrcLazyDouble lazyDouble1 = (OrcLazyDouble) row.getFieldValue(12);
        DoubleWritable double1 = (DoubleWritable) lazyDouble1.materialize();
        if (double1 == null) {
            assertNull(expected.double1);
        } else {
            assertEquals(expected.double1.doubleValue(), double1.get(), 0.0001);
        }

        BytesWritable bytes1 = (BytesWritable) ((OrcLazyBinary) row.getFieldValue(13)).materialize();
        if (bytes1 == null) {
            assertNull(expected.bytes1);
        } else {
            assertEquals(expected.bytes1, bytes1);
        }

        Text string1 = (Text) ((OrcLazyString) row.getFieldValue(14)).materialize();
        if (string1 == null) {
            assertNull(expected.string1);
        } else {
            assertEquals(expected.string1, string1);
        }

        Text string2 = (Text) ((OrcLazyString) row.getFieldValue(15)).materialize();
        if (string2 == null) {
            assertNull(expected.string2);
        } else {
            assertEquals(expected.string2, string2);
        }

        Text string3 = (Text) ((OrcLazyString) row.getFieldValue(16)).materialize();
        if (string3 == null) {
            assertNull(expected.string3);
        } else {
            assertEquals(expected.string3, string3);
        }

        OrcStruct middle = (OrcStruct) ((OrcLazyStruct) row.getFieldValue(17)).materialize();
        if (middle == null) {
            assertNull(expected.middle);
        } else {
            final List<InnerStruct> expectedList = expected.middle.list;
            final List<OrcStruct> actualList = (List) middle.getFieldValue(0);
            compareListOfStructs(expectedList, actualList);
            final List<String> actualFieldNames = middle.getFieldNames();
            final List<String> expectedFieldNames = ImmutableList.of("list");
            compareLists(expectedFieldNames, actualFieldNames);
        }

        List list = (List) ((OrcLazyList) row.getFieldValue(18)).materialize();
        if (list == null) {
            assertNull(expected.list);
        } else {
            compareListOfStructs(expected.list, list);
        }

        Map map = (Map) ((OrcLazyMap) row.getFieldValue(19)).materialize();
        if (map == null) {
            assertNull(expected.map);
        } else {
            compareMap(expected.map, map);
        }

        if (usingPrimitives) {
            compareRowsUsingPrimitives(expected, lazyBoolean1, lazyShort1, lazyInt1, lazyLong1, lazyShort2,
                    lazyInt2, lazyLong2, lazyShort3, lazyInt3, lazyLong3, lazyFloat1, lazyDouble1);
        }
    }

    @Test
    public void testSeek() throws Exception {
        testSeek(false, true, false);
    }

    @Test
    public void testSeekLowMemory() throws Exception {
        testSeek(true, false, false);
    }

    @Test
    public void testSeekLazyHdfsReads() throws Exception {
        testSeek(false, false, true);
    }

    private void testSeek(boolean lowMemory, boolean testPrimitives, boolean lazyHdfsReads) throws Exception {
        final int COUNT = 32768;
        RandomRowInputs inputs = writeRandomRows(COUNT, lowMemory);
        ReaderWriterProfiler.setProfilerOptions(conf);
        OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_EAGER_HDFS_READ, !lazyHdfsReads);
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        assertEquals(COUNT, reader.getNumberOfRows());
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        for (int i = COUNT - 1; i >= 0; --i) {
            rows.seekToRow(i);
            lazyRow = (OrcLazyStruct) rows.next(lazyRow);
            row = (OrcStruct) lazyRow.materialize();
            compareRows(row, inputs, i, NumberOfNulls.NONE, testPrimitives);
        }
        rows.close();
    }

    private void readEveryNthRow(int n, boolean withoutNextIsNull, NumberOfNulls numNulls) throws Exception {
        final int COUNT = 32768;
        RandomRowInputs inputs = null;
        switch (numNulls) {
        case NONE:
            inputs = writeRandomRows(COUNT, false);
            break;
        case SOME:
        case MANY:
            inputs = writeRandomRowsWithNulls(COUNT, numNulls, false);
            break;
        }

        ReaderWriterProfiler.setProfilerOptions(conf);
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        assertEquals(COUNT, reader.getNumberOfRows());
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        for (int i = 0; i < COUNT / n; i++) {
            rows.seekToRow(i * n);
            lazyRow = (OrcLazyStruct) rows.next(lazyRow);
            row = (OrcStruct) lazyRow.materialize();
            if (withoutNextIsNull) {
                compareRowsWithoutNextIsNull(row, inputs, i * n, numNulls, true);
            } else {
                compareRows(row, inputs, i * n, numNulls, true);
            }
        }
        rows.close();
    }

    @Test
    public void testEveryRow() throws Exception {
        readEveryNthRow(1, false, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryOtherRow() throws Exception {
        readEveryNthRow(2, false, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryThirdRow() throws Exception {
        readEveryNthRow(3, false, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryFourthRow() throws Exception {
        readEveryNthRow(4, false, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryFifthRow() throws Exception {
        readEveryNthRow(5, false, NumberOfNulls.NONE);
    }

    @Test
    public void testEverySixthRow() throws Exception {
        readEveryNthRow(6, false, NumberOfNulls.NONE);
    }

    @Test
    public void testEverySeventhRow() throws Exception {
        readEveryNthRow(7, false, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryEighthRow() throws Exception {
        readEveryNthRow(8, false, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryNinthRow() throws Exception {
        readEveryNthRow(9, false, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryTenthRow() throws Exception {
        readEveryNthRow(10, false, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryHundredthRow() throws Exception {
        readEveryNthRow(100, false, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryThousandthRow() throws Exception {
        readEveryNthRow(1000, false, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryTenThousandthRow() throws Exception {
        readEveryNthRow(10000, false, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryRowWithoutNextIsNull() throws Exception {
        readEveryNthRow(1, true, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryOtherRowWithoutNextIsNull() throws Exception {
        readEveryNthRow(2, true, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryThirdRowWithoutNextIsNull() throws Exception {
        readEveryNthRow(3, true, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryFourthRowWithoutNextIsNull() throws Exception {
        readEveryNthRow(4, true, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryFifthRowWithoutNextIsNull() throws Exception {
        readEveryNthRow(5, true, NumberOfNulls.NONE);
    }

    @Test
    public void testEverySixthRowWithoutNextIsNull() throws Exception {
        readEveryNthRow(6, true, NumberOfNulls.NONE);
    }

    @Test
    public void testEverySeventhRowWithoutNextIsNull() throws Exception {
        readEveryNthRow(7, true, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryEighthRowWithoutNextIsNull() throws Exception {
        readEveryNthRow(8, true, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryNinthRowWithoutNextIsNull() throws Exception {
        readEveryNthRow(9, true, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryTenthRowWithoutNextIsNull() throws Exception {
        readEveryNthRow(10, true, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryHundredthRowWithoutNextIsNull() throws Exception {
        readEveryNthRow(100, true, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryThousandthRowWithoutNextIsNull() throws Exception {
        readEveryNthRow(1000, true, NumberOfNulls.NONE);
    }

    @Test
    public void testEveryTenThousandthRowWithoutNextIsNull() throws Exception {
        readEveryNthRow(10000, true, NumberOfNulls.NONE);
    }

    private RandomRowInputs writeRandomRowsWithNulls(int count, NumberOfNulls numNulls, boolean lowMemoryMode)
            throws IOException {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(ReallyBigRow.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = new WriterImpl(fs, testFilePath, conf, inspector, lowMemoryMode ? 200000 : 4000000,
                CompressionKind.ZLIB, 65536, 1000, new MemoryManager(conf));
        Random rand = new Random(42);
        RandomRowInputs inputs = new RandomRowInputs(count);
        long[] intValues = inputs.intValues;
        double[] doubleValues = inputs.doubleValues;
        String[] stringValues = inputs.stringValues;
        BytesWritable[] byteValues = inputs.byteValues;
        String[] words = inputs.words;
        for (int i = 0; i < words.length; ++i) {
            words[i] = Integer.toHexString(rand.nextInt());
        }
        for (int i = 0; i < count / 2; ++i) {
            intValues[2 * i] = rand.nextLong();
            intValues[2 * i + 1] = rand.nextLong();
            stringValues[2 * i] = words[rand.nextInt(words.length)];
            stringValues[2 * i + 1] = words[rand.nextInt(words.length)];
        }
        for (int i = 0; i < count; ++i) {
            doubleValues[i] = rand.nextDouble();
            byte[] buf = new byte[20];
            rand.nextBytes(buf);
            byteValues[i] = new BytesWritable(buf);
        }
        for (int i = 0; i < count; ++i) {
            ReallyBigRow bigrow = createRandomRowWithNulls(intValues, doubleValues, stringValues, byteValues, words,
                    i, numNulls);
            writer.addRow(bigrow);
        }
        writer.close();
        writer = null;
        return inputs;
    }

    @Test
    public void testEveryRowWithNulls() throws Exception {
        readEveryNthRow(1, false, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryOtherRowWithNulls() throws Exception {
        readEveryNthRow(2, false, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryThirdRowWithNulls() throws Exception {
        readEveryNthRow(3, false, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryFourthRowWithNulls() throws Exception {
        readEveryNthRow(4, false, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryFifthRowWithNulls() throws Exception {
        readEveryNthRow(5, false, NumberOfNulls.SOME);
    }

    @Test
    public void testEverySixthRowWithNulls() throws Exception {
        readEveryNthRow(6, false, NumberOfNulls.SOME);
    }

    @Test
    public void testEverySeventhRowWithNulls() throws Exception {
        readEveryNthRow(7, false, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryEighthRowWithNulls() throws Exception {
        readEveryNthRow(8, false, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryNinthRowWithNulls() throws Exception {
        readEveryNthRow(9, false, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryTenthRowWithNulls() throws Exception {
        readEveryNthRow(10, false, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryHundredthRowWithNulls() throws Exception {
        readEveryNthRow(100, false, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryThousandthRowWithNulls() throws Exception {
        readEveryNthRow(1000, false, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryTenThousandthRowWithNulls() throws Exception {
        readEveryNthRow(10000, false, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryRowWithNullsWithoutNextIsNull() throws Exception {
        readEveryNthRow(1, true, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryOtherRowWithNullsWithoutNextIsNull() throws Exception {
        readEveryNthRow(2, true, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryThirdRowWithNullsWithoutNextIsNull() throws Exception {
        readEveryNthRow(3, true, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryFourthRowWithNullsWithoutNextIsNull() throws Exception {
        readEveryNthRow(4, true, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryFifthRowWithNullsWithoutNextIsNull() throws Exception {
        readEveryNthRow(5, true, NumberOfNulls.SOME);
    }

    @Test
    public void testEverySixthRowWithNullsWithoutNextIsNull() throws Exception {
        readEveryNthRow(6, true, NumberOfNulls.SOME);
    }

    @Test
    public void testEverySeventhRowWithNullsWithoutNextIsNull() throws Exception {
        readEveryNthRow(7, true, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryEighthRowWithNullsWithoutNextIsNull() throws Exception {
        readEveryNthRow(8, true, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryNinthRowWithNullsWithoutNextIsNull() throws Exception {
        readEveryNthRow(9, true, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryTenthRowWithNullsWithoutNextIsNull() throws Exception {
        readEveryNthRow(10, true, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryHundredthRowWithNullsWithoutNextIsNull() throws Exception {
        readEveryNthRow(100, true, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryThousandthRowWithNullsWithoutNextIsNull() throws Exception {
        readEveryNthRow(1000, true, NumberOfNulls.SOME);
    }

    @Test
    public void testEveryTenThousandthRowWithNullsWithoutNextIsNull() throws Exception {
        readEveryNthRow(10000, true, NumberOfNulls.SOME);
    }

    private void skipEveryNthRow(int n, boolean withoutNextIsNull, NumberOfNulls numNulls) throws Exception {
        final int COUNT = 32768;
        RandomRowInputs inputs = null;
        switch (numNulls) {
        case NONE:
            inputs = writeRandomRows(COUNT, false);
            break;
        case SOME:
        case MANY:
            inputs = writeRandomRowsWithNulls(COUNT, numNulls, false);
            break;
        }

        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        assertEquals(COUNT, reader.getNumberOfRows());
        RecordReader rows = reader.rows(null);
        OrcLazyRow lazyRow = null;
        OrcStruct row = null;
        for (int i = 0; i < COUNT; i++) {
            lazyRow = (OrcLazyRow) rows.next(lazyRow);
            if (i % n != 0) {
                row = (OrcStruct) lazyRow.materialize();
                if (withoutNextIsNull) {
                    compareRowsWithoutNextIsNull(row, inputs, i, numNulls, false);
                } else {
                    compareRows(row, inputs, i, numNulls, false);
                }
            }
        }
        rows.close();
    }

    @Test
    public void testEveryRowWithLotsOfNulls() throws Exception {
        skipEveryNthRow(1, false, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryOtherRowWithLotsOfNulls() throws Exception {
        skipEveryNthRow(2, false, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryThirdRowWithLotsOfNulls() throws Exception {
        skipEveryNthRow(3, false, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryFourthRowWithLotsOfNulls() throws Exception {
        skipEveryNthRow(4, false, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryFifthRowWithLotsOfNulls() throws Exception {
        skipEveryNthRow(5, false, NumberOfNulls.MANY);
    }

    @Test
    public void testEverySixthRowWithLotsOfNulls() throws Exception {
        skipEveryNthRow(6, false, NumberOfNulls.MANY);
    }

    @Test
    public void testEverySeventhRowWithLotsOfNulls() throws Exception {
        skipEveryNthRow(7, false, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryEighthRowWithLotsOfNulls() throws Exception {
        skipEveryNthRow(8, false, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryNinthRowWithLotsOfNulls() throws Exception {
        skipEveryNthRow(9, false, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryTenthRowWithLotsOfNulls() throws Exception {
        skipEveryNthRow(10, false, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryHundredthRowWithLotsOfNulls() throws Exception {
        skipEveryNthRow(100, false, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryThousandthRowWithLotsOfNulls() throws Exception {
        skipEveryNthRow(1000, false, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryTenThousandthRowWithLotsOfNulls() throws Exception {
        skipEveryNthRow(10000, false, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
        skipEveryNthRow(1, true, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryOtherRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
        skipEveryNthRow(2, true, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryThirdRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
        skipEveryNthRow(3, true, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryFourthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
        skipEveryNthRow(4, true, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryFifthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
        skipEveryNthRow(5, true, NumberOfNulls.MANY);
    }

    @Test
    public void testEverySixthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
        skipEveryNthRow(6, true, NumberOfNulls.MANY);
    }

    @Test
    public void testEverySeventhRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
        skipEveryNthRow(7, true, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryEighthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
        skipEveryNthRow(8, true, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryNinthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
        skipEveryNthRow(9, true, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryTenthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
        skipEveryNthRow(10, true, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryHundredthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
        skipEveryNthRow(100, true, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryThousandthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
        skipEveryNthRow(1000, true, NumberOfNulls.MANY);
    }

    @Test
    public void testEveryTenThousandthRowWithLotsOfNullsWithoutNextIsNull() throws Exception {
        skipEveryNthRow(10000, true, NumberOfNulls.MANY);
    }

    private void compareInner(InnerStruct expect, OrcStruct actual) throws Exception {
        if (expect == null || actual == null) {
            assertEquals(expect, actual);
        } else {
            if (actual.getFieldValue(0) == null) {
                assertNull(expect.int1);
            } else {
                assertEquals(expect.int1.intValue(), ((IntWritable) actual.getFieldValue(0)).get());
            }
            assertEquals(expect.string1, actual.getFieldValue(1));
        }
    }

    private void compareListOfStructs(List<InnerStruct> expect, List<OrcStruct> actual) throws Exception {
        assertEquals(expect.size(), actual.size());
        for (int j = 0; j < expect.size(); ++j) {
            compareInner(expect.get(j), actual.get(j));
        }
    }

    private void compareLists(List expect, List actual) throws Exception {
        assertEquals(expect.size(), actual.size());
        assertTrue(expect.containsAll(actual));
    }

    private void compareMap(Map<Text, InnerStruct> expect, Map<Text, OrcStruct> actual) throws Exception {
        assertEquals(expect.size(), actual.size());
        for (Text key : expect.keySet()) {
            compareInner(expect.get(key), actual.get(key));
        }
    }

    private ReallyBigRow createRandomRow(long[] intValues, double[] doubleValues, String[] stringValues,
            BytesWritable[] byteValues, String[] words, int i) {
        InnerStruct inner = new InnerStruct((int) intValues[i], stringValues[i]);
        InnerStruct inner2 = new InnerStruct((int) (intValues[i] >> 32), words[i % words.length] + "-x");
        // Every 10th value of this string should be unique-ish in the file
        String stringWithUniques = i % 10 == 0 ? Integer.toHexString(i) : stringValues[i];
        Short shortWithUniques = i % 10 == 0 ? (short) i : (short) (intValues[i] % 10);
        Integer intWithUniques = i % 10 == 0 ? (int) (Short.MAX_VALUE + i)
                : (int) (Short.MAX_VALUE + (intValues[i] % 10));
        Long longWithUniques = i % 10 == 0 ? (long) (Integer.MAX_VALUE + i)
                : (long) (Integer.MAX_VALUE + (intValues[i] % 10));
        return new ReallyBigRow((intValues[i] & 1) == 0, (byte) intValues[i], (short) intValues[i],
                (int) (Short.MAX_VALUE + intValues[i]), (long) (Integer.MAX_VALUE + intValues[i]),
                (short) (intValues[i] % 10), (int) (Short.MAX_VALUE + (intValues[i] % 10)),
                (long) (Integer.MAX_VALUE + (intValues[i] % 10)), shortWithUniques, intWithUniques, longWithUniques,
                (float) doubleValues[i], doubleValues[i], byteValues[i], stringValues[i], Integer.toHexString(i),
                stringWithUniques, new MiddleStruct(inner, inner2), list(), map(inner, inner2));
    }

    private ReallyBigRow createRandomRowWithNulls(long[] intValues, double[] doubleValues, String[] stringValues,
            BytesWritable[] byteValues, String[] words, int i, NumberOfNulls numNulls) {
        boolean lotsOfNulls = numNulls == NumberOfNulls.MANY;
        Boolean booleanVal = intValues[i] % 10 == 0 ^ lotsOfNulls ? null : (intValues[i] & 1) == 0;
        Byte byteVal = intValues[i] % 11 == 0 ^ lotsOfNulls ? null : (byte) intValues[i];
        Short shortVal = intValues[i] % 12 == 0 ^ lotsOfNulls ? null : (short) intValues[i];
        Integer intVal = intValues[i] % 13 == 0 ^ lotsOfNulls ? null : (int) (Short.MAX_VALUE + i);
        Long longVal = intValues[i] % 14 == 0 ^ lotsOfNulls ? null : (long) (Integer.MAX_VALUE + i);
        Float floatVal = intValues[i] % 15 == 0 ^ lotsOfNulls ? null : (float) doubleValues[i];
        Double doubleVal = intValues[i] % 16 == 0 ^ lotsOfNulls ? null : doubleValues[i];
        BytesWritable bytesVal = intValues[i] % 17 == 0 ^ lotsOfNulls ? null : byteValues[i];
        String strVal = intValues[i] % 18 == 0 ^ lotsOfNulls ? null : stringValues[i];
        InnerStruct inner = intValues[i] % 19 == 0 ^ lotsOfNulls ? null
                : new InnerStruct(intValues[i] % 10 == 0 ^ lotsOfNulls ? null : (int) intValues[i],
                        intValues[i] % 11 == 0 ^ lotsOfNulls ? null : stringValues[i]);
        InnerStruct inner2 = intValues[i] % 12 == 0 ^ lotsOfNulls ? null
                : new InnerStruct(intValues[i] % 13 == 0 ^ lotsOfNulls ? null : (int) (intValues[i] >> 32),
                        intValues[i] % 14 == 0 ^ lotsOfNulls ? null : words[i % words.length] + "-x");
        MiddleStruct middle = intValues[i] % 15 == 0 ^ lotsOfNulls ? null : new MiddleStruct(inner, inner2);
        List<InnerStruct> list = intValues[i] % 16 == 0 ^ lotsOfNulls ? null : list(inner, inner2);
        Map<Text, InnerStruct> map = intValues[i] % 17 == 0 ^ lotsOfNulls ? null : map(inner, inner2);
        String strVal2 = intValues[i] % 18 == 0 ^ lotsOfNulls ? null : Integer.toHexString(i);
        Short shortVal2 = intValues[i] % 19 == 0 ^ lotsOfNulls ? null : (short) (intValues[i] % 10);
        Integer intVal2 = intValues[i] % 10 == 0 ^ lotsOfNulls ? null
                : (int) (Short.MAX_VALUE + (intValues[i] % 10));
        Long longVal2 = intValues[i] % 11 == 0 ^ lotsOfNulls ? null
                : (long) (Integer.MAX_VALUE + (intValues[i] % 10));
        String strVal3 = intValues[i] % 12 == 0 ^ lotsOfNulls ? null
                : (intValues[i] % 10 == 0 ? Integer.toHexString(i) : stringValues[i]);
        Short shortVal3 = intValues[i] % 13 == 0 ^ lotsOfNulls ? null
                : (intValues[i] % 10 == 0 ? (short) i : (short) (intValues[i] % 10));
        Integer intVal3 = intValues[i] % 14 == 0 ^ lotsOfNulls ? null
                : (intValues[i] % 10 == 0 ? (int) (Short.MAX_VALUE + i)
                        : (int) (Short.MAX_VALUE + (intValues[i] % 10)));
        Long longVal3 = intValues[i] % 15 == 0 ^ lotsOfNulls ? null
                : (intValues[i] % 10 == 0 ? (long) (Integer.MAX_VALUE + i)
                        : (long) (Integer.MAX_VALUE + (intValues[i] % 10)));
        return new ReallyBigRow(booleanVal, byteVal, shortVal, intVal, longVal, shortVal2, intVal2, longVal2,
                shortVal3, intVal3, longVal3, floatVal, doubleVal, bytesVal, strVal, strVal2, strVal3, middle, list,
                map);
    }

    private static class MyMemoryManager extends MemoryManager {
        final long totalSpace;
        double rate;
        Path path = null;
        long lastAllocation = 0;
        int rows = 0;
        MemoryManager.Callback callback;

        MyMemoryManager(Configuration conf, long totalSpace, double rate) {
            super(conf);
            this.totalSpace = totalSpace;
            this.rate = rate;
        }

        @Override
        void addWriter(Path path, long requestedAllocation, MemoryManager.Callback callback,
                long initialAllocation) {
            this.path = path;
            this.lastAllocation = requestedAllocation;
            this.callback = callback;
        }

        @Override
        synchronized void removeWriter(Path path) {
            this.path = null;
            this.lastAllocation = 0;
        }

        @Override
        long getTotalMemoryPool() {
            return totalSpace;
        }

        @Override
        double getAllocationScale() {
            return rate;
        }

        @Override
        void addedRow() throws IOException {

        }

        @Override
        boolean shouldFlush(MemoryEstimate memoryEstimate, Path path, long stripeSize, long maxDictSize) {
            long limit = Math.round(stripeSize * rate);
            return memoryEstimate.getTotalMemory() > limit
                    || (maxDictSize > 0 && memoryEstimate.getDictionaryMemory() > maxDictSize);
        }
    }

    @Test
    public void testMemoryManagement() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(InnerStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        MyMemoryManager memory = new MyMemoryManager(conf, 10000, 0.1);
        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = new WriterImpl(fs, testFilePath, conf, inspector, 50000, CompressionKind.NONE, 100, 0,
                memory);
        assertEquals(testFilePath, memory.path);
        for (int i = 0; i < 2500; ++i) {
            writer.addRow(new InnerStruct(i * 300, Integer.toHexString(10 * i)));
        }
        writer.close();
        assertEquals(null, memory.path);
        ReaderWriterProfiler.setProfilerOptions(conf);
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        int i = 0;
        for (StripeInformation stripe : reader.getStripes()) {
            i += 1;
            assertTrue("stripe " + i + " is too long at " + stripe.getDataLength(), stripe.getDataLength() < 6000);
        }
        assertEquals(5, i);
        assertEquals(2500, reader.getNumberOfRows());
    }

    @Test
    /**
     * Test a stride dictionary that contains only the empty string
     */
    public void testEmptyStringStrideDictionary() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(StringStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
        ReaderWriterProfiler.setProfilerOptions(conf);
        OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
        Writer writer = new WriterImpl(fs, testFilePath, conf, inspector, 1000000, CompressionKind.NONE, 100, 1000,
                memory);
        writer.addRow(new StringStruct(""));
        for (int i = 0; i < 999; i++) {
            writer.addRow(new StringStruct("123"));
        }
        writer.close();
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        lazyRow = (OrcLazyStruct) rows.next(lazyRow);
        row = (OrcStruct) lazyRow.materialize();
        assertEquals("", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
        for (int i = 0; i < 999; i++) {
            rows.next(lazyRow);
            assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
        }
    }

    @Test
    /**
     * Tests writing a stripe containing a string column, which is not dictionary encoded in the
     * first stripe, this is carried over to the third stripe, then dictionary encoding is turned
     * back on.  This will cause the dictionary to be nulled out, then reinitialized.
     */
    public void testStrideDictionariesWithoutStripeCarryover() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(StringStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        ReaderWriterProfiler.setProfilerOptions(conf);
        OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
        OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 2);
        OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true);
        OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_SORT_KEYS, true);
        WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf, inspector, 1000000,
                CompressionKind.NONE, 100, 1000, new MemoryManager(conf));
        // Write a stripe which is not dictionary encoded
        for (int i = 0; i < 2000; i++) {
            writer.addRow(new StringStruct(Integer.toString(i)));
        }
        writer.forceFlushStripe();
        // Write another stripe (doesn't matter what)
        for (int i = 0; i < 2000; i++) {
            writer.addRow(new StringStruct(Integer.toString(i)));
        }
        writer.forceFlushStripe();
        // Write a stripe which will be dictionary encoded
        // Note: it is important that this string is lexicographically after the string in the next
        // index stride.  This way, if sorting by index strides is not working, this value will appear
        // after the next one, though it should appear before, yielding incorrect results.
        writer.addRow(new StringStruct("b"));
        for (int i = 0; i < 999; i++) {
            writer.addRow(new StringStruct("123"));
        }
        writer.addRow(new StringStruct("a"));
        for (int i = 0; i < 999; i++) {
            writer.addRow(new StringStruct("123"));
        }
        writer.forceFlushStripe();
        writer.close();
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        lazyRow = (OrcLazyStruct) rows.next(lazyRow);
        row = (OrcStruct) lazyRow.materialize();
        for (int i = 0; i < 4000; i++) {
            assertEquals(Integer.toString(i % 2000),
                    ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
            rows.next(lazyRow);
        }
        assertEquals("b", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
        for (int i = 0; i < 999; i++) {
            rows.next(lazyRow);
            assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
        }
        rows.next(lazyRow);
        assertEquals("a", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
        for (int i = 0; i < 999; i++) {
            rows.next(lazyRow);
            assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
        }
    }

    @Test
    /**
     * Tests writing a stripe that contains a single string column across two index strides where
     * the column is dictionary encoded with a stride dictionary in both strides.
     * When reading, all rows in the first stride whose values are in the stride dictionary are
     * skipped, and in the second stride the values in the stride dictionary are read.
     * This can cause problems if seeking across strides is broken for stride dictionary streams.
     * @throws Exception
     */
    public void testSeekAcrossStrideDictionaries() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(StringStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
        ReaderWriterProfiler.setProfilerOptions(conf);
        // Set configs so the column is dictionary encoded and stride dictioanries are used
        OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
        OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_BUILD_STRIDE_DICTIONARY, true);
        Writer writer = new WriterImpl(fs, testFilePath, conf, inspector, 1000000, CompressionKind.NONE, 100, 1000,
                memory);
        // Write this value once, so it's added to a stride dictionary
        writer.addRow(new StringStruct("a"));
        // Fill out the rest of the stride
        for (int i = 0; i < 999; i++) {
            writer.addRow(new StringStruct("123"));
        }
        // Write this value once, so it's added to a stride dictionary
        writer.addRow(new StringStruct("b"));
        // Fill out the rest of the stride
        for (int i = 0; i < 999; i++) {
            writer.addRow(new StringStruct("123"));
        }
        writer.close();

        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        lazyRow = (OrcLazyStruct) rows.next(lazyRow);
        row = (OrcStruct) lazyRow.materialize();
        // Skip the one row in the stride dictionary in the first stride ("a")
        rows.next(lazyRow);
        // Read the rest of the values in the stride
        for (int i = 0; i < 999; i++) {
            assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
            rows.next(lazyRow);
        }
        // Read the row in the stride dictionary in the second stride (note that seek won't be called
        // because we read the previous row
        assertEquals("b", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
        rows.close();
    }

    @Test
    /**
     * Tests a writing a stripe with a stride dictionary, followed by a stripe without
     * followed by a stripe with.
     */
    public void testEmptyInStringDictionaryStream() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(StringStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        ReaderWriterProfiler.setProfilerOptions(conf);
        OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_STRING_THRESHOLD, 1);
        WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf, inspector, 1000000,
                CompressionKind.NONE, 100, 1000, new MemoryManager(conf));
        writer.addRow(new StringStruct("a"));
        writer.addRow(new StringStruct("b"));
        writer.addRow(new StringStruct("c"));
        for (int i = 0; i < 997; i++) {
            writer.addRow(new StringStruct("123"));
        }
        writer.forceFlushStripe();
        for (int i = 0; i < 1000; i++) {
            writer.addRow(new StringStruct("123"));
        }
        writer.forceFlushStripe();
        writer.addRow(new StringStruct("a"));
        writer.addRow(new StringStruct("b"));
        writer.addRow(new StringStruct("c"));
        for (int i = 0; i < 997; i++) {
            writer.addRow(new StringStruct("123"));
        }
        writer.close();
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        lazyRow = (OrcLazyStruct) rows.next(lazyRow);
        row = (OrcStruct) lazyRow.materialize();
        assertEquals("a", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
        rows.next(lazyRow);
        assertEquals("b", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
        rows.next(lazyRow);
        assertEquals("c", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
        for (int i = 0; i < 997; i++) {
            rows.next(lazyRow);
            assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
        }
        for (int i = 0; i < 1000; i++) {
            rows.next(lazyRow);
            assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
        }
        rows.next(lazyRow);
        assertEquals("a", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
        rows.next(lazyRow);
        assertEquals("b", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
        rows.next(lazyRow);
        assertEquals("c", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
        for (int i = 0; i < 997; i++) {
            rows.next(lazyRow);
            assertEquals("123", ((OrcLazyString) row.getFieldValue(0)).materialize().toString());
        }
    }

    @Test
    /**
     * Tests a writing a stripe with a stride dictionary, followed by a stripe without
     * followed by a stripe with.
     */
    public void testEmptyInIntDictionaryStream() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(IntStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        ReaderWriterProfiler.setProfilerOptions(conf);
        WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf, inspector, 1000000,
                CompressionKind.NONE, 100, 1000, new MemoryManager(conf));
        writer.addRow(new IntStruct(1));
        writer.addRow(new IntStruct(2));
        writer.addRow(new IntStruct(3));
        for (int i = 0; i < 997; i++) {
            writer.addRow(new IntStruct(123));
        }
        writer.forceFlushStripe();
        for (int i = 0; i < 1000; i++) {
            writer.addRow(new IntStruct(123));
        }
        writer.forceFlushStripe();
        writer.addRow(new IntStruct(1));
        writer.addRow(new IntStruct(2));
        writer.addRow(new IntStruct(3));
        for (int i = 0; i < 997; i++) {
            writer.addRow(new IntStruct(123));
        }
        writer.close();
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        lazyRow = (OrcLazyStruct) rows.next(lazyRow);
        row = (OrcStruct) lazyRow.materialize();
        assertEquals(1, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        rows.next(lazyRow);
        assertEquals(2, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        rows.next(lazyRow);
        assertEquals(3, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        for (int i = 0; i < 997; i++) {
            rows.next(lazyRow);
            assertEquals(123, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        }
        for (int i = 0; i < 1000; i++) {
            rows.next(lazyRow);
            assertEquals(123, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        }
        rows.next(lazyRow);
        assertEquals(1, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        rows.next(lazyRow);
        assertEquals(2, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        rows.next(lazyRow);
        assertEquals(3, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        for (int i = 0; i < 997; i++) {
            rows.next(lazyRow);
            assertEquals(123, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        }
    }

    /**
     * Verifies the scenario when {@link com.facebook.hive.orc.BitFieldReader#skip(long)} skips to
     * the last value and doesn't load the next value if it has reached the end of the stream.
     *
     * @throws Exception
     */
    @Test
    public void testSkipWithEmptyArrayInEnd() throws Exception {
        ObjectInspector inspector;
        List<String> emptyList = Collections.emptyList();
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(StringListWithId.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }

        ReaderWriterProfiler.setProfilerOptions(conf);
        OrcConf.setFloatVar(conf, OrcConf.ConfVars.HIVE_ORC_ENTROPY_KEY_STRING_SIZE_THRESHOLD, 0.01f);
        OrcConf.setBoolVar(conf, OrcConf.ConfVars.HIVE_ORC_FILE_ENABLE_LOW_MEMORY_MODE, false);
        Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector, 1000000, CompressionKind.ZLIB, 1000,
                1000);

        int numNulls = 4;
        int numNonNulls = 8;
        for (int i = 0; i < numNonNulls; i++) {
            List<String> filledList = new ArrayList<String>(2);
            filledList.add("SomeText");
            filledList.add("SomeMoreText" + i);
            writer.addRow(new StringListWithId(i, filledList));
        }
        for (int j = 0; j < numNulls; j++) {
            writer.addRow(new StringListWithId(numNonNulls + j, emptyList));
        }

        writer.close();

        // Prepare to read back the data
        ReaderWriterProfiler.setProfilerOptions(conf);
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = (OrcLazyStruct) rows.next(null);
        OrcStruct row = (OrcStruct) lazyRow.materialize();
        OrcLazyList list = ((OrcLazyList) row.getFieldValue(1));
        LazyTreeReader lazyReader = list.getLazyTreeReader();

        Object prev = lazyReader.get(numNonNulls - 1, null);

        boolean gotException = false;
        String expectedExceptionMessage = "Read past end of buffer RLE byte from compressed stream Stream for column 3 "
                + "kind IN_DICTIONARY base: 60 limit: 66 current stride: 1 compressed offset: 66 uncompressed: 66 to 66";
        try {
            lazyReader.get(numNonNulls + 1, prev);
        } catch (EOFException e) {
            if (e.getMessage().compareTo(expectedExceptionMessage) == 0) {
                gotException = true;
            } else {
                throw e;
            }
        }

        assertFalse("Got EOFException for reading past end of buffer RLE byte", gotException);
    }

    @Test
    /**
     * Tests a writing a stripe with an integer column, which enters low memory mode before the first
     * index stride is complete.
     */
    public void testIntEnterLowMemoryModeInFirstStride() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(IntStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = new WriterImpl(fs, testFilePath, conf, inspector, 1000000, CompressionKind.NONE, 100, 10000,
                memory);

        // Write 500 rows
        for (int i = 0; i < 500; i++) {
            writer.addRow(new IntStruct(i));
        }

        // Force the writer to enter low memory mode, note since the stride length was set to 10000
        // we're still in the first stride
        memory.forceEnterLowMemoryMode();

        // Write 500 more rows
        for (int i = 0; i < 500; i++) {
            writer.addRow(new IntStruct(i + 500));
        }

        writer.close();
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        for (int i = 0; i < 1000; i++) {
            lazyRow = (OrcLazyStruct) rows.next(lazyRow);
            row = (OrcStruct) lazyRow.materialize();
            assertEquals(i, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        }
        rows.close();
    }

    @Test
    /**
     * Tests a writing a stripe with a string column, which enters low memory mode before the first
     * index stride is complete.
     */
    public void testStringEnterLowMemoryModeInFirstStride() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(StringStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = new WriterImpl(fs, testFilePath, conf, inspector, 1000000, CompressionKind.NONE, 100, 10000,
                memory);

        // Write 500 rows
        for (int i = 0; i < 500; i++) {
            writer.addRow(new StringStruct(Integer.toString(i)));
        }

        // Force the writer to enter low memory mode, note since the stride length was set to 10000
        // we're still in the first stride
        memory.forceEnterLowMemoryMode();

        // Write 500 more rows
        for (int i = 0; i < 500; i++) {
            writer.addRow(new StringStruct(Integer.toString(i + 500)));
        }

        writer.close();
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        for (int i = 0; i < 1000; i++) {
            lazyRow = (OrcLazyStruct) rows.next(lazyRow);
            row = (OrcStruct) lazyRow.materialize();
            assertEquals(Integer.toString(i),
                    ((Text) ((OrcLazyString) row.getFieldValue(0)).materialize()).toString());
        }
        rows.close();
    }

    @Test
    /**
     * Tests a writing a stripe with a string column, which enters low memory mode before the second
     * index stride is complete, and does not complete that stride.
     */
    public void testStringEnterLowMemoryModeInSecondStride() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(StringStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = new WriterImpl(fs, testFilePath, conf, inspector, 1000000, CompressionKind.NONE, 100, 1000,
                memory);

        // Write 1000 rows (the first stride)
        for (int i = 0; i < 1000; i++) {
            writer.addRow(new StringStruct(Integer.toString(i)));
        }

        // Write 250 more rows (a portion of the second stride)
        for (int i = 0; i < 250; i++) {
            writer.addRow(new StringStruct(Integer.toString(i)));
        }

        // Force the writer to enter low memory mode, note since the stride length was set to 1000
        // we're still in the second stride
        memory.forceEnterLowMemoryMode();

        // Write 250 more rows (which still gets written to the second stride, but not enough to fill
        // it)
        for (int i = 0; i < 250; i++) {
            writer.addRow(new StringStruct(Integer.toString(i + 250)));
        }

        writer.close();
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        for (int i = 0; i < 1500; i++) {
            lazyRow = (OrcLazyStruct) rows.next(lazyRow);
            row = (OrcStruct) lazyRow.materialize();
            assertEquals(Integer.toString(i % 1000),
                    ((Text) ((OrcLazyString) row.getFieldValue(0)).materialize()).toString());
        }
        rows.close();
    }

    @Test
    /**
     * Tests a writing a stripe with an int column, which enters low memory mode before the second
     * index stride is complete, and does not complete that stride.
     */
    public void testIntEnterLowMemoryModeInSecondStride() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(IntStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = new WriterImpl(fs, testFilePath, conf, inspector, 1000000, CompressionKind.NONE, 100, 1000,
                memory);

        // Write 1000 rows (the first stride)
        for (int i = 0; i < 1000; i++) {
            writer.addRow(new IntStruct(i));
        }

        // Write 250 more rows (a portion of the second stride)
        for (int i = 0; i < 250; i++) {
            writer.addRow(new IntStruct(i));
        }

        // Force the writer to enter low memory mode, note since the stride length was set to 1000
        // we're still in the second stride
        memory.forceEnterLowMemoryMode();

        // Write 250 more rows (which still gets written to the second stride, but not enough to fill
        // it)
        for (int i = 0; i < 250; i++) {
            writer.addRow(new IntStruct(i + 250));
        }

        writer.close();
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        for (int i = 0; i < 1500; i++) {
            lazyRow = (OrcLazyStruct) rows.next(lazyRow);
            row = (OrcStruct) lazyRow.materialize();
            assertEquals(i % 1000, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        }
        rows.close();
    }

    @Test
    /**
     * Tests a writing a stripe with a string column, which enters low memory mode just before the
     * second stride starts
     */
    public void testStringEnterLowMemoryModeAtStrideStart() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(StringStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = new WriterImpl(fs, testFilePath, conf, inspector, 1000000, CompressionKind.NONE, 100, 1000,
                memory);

        // Write 1000 rows (the first stride)
        for (int i = 0; i < 1000; i++) {
            writer.addRow(new StringStruct(Integer.toString(i)));
        }

        // Force the writer to enter low memory mode, note since the stride length was set to 1000
        // we're just starting the second stride
        memory.forceEnterLowMemoryMode();

        // Write 500 more rows (a portion of the second stride)
        for (int i = 0; i < 500; i++) {
            writer.addRow(new StringStruct(Integer.toString(i)));
        }

        writer.close();
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        for (int i = 0; i < 1500; i++) {
            lazyRow = (OrcLazyStruct) rows.next(lazyRow);
            row = (OrcStruct) lazyRow.materialize();
            assertEquals(Integer.toString(i % 1000),
                    ((Text) ((OrcLazyString) row.getFieldValue(0)).materialize()).toString());
        }
        rows.close();
    }

    @Test
    /**
     * Tests a writing a stripe with an int column, which enters low memory mode just before the
     * second stride starts
     */
    public void testIntEnterLowMemoryModeAtStrideStart() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(IntStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = new WriterImpl(fs, testFilePath, conf, inspector, 1000000, CompressionKind.NONE, 100, 1000,
                memory);

        // Write 1000 rows (the first stride)
        for (int i = 0; i < 1000; i++) {
            writer.addRow(new IntStruct(i));
        }

        // Force the writer to enter low memory mode, note since the stride length was set to 1000
        // we're just starting the second stride
        memory.forceEnterLowMemoryMode();

        // Write 500 more rows (a portion of the second stride)
        for (int i = 0; i < 500; i++) {
            writer.addRow(new IntStruct(i));
        }

        writer.close();
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        for (int i = 0; i < 1500; i++) {
            lazyRow = (OrcLazyStruct) rows.next(lazyRow);
            row = (OrcStruct) lazyRow.materialize();
            assertEquals(i % 1000, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        }
        rows.close();
    }

    @Test
    /**
     * Tests a writing a stripe with a string column, which enters low memory mode just after the
     * second stride starts
     */
    public void testStringEnterLowMemoryModeAfterStrideStart() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(StringStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = new WriterImpl(fs, testFilePath, conf, inspector, 1000000, CompressionKind.NONE, 100, 1000,
                memory);

        // Write 1000 rows (the first stride)
        for (int i = 0; i < 1001; i++) {
            writer.addRow(new StringStruct(Integer.toString(i % 1000)));
        }

        // Force the writer to enter low memory mode, note since the stride length was set to 1000
        // we're just after starting the second stride
        memory.forceEnterLowMemoryMode();

        // Write 499 more rows (a portion of the second stride)
        for (int i = 1; i < 500; i++) {
            writer.addRow(new StringStruct(Integer.toString(i)));
        }

        writer.close();
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        for (int i = 0; i < 1500; i++) {
            lazyRow = (OrcLazyStruct) rows.next(lazyRow);
            row = (OrcStruct) lazyRow.materialize();
            assertEquals(Integer.toString(i % 1000),
                    ((Text) ((OrcLazyString) row.getFieldValue(0)).materialize()).toString());
        }
        rows.close();
    }

    @Test
    /**
     * Tests a writing a stripe with an int column, which enters low memory mode just after the
     * second stride starts
     */
    public void testIntEnterLowMemoryModeAfterStrideStart() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(IntStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
        ReaderWriterProfiler.setProfilerOptions(conf);
        Writer writer = new WriterImpl(fs, testFilePath, conf, inspector, 1000000, CompressionKind.NONE, 100, 1000,
                memory);

        // Write 1000 rows (the first stride)
        for (int i = 0; i < 1001; i++) {
            writer.addRow(new IntStruct(i % 1000));
        }

        // Force the writer to enter low memory mode, note since the stride length was set to 1000
        // we're just after starting the second stride
        memory.forceEnterLowMemoryMode();

        // Write 499 more rows (a portion of the second stride)
        for (int i = 1; i < 500; i++) {
            writer.addRow(new IntStruct(i));
        }

        writer.close();
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        for (int i = 0; i < 1500; i++) {
            lazyRow = (OrcLazyStruct) rows.next(lazyRow);
            row = (OrcStruct) lazyRow.materialize();
            assertEquals(i % 1000, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        }
        rows.close();
    }

    @Test
    /**
     * Tests writing a stripe with a string column, which doesn't do dictionary encoding, then
     * re-evaluates whether it should do dictionary encoding or not.  While it's re-evaluating, it
     * enters low memory mode.
     */
    public void testStringEnterLowMemoryModeAndOnNotCarriedOverStripe() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(StringStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        // Reevaluate if we should use dictionary encoding on every stripe
        OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 1);
        MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
        ReaderWriterProfiler.setProfilerOptions(conf);
        WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf, inspector, 1000000,
                CompressionKind.NONE, 100, 10000, memory);

        // Write 500 rows, they wil be directly encoded
        for (int i = 0; i < 1000; i++) {
            writer.addRow(new StringStruct(Integer.toString(i)));
        }

        // Flush the first stripe
        writer.forceFlushStripe();

        // Write 500 more rows
        for (int i = 0; i < 500; i++) {
            writer.addRow(new StringStruct(Integer.toString(i)));
        }

        // Force the writer to enter low memory mode
        memory.forceEnterLowMemoryMode();

        // Write 500 more rows
        for (int i = 0; i < 500; i++) {
            writer.addRow(new StringStruct(Integer.toString(i + 500)));
        }

        writer.close();
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        for (int i = 0; i < 2000; i++) {
            lazyRow = (OrcLazyStruct) rows.next(lazyRow);
            row = (OrcStruct) lazyRow.materialize();
            assertEquals(Integer.toString(i % 1000),
                    ((Text) ((OrcLazyString) row.getFieldValue(0)).materialize()).toString());
        }
        rows.close();
    }

    @Test
    /**
     * Tests writing a stripe with an int column, which doesn't do dictionary encoding, then
     * re-evaluates whether it should do dictionary encoding or not.  While it's re-evaluating, it
     * enters low memory mode
     */
    public void testIntegerEnterLowMemoryModeAndOnNotCarriedOverStripe() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(IntStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        // Reevaluate if we should use dictionary encoding on every stripe
        OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 1);
        MemoryManagerWithForce memory = new MemoryManagerWithForce(conf);
        ReaderWriterProfiler.setProfilerOptions(conf);
        WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf, inspector, 1000000,
                CompressionKind.NONE, 100, 10000, memory);

        // Write 500 rows
        for (int i = 0; i < 1000; i++) {
            writer.addRow(new IntStruct(i));
        }

        // Flush the first stripe
        writer.forceFlushStripe();

        // Write 500 more rows
        for (int i = 0; i < 500; i++) {
            writer.addRow(new IntStruct(i));
        }

        // Force the writer to enter low memory mode
        memory.forceEnterLowMemoryMode();

        // Write 500 more rows
        for (int i = 0; i < 500; i++) {
            writer.addRow(new IntStruct(i + 500));
        }

        writer.close();
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        for (int i = 0; i < 2000; i++) {
            lazyRow = (OrcLazyStruct) rows.next(lazyRow);
            row = (OrcStruct) lazyRow.materialize();
            assertEquals(i % 1000, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        }
        rows.close();
    }

    @Test
    /**
     * Tests calling seekToRow to make sure it updates the stripe accordingly
     */
    public void testSeekToRow() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(IntStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        ReaderWriterProfiler.setProfilerOptions(conf);
        WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf, inspector, 1000000,
                CompressionKind.NONE, 100, 10000, new MemoryManager(conf));

        // Write 100 rows
        for (int i = 0; i < 100; i++) {
            writer.addRow(new IntStruct(i));
        }

        // Flush the first stripe
        writer.forceFlushStripe();

        // Write 100 more rows
        for (int i = 0; i < 100; i++) {
            writer.addRow(new IntStruct(i + 100));
        }

        writer.close();
        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        RecordReader rows = reader.rows(null);
        OrcLazyStruct lazyRow = null;
        OrcStruct row = null;
        lazyRow = (OrcLazyStruct) rows.next(lazyRow);
        row = (OrcStruct) lazyRow.materialize();
        assertEquals(0, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        // Seek to row 98 which is almost at the end of a stripe, this way it stays in the current
        // stripe, and if the row is not updated correctly will read off the end of a stream.
        rows.seekToRow(98);
        for (int i = 98; i < 200; i++) {
            lazyRow = (OrcLazyStruct) rows.next(lazyRow);
            row = (OrcStruct) lazyRow.materialize();
            assertEquals(i, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        }
        rows.close();
    }

    @Test
    /**
     * Tests that when a reader is initialized using offset, length the stripes included are
     * those that start in the range [offset, offset + length)
     */
    public void testSplitStripe() throws Exception {
        ObjectInspector inspector;
        synchronized (TestOrcFile.class) {
            inspector = ObjectInspectorFactory.getReflectionObjectInspector(IntStruct.class,
                    ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
        }
        // Reevaluate if we should use dictionary encoding on every stripe
        OrcConf.setIntVar(conf, OrcConf.ConfVars.HIVE_ORC_DICTIONARY_ENCODING_INTERVAL, 1);
        ReaderWriterProfiler.setProfilerOptions(conf);
        WriterImplWithForceFlush writer = new WriterImplWithForceFlush(fs, testFilePath, conf, inspector, 1000000,
                CompressionKind.NONE, 100, 10000, new MemoryManager(conf));

        // Write 100 rows
        for (int i = 0; i < 100; i++) {
            writer.addRow(new IntStruct(i));
        }

        // Flush the first stripe
        writer.forceFlushStripe();

        // Write 100 more rows
        for (int i = 0; i < 100; i++) {
            writer.addRow(new IntStruct(i + 100));
        }

        writer.close();

        Reader reader = OrcFile.createReader(fs, testFilePath, conf);
        Iterator<StripeInformation> stripes = reader.getStripes().iterator();

        StripeInformation firstStripe = stripes.next();
        StripeInformation secondStripe = stripes.next();

        // Create a record reader that has the offset and length of the first stripe
        RecordReader rows = reader.rows(firstStripe.getOffset(), secondStripe.getOffset() - firstStripe.getOffset(),
                null);

        // Read what we wrote for the first stripe
        OrcLazyStruct lazyRow = null;
        OrcStruct row;
        for (int i = 0; i < 100; i++) {
            lazyRow = (OrcLazyStruct) rows.next(lazyRow);
            row = (OrcStruct) lazyRow.materialize();
            assertEquals(i, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        }

        // Make sure that there's no additional data
        assertFalse(rows.hasNext());
        rows.close();

        // Create a record reader that has the offset and length of the second stripe
        // Since this is the last stripe it has length equal to the length of the file containing
        // stripes - the offset of the second stripe
        rows = reader.rows(secondStripe.getOffset(), reader.getContentLength() - secondStripe.getOffset(), null);

        // Read what we wrote for the first stripe
        for (int i = 0; i < 100; i++) {
            lazyRow = (OrcLazyStruct) rows.next(lazyRow);
            row = (OrcStruct) lazyRow.materialize();
            assertEquals(i + 100, ((IntWritable) ((OrcLazyInt) row.getFieldValue(0)).materialize()).get());
        }

        // Make sure that there's no additional data
        assertFalse(rows.hasNext());
        rows.close();
    }
}