org.apache.hive.streaming.TestStreaming.java Source code

Introduction

Here is the source code for org.apache.hive.streaming.TestStreaming.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hive.streaming;

import static org.apache.hadoop.hive.metastore.api.hive_metastoreConstants.BUCKET_COUNT;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileFilter;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RawLocalFileSystem;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.cli.CliSessionState;
import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.common.TableName;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.common.ValidWriteIdList;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.Validator;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
import org.apache.hadoop.hive.metastore.api.LockState;
import org.apache.hadoop.hive.metastore.api.LockType;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.ShowLocksRequest;
import org.apache.hadoop.hive.metastore.api.ShowLocksResponse;
import org.apache.hadoop.hive.metastore.api.ShowLocksResponseElement;
import org.apache.hadoop.hive.metastore.api.TableValidWriteIds;
import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
import org.apache.hadoop.hive.metastore.api.TxnInfo;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.metastore.txn.AcidHouseKeeperService;
import org.apache.hadoop.hive.metastore.txn.TxnCommonUtils;
import org.apache.hadoop.hive.metastore.txn.TxnDbUtil;
import org.apache.hadoop.hive.metastore.txn.TxnStore;
import org.apache.hadoop.hive.metastore.txn.TxnUtils;
import org.apache.hadoop.hive.ql.DriverFactory;
import org.apache.hadoop.hive.ql.IDriver;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.BucketCodec;
import org.apache.hadoop.hive.ql.io.IOConstants;
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
import org.apache.hadoop.hive.ql.io.orc.Reader;
import org.apache.hadoop.hive.ql.io.orc.RecordReader;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.txn.compactor.Worker;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.orc.impl.OrcAcidUtils;
import org.apache.orc.tools.FileDump;
import org.apache.thrift.TException;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class TestStreaming {
    private static final Logger LOG = LoggerFactory.getLogger(TestStreaming.class);

    public static class RawFileSystem extends RawLocalFileSystem {
        private static final URI NAME;

        static {
            try {
                NAME = new URI("raw:///");
            } catch (URISyntaxException se) {
                throw new IllegalArgumentException("bad uri", se);
            }
        }

        @Override
        public URI getUri() {
            return NAME;
        }

        @Override
        public String getScheme() {
            return "raw";
        }

        @Override
        public FileStatus getFileStatus(Path path) throws IOException {
            File file = pathToFile(path);
            if (!file.exists()) {
                throw new FileNotFoundException("Can'table find " + path);
            }
            // get close enough
            short mod = 0;
            if (file.canRead()) {
                mod |= 0444;
            }
            if (file.canWrite()) {
                mod |= 0200;
            }
            if (file.canExecute()) {
                mod |= 0111;
            }
            return new FileStatus(file.length(), file.isDirectory(), 1, 1024, file.lastModified(),
                    file.lastModified(), FsPermission.createImmutable(mod), "owen", "users", path);
        }
    }

    private static final String COL1 = "id";
    private static final String COL2 = "msg";

    private static HiveConf conf = null;
    private IDriver driver;
    private final IMetaStoreClient msClient;

    // partitioned table
    private final static String dbName = "testing";
    private final static String tblName = "alerts";
    private final static String[] fieldNames = new String[] { COL1, COL2 };
    static List<String> partitionVals;
    private static Path partLoc;
    private static Path partLoc2;

    // unpartitioned table
    private final static String dbName2 = "testing2";
    private final static String tblName2 = "alerts";
    private final static String[] fieldNames2 = new String[] { COL1, COL2 };

    // for bucket join testing
    private final static String dbName3 = "testing3";
    private final static String tblName3 = "dimensionTable";
    private final static String dbName4 = "testing4";
    private final static String tblName4 = "factTable";
    List<String> partitionVals2;

    private final String PART1_CONTINENT = "Asia";
    private final String PART1_COUNTRY = "India";

    @Rule
    public TemporaryFolder dbFolder = new TemporaryFolder();

    public TestStreaming() throws Exception {
        partitionVals = new ArrayList<String>(2);
        partitionVals.add(PART1_CONTINENT);
        partitionVals.add(PART1_COUNTRY);

        partitionVals2 = new ArrayList<String>(1);
        partitionVals2.add(PART1_COUNTRY);

        conf = new HiveConf(this.getClass());
        conf.set("fs.raw.impl", RawFileSystem.class.getName());
        conf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
                "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
        TxnDbUtil.setConfValues(conf);
        conf.setBoolVar(HiveConf.ConfVars.METASTORE_EXECUTE_SET_UGI, true);
        conf.setBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY, true);
        dbFolder.create();

        //1) Start from a clean slate (metastore)
        TxnDbUtil.cleanDb(conf);
        TxnDbUtil.prepDb(conf);

        //2) obtain metastore clients
        msClient = new HiveMetaStoreClient(conf);
    }

    @Before
    public void setup() throws Exception {
        SessionState.start(new CliSessionState(conf));
        driver = DriverFactory.newDriver(conf);
        driver.setMaxRows(200002);//make sure Driver returns all results
        // drop and recreate the necessary databases and tables
        dropDB(msClient, dbName);

        String[] colNames = new String[] { COL1, COL2 };
        String[] colTypes = new String[] { serdeConstants.INT_TYPE_NAME, serdeConstants.STRING_TYPE_NAME };
        String[] bucketCols = new String[] { COL1 };
        String loc1 = dbFolder.newFolder(dbName + ".db").toString();
        String[] partNames = new String[] { "Continent", "Country" };
        partLoc = createDbAndTable(driver, dbName, tblName, partitionVals, colNames, colTypes, bucketCols,
                partNames, loc1, 1);

        dropDB(msClient, dbName2);
        String loc2 = dbFolder.newFolder(dbName2 + ".db").toString();
        partLoc2 = createDbAndTable(driver, dbName2, tblName2, null, colNames, colTypes, bucketCols, null, loc2, 2);

        String loc3 = dbFolder.newFolder("testing5.db").toString();
        createStoreSales("testing5", loc3);

        runDDL(driver, "drop table testBucketing3.streamedtable");
        runDDL(driver, "drop table testBucketing3.finaltable");
        runDDL(driver, "drop table testBucketing3.nobucket");
    }

    @After
    public void cleanup() {
        msClient.close();
        driver.close();
    }

    private void createStoreSales(String dbName, String loc) throws Exception {
        String dbUri = "raw://" + new Path(loc).toUri().toString();
        String tableLoc = dbUri + Path.SEPARATOR + "store_sales";

        boolean success = runDDL(driver, "create database IF NOT EXISTS " + dbName + " location '" + dbUri + "'");
        Assert.assertTrue(success);
        success = runDDL(driver, "use " + dbName);
        Assert.assertTrue(success);

        success = runDDL(driver, "drop table if exists store_sales");
        Assert.assertTrue(success);
        success = runDDL(driver, "create table store_sales\n" + "(\n" + "    ss_sold_date_sk           int,\n"
                + "    ss_sold_time_sk           int,\n" + "    ss_item_sk                int,\n"
                + "    ss_customer_sk            int,\n" + "    ss_cdemo_sk               int,\n"
                + "    ss_hdemo_sk               int,\n" + "    ss_addr_sk                int,\n"
                + "    ss_store_sk               int,\n" + "    ss_promo_sk               int,\n"
                + "    ss_ticket_number          int,\n" + "    ss_quantity               int,\n"
                + "    ss_wholesale_cost         decimal(7,2),\n" + "    ss_list_price             decimal(7,2),\n"
                + "    ss_sales_price            decimal(7,2),\n" + "    ss_ext_discount_amt       decimal(7,2),\n"
                + "    ss_ext_sales_price        decimal(7,2),\n" + "    ss_ext_wholesale_cost     decimal(7,2),\n"
                + "    ss_ext_list_price         decimal(7,2),\n" + "    ss_ext_tax                decimal(7,2),\n"
                + "    ss_coupon_amt             decimal(7,2),\n" + "    ss_net_paid               decimal(7,2),\n"
                + "    ss_net_paid_inc_tax       decimal(7,2),\n" + "    ss_net_profit             decimal(7,2)\n"
                + ")\n" + " partitioned by (dt string)\n" + "clustered by (ss_store_sk, ss_promo_sk)\n"
                + "INTO 4 BUCKETS stored as orc " + " location '" + tableLoc + "'"
                + "  TBLPROPERTIES ('orc.compress'='NONE', 'transactional'='true')");
        Assert.assertTrue(success);

        success = runDDL(driver, "alter table store_sales add partition(dt='2015')");
        Assert.assertTrue(success);
    }

    /**
     * make sure it works with table where bucket col is not 1st col
     *
     * @throws Exception
     */
    @Test
    public void testBucketingWhereBucketColIsNotFirstCol() throws Exception {
        List<String> partitionVals = new ArrayList<String>();
        partitionVals.add("2015");
        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase("testing5")
                .withTable("store_sales").withStaticPartitionValues(partitionVals)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withRecordWriter(writer).withHiveConf(conf)
                .connect();

        connection.beginTransaction();
        StringBuilder row = new StringBuilder();
        for (int i = 0; i < 10; i++) {
            for (int ints = 0; ints < 11; ints++) {
                row.append(ints).append(',');
            }
            for (int decs = 0; decs < 12; decs++) {
                row.append(i + 0.1).append(',');
            }
            row.setLength(row.length() - 1);
            connection.write(row.toString().getBytes());
        }
        connection.commitTransaction();
        connection.close();

        ArrayList<String> res = queryTable(driver, "select row__id.bucketid, * from testing5.store_sales");
        for (String re : res) {
            System.out.println(re);
        }
    }

    /**
     * Test that streaming can write to unbucketed table.
     */
    @Test
    public void testNoBuckets() throws Exception {
        queryTable(driver, "drop table if exists default.streamingnobuckets");
        queryTable(driver, "create table default.streamingnobuckets (a string, b string) stored as orc "
                + "TBLPROPERTIES('transactional'='true')");
        queryTable(driver, "insert into default.streamingnobuckets values('foo','bar')");
        List<String> rs = queryTable(driver, "select * from default.streamingnobuckets");
        Assert.assertEquals(1, rs.size());
        Assert.assertEquals("foo\tbar", rs.get(0));
        StrictDelimitedInputWriter wr = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase("Default")
                .withTable("streamingNoBuckets").withAgentInfo("UT_" + Thread.currentThread().getName())
                .withTransactionBatchSize(2).withRecordWriter(wr).withHiveConf(conf).connect();

        connection.beginTransaction();
        connection.write("a1,b2".getBytes());
        connection.write("a3,b4".getBytes());
        TxnStore txnHandler = TxnUtils.getTxnStore(conf);
        ShowLocksResponse resp = txnHandler.showLocks(new ShowLocksRequest());
        Assert.assertEquals(resp.getLocksSize(), 1);
        Assert.assertEquals("streamingnobuckets", resp.getLocks().get(0).getTablename());
        Assert.assertEquals("default", resp.getLocks().get(0).getDbname());
        connection.commitTransaction();
        connection.beginTransaction();
        connection.write("a5,b6".getBytes());
        connection.write("a7,b8".getBytes());
        connection.commitTransaction();
        connection.close();

        Assert.assertEquals("", 0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912));
        rs = queryTable(driver,
                "select ROW__ID, a, b, INPUT__FILE__NAME from default.streamingnobuckets order by ROW__ID");

        Assert.assertTrue(rs.get(0),
                rs.get(0).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\tfoo\tbar"));
        Assert.assertTrue(rs.get(0),
                rs.get(0).endsWith("streamingnobuckets/delta_0000001_0000001_0000/bucket_00000"));
        Assert.assertTrue(rs.get(1),
                rs.get(1).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\ta1\tb2"));
        Assert.assertTrue(rs.get(1), rs.get(1).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
        Assert.assertTrue(rs.get(2),
                rs.get(2).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\ta3\tb4"));
        Assert.assertTrue(rs.get(2), rs.get(2).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
        Assert.assertTrue(rs.get(3),
                rs.get(3).startsWith("{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\ta5\tb6"));
        Assert.assertTrue(rs.get(3), rs.get(3).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
        Assert.assertTrue(rs.get(4),
                rs.get(4).startsWith("{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\ta7\tb8"));
        Assert.assertTrue(rs.get(4), rs.get(4).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));

        queryTable(driver, "update default.streamingnobuckets set a=0, b=0 where a='a7'");
        queryTable(driver, "delete from default.streamingnobuckets where a='a1'");
        rs = queryTable(driver, "select a, b from default.streamingnobuckets order by a, b");
        int row = 0;
        Assert.assertEquals("at row=" + row, "0\t0", rs.get(row++));
        Assert.assertEquals("at row=" + row, "a3\tb4", rs.get(row++));
        Assert.assertEquals("at row=" + row, "a5\tb6", rs.get(row++));
        Assert.assertEquals("at row=" + row, "foo\tbar", rs.get(row++));

        queryTable(driver, "alter table default.streamingnobuckets compact 'major'");
        runWorker(conf);
        rs = queryTable(driver,
                "select ROW__ID, a, b, INPUT__FILE__NAME from default.streamingnobuckets order by ROW__ID");

        Assert.assertTrue(rs.get(0),
                rs.get(0).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\tfoo\tbar"));
        Assert.assertTrue(rs.get(0), rs.get(0).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
        Assert.assertTrue(rs.get(1),
                rs.get(1).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\ta3\tb4"));
        Assert.assertTrue(rs.get(1), rs.get(1).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
        Assert.assertTrue(rs.get(2),
                rs.get(2).startsWith("{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\ta5\tb6"));
        Assert.assertTrue(rs.get(2), rs.get(2).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
        Assert.assertTrue(rs.get(3),
                rs.get(3).startsWith("{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\t0\t0"));
        Assert.assertTrue(rs.get(3), rs.get(3).endsWith("streamingnobuckets/base_0000005_v0000025/bucket_00000"));
    }

    @Test
    public void testGetDeltaPath() throws Exception {
        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName)
                .withTable(tblName).withRecordWriter(writer).withHiveConf(conf).connect();
        Path path = connection.getDeltaFileLocation(partitionVals, 0, 5L, 5L, 9);
        Assert.assertTrue(path.toString().endsWith(
                "testing.db/alerts/continent" + "=Asia/country=India/delta_0000005_0000005_0009/bucket_00000"));
    }

    @Test
    public void testCommitWithKeyValue() throws Exception {
        queryTable(driver, "drop table if exists default.keyvalue");
        queryTable(driver, "create table default.keyvalue (a string, b string) stored as orc "
                + "TBLPROPERTIES('transactional'='true')");
        queryTable(driver, "insert into default.keyvalue values('foo','bar')");
        queryTable(driver, "ALTER TABLE default.keyvalue SET TBLPROPERTIES('_metamykey' = 'myvalue')");
        List<String> rs = queryTable(driver, "select * from default.keyvalue");
        Assert.assertEquals(1, rs.size());
        Assert.assertEquals("foo\tbar", rs.get(0));
        StrictDelimitedInputWriter wr = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase("Default")
                .withTable("keyvalue").withAgentInfo("UT_" + Thread.currentThread().getName())
                .withTransactionBatchSize(2).withRecordWriter(wr).withHiveConf(conf).connect();
        connection.beginTransaction();
        connection.write("a1,b2".getBytes());
        connection.write("a3,b4".getBytes());
        connection.commitTransaction(null, "_metamykey", "myvalue");
        connection.close();

        rs = queryTable(driver, "select ROW__ID, a, b, INPUT__FILE__NAME from default.keyvalue order by ROW__ID");
        Assert.assertTrue(rs.get(1),
                rs.get(1).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\ta1\tb2"));
        Assert.assertTrue(rs.get(1), rs.get(1).endsWith("keyvalue/delta_0000002_0000003/bucket_00000"));
        Assert.assertTrue(rs.get(2),
                rs.get(2).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\ta3\tb4"));
        Assert.assertTrue(rs.get(2), rs.get(2).endsWith("keyvalue/delta_0000002_0000003/bucket_00000"));

        rs = queryTable(driver, "SHOW TBLPROPERTIES default.keyvalue('_metamykey')");
        Assert.assertEquals(rs.get(0), "_metamykey\tmyvalue", rs.get(0));
    }

    @Test
    public void testConnectionWithWriteId() throws Exception {
        queryTable(driver, "drop table if exists default.writeidconnection");
        queryTable(driver, "create table default.writeidconnection (a string, b string) stored as orc "
                + "TBLPROPERTIES('transactional'='true')");
        queryTable(driver, "insert into default.writeidconnection values('a0','bar')");

        List<String> rs = queryTable(driver, "select * from default.writeidconnection");
        Assert.assertEquals(1, rs.size());
        Assert.assertEquals("a0\tbar", rs.get(0));

        StrictDelimitedInputWriter writerT = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',')
                .build();
        HiveStreamingConnection transactionConnection = HiveStreamingConnection.newBuilder().withDatabase("Default")
                .withTable("writeidconnection").withRecordWriter(writerT).withHiveConf(conf).connect();
        transactionConnection.beginTransaction();

        Table tObject = transactionConnection.getTable();
        Long writeId = transactionConnection.getCurrentWriteId();

        Assert.assertNotNull(tObject);
        Assert.assertNotNull(writeId);

        StrictDelimitedInputWriter writerOne = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',')
                .build();
        HiveStreamingConnection connectionOne = HiveStreamingConnection.newBuilder().withDatabase("Default")
                .withTable("writeidconnection").withRecordWriter(writerOne).withHiveConf(conf).withWriteId(writeId)
                .withStatementId(1).withTableObject(tObject).connect();

        StrictDelimitedInputWriter writerTwo = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',')
                .build();
        HiveStreamingConnection connectionTwo = HiveStreamingConnection.newBuilder().withDatabase("Default")
                .withRecordWriter(writerTwo).withHiveConf(conf).withWriteId(writeId).withStatementId(2)
                .withTableObject(tObject).connect();

        Assert.assertNotNull(connectionOne);
        Assert.assertNotNull(connectionTwo);

        connectionOne.beginTransaction();
        connectionTwo.beginTransaction();
        connectionOne.write("a1,b2".getBytes());
        connectionTwo.write("a5,b6".getBytes());
        connectionOne.write("a3,b4".getBytes());
        connectionOne.commitTransaction();
        connectionTwo.commitTransaction();

        Assert.assertEquals(HiveStreamingConnection.TxnState.PREPARED_FOR_COMMIT,
                connectionOne.getCurrentTransactionState());
        Assert.assertEquals(HiveStreamingConnection.TxnState.PREPARED_FOR_COMMIT,
                connectionTwo.getCurrentTransactionState());

        try {
            connectionOne.beginTransaction();
            Assert.fail("second beginTransaction should have thrown a " + "StreamingException");
        } catch (StreamingException e) {

        }

        connectionOne.close();
        connectionTwo.close();

        rs = queryTable(driver,
                "select ROW__ID, a, b, " + "INPUT__FILE__NAME from default.writeidconnection order by ROW__ID");
        // Nothing here since it hasn't been committed
        Assert.assertEquals(1, rs.size());

        transactionConnection.commitTransaction();

        rs = queryTable(driver,
                "select ROW__ID, a, b, " + "INPUT__FILE__NAME from default.writeidconnection order by a");
        Assert.assertEquals(4, rs.size());
        Assert.assertTrue(rs.get(0),
                rs.get(0).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\ta0\tbar"));
        Assert.assertTrue(rs.get(0), rs.get(0).endsWith("bucket_00000"));
        Assert.assertTrue(rs.get(1), rs.get(1).contains("\"rowid\":0}\ta1\tb2"));
        Assert.assertTrue(rs.get(1), rs.get(1).endsWith("bucket_00000"));
        Assert.assertTrue(rs.get(2), rs.get(2).contains("\"rowid\":1}\ta3\tb4"));
        Assert.assertTrue(rs.get(2), rs.get(2).endsWith("bucket_00000"));
        Assert.assertTrue(rs.get(3), rs.get(3).contains("\ta5\tb6"));
        Assert.assertTrue(rs.get(3), rs.get(3).endsWith("bucket_00000"));
    }

    @Test
    public void testAllTypesDelimitedWriter() throws Exception {
        queryTable(driver, "drop table if exists default.alltypes");
        queryTable(driver,
                "create table if not exists default.alltypes ( bo boolean, ti tinyint, si smallint, i int, bi bigint, "
                        + "f float, d double, de decimal(10,3), ts timestamp, da date, s string, c char(5), vc varchar(5), "
                        + "m map<string, string>, l array<int>, st struct<c1:int, c2:string> ) "
                        + "stored as orc TBLPROPERTIES('transactional'='true')");
        StrictDelimitedInputWriter wr = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter('|')
                .withCollectionDelimiter(',').withMapKeyDelimiter(':').build();
        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase("default")
                .withTable("alltypes").withAgentInfo("UT_" + Thread.currentThread().getName())
                .withTransactionBatchSize(2).withRecordWriter(wr).withHiveConf(conf).connect();

        String row1 = "true|10|100|1000|10000|4.0|20.0|4.2222|1969-12-31 "
                + "15:59:58.174|1970-01-01|string|hello|hello|k1:v1|100,200|10,foo";
        String row2 = "false|20|200|2000|20000|8.0|40.0|2.2222|1970-12-31 15:59:58.174|1971-01-01|abcd|world|world|"
                + "k4:v4|200,300|20,bar";
        connection.beginTransaction();
        connection.write(row1.getBytes());
        connection.write(row2.getBytes());
        connection.commitTransaction();
        connection.close();

        List<String> rs = queryTable(driver,
                "select ROW__ID, bo, ti, si, i, bi, f, d, de, ts, da, s, c, vc, m, l, st,"
                        + " INPUT__FILE__NAME from default.alltypes order by ROW__ID");
        Assert.assertEquals(2, rs.size());
        String gotRow1 = rs.get(0);
        String expectedPrefixRow1 = "{\"writeid\":1,\"bucketid\":536870912,"
                + "\"rowid\":0}\ttrue\t10\t100\t1000\t10000\t4.0\t20.0\t4.222\t1969-12-31 15:59:58.174\t1970-01-01\tstring"
                + "\thello\thello\t{\"k1\":\"v1\"}\t[100,200]\t{\"c1\":10,\"c2\":\"foo\"}";
        String expectedSuffixRow1 = "alltypes/delta_0000001_0000002/bucket_00000";
        String gotRow2 = rs.get(1);
        String expectedPrefixRow2 = "{\"writeid\":1,\"bucketid\":536870912,"
                + "\"rowid\":1}\tfalse\t20\t200\t2000\t20000\t8.0\t40.0\t2.222\t1970-12-31 15:59:58.174\t1971-01-01\tabcd"
                + "\tworld\tworld\t{\"k4\":\"v4\"}\t[200,300]\t{\"c1\":20,\"c2\":\"bar\"}";
        String expectedSuffixRow2 = "alltypes/delta_0000001_0000002/bucket_00000";
        Assert.assertTrue(gotRow1, gotRow1.startsWith(expectedPrefixRow1));
        Assert.assertTrue(gotRow1, gotRow1.endsWith(expectedSuffixRow1));
        Assert.assertTrue(gotRow2, gotRow2.startsWith(expectedPrefixRow2));
        Assert.assertTrue(gotRow2, gotRow2.endsWith(expectedSuffixRow2));
    }

    @Test
    public void testAllTypesDelimitedWriterInputStream() throws Exception {
        queryTable(driver, "drop table if exists default.alltypes");
        queryTable(driver,
                "create table if not exists default.alltypes ( bo boolean, ti tinyint, si smallint, i int, bi bigint, "
                        + "f float, d double, de decimal(10,3), ts timestamp, da date, s string, c char(5), vc varchar(5), "
                        + "m map<string, string>, l array<int>, st struct<c1:int, c2:string> ) "
                        + "stored as orc TBLPROPERTIES('transactional'='true')");
        StrictDelimitedInputWriter wr = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter('|')
                .withCollectionDelimiter(',').withMapKeyDelimiter(':').withLineDelimiterPattern("\n").build();
        StreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase("default")
                .withTable("alltypes").withAgentInfo("UT_" + Thread.currentThread().getName())
                .withTransactionBatchSize(2).withRecordWriter(wr).withHiveConf(conf).connect();

        String row1 = "true|10|100|1000|10000|4.0|20.0|4.2222|1969-12-31 "
                + "15:59:58.174|1970-01-01|string|hello|hello|k1:v1|100,200|10,foo";
        String row2 = "false|20|200|2000|20000|8.0|40.0|2.2222|1970-12-31 15:59:58.174|1971-01-01|abcd|world|world|"
                + "k4:v4|200,300|20,bar";
        String allRows = row1 + "\n" + row2 + "\n";
        ByteArrayInputStream bais = new ByteArrayInputStream(allRows.getBytes());
        connection.beginTransaction();
        connection.write(bais);
        connection.commitTransaction();
        connection.close();
        bais.close();

        List<String> rs = queryTable(driver,
                "select ROW__ID, bo, ti, si, i, bi, f, d, de, ts, da, s, c, vc, m, l, st,"
                        + " INPUT__FILE__NAME from default.alltypes order by ROW__ID");
        Assert.assertEquals(2, rs.size());
        String gotRow1 = rs.get(0);
        String expectedPrefixRow1 = "{\"writeid\":1,\"bucketid\":536870912,"
                + "\"rowid\":0}\ttrue\t10\t100\t1000\t10000\t4.0\t20.0\t4.222\t1969-12-31 15:59:58.174\t1970-01-01\tstring"
                + "\thello\thello\t{\"k1\":\"v1\"}\t[100,200]\t{\"c1\":10,\"c2\":\"foo\"}";
        String expectedSuffixRow1 = "alltypes/delta_0000001_0000002/bucket_00000";
        String gotRow2 = rs.get(1);
        String expectedPrefixRow2 = "{\"writeid\":1,\"bucketid\":536870912,"
                + "\"rowid\":1}\tfalse\t20\t200\t2000\t20000\t8.0\t40.0\t2.222\t1970-12-31 15:59:58.174\t1971-01-01\tabcd"
                + "\tworld\tworld\t{\"k4\":\"v4\"}\t[200,300]\t{\"c1\":20,\"c2\":\"bar\"}";
        String expectedSuffixRow2 = "alltypes/delta_0000001_0000002/bucket_00000";
        Assert.assertTrue(gotRow1, gotRow1.startsWith(expectedPrefixRow1));
        Assert.assertTrue(gotRow1, gotRow1.endsWith(expectedSuffixRow1));
        Assert.assertTrue(gotRow2, gotRow2.startsWith(expectedPrefixRow2));
        Assert.assertTrue(gotRow2, gotRow2.endsWith(expectedSuffixRow2));
    }

    @Test
    public void testAutoRollTransactionBatch() throws Exception {
        queryTable(driver, "drop table if exists default.streamingnobuckets");
        queryTable(driver, "create table default.streamingnobuckets (a string, b string) stored as orc "
                + "TBLPROPERTIES('transactional'='true')");
        queryTable(driver, "insert into default.streamingnobuckets values('foo','bar')");
        List<String> rs = queryTable(driver, "select * from default.streamingnobuckets");
        Assert.assertEquals(1, rs.size());
        Assert.assertEquals("foo\tbar", rs.get(0));
        StrictDelimitedInputWriter wr = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase("default")
                .withTable("streamingnobuckets").withAgentInfo("UT_" + Thread.currentThread().getName())
                .withRecordWriter(wr).withHiveConf(conf).withTransactionBatchSize(2).connect();

        connection.beginTransaction();
        connection.write("a1,b2".getBytes());
        connection.write("a3,b4".getBytes());
        connection.commitTransaction();
        connection.beginTransaction();
        connection.write("a5,b6".getBytes());
        connection.write("a7,b8".getBytes());
        connection.commitTransaction();
        // should have rolled over to next transaction batch
        connection.beginTransaction();
        connection.write("a9,b10".getBytes());
        connection.write("a11,b12".getBytes());
        connection.commitTransaction();
        connection.beginTransaction();
        connection.write("a13,b14".getBytes());
        connection.write("a15,b16".getBytes());
        connection.commitTransaction();
        connection.close();

        Assert.assertEquals("", 0, BucketCodec.determineVersion(536870912).decodeWriterId(536870912));
        rs = queryTable(driver,
                "select ROW__ID, a, b, INPUT__FILE__NAME from default.streamingnobuckets order by ROW__ID");

        Assert.assertTrue(rs.get(0),
                rs.get(0).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\tfoo\tbar"));
        Assert.assertTrue(rs.get(0),
                rs.get(0).endsWith("streamingnobuckets/delta_0000001_0000001_0000/bucket_00000"));
        Assert.assertTrue(rs.get(1),
                rs.get(1).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":0}\ta1\tb2"));
        Assert.assertTrue(rs.get(1), rs.get(1).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
        Assert.assertTrue(rs.get(2),
                rs.get(2).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\ta3\tb4"));
        Assert.assertTrue(rs.get(2), rs.get(2).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
        Assert.assertTrue(rs.get(3),
                rs.get(3).startsWith("{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\ta5\tb6"));
        Assert.assertTrue(rs.get(3), rs.get(3).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));
        Assert.assertTrue(rs.get(4),
                rs.get(4).startsWith("{\"writeid\":3,\"bucketid\":536870912,\"rowid\":1}\ta7\tb8"));
        Assert.assertTrue(rs.get(4), rs.get(4).endsWith("streamingnobuckets/delta_0000002_0000003/bucket_00000"));

        Assert.assertTrue(rs.get(5),
                rs.get(5).startsWith("{\"writeid\":4,\"bucketid\":536870912,\"rowid\":0}\ta9\tb10"));
        Assert.assertTrue(rs.get(5), rs.get(5).endsWith("streamingnobuckets/delta_0000004_0000005/bucket_00000"));
        Assert.assertTrue(rs.get(6),
                rs.get(6).startsWith("{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\ta11\tb12"));
        Assert.assertTrue(rs.get(6), rs.get(6).endsWith("streamingnobuckets/delta_0000004_0000005/bucket_00000"));
        Assert.assertTrue(rs.get(7),
                rs.get(7).startsWith("{\"writeid\":5,\"bucketid\":536870912,\"rowid\":0}\ta13\tb14"));
        Assert.assertTrue(rs.get(7), rs.get(7).endsWith("streamingnobuckets/delta_0000004_0000005/bucket_00000"));
        Assert.assertTrue(rs.get(8),
                rs.get(8).startsWith("{\"writeid\":5,\"bucketid\":536870912,\"rowid\":1}\ta15\tb16"));
        Assert.assertTrue(rs.get(8), rs.get(8).endsWith("streamingnobuckets/delta_0000004_0000005/bucket_00000"));

        queryTable(driver, "update default.streamingnobuckets set a=0, b=0 where a='a7'");
        queryTable(driver, "delete from default.streamingnobuckets where a='a1'");
        queryTable(driver, "update default.streamingnobuckets set a=0, b=0 where a='a15'");
        queryTable(driver, "delete from default.streamingnobuckets where a='a9'");
        rs = queryTable(driver, "select a, b from default.streamingnobuckets order by a, b");
        int row = 0;
        Assert.assertEquals("at row=" + row, "0\t0", rs.get(row++));
        Assert.assertEquals("at row=" + row, "0\t0", rs.get(row++));
        Assert.assertEquals("at row=" + row, "a11\tb12", rs.get(row++));
        Assert.assertEquals("at row=" + row, "a13\tb14", rs.get(row++));
        Assert.assertEquals("at row=" + row, "a3\tb4", rs.get(row++));
        Assert.assertEquals("at row=" + row, "a5\tb6", rs.get(row++));
        Assert.assertEquals("at row=" + row, "foo\tbar", rs.get(row++));

        queryTable(driver, "alter table default.streamingnobuckets compact 'major'");
        runWorker(conf);
        rs = queryTable(driver,
                "select ROW__ID, a, b, INPUT__FILE__NAME from default.streamingnobuckets order by ROW__ID");

        Assert.assertTrue(rs.get(0),
                rs.get(0).startsWith("{\"writeid\":1,\"bucketid\":536870912,\"rowid\":0}\tfoo\tbar"));
        Assert.assertTrue(rs.get(0), rs.get(0).endsWith("streamingnobuckets/base_0000009_v0000029/bucket_00000"));
        Assert.assertTrue(rs.get(1),
                rs.get(1).startsWith("{\"writeid\":2,\"bucketid\":536870912,\"rowid\":1}\ta3\tb4"));
        Assert.assertTrue(rs.get(1), rs.get(1).endsWith("streamingnobuckets/base_0000009_v0000029/bucket_00000"));
        Assert.assertTrue(rs.get(2),
                rs.get(2).startsWith("{\"writeid\":3,\"bucketid\":536870912,\"rowid\":0}\ta5\tb6"));
        Assert.assertTrue(rs.get(2), rs.get(2).endsWith("streamingnobuckets/base_0000009_v0000029/bucket_00000"));
        Assert.assertTrue(rs.get(3),
                rs.get(3).startsWith("{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\ta11\tb12"));
        Assert.assertTrue(rs.get(3), rs.get(3).endsWith("streamingnobuckets/base_0000009_v0000029/bucket_00000"));
        Assert.assertTrue(rs.get(4),
                rs.get(4).startsWith("{\"writeid\":5,\"bucketid\":536870912,\"rowid\":0}\ta13\tb14"));
        Assert.assertTrue(rs.get(4), rs.get(4).endsWith("streamingnobuckets/base_0000009_v0000029/bucket_00000"));
        Assert.assertTrue(rs.get(5),
                rs.get(5).startsWith("{\"writeid\":6,\"bucketid\":536870912,\"rowid\":0}\t0\t0"));
        Assert.assertTrue(rs.get(5), rs.get(5).endsWith("streamingnobuckets/base_0000009_v0000029/bucket_00000"));
    }

    /**
     * this is a clone from TestHiveStreamingConnection.TxnStatement2....
     */
    public static void runWorker(HiveConf hiveConf) throws Exception {
        AtomicBoolean stop = new AtomicBoolean(true);
        Worker t = new Worker();
        t.setThreadId((int) t.getId());
        t.setConf(hiveConf);
        AtomicBoolean looped = new AtomicBoolean();
        t.init(stop, looped);
        t.run();
    }

    // stream data into streaming table with N buckets, then copy the data into another bucketed table
    // check if bucketing in both was done in the same way
    @Test
    public void testStreamBucketingMatchesRegularBucketing() throws Exception {
        int bucketCount = 100;

        String dbUri = "raw://" + new Path(dbFolder.newFolder().toString()).toUri().toString();
        String tableLoc = "'" + dbUri + Path.SEPARATOR + "streamedtable" + "'";
        String tableLoc2 = "'" + dbUri + Path.SEPARATOR + "finaltable" + "'";
        String tableLoc3 = "'" + dbUri + Path.SEPARATOR + "nobucket" + "'";

        // disabling vectorization as this test yields incorrect results with vectorization
        conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false);
        try (IDriver driver = DriverFactory.newDriver(conf)) {
            runDDL(driver, "create database testBucketing3");
            runDDL(driver, "use testBucketing3");
            runDDL(driver,
                    "create table streamedtable ( key1 string,key2 int,data string ) clustered by ( key1,key2 ) into "
                            + bucketCount + " buckets  stored as orc  location " + tableLoc
                            + " TBLPROPERTIES ('transactional'='true')");
            //  In 'nobucket' table we capture bucketid from streamedtable to workaround a hive bug that prevents joins two identically bucketed tables
            runDDL(driver, "create table nobucket ( bucketid int, key1 string,key2 int,data string ) location "
                    + tableLoc3);
            runDDL(driver,
                    "create table finaltable ( bucketid int, key1 string,key2 int,data string ) clustered by ( key1,key2 ) into "
                            + bucketCount + " buckets  stored as orc location " + tableLoc2
                            + " TBLPROPERTIES ('transactional'='true')");

            String[] records = new String[] { "PSFAHYLZVC,29,EPNMA", "PPPRKWAYAU,96,VUTEE", "MIAOFERCHI,3,WBDSI",
                    "CEGQAZOWVN,0,WCUZL", "XWAKMNSVQF,28,YJVHU", "XBWTSAJWME,2,KDQFO", "FUVLQTAXAY,5,LDSDG",
                    "QTQMDJMGJH,6,QBOMA", "EFLOTLWJWN,71,GHWPS", "PEQNAOJHCM,82,CAAFI", "MOEKQLGZCP,41,RUACR",
                    "QZXMCOPTID,37,LFLWE", "EYALVWICRD,13,JEZLC", "VYWLZAYTXX,16,DMVZX", "OSALYSQIXR,47,HNZVE",
                    "JGKVHKCEGQ,25,KSCJB", "WQFMMYDHET,12,DTRWA", "AJOVAYZKZQ,15,YBKFO", "YAQONWCUAU,31,QJNHZ",
                    "DJBXUEUOEB,35,IYCBL" };

            StrictDelimitedInputWriter wr = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
            HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase("testBucketing3")
                    .withTable("streamedtable").withAgentInfo("UT_" + Thread.currentThread().getName())
                    .withRecordWriter(wr).withHiveConf(conf).connect();

            connection.beginTransaction();

            for (String record : records) {
                connection.write(record.getBytes());
            }

            connection.commitTransaction();
            connection.close();

            ArrayList<String> res1 = queryTable(driver,
                    "select row__id.bucketid, * from streamedtable order by key2");
            for (String re : res1) {
                LOG.error(re);
            }

            driver.run("insert into nobucket select row__id.bucketid,* from streamedtable");
            runDDL(driver, "insert into finaltable select * from nobucket");
            ArrayList<String> res2 = queryTable(driver,
                    "select row__id.bucketid,* from finaltable where row__id.bucketid<>bucketid");
            for (String s : res2) {
                LOG.error(s);
            }
            Assert.assertTrue(res2.isEmpty());
        } finally {
            conf.unset(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname);
        }
    }

    @Test
    public void testTableValidation() throws Exception {
        int bucketCount = 100;

        String dbUri = "raw://" + new Path(dbFolder.newFolder().toString()).toUri().toString();
        String tbl1 = "validation1";
        String tbl2 = "validation2";

        String tableLoc = "'" + dbUri + Path.SEPARATOR + tbl1 + "'";
        String tableLoc2 = "'" + dbUri + Path.SEPARATOR + tbl2 + "'";

        runDDL(driver, "create database testBucketing3");
        runDDL(driver, "use testBucketing3");

        runDDL(driver,
                "create table " + tbl1 + " ( key1 string, data string ) clustered by ( key1 ) into " + bucketCount
                        + " buckets  stored as orc  location " + tableLoc
                        + " TBLPROPERTIES ('transactional'='false')");

        runDDL(driver,
                "create table " + tbl2 + " ( key1 string, data string ) clustered by ( key1 ) into " + bucketCount
                        + " buckets  stored as orc  location " + tableLoc2
                        + " TBLPROPERTIES ('transactional'='false')");

        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
        HiveStreamingConnection connection = null;
        try {
            connection = HiveStreamingConnection.newBuilder().withDatabase("testBucketing3")
                    .withTable("validation2").withAgentInfo("UT_" + Thread.currentThread().getName())
                    .withRecordWriter(writer).withHiveConf(conf).connect();
            Assert.assertTrue("InvalidTable exception was not thrown", false);
        } catch (InvalidTable e) {
            // expecting this exception
        } finally {
            if (connection != null) {
                connection.close();
            }
        }
        try {
            connection = HiveStreamingConnection.newBuilder().withDatabase("testBucketing3")
                    .withTable("validation2").withAgentInfo("UT_" + Thread.currentThread().getName())
                    .withRecordWriter(writer).withHiveConf(conf).connect();
            Assert.assertTrue("InvalidTable exception was not thrown", false);
        } catch (InvalidTable e) {
            // expecting this exception
        } finally {
            if (connection != null) {
                connection.close();
            }
        }
    }

    /**
     * @deprecated use {@link #checkDataWritten2(Path, long, long, int, String, boolean, String...)} -
     * there is little value in using InputFormat directly
     */
    @Deprecated
    private void checkDataWritten(Path partitionPath, long minTxn, long maxTxn, int buckets, int numExpectedFiles,
            String... records) throws Exception {
        ValidWriteIdList writeIds = getTransactionContext(conf);
        AcidUtils.Directory dir = AcidUtils.getAcidState(partitionPath, conf, writeIds);
        Assert.assertEquals(0, dir.getObsolete().size());
        Assert.assertEquals(0, dir.getOriginalFiles().size());
        List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories();
        System.out.println("Files found: ");
        for (AcidUtils.ParsedDelta pd : current) {
            System.out.println(pd.getPath().toString());
        }
        Assert.assertEquals(numExpectedFiles, current.size());

        // find the absolute minimum transaction
        long min = Long.MAX_VALUE;
        long max = Long.MIN_VALUE;
        for (AcidUtils.ParsedDelta pd : current) {
            if (pd.getMaxWriteId() > max) {
                max = pd.getMaxWriteId();
            }
            if (pd.getMinWriteId() < min) {
                min = pd.getMinWriteId();
            }
        }
        Assert.assertEquals(minTxn, min);
        Assert.assertEquals(maxTxn, max);

        InputFormat inf = new OrcInputFormat();
        JobConf job = new JobConf();
        job.set("mapred.input.dir", partitionPath.toString());
        job.set(BUCKET_COUNT, Integer.toString(buckets));
        job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS, "id,msg");
        job.set(IOConstants.SCHEMA_EVOLUTION_COLUMNS_TYPES, "bigint:string");
        AcidUtils.setAcidOperationalProperties(job, true, null);
        job.setBoolean(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, true);
        job.set(ValidWriteIdList.VALID_WRITEIDS_KEY, writeIds.writeToString());
        job.set(ValidTxnList.VALID_TXNS_KEY, conf.get(ValidTxnList.VALID_TXNS_KEY));
        InputSplit[] splits = inf.getSplits(job, buckets);
        Assert.assertEquals(numExpectedFiles, splits.length);
        org.apache.hadoop.mapred.RecordReader<NullWritable, OrcStruct> rr = inf.getRecordReader(splits[0], job,
                Reporter.NULL);

        NullWritable key = rr.createKey();
        OrcStruct value = rr.createValue();
        for (String record : records) {
            Assert.assertEquals(true, rr.next(key, value));
            Assert.assertEquals(record, value.toString());
        }
        Assert.assertEquals(false, rr.next(key, value));
    }

    /**
     * @param validationQuery query to read from table to compare data against {@code records}
     * @param records         expected data.  each row is CVS list of values
     */
    private void checkDataWritten2(Path partitionPath, long minTxn, long maxTxn, int numExpectedFiles,
            String validationQuery, boolean vectorize, String... records) throws Exception {
        AcidUtils.Directory dir = AcidUtils.getAcidState(partitionPath, conf, getTransactionContext(conf));
        Assert.assertEquals(0, dir.getObsolete().size());
        Assert.assertEquals(0, dir.getOriginalFiles().size());
        List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories();
        System.out.println("Files found: ");
        for (AcidUtils.ParsedDelta pd : current) {
            System.out.println(pd.getPath().toString());
        }
        Assert.assertEquals(numExpectedFiles, current.size());

        // find the absolute minimum transaction
        long min = Long.MAX_VALUE;
        long max = Long.MIN_VALUE;
        for (AcidUtils.ParsedDelta pd : current) {
            if (pd.getMaxWriteId() > max) {
                max = pd.getMaxWriteId();
            }
            if (pd.getMinWriteId() < min) {
                min = pd.getMinWriteId();
            }
        }
        Assert.assertEquals(minTxn, min);
        Assert.assertEquals(maxTxn, max);
        boolean isVectorizationEnabled = conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED);
        if (vectorize) {
            conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true);
        }

        String currStrategy = conf.getVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY);
        for (String strategy : ((Validator.StringSet) HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY.getValidator())
                .getExpected()) {
            //run it with each split strategy - make sure there are differences
            conf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, strategy.toUpperCase());
            List<String> actualResult = queryTable(driver, validationQuery);
            for (int i = 0; i < actualResult.size(); i++) {
                Assert.assertEquals(
                        "diff at [" + i + "].  actual=" + actualResult + " expected=" + Arrays.toString(records),
                        records[i], actualResult.get(i));
            }
        }
        conf.setVar(HiveConf.ConfVars.HIVE_ORC_SPLIT_STRATEGY, currStrategy);
        conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, isVectorizationEnabled);
    }

    private ValidWriteIdList getTransactionContext(Configuration conf) throws Exception {
        ValidTxnList validTxnList = msClient.getValidTxns();
        conf.set(ValidTxnList.VALID_TXNS_KEY, validTxnList.writeToString());
        List<TableValidWriteIds> v = msClient.getValidWriteIds(
                Collections.singletonList(TableName.getDbTable(dbName, tblName)), validTxnList.writeToString());
        return TxnCommonUtils.createValidReaderWriteIdList(v.get(0));
    }

    private void checkNothingWritten(Path partitionPath) throws Exception {
        AcidUtils.Directory dir = AcidUtils.getAcidState(partitionPath, conf, getTransactionContext(conf));
        Assert.assertEquals(0, dir.getObsolete().size());
        Assert.assertEquals(0, dir.getOriginalFiles().size());
        List<AcidUtils.ParsedDelta> current = dir.getCurrentDirectories();
        Assert.assertEquals(0, current.size());
    }

    @Test
    public void testEndpointConnection() throws Exception {
        // For partitioned table, partitionVals are specified
        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName)
                .withTable(tblName).withStaticPartitionValues(partitionVals)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withRecordWriter(writer).withHiveConf(conf)
                .connect();
        connection.close();

        // For unpartitioned table, partitionVals are not specified
        connection = HiveStreamingConnection.newBuilder().withDatabase(dbName2).withTable(tblName2)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withRecordWriter(writer).withHiveConf(conf)
                .connect();
        connection.close();

        // For unpartitioned table, partition values are specified
        try {
            connection = HiveStreamingConnection.newBuilder().withDatabase(dbName2).withTable(tblName2)
                    .withStaticPartitionValues(partitionVals)
                    .withAgentInfo("UT_" + Thread.currentThread().getName()).withRecordWriter(writer)
                    .withHiveConf(conf).connect();
            Assert.assertTrue("ConnectionError was not thrown", false);
            connection.close();
        } catch (ConnectionError e) {
            // expecting this exception
            String errMsg = "specifies partitions for un-partitioned table";
            Assert.assertTrue(e.toString().endsWith(errMsg));
        }
    }

    @Test
    public void testAddPartition() throws Exception {
        List<String> newPartVals = new ArrayList<String>(2);
        newPartVals.add(PART1_CONTINENT);
        newPartVals.add("Nepal");

        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName)
                .withTable(tblName).withStaticPartitionValues(newPartVals)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withRecordWriter(writer).withHiveConf(conf)
                .connect();

        // Create partition
        Assert.assertNotNull(connection);

        connection.beginTransaction();
        connection.write("3,Hello streaming - once again".getBytes());
        connection.commitTransaction();

        // Ensure partition is present
        Partition p = msClient.getPartition(dbName, tblName, newPartVals);
        Assert.assertNotNull("Did not find added partition", p);
    }

    @Test
    public void testAddPartitionWithWriteId() throws Exception {
        List<String> newPartVals = new ArrayList<String>(2);
        newPartVals.add("WriteId_continent");
        newPartVals.add("WriteId_country");

        StrictDelimitedInputWriter writerT = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',')
                .build();
        HiveStreamingConnection transactionConnection = HiveStreamingConnection.newBuilder().withDatabase(dbName)
                .withTable(tblName).withStaticPartitionValues(newPartVals).withRecordWriter(writerT)
                .withHiveConf(conf).connect();
        transactionConnection.beginTransaction();

        Table tObject = transactionConnection.getTable();
        Long writeId = transactionConnection.getCurrentWriteId();

        Assert.assertNotNull(tObject);
        Assert.assertNotNull(writeId);

        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName)
                .withTable(tblName).withStaticPartitionValues(newPartVals).withRecordWriter(writer)
                .withHiveConf(conf).withWriteId(writeId).withStatementId(1).withTableObject(tObject).connect();

        Assert.assertNotNull(connection);

        connection.beginTransaction();
        connection.write("3,Hello streaming - once again".getBytes());
        connection.commitTransaction();

        Set<String> partitions = new HashSet<>(connection.getPartitions());

        connection.close();

        // Ensure partition is not present
        try {
            msClient.getPartition(dbName, tblName, newPartVals);
            Assert.fail("Partition shouldn't exist so a NoSuchObjectException should have been raised");
        } catch (NoSuchObjectException e) {
        }

        transactionConnection.commitTransaction(partitions);

        // Ensure partition is present
        Partition p = msClient.getPartition(dbName, tblName, newPartVals);
        Assert.assertNotNull("Did not find added partition", p);
    }

    @Test
    public void testAddDynamicPartitionWithWriteId() throws Exception {
        queryTable(driver, "drop table if exists default.writeiddynamic");
        queryTable(driver,
                "create table default.writeiddynamic (a" + " string, b string) partitioned by (c string, d string)"
                        + " stored as orc TBLPROPERTIES('transactional'='true')");

        StrictDelimitedInputWriter writerT = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',')
                .build();
        HiveStreamingConnection transactionConnection = HiveStreamingConnection.newBuilder().withDatabase("default")
                .withTable("writeiddynamic").withRecordWriter(writerT).withHiveConf(conf).connect();
        transactionConnection.beginTransaction();

        Table tObject = transactionConnection.getTable();
        Long writeId = transactionConnection.getCurrentWriteId();

        Assert.assertNotNull(tObject);
        Assert.assertNotNull(writeId);

        StrictDelimitedInputWriter writerOne = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',')
                .build();
        HiveStreamingConnection connectionOne = HiveStreamingConnection.newBuilder().withDatabase("default")
                .withTable("writeiddynamic").withRecordWriter(writerOne).withHiveConf(conf).withWriteId(writeId)
                .withStatementId(1).withTableObject(tObject).connect();

        StrictDelimitedInputWriter writerTwo = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',')
                .build();
        HiveStreamingConnection connectionTwo = HiveStreamingConnection.newBuilder().withDatabase("default")
                .withTable("writeiddynamic").withRecordWriter(writerTwo).withHiveConf(conf).withWriteId(writeId)
                .withStatementId(1).withTableObject(tObject).connect();

        Assert.assertNotNull(connectionOne);

        connectionTwo.beginTransaction();
        connectionOne.beginTransaction();
        connectionOne.write("1,2,3,4".getBytes());
        connectionOne.write("1,2,5,6".getBytes());
        connectionTwo.write("1,2,30,40".getBytes());
        connectionOne.write("1,2,7,8".getBytes());
        connectionTwo.write("1,2,50,60".getBytes());
        connectionOne.write("1,2,9,10".getBytes());
        connectionOne.commitTransaction();
        connectionTwo.commitTransaction();

        Set<String> partitionsOne = new HashSet<>(connectionOne.getPartitions());
        Assert.assertEquals(4, partitionsOne.size());

        Set<String> partitionsTwo = new HashSet<>(connectionTwo.getPartitions());
        Assert.assertEquals(2, partitionsTwo.size());

        connectionOne.close();
        connectionTwo.close();

        try {
            String partitionName = partitionsOne.iterator().next();
            msClient.getPartition("default", "writeiddynamic", partitionName);
            Assert.fail("Partition shouldn't exist so a NoSuchObjectException should have been raised");
        } catch (NoSuchObjectException e) {
        }

        partitionsOne.addAll(partitionsTwo);
        Set<String> allPartitions = partitionsOne;
        transactionConnection.commitTransaction(allPartitions);

        // Ensure partition is present
        for (String partition : allPartitions) {
            Partition p = msClient.getPartition("default", "writeiddynamic", partition);
            Assert.assertNotNull("Did not find added partition", p);
        }
    }

    @Test
    public void testTransactionBatchEmptyCommit() throws Exception {
        // 1)  to partitioned table
        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName)
                .withTable(tblName).withStaticPartitionValues(partitionVals)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withRecordWriter(writer).withHiveConf(conf)
                .connect();
        connection.beginTransaction();
        connection.commitTransaction();
        Assert.assertEquals(HiveStreamingConnection.TxnState.COMMITTED, connection.getCurrentTransactionState());
        connection.close();

        // 2) To unpartitioned table
        writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
        connection = HiveStreamingConnection.newBuilder().withDatabase(dbName2).withTable(tblName2)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withRecordWriter(writer).withHiveConf(conf)
                .connect();

        connection.beginTransaction();
        connection.commitTransaction();
        Assert.assertEquals(HiveStreamingConnection.TxnState.COMMITTED, connection.getCurrentTransactionState());
        connection.close();
    }

    /**
     * check that transactions that have not heartbeated and timedout get properly aborted
     *
     * @throws Exception
     */
    @Test
    public void testTimeOutReaper() throws Exception {
        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName2)
                .withTable(tblName2).withAgentInfo("UT_" + Thread.currentThread().getName())
                .withRecordWriter(writer).withHiveConf(conf).connect();

        connection.beginTransaction();
        conf.setTimeVar(HiveConf.ConfVars.HIVE_TIMEDOUT_TXN_REAPER_START, 0, TimeUnit.SECONDS);
        //ensure txn timesout
        conf.setTimeVar(HiveConf.ConfVars.HIVE_TXN_TIMEOUT, 2, TimeUnit.MILLISECONDS);
        AcidHouseKeeperService houseKeeperService = new AcidHouseKeeperService();
        houseKeeperService.setConf(conf);
        houseKeeperService.run();
        try {
            //should fail because the TransactionBatch timed out
            connection.commitTransaction();
        } catch (TransactionError e) {
            Assert.assertTrue("Expected aborted transaction", e.getCause() instanceof TxnAbortedException);
        }
        connection.close();
        connection = HiveStreamingConnection.newBuilder().withDatabase(dbName2).withTable(tblName2)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withRecordWriter(writer).withHiveConf(conf)
                .connect();
        connection.beginTransaction();
        connection.commitTransaction();
        connection.beginTransaction();
        houseKeeperService.run();
        try {
            //should fail because the TransactionBatch timed out
            connection.commitTransaction();
        } catch (TransactionError e) {
            Assert.assertTrue("Expected aborted transaction", e.getCause() instanceof TxnAbortedException);
        }
        connection.close();
    }

    @Test
    public void testHeartbeat() throws Exception {
        int transactionBatch = 20;
        conf.setTimeVar(HiveConf.ConfVars.HIVE_TXN_TIMEOUT, 200, TimeUnit.MILLISECONDS);
        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName2)
                .withTable(tblName2).withAgentInfo("UT_" + Thread.currentThread().getName())
                .withTransactionBatchSize(transactionBatch).withRecordWriter(writer).withHiveConf(conf).connect();
        try {
            connection.beginTransaction();
            ShowLocksRequest request = new ShowLocksRequest();
            request.setDbname(dbName2);
            request.setTablename(tblName2);
            ShowLocksResponse response = msClient.showLocks(request);
            Assert.assertEquals("Wrong number of locks: " + response, 1, response.getLocks().size());
            ShowLocksResponseElement lock = response.getLocks().get(0);
            long acquiredAt = lock.getAcquiredat();
            long heartbeatAt = lock.getLastheartbeat();
            response = msClient.showLocks(request);
            Assert.assertEquals("Wrong number of locks2: " + response, 1, response.getLocks().size());
            lock = response.getLocks().get(0);
            Assert.assertEquals("Acquired timestamp didn'table match", acquiredAt, lock.getAcquiredat());
            Assert.assertTrue("Expected new heartbeat (" + lock.getLastheartbeat() + ") == old heartbeat("
                    + heartbeatAt + ")", lock.getLastheartbeat() == heartbeatAt);
            for (int i = 0; i < transactionBatch * 3; i++) {
                connection.beginTransaction();
                if (i % 10 == 0) {
                    connection.abortTransaction();
                } else {
                    connection.commitTransaction();
                }
                Thread.sleep(10);
            }
        } finally {
            conf.unset(HiveConf.ConfVars.HIVE_TXN_TIMEOUT.varname);
            connection.close();
        }
    }

    @Test
    public void testTransactionBatchEmptyAbort() throws Exception {
        // 1) to partitioned table
        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName)
                .withTable(tblName).withStaticPartitionValues(partitionVals)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withRecordWriter(writer).withHiveConf(conf)
                .connect();

        connection.beginTransaction();
        connection.abortTransaction();
        Assert.assertEquals(HiveStreamingConnection.TxnState.ABORTED, connection.getCurrentTransactionState());
        connection.close();

        // 2) to unpartitioned table
        writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
        connection = HiveStreamingConnection.newBuilder().withDatabase(dbName2).withTable(tblName2)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withRecordWriter(writer).withHiveConf(conf)
                .connect();

        connection.beginTransaction();
        connection.abortTransaction();
        Assert.assertEquals(HiveStreamingConnection.TxnState.ABORTED, connection.getCurrentTransactionState());
        connection.close();
    }

    @Test
    public void testTransactionBatchCommitDelimited() throws Exception {
        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();

        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName)
                .withTable(tblName).withStaticPartitionValues(partitionVals)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withHiveConf(conf).withRecordWriter(writer)
                .withTransactionBatchSize(10).connect();

        // 1st Txn
        connection.beginTransaction();
        Assert.assertEquals(HiveStreamingConnection.TxnState.OPEN, connection.getCurrentTransactionState());
        connection.write("1,Hello streaming".getBytes());
        connection.commitTransaction();

        checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");

        Assert.assertEquals(HiveStreamingConnection.TxnState.COMMITTED, connection.getCurrentTransactionState());

        // 2nd Txn
        connection.beginTransaction();
        Assert.assertEquals(HiveStreamingConnection.TxnState.OPEN, connection.getCurrentTransactionState());
        connection.write("2,Welcome to streaming".getBytes());

        // data should not be visible
        checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");

        connection.commitTransaction();

        checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}", "{2, Welcome to streaming}");

        connection.close();

        Assert.assertEquals(HiveStreamingConnection.TxnState.INACTIVE, connection.getCurrentTransactionState());

        // To Unpartitioned table
        writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
        connection = HiveStreamingConnection.newBuilder().withDatabase(dbName2).withTable(tblName2)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withHiveConf(conf).withRecordWriter(writer)
                .connect();
        // 1st Txn
        connection.beginTransaction();
        Assert.assertEquals(HiveStreamingConnection.TxnState.OPEN, connection.getCurrentTransactionState());
        connection.write("1,Hello streaming".getBytes());
        connection.commitTransaction();

        Assert.assertEquals(HiveStreamingConnection.TxnState.COMMITTED, connection.getCurrentTransactionState());
        connection.close();
    }

    @Test
    public void testTransactionBatchCommitRegex() throws Exception {
        String regex = "([^,]*),(.*)";
        StrictRegexWriter writer = StrictRegexWriter.newBuilder().withRegex(regex).build();
        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName)
                .withTable(tblName).withStaticPartitionValues(partitionVals)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withHiveConf(conf).withRecordWriter(writer)
                .withTransactionBatchSize(10).connect();

        // 1st Txn
        connection.beginTransaction();
        Assert.assertEquals(HiveStreamingConnection.TxnState.OPEN, connection.getCurrentTransactionState());
        connection.write("1,Hello streaming".getBytes());
        connection.commitTransaction();

        checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");

        Assert.assertEquals(HiveStreamingConnection.TxnState.COMMITTED, connection.getCurrentTransactionState());

        // 2nd Txn
        connection.beginTransaction();
        Assert.assertEquals(HiveStreamingConnection.TxnState.OPEN, connection.getCurrentTransactionState());
        connection.write("2,Welcome to streaming".getBytes());

        // data should not be visible
        checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");

        connection.commitTransaction();

        checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}", "{2, Welcome to streaming}");

        connection.close();
        Assert.assertEquals(HiveStreamingConnection.TxnState.INACTIVE, connection.getCurrentTransactionState());

        // To Unpartitioned table
        regex = "([^:]*):(.*)";
        writer = StrictRegexWriter.newBuilder().withRegex(regex).build();

        connection = HiveStreamingConnection.newBuilder().withDatabase(dbName2).withTable(tblName2)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withHiveConf(conf).withRecordWriter(writer)
                .connect();

        // 1st Txn
        connection.beginTransaction();
        Assert.assertEquals(HiveStreamingConnection.TxnState.OPEN, connection.getCurrentTransactionState());
        connection.write("1:Hello streaming".getBytes());
        connection.commitTransaction();

        Assert.assertEquals(HiveStreamingConnection.TxnState.COMMITTED, connection.getCurrentTransactionState());
        connection.close();
    }

    @Test
    public void testRegexInputStream() throws Exception {
        String regex = "([^,]*),(.*)";
        StrictRegexWriter writer = StrictRegexWriter.newBuilder()
                // if unspecified, default one or [\r\n] will be used for line break
                .withRegex(regex).build();
        StreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName)
                .withTable(tblName).withStaticPartitionValues(partitionVals)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withHiveConf(conf).withRecordWriter(writer)
                .connect();

        String rows = "1,foo\r2,bar\r3,baz";
        ByteArrayInputStream bais = new ByteArrayInputStream(rows.getBytes());
        connection.beginTransaction();
        connection.write(bais);
        connection.commitTransaction();
        bais.close();
        connection.close();

        List<String> rs = queryTable(driver, "select * from " + dbName + "." + tblName);
        Assert.assertEquals(3, rs.size());
        Assert.assertEquals("1\tfoo\tAsia\tIndia", rs.get(0));
        Assert.assertEquals("2\tbar\tAsia\tIndia", rs.get(1));
        Assert.assertEquals("3\tbaz\tAsia\tIndia", rs.get(2));
    }

    @Test
    public void testTransactionBatchCommitJson() throws Exception {
        StrictJsonWriter writer = StrictJsonWriter.newBuilder().build();
        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName)
                .withTable(tblName).withStaticPartitionValues(partitionVals)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withRecordWriter(writer).withHiveConf(conf)
                .withTransactionBatchSize(10).connect();

        // 1st Txn
        connection.beginTransaction();
        Assert.assertEquals(HiveStreamingConnection.TxnState.OPEN, connection.getCurrentTransactionState());
        String rec1 = "{\"id\" : 1, \"msg\": \"Hello streaming\"}";
        connection.write(rec1.getBytes());
        connection.commitTransaction();

        checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}");

        Assert.assertEquals(HiveStreamingConnection.TxnState.COMMITTED, connection.getCurrentTransactionState());

        connection.close();
        Assert.assertEquals(HiveStreamingConnection.TxnState.INACTIVE, connection.getCurrentTransactionState());

        List<String> rs = queryTable(driver, "select * from " + dbName + "." + tblName);
        Assert.assertEquals(1, rs.size());
    }

    @Test
    public void testJsonInputStream() throws Exception {
        StrictJsonWriter writer = StrictJsonWriter.newBuilder().withLineDelimiterPattern("\\|").build();
        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName)
                .withTable(tblName).withStaticPartitionValues(partitionVals)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withRecordWriter(writer).withHiveConf(conf)
                .connect();

        // 1st Txn
        connection.beginTransaction();
        Assert.assertEquals(HiveStreamingConnection.TxnState.OPEN, connection.getCurrentTransactionState());
        String records = "{\"id\" : 1, \"msg\": \"Hello streaming\"}|{\"id\" : 2, \"msg\": \"Hello world\"}|{\"id\" : 3, "
                + "\"msg\": \"Hello world!!\"}";
        ByteArrayInputStream bais = new ByteArrayInputStream(records.getBytes());
        connection.write(bais);
        connection.commitTransaction();
        bais.close();
        connection.close();
        List<String> rs = queryTable(driver, "select * from " + dbName + "." + tblName);
        Assert.assertEquals(3, rs.size());
        Assert.assertEquals("1\tHello streaming\tAsia\tIndia", rs.get(0));
        Assert.assertEquals("2\tHello world\tAsia\tIndia", rs.get(1));
        Assert.assertEquals("3\tHello world!!\tAsia\tIndia", rs.get(2));
    }

    @Test
    public void testRemainingTransactions() throws Exception {
        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName)
                .withTable(tblName).withStaticPartitionValues(partitionVals)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withRecordWriter(writer).withHiveConf(conf)
                .connect();
        connection.beginTransaction();
        // 1) test with txn.Commit()
        int batch = 0;
        int initialCount = connection.remainingTransactions();
        while (connection.remainingTransactions() > 0) {
            connection.beginTransaction();
            Assert.assertEquals(--initialCount, connection.remainingTransactions());
            for (int rec = 0; rec < 2; ++rec) {
                Assert.assertEquals(HiveStreamingConnection.TxnState.OPEN, connection.getCurrentTransactionState());
                connection.write((batch * rec + ",Hello streaming").getBytes());
            }
            connection.commitTransaction();
            Assert.assertEquals(HiveStreamingConnection.TxnState.COMMITTED,
                    connection.getCurrentTransactionState());
            ++batch;
        }
        Assert.assertEquals(0, connection.remainingTransactions());
        connection.close();

        Assert.assertEquals(HiveStreamingConnection.TxnState.INACTIVE, connection.getCurrentTransactionState());

        connection = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName)
                .withStaticPartitionValues(partitionVals).withAgentInfo("UT_" + Thread.currentThread().getName())
                .withRecordWriter(writer).withHiveConf(conf).connect();
        // 2) test with txn.Abort()
        connection.beginTransaction();
        batch = 0;
        initialCount = connection.remainingTransactions();
        while (connection.remainingTransactions() > 0) {
            connection.beginTransaction();
            Assert.assertEquals(--initialCount, connection.remainingTransactions());
            for (int rec = 0; rec < 2; ++rec) {
                Assert.assertEquals(HiveStreamingConnection.TxnState.OPEN, connection.getCurrentTransactionState());
                connection.write((batch * rec + ",Hello streaming").getBytes());
            }
            connection.abortTransaction();
            Assert.assertEquals(HiveStreamingConnection.TxnState.ABORTED, connection.getCurrentTransactionState());
            ++batch;
        }
        Assert.assertEquals(0, connection.remainingTransactions());
        connection.close();

        Assert.assertEquals(HiveStreamingConnection.TxnState.INACTIVE, connection.getCurrentTransactionState());
    }

    @Test
    public void testTransactionBatchAbort() throws Exception {
        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();

        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName)
                .withTable(tblName).withStaticPartitionValues(partitionVals)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withRecordWriter(writer).withHiveConf(conf)
                .connect();

        connection.beginTransaction();
        connection.write("1,Hello streaming".getBytes());
        connection.write("2,Welcome to streaming".getBytes());
        connection.abortTransaction();

        checkNothingWritten(partLoc);

        Assert.assertEquals(HiveStreamingConnection.TxnState.ABORTED, connection.getCurrentTransactionState());

        connection.close();

        checkNothingWritten(partLoc);

    }

    @Test
    public void testTransactionBatchAbortAndCommit() throws Exception {
        String agentInfo = "UT_" + Thread.currentThread().getName();
        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();

        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName)
                .withTable(tblName).withStaticPartitionValues(partitionVals).withAgentInfo(agentInfo)
                .withRecordWriter(writer).withHiveConf(conf).withTransactionBatchSize(10).connect();

        connection.beginTransaction();
        connection.write("1,Hello streaming".getBytes());
        connection.write("2,Welcome to streaming".getBytes());
        ShowLocksResponse resp = msClient.showLocks(new ShowLocksRequest());
        Assert.assertEquals("LockCount", 1, resp.getLocksSize());
        Assert.assertEquals("LockType", LockType.SHARED_READ, resp.getLocks().get(0).getType());
        Assert.assertEquals("LockState", LockState.ACQUIRED, resp.getLocks().get(0).getState());
        Assert.assertEquals("AgentInfo", agentInfo, resp.getLocks().get(0).getAgentInfo());
        connection.abortTransaction();

        checkNothingWritten(partLoc);

        Assert.assertEquals(HiveStreamingConnection.TxnState.ABORTED, connection.getCurrentTransactionState());

        connection.beginTransaction();
        connection.write("1,Hello streaming".getBytes());
        connection.write("2,Welcome to streaming".getBytes());
        connection.commitTransaction();

        checkDataWritten(partLoc, 1, 10, 1, 1, "{1, Hello streaming}", "{2, Welcome to streaming}");

        connection.close();
    }

    @Test
    public void testMultipleTransactionBatchCommits() throws Exception {
        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();

        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName)
                .withTable(tblName).withStaticPartitionValues(partitionVals)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withRecordWriter(writer)
                .withTransactionBatchSize(10).withHiveConf(conf).connect();

        connection.beginTransaction();
        connection.write("1,Hello streaming".getBytes());
        connection.commitTransaction();
        String validationQuery = "select id, msg from " + dbName + "." + tblName + " order by id, msg";
        checkDataWritten2(partLoc, 1, 10, 1, validationQuery, false, "1\tHello streaming");

        connection.beginTransaction();
        connection.write("2,Welcome to streaming".getBytes());
        connection.commitTransaction();

        checkDataWritten2(partLoc, 1, 10, 1, validationQuery, true, "1\tHello streaming",
                "2\tWelcome to streaming");

        connection.close();

        connection = HiveStreamingConnection.newBuilder().withDatabase(dbName).withTable(tblName)
                .withStaticPartitionValues(partitionVals).withAgentInfo("UT_" + Thread.currentThread().getName())
                .withRecordWriter(writer).withTransactionBatchSize(10).withHiveConf(conf).connect();
        // 2nd Txn Batch
        connection.beginTransaction();
        connection.write("3,Hello streaming - once again".getBytes());
        connection.commitTransaction();

        checkDataWritten2(partLoc, 1, 20, 2, validationQuery, false, "1\tHello streaming",
                "2\tWelcome to streaming", "3\tHello streaming - once again");

        connection.beginTransaction();
        connection.write("4,Welcome to streaming - once again".getBytes());
        connection.commitTransaction();

        checkDataWritten2(partLoc, 1, 20, 2, validationQuery, true, "1\tHello streaming", "2\tWelcome to streaming",
                "3\tHello streaming - once again", "4\tWelcome to streaming - once again");

        Assert.assertEquals(HiveStreamingConnection.TxnState.COMMITTED, connection.getCurrentTransactionState());

        connection.close();
    }

    @Test
    public void testInterleavedTransactionBatchCommits() throws Exception {
        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();

        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName)
                .withTable(tblName).withStaticPartitionValues(partitionVals)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withRecordWriter(writer).withHiveConf(conf)
                .withTransactionBatchSize(10).connect();

        // Acquire 1st Txn Batch
        connection.beginTransaction();

        // Acquire 2nd Txn Batch
        StrictDelimitedInputWriter writer2 = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',')
                .build();
        HiveStreamingConnection connection2 = HiveStreamingConnection.newBuilder().withDatabase(dbName)
                .withTable(tblName).withStaticPartitionValues(partitionVals)
                .withAgentInfo("UT_" + Thread.currentThread().getName()).withRecordWriter(writer2)
                .withHiveConf(conf).withTransactionBatchSize(10).connect();
        connection2.beginTransaction();

        // Interleaved writes to both batches
        connection.write("1,Hello streaming".getBytes());
        connection2.write("3,Hello streaming - once again".getBytes());

        checkNothingWritten(partLoc);

        connection2.commitTransaction();

        String validationQuery = "select id, msg from " + dbName + "." + tblName + " order by id, msg";
        checkDataWritten2(partLoc, 11, 20, 1, validationQuery, true, "3\tHello streaming - once again");

        connection.commitTransaction();
        /*now both batches have committed (but not closed) so we for each primary file we expect a side
        file to exist and indicate the true length of primary file*/
        FileSystem fs = partLoc.getFileSystem(conf);
        AcidUtils.Directory dir = AcidUtils.getAcidState(partLoc, conf, getTransactionContext(conf));
        for (AcidUtils.ParsedDelta pd : dir.getCurrentDirectories()) {
            for (FileStatus stat : fs.listStatus(pd.getPath(), AcidUtils.bucketFileFilter)) {
                Path lengthFile = OrcAcidUtils.getSideFile(stat.getPath());
                Assert.assertTrue(lengthFile + " missing", fs.exists(lengthFile));
                long lengthFileSize = fs.getFileStatus(lengthFile).getLen();
                Assert.assertTrue("Expected " + lengthFile + " to be non empty. lengh=" + lengthFileSize,
                        lengthFileSize > 0);
                long logicalLength = AcidUtils.getLogicalLength(fs, stat);
                long actualLength = stat.getLen();
                Assert.assertTrue("", logicalLength == actualLength);
            }
        }
        checkDataWritten2(partLoc, 1, 20, 2, validationQuery, false, "1\tHello streaming",
                "3\tHello streaming - once again");

        connection.beginTransaction();
        connection.write("2,Welcome to streaming".getBytes());

        connection2.beginTransaction();
        connection2.write("4,Welcome to streaming - once again".getBytes());
        //here each batch has written data and committed (to bucket0 since table only has 1 bucket)
        //so each of 2 deltas has 1 bucket0 and 1 bucket0_flush_length.  Furthermore, each bucket0
        //has now received more data(logically - it's buffered) but it is not yet committed.
        //lets check that side files exist, etc
        dir = AcidUtils.getAcidState(partLoc, conf, getTransactionContext(conf));
        for (AcidUtils.ParsedDelta pd : dir.getCurrentDirectories()) {
            for (FileStatus stat : fs.listStatus(pd.getPath(), AcidUtils.bucketFileFilter)) {
                Path lengthFile = OrcAcidUtils.getSideFile(stat.getPath());
                Assert.assertTrue(lengthFile + " missing", fs.exists(lengthFile));
                long lengthFileSize = fs.getFileStatus(lengthFile).getLen();
                Assert.assertTrue("Expected " + lengthFile + " to be non empty. lengh=" + lengthFileSize,
                        lengthFileSize > 0);
                long logicalLength = AcidUtils.getLogicalLength(fs, stat);
                long actualLength = stat.getLen();
                Assert.assertTrue("", logicalLength <= actualLength);
            }
        }
        checkDataWritten2(partLoc, 1, 20, 2, validationQuery, true, "1\tHello streaming",
                "3\tHello streaming - once again");

        connection.commitTransaction();

        checkDataWritten2(partLoc, 1, 20, 2, validationQuery, false, "1\tHello streaming",
                "2\tWelcome to streaming", "3\tHello streaming - once again");

        connection2.commitTransaction();

        checkDataWritten2(partLoc, 1, 20, 2, validationQuery, true, "1\tHello streaming", "2\tWelcome to streaming",
                "3\tHello streaming - once again", "4\tWelcome to streaming - once again");

        Assert.assertEquals(HiveStreamingConnection.TxnState.COMMITTED, connection.getCurrentTransactionState());
        Assert.assertEquals(HiveStreamingConnection.TxnState.COMMITTED, connection2.getCurrentTransactionState());

        connection.close();
        connection2.close();
    }

    private static class WriterThd extends Thread {

        private final StreamingConnection conn;
        private final String data;
        private Throwable error;

        WriterThd(String data) throws Exception {
            super("Writer_" + data);
            RecordWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();
            HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName)
                    .withTable(tblName).withStaticPartitionValues(partitionVals).withRecordWriter(writer)
                    .withHiveConf(conf).connect();
            this.conn = connection;
            this.data = data;
            setUncaughtExceptionHandler((thread, throwable) -> {
                error = throwable;
                LOG.error(connection.toTransactionString());
                LOG.error("Thread " + thread.getName() + " died: " + throwable.getMessage(), throwable);
            });
        }

        @Override
        public void run() {
            try {
                for (int i = 0; i < 10; i++) {
                    conn.beginTransaction();
                    conn.write(data.getBytes());
                    conn.write(data.getBytes());
                    conn.commitTransaction();
                } // while
            } catch (Exception e) {
                throw new RuntimeException(e);
            } finally {
                if (conn != null) {
                    try {
                        conn.close();
                    } catch (Exception e) {
                        LOG.error("txnBatch.close() failed: " + e.getMessage(), e);
                    }
                }
            }
        }
    }

    @Test
    public void testConcurrentTransactionBatchCommits() throws Exception {
        List<WriterThd> writers = new ArrayList<WriterThd>(3);
        writers.add(new WriterThd("1,Matrix"));
        writers.add(new WriterThd("2,Gandhi"));
        writers.add(new WriterThd("3,Silence"));

        for (WriterThd w : writers) {
            w.start();
        }
        for (WriterThd w : writers) {
            w.join();
        }
        for (WriterThd w : writers) {
            if (w.error != null) {
                Assert.assertFalse("Writer thread" + w.getName() + " died: " + w.error.getMessage()
                        + " See log file for stack trace", true);
            }
        }
    }

    private ArrayList<SampleRec> dumpBucket(Path orcFile) throws IOException {
        org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.getLocal(new Configuration());
        Reader reader = OrcFile.createReader(orcFile, OrcFile.readerOptions(conf).filesystem(fs));

        RecordReader rows = reader.rows();
        StructObjectInspector inspector = (StructObjectInspector) reader.getObjectInspector();

        System.out.format("Found Bucket File : %s \n", orcFile.getName());
        ArrayList<SampleRec> result = new ArrayList<SampleRec>();
        while (rows.hasNext()) {
            Object row = rows.next(null);
            SampleRec rec = (SampleRec) deserializeDeltaFileRow(row, inspector)[5];
            result.add(rec);
        }

        return result;
    }

    // Assumes stored data schema = [acid fields],string,int,string
    // return array of 6 fields, where the last field has the actual data
    private static Object[] deserializeDeltaFileRow(Object row, StructObjectInspector inspector) {
        List<? extends StructField> fields = inspector.getAllStructFieldRefs();

        WritableIntObjectInspector f0ins = (WritableIntObjectInspector) fields.get(0).getFieldObjectInspector();
        WritableLongObjectInspector f1ins = (WritableLongObjectInspector) fields.get(1).getFieldObjectInspector();
        WritableIntObjectInspector f2ins = (WritableIntObjectInspector) fields.get(2).getFieldObjectInspector();
        WritableLongObjectInspector f3ins = (WritableLongObjectInspector) fields.get(3).getFieldObjectInspector();
        WritableLongObjectInspector f4ins = (WritableLongObjectInspector) fields.get(4).getFieldObjectInspector();
        StructObjectInspector f5ins = (StructObjectInspector) fields.get(5).getFieldObjectInspector();

        int f0 = f0ins.get(inspector.getStructFieldData(row, fields.get(0)));
        long f1 = f1ins.get(inspector.getStructFieldData(row, fields.get(1)));
        int f2 = f2ins.get(inspector.getStructFieldData(row, fields.get(2)));
        long f3 = f3ins.get(inspector.getStructFieldData(row, fields.get(3)));
        long f4 = f4ins.get(inspector.getStructFieldData(row, fields.get(4)));
        SampleRec f5 = deserializeInner(inspector.getStructFieldData(row, fields.get(5)), f5ins);

        return new Object[] { f0, f1, f2, f3, f4, f5 };
    }

    // Assumes row schema => string,int,string
    private static SampleRec deserializeInner(Object row, StructObjectInspector inspector) {
        List<? extends StructField> fields = inspector.getAllStructFieldRefs();

        WritableStringObjectInspector f0ins = (WritableStringObjectInspector) fields.get(0)
                .getFieldObjectInspector();
        WritableIntObjectInspector f1ins = (WritableIntObjectInspector) fields.get(1).getFieldObjectInspector();
        WritableStringObjectInspector f2ins = (WritableStringObjectInspector) fields.get(2)
                .getFieldObjectInspector();

        String f0 = f0ins.getPrimitiveJavaObject(inspector.getStructFieldData(row, fields.get(0)));
        int f1 = f1ins.get(inspector.getStructFieldData(row, fields.get(1)));
        String f2 = f2ins.getPrimitiveJavaObject(inspector.getStructFieldData(row, fields.get(2)));
        return new SampleRec(f0, f1, f2);
    }

    @Test
    public void testBucketing() throws Exception {
        String agentInfo = "UT_" + Thread.currentThread().getName();
        dropDB(msClient, dbName3);
        dropDB(msClient, dbName4);

        // 1) Create two bucketed tables
        String dbLocation = dbFolder.newFolder(dbName3).getCanonicalPath() + ".db";
        dbLocation = dbLocation.replaceAll("\\\\", "/"); // for windows paths
        String[] colNames = "key1,key2,data".split(",");
        String[] colTypes = "string,int,string".split(",");
        String[] bucketNames = "key1,key2".split(",");
        int bucketCount = 4;
        createDbAndTable(driver, dbName3, tblName3, null, colNames, colTypes, bucketNames, null, dbLocation,
                bucketCount);

        String dbLocation2 = dbFolder.newFolder(dbName4).getCanonicalPath() + ".db";
        dbLocation2 = dbLocation2.replaceAll("\\\\", "/"); // for windows paths
        String[] colNames2 = "key3,key4,data2".split(",");
        String[] colTypes2 = "string,int,string".split(",");
        String[] bucketNames2 = "key3,key4".split(",");
        createDbAndTable(driver, dbName4, tblName4, null, colNames2, colTypes2, bucketNames2, null, dbLocation2,
                bucketCount);

        // 2) Insert data into both tables
        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();

        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName3)
                .withTable(tblName3).withAgentInfo(agentInfo).withRecordWriter(writer).withHiveConf(conf).connect();

        connection.beginTransaction();
        connection.write("name0,1,Hello streaming".getBytes());
        connection.write("name2,2,Welcome to streaming".getBytes());
        connection.write("name4,2,more Streaming unlimited".getBytes());
        connection.write("name5,2,even more Streaming unlimited".getBytes());
        connection.commitTransaction();
        connection.close();

        StrictDelimitedInputWriter writer2 = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',')
                .build();

        HiveStreamingConnection connection2 = HiveStreamingConnection.newBuilder().withDatabase(dbName4)
                .withTable(tblName4).withAgentInfo("UT_" + Thread.currentThread().getName())
                .withRecordWriter(writer2).withHiveConf(conf).connect();

        connection2.beginTransaction();

        connection2.write("name5,2,fact3".getBytes()); // bucket 0
        connection2.write("name8,2,fact3".getBytes()); // bucket 1
        connection2.write("name0,1,fact1".getBytes()); // bucket 2

        connection2.commitTransaction();

        connection2.close();
        // 3 Check data distribution in  buckets

        HashMap<Integer, ArrayList<SampleRec>> actual1 = dumpAllBuckets(dbLocation, tblName3);
        HashMap<Integer, ArrayList<SampleRec>> actual2 = dumpAllBuckets(dbLocation2, tblName4);
        System.err.println("\n  Table 1");
        System.err.println(actual1);
        System.err.println("\n  Table 2");
        System.err.println(actual2);

        // assert bucket listing is as expected
        Assert.assertEquals("number of buckets does not match expectation", actual1.values().size(), 3);
        Assert.assertTrue("bucket 0 shouldn't have been created", actual1.get(0) == null);
        Assert.assertEquals("records in bucket does not match expectation", actual1.get(1).size(), 1);
        Assert.assertEquals("records in bucket does not match expectation", actual1.get(2).size(), 2);
        Assert.assertEquals("records in bucket does not match expectation", actual1.get(3).size(), 1);
    }

    private void runCmdOnDriver(String cmd) {
        boolean t = runDDL(driver, cmd);
        Assert.assertTrue(cmd + " failed", t);
    }

    @Test
    public void testFileDump() throws Exception {
        String agentInfo = "UT_" + Thread.currentThread().getName();
        dropDB(msClient, dbName3);
        dropDB(msClient, dbName4);

        // 1) Create two bucketed tables
        String dbLocation = dbFolder.newFolder(dbName3).getCanonicalPath() + ".db";
        dbLocation = dbLocation.replaceAll("\\\\", "/"); // for windows paths
        String[] colNames = "key1,key2,data".split(",");
        String[] colTypes = "string,int,string".split(",");
        String[] bucketNames = "key1,key2".split(",");
        int bucketCount = 4;
        createDbAndTable(driver, dbName3, tblName3, null, colNames, colTypes, bucketNames, null, dbLocation,
                bucketCount);

        String dbLocation2 = dbFolder.newFolder(dbName4).getCanonicalPath() + ".db";
        dbLocation2 = dbLocation2.replaceAll("\\\\", "/"); // for windows paths
        String[] colNames2 = "key3,key4,data2".split(",");
        String[] colTypes2 = "string,int,string".split(",");
        String[] bucketNames2 = "key3,key4".split(",");
        createDbAndTable(driver, dbName4, tblName4, null, colNames2, colTypes2, bucketNames2, null, dbLocation2,
                bucketCount);

        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();

        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName3)
                .withTable(tblName3).withAgentInfo(agentInfo).withHiveConf(conf).withRecordWriter(writer).connect();
        // 2) Insert data into both tables
        connection.beginTransaction();
        connection.write("name0,1,Hello streaming".getBytes());
        connection.write("name2,2,Welcome to streaming".getBytes());
        connection.write("name4,2,more Streaming unlimited".getBytes());
        connection.write("name5,2,even more Streaming unlimited".getBytes());
        connection.commitTransaction();
        connection.close();

        PrintStream origErr = System.err;
        ByteArrayOutputStream myErr = new ByteArrayOutputStream();

        // replace stderr and run command
        System.setErr(new PrintStream(myErr));
        FileDump.main(new String[] { dbLocation });
        System.err.flush();
        System.setErr(origErr);

        String errDump = new String(myErr.toByteArray());
        Assert.assertEquals(false, errDump.contains("file(s) are corrupted"));
        // since this test runs on local file system which does not have an API to tell if files or
        // open or not, we are testing for negative case even though the bucket files are still open
        // for writes (transaction batch not closed yet)
        Assert.assertEquals(false, errDump.contains("is still open for writes."));

        StrictDelimitedInputWriter writer2 = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',')
                .build();

        HiveStreamingConnection connection2 = HiveStreamingConnection.newBuilder().withDatabase(dbName4)
                .withTable(tblName4).withAgentInfo(agentInfo).withRecordWriter(writer2).withHiveConf(conf)
                .connect();

        connection2.beginTransaction();

        connection2.write("name5,2,fact3".getBytes()); // bucket 0
        connection2.write("name8,2,fact3".getBytes()); // bucket 1
        connection2.write("name0,1,fact1".getBytes()); // bucket 2
        // no data for bucket 3 -- expect 0 length bucket file

        connection2.commitTransaction();
        connection2.close();

        origErr = System.err;
        myErr = new ByteArrayOutputStream();

        // replace stderr and run command
        System.setErr(new PrintStream(myErr));
        FileDump.main(new String[] { dbLocation });
        System.out.flush();
        System.err.flush();
        System.setErr(origErr);

        errDump = new String(myErr.toByteArray());
        Assert.assertEquals(false, errDump.contains("Exception"));
        Assert.assertEquals(false, errDump.contains("file(s) are corrupted"));
        Assert.assertEquals(false, errDump.contains("is still open for writes."));
    }

    @Test
    public void testFileDumpDeltaFilesWithStreamingOptimizations() throws Exception {
        String agentInfo = "UT_" + Thread.currentThread().getName();
        dropDB(msClient, dbName3);
        dropDB(msClient, dbName4);

        // 1) Create two bucketed tables
        String dbLocation = dbFolder.newFolder(dbName3).getCanonicalPath() + ".db";
        dbLocation = dbLocation.replaceAll("\\\\", "/"); // for windows paths
        String[] colNames = "key1,key2,data".split(",");
        String[] colTypes = "string,int,string".split(",");
        String[] bucketNames = "key1,key2".split(",");
        int bucketCount = 4;
        createDbAndTable(driver, dbName3, tblName3, null, colNames, colTypes, bucketNames, null, dbLocation,
                bucketCount);

        // 2) Insert data into both tables
        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();

        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName3)
                .withTable(tblName3).withAgentInfo(agentInfo).withHiveConf(conf).withRecordWriter(writer)
                .withStreamingOptimizations(true).connect();
        connection.beginTransaction();
        connection.write("name0,1,streaming".getBytes());
        connection.write("name2,2,streaming".getBytes());
        connection.write("name4,2,unlimited".getBytes());
        connection.write("name5,2,unlimited".getBytes());
        for (int i = 0; i < 6000; i++) {
            if (i % 2 == 0) {
                connection.write(("name" + i + "," + i + "," + "streaming").getBytes());
            } else {
                connection.write(("name" + i + "," + i + "," + "unlimited").getBytes());
            }
        }
        connection.commitTransaction();
        connection.close();
        connection.close();

        PrintStream origOut = System.out;
        ByteArrayOutputStream myOut = new ByteArrayOutputStream();

        // replace stderr and run command
        System.setOut(new PrintStream(myOut));
        FileDump.main(new String[] { dbLocation });
        System.out.flush();
        System.setOut(origOut);

        String outDump = new String(myOut.toByteArray());
        // make sure delta files are written with no indexes and no dictionary
        Assert.assertEquals(true, outDump.contains("Compression: ZLIB"));
        // no stats/indexes
        Assert.assertEquals(true, outDump.contains("Column 0: count: 0 hasNull: false"));
        Assert.assertEquals(true, outDump.contains("Column 1: count: 0 hasNull: false bytesOnDisk: 15 sum: 0"));
        Assert.assertEquals(true, outDump.contains("Column 2: count: 0 hasNull: false bytesOnDisk: 15 sum: 0"));
        Assert.assertEquals(true, outDump.contains("Column 3: count: 0 hasNull: false bytesOnDisk: 19 sum: 0"));
        Assert.assertEquals(true, outDump.contains("Column 4: count: 0 hasNull: false bytesOnDisk: 17 sum: 0"));
        Assert.assertEquals(true, outDump.contains("Column 5: count: 0 hasNull: false bytesOnDisk: 15 sum: 0"));
        Assert.assertEquals(true, outDump.contains("Column 6: count: 0 hasNull: false"));
        Assert.assertEquals(true, outDump.contains("Column 7: count: 0 hasNull: false bytesOnDisk: 3929"));
        Assert.assertEquals(true, outDump.contains("Column 8: count: 0 hasNull: false bytesOnDisk: 1484 sum: 0"));
        Assert.assertEquals(true, outDump.contains("Column 9: count: 0 hasNull: false bytesOnDisk: 816"));
        // no dictionary
        Assert.assertEquals(true, outDump.contains("Encoding column 7: DIRECT_V2"));
        Assert.assertEquals(true, outDump.contains("Encoding column 9: DIRECT_V2"));
    }

    @Test
    public void testFileDumpDeltaFilesWithoutStreamingOptimizations() throws Exception {
        String agentInfo = "UT_" + Thread.currentThread().getName();
        dropDB(msClient, dbName3);
        dropDB(msClient, dbName4);

        // 1) Create two bucketed tables
        String dbLocation = dbFolder.newFolder(dbName3).getCanonicalPath() + ".db";
        dbLocation = dbLocation.replaceAll("\\\\", "/"); // for windows paths
        String[] colNames = "key1,key2,data".split(",");
        String[] colTypes = "string,int,string".split(",");
        String[] bucketNames = "key1,key2".split(",");
        int bucketCount = 4;
        createDbAndTable(driver, dbName3, tblName3, null, colNames, colTypes, bucketNames, null, dbLocation,
                bucketCount);

        // 2) Insert data into both tables
        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();

        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName3)
                .withTable(tblName3).withAgentInfo(agentInfo).withHiveConf(conf).withRecordWriter(writer)
                .withStreamingOptimizations(false).connect();
        connection.beginTransaction();
        connection.write("name0,1,streaming".getBytes());
        connection.write("name2,2,streaming".getBytes());
        connection.write("name4,2,unlimited".getBytes());
        connection.write("name5,2,unlimited".getBytes());
        for (int i = 0; i < 6000; i++) {
            if (i % 2 == 0) {
                connection.write(("name" + i + "," + i + "," + "streaming").getBytes());
            } else {
                connection.write(("name" + i + "," + i + "," + "unlimited").getBytes());
            }
        }
        connection.commitTransaction();
        connection.close();

        PrintStream origOut = System.out;
        ByteArrayOutputStream myOut = new ByteArrayOutputStream();

        // replace stderr and run command
        System.setOut(new PrintStream(myOut));
        FileDump.main(new String[] { dbLocation });
        System.out.flush();
        System.setOut(origOut);

        String outDump = new String(myOut.toByteArray());
        Assert.assertEquals(true, outDump.contains("Compression: ZLIB"));
        Assert.assertEquals(true, outDump.contains("Encoding column 9: DICTIONARY"));
    }

    @Test
    public void testFileDumpCorruptDataFiles() throws Exception {
        dropDB(msClient, dbName3);

        // 1) Create two bucketed tables
        String dbLocation = dbFolder.newFolder(dbName3).getCanonicalPath() + ".db";
        dbLocation = dbLocation.replaceAll("\\\\", "/"); // for windows paths
        String[] colNames = "key1,key2,data".split(",");
        String[] colTypes = "string,int,string".split(",");
        String[] bucketNames = "key1,key2".split(",");
        int bucketCount = 4;
        createDbAndTable(driver, dbName3, tblName3, null, colNames, colTypes, bucketNames, null, dbLocation,
                bucketCount);

        // 2) Insert data into both tables
        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();

        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName3)
                .withTable(tblName3).withAgentInfo("UT_" + Thread.currentThread().getName())
                .withRecordWriter(writer).withHiveConf(conf).withTransactionBatchSize(10).connect();
        // we need side file for this test, so we create 2 txn batch and test with only one
        connection.beginTransaction();
        connection.write("name0,1,Hello streaming".getBytes());
        connection.write("name2,2,Welcome to streaming".getBytes());
        connection.write("name4,2,more Streaming unlimited".getBytes());
        connection.write("name5,2,even more Streaming unlimited".getBytes());
        connection.commitTransaction();

        // intentionally corrupt some files
        Path path = new Path(dbLocation);
        Collection<String> files = FileDump.getAllFilesInPath(path, conf);
        for (String file : files) {
            if (file.contains("bucket_00000")) {
                // empty out the file
                corruptDataFile(file, conf, Integer.MIN_VALUE);
            } else if (file.contains("bucket_00001")) {
                corruptDataFile(file, conf, -1);
            } else if (file.contains("bucket_00002")) {
                corruptDataFile(file, conf, 100);
            } else if (file.contains("bucket_00003")) {
                corruptDataFile(file, conf, 100);
            }
        }

        PrintStream origErr = System.err;
        ByteArrayOutputStream myErr = new ByteArrayOutputStream();

        // replace stderr and run command
        System.setErr(new PrintStream(myErr));
        FileDump.main(new String[] { dbLocation });
        System.err.flush();
        System.setErr(origErr);

        String errDump = new String(myErr.toByteArray());
        Assert.assertEquals(false, errDump.contains("Exception"));
        Assert.assertEquals(true, errDump.contains("3 file(s) are corrupted"));
        Assert.assertEquals(false, errDump.contains("is still open for writes."));

        origErr = System.err;
        myErr = new ByteArrayOutputStream();

        // replace stderr and run command
        System.setErr(new PrintStream(myErr));
        FileDump.main(new String[] { dbLocation, "--recover", "--skip-dump" });
        System.err.flush();
        System.setErr(origErr);

        errDump = new String(myErr.toByteArray());
        Assert.assertEquals(true, errDump.contains("bucket_00001 recovered successfully!"));
        Assert.assertEquals(true, errDump.contains("No readable footers found. Creating empty orc file."));
        Assert.assertEquals(true, errDump.contains("bucket_00002 recovered successfully!"));
        Assert.assertEquals(true, errDump.contains("bucket_00003 recovered successfully!"));
        Assert.assertEquals(false, errDump.contains("Exception"));
        Assert.assertEquals(false, errDump.contains("is still open for writes."));

        // test after recovery
        origErr = System.err;
        myErr = new ByteArrayOutputStream();

        // replace stdout and run command
        System.setErr(new PrintStream(myErr));
        FileDump.main(new String[] { dbLocation });
        System.err.flush();
        System.setErr(origErr);

        errDump = new String(myErr.toByteArray());
        Assert.assertEquals(false, errDump.contains("Exception"));
        Assert.assertEquals(false, errDump.contains("file(s) are corrupted"));
        Assert.assertEquals(false, errDump.contains("is still open for writes."));

        // after recovery there shouldn'table be any *_flush_length files
        files = FileDump.getAllFilesInPath(path, conf);
        for (String file : files) {
            Assert.assertEquals(false, file.contains("_flush_length"));
        }

        connection.close();
    }

    private void corruptDataFile(final String file, final Configuration conf, final int addRemoveBytes)
            throws Exception {
        Path bPath = new Path(file);
        Path cPath = new Path(bPath.getParent(), bPath.getName() + ".corrupt");
        FileSystem fs = bPath.getFileSystem(conf);
        FileStatus fileStatus = fs.getFileStatus(bPath);
        int len = addRemoveBytes == Integer.MIN_VALUE ? 0 : (int) fileStatus.getLen() + addRemoveBytes;
        byte[] buffer = new byte[len];
        FSDataInputStream fdis = fs.open(bPath);
        fdis.readFully(0, buffer, 0, (int) Math.min(fileStatus.getLen(), buffer.length));
        fdis.close();
        FSDataOutputStream fdos = fs.create(cPath, true);
        fdos.write(buffer, 0, buffer.length);
        fdos.close();
        fs.delete(bPath, false);
        fs.rename(cPath, bPath);
    }

    @Test
    public void testFileDumpCorruptSideFiles() throws Exception {
        dropDB(msClient, dbName3);

        // 1) Create two bucketed tables
        String dbLocation = dbFolder.newFolder(dbName3).getCanonicalPath() + ".db";
        dbLocation = dbLocation.replaceAll("\\\\", "/"); // for windows paths
        String[] colNames = "key1,key2,data".split(",");
        String[] colTypes = "string,int,string".split(",");
        String[] bucketNames = "key1,key2".split(",");
        int bucketCount = 4;
        createDbAndTable(driver, dbName3, tblName3, null, colNames, colTypes, bucketNames, null, dbLocation,
                bucketCount);

        // 2) Insert data into both tables
        StrictDelimitedInputWriter writer = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',').build();

        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase(dbName3)
                .withTable(tblName3).withAgentInfo("UT_" + Thread.currentThread().getName())
                .withRecordWriter(writer).withHiveConf(conf).withTransactionBatchSize(10).connect();

        connection.beginTransaction();
        connection.write("name0,1,Hello streaming".getBytes());
        connection.write("name2,2,Welcome to streaming".getBytes());
        connection.write("name4,2,more Streaming unlimited".getBytes());
        connection.write("name5,2,even more Streaming unlimited".getBytes());
        connection.write("name6,3,aHello streaming".getBytes());
        connection.commitTransaction();

        Map<String, List<Long>> offsetMap = new HashMap<String, List<Long>>();
        recordOffsets(conf, dbLocation, offsetMap);

        connection.beginTransaction();
        connection.write("name01,11,-Hello streaming".getBytes());
        connection.write("name21,21,-Welcome to streaming".getBytes());
        connection.write("name41,21,-more Streaming unlimited".getBytes());
        connection.write("name51,21,-even more Streaming unlimited".getBytes());
        connection.write("name02,12,--Hello streaming".getBytes());
        connection.write("name22,22,--Welcome to streaming".getBytes());
        connection.write("name42,22,--more Streaming unlimited".getBytes());
        connection.write("name52,22,--even more Streaming unlimited".getBytes());
        connection.write("name7,4,aWelcome to streaming".getBytes());
        connection.write("name8,5,amore Streaming unlimited".getBytes());
        connection.write("name9,6,aeven more Streaming unlimited".getBytes());
        connection.write("name10,7,bHello streaming".getBytes());
        connection.write("name11,8,bWelcome to streaming".getBytes());
        connection.write("name12,9,bmore Streaming unlimited".getBytes());
        connection.write("name13,10,beven more Streaming unlimited".getBytes());
        connection.commitTransaction();

        recordOffsets(conf, dbLocation, offsetMap);

        // intentionally corrupt some files
        Path path = new Path(dbLocation);
        Collection<String> files = FileDump.getAllFilesInPath(path, conf);
        for (String file : files) {
            if (file.contains("bucket_00000")) {
                corruptSideFile(file, conf, offsetMap, "bucket_00000", -1); // corrupt last entry
            } else if (file.contains("bucket_00001")) {
                corruptSideFile(file, conf, offsetMap, "bucket_00001", 0); // empty out side file
            } else if (file.contains("bucket_00002")) {
                corruptSideFile(file, conf, offsetMap, "bucket_00002", 3); // total 3 entries (2 valid + 1 fake)
            } else if (file.contains("bucket_00003")) {
                corruptSideFile(file, conf, offsetMap, "bucket_00003", 10); // total 10 entries (2 valid + 8 fake)
            }
        }

        PrintStream origErr = System.err;
        ByteArrayOutputStream myErr = new ByteArrayOutputStream();

        // replace stderr and run command
        System.setErr(new PrintStream(myErr));
        FileDump.main(new String[] { dbLocation });
        System.err.flush();
        System.setErr(origErr);

        String errDump = new String(myErr.toByteArray());
        Assert.assertEquals(true, errDump.contains("bucket_00000_flush_length [length: 11"));
        Assert.assertEquals(true, errDump.contains("bucket_00001_flush_length [length: 0"));
        Assert.assertEquals(true, errDump.contains("bucket_00002_flush_length [length: 24"));
        Assert.assertEquals(true, errDump.contains("bucket_00003_flush_length [length: 80"));
        Assert.assertEquals(false, errDump.contains("Exception"));
        Assert.assertEquals(true, errDump.contains("4 file(s) are corrupted"));
        Assert.assertEquals(false, errDump.contains("is still open for writes."));

        origErr = System.err;
        myErr = new ByteArrayOutputStream();

        // replace stderr and run command
        System.setErr(new PrintStream(myErr));
        FileDump.main(new String[] { dbLocation, "--recover", "--skip-dump" });
        System.err.flush();
        System.setErr(origErr);

        errDump = new String(myErr.toByteArray());
        Assert.assertEquals(true, errDump.contains("bucket_00000 recovered successfully!"));
        Assert.assertEquals(true, errDump.contains("bucket_00001 recovered successfully!"));
        Assert.assertEquals(true, errDump.contains("bucket_00002 recovered successfully!"));
        Assert.assertEquals(true, errDump.contains("bucket_00003 recovered successfully!"));
        List<Long> offsets = offsetMap.get("bucket_00000");
        Assert.assertEquals(true, errDump.contains("Readable footerOffsets: " + offsets.toString()));
        offsets = offsetMap.get("bucket_00001");
        Assert.assertEquals(true, errDump.contains("Readable footerOffsets: " + offsets.toString()));
        offsets = offsetMap.get("bucket_00002");
        Assert.assertEquals(true, errDump.contains("Readable footerOffsets: " + offsets.toString()));
        offsets = offsetMap.get("bucket_00003");
        Assert.assertEquals(true, errDump.contains("Readable footerOffsets: " + offsets.toString()));
        Assert.assertEquals(false, errDump.contains("Exception"));
        Assert.assertEquals(false, errDump.contains("is still open for writes."));

        // test after recovery
        origErr = System.err;
        myErr = new ByteArrayOutputStream();

        // replace stdout and run command
        System.setErr(new PrintStream(myErr));
        FileDump.main(new String[] { dbLocation });
        System.err.flush();
        System.setErr(origErr);

        errDump = new String(myErr.toByteArray());
        Assert.assertEquals(false, errDump.contains("Exception"));
        Assert.assertEquals(false, errDump.contains("file(s) are corrupted"));
        Assert.assertEquals(false, errDump.contains("is still open for writes."));

        // after recovery there shouldn'table be any *_flush_length files
        files = FileDump.getAllFilesInPath(path, conf);
        for (String file : files) {
            Assert.assertEquals(false, file.contains("_flush_length"));
        }

        connection.close();
    }

    private void corruptSideFile(final String file, final HiveConf conf, final Map<String, List<Long>> offsetMap,
            final String key, final int numEntries) throws IOException {
        Path dataPath = new Path(file);
        Path sideFilePath = OrcAcidUtils.getSideFile(dataPath);
        Path cPath = new Path(sideFilePath.getParent(), sideFilePath.getName() + ".corrupt");
        FileSystem fs = sideFilePath.getFileSystem(conf);
        List<Long> offsets = offsetMap.get(key);
        long lastOffset = offsets.get(offsets.size() - 1);
        FSDataOutputStream fdos = fs.create(cPath, true);
        // corrupt last entry
        if (numEntries < 0) {
            byte[] lastOffsetBytes = longToBytes(lastOffset);
            for (int i = 0; i < offsets.size() - 1; i++) {
                fdos.writeLong(offsets.get(i));
            }

            fdos.write(lastOffsetBytes, 0, 3);
        } else if (numEntries > 0) {
            int firstRun = Math.min(offsets.size(), numEntries);
            // add original entries
            for (int i = 0; i < firstRun; i++) {
                fdos.writeLong(offsets.get(i));
            }

            // add fake entries
            int remaining = numEntries - firstRun;
            for (int i = 0; i < remaining; i++) {
                fdos.writeLong(lastOffset + ((i + 1) * 100));
            }
        }

        fdos.close();
        fs.delete(sideFilePath, false);
        fs.rename(cPath, sideFilePath);
    }

    private byte[] longToBytes(long x) {
        ByteBuffer buffer = ByteBuffer.allocate(8);
        buffer.putLong(x);
        return buffer.array();
    }

    private void recordOffsets(final HiveConf conf, final String dbLocation,
            final Map<String, List<Long>> offsetMap) throws IOException {
        Path path = new Path(dbLocation);
        Collection<String> files = FileDump.getAllFilesInPath(path, conf);
        for (String file : files) {
            Path bPath = new Path(file);
            FileSystem fs = bPath.getFileSystem(conf);
            FileStatus fileStatus = fs.getFileStatus(bPath);
            long len = fileStatus.getLen();

            if (file.contains("bucket_00000")) {
                if (offsetMap.containsKey("bucket_00000")) {
                    List<Long> offsets = offsetMap.get("bucket_00000");
                    offsets.add(len);
                    offsetMap.put("bucket_00000", offsets);
                } else {
                    List<Long> offsets = new ArrayList<Long>();
                    offsets.add(len);
                    offsetMap.put("bucket_00000", offsets);
                }
            } else if (file.contains("bucket_00001")) {
                if (offsetMap.containsKey("bucket_00001")) {
                    List<Long> offsets = offsetMap.get("bucket_00001");
                    offsets.add(len);
                    offsetMap.put("bucket_00001", offsets);
                } else {
                    List<Long> offsets = new ArrayList<Long>();
                    offsets.add(len);
                    offsetMap.put("bucket_00001", offsets);
                }
            } else if (file.contains("bucket_00002")) {
                if (offsetMap.containsKey("bucket_00002")) {
                    List<Long> offsets = offsetMap.get("bucket_00002");
                    offsets.add(len);
                    offsetMap.put("bucket_00002", offsets);
                } else {
                    List<Long> offsets = new ArrayList<Long>();
                    offsets.add(len);
                    offsetMap.put("bucket_00002", offsets);
                }
            } else if (file.contains("bucket_00003")) {
                if (offsetMap.containsKey("bucket_00003")) {
                    List<Long> offsets = offsetMap.get("bucket_00003");
                    offsets.add(len);
                    offsetMap.put("bucket_00003", offsets);
                } else {
                    List<Long> offsets = new ArrayList<Long>();
                    offsets.add(len);
                    offsetMap.put("bucket_00003", offsets);
                }
            }
        }
    }

    @Test
    public void testErrorHandling() throws Exception {
        String agentInfo = "UT_" + Thread.currentThread().getName();
        runCmdOnDriver("create database testErrors");
        runCmdOnDriver("use testErrors");
        runCmdOnDriver(
                "create table T(a int, b int) clustered by (b) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')");

        StrictDelimitedInputWriter innerWriter = StrictDelimitedInputWriter.newBuilder().withFieldDelimiter(',')
                .build();

        HiveStreamingConnection connection = HiveStreamingConnection.newBuilder().withDatabase("testErrors")
                .withTable("T").withAgentInfo(agentInfo).withTransactionBatchSize(2).withRecordWriter(innerWriter)
                .withHiveConf(conf).connect();
        connection.beginTransaction();
        FaultyWriter writer = new FaultyWriter(innerWriter);

        connection.close();
        Exception expectedEx = null;
        GetOpenTxnsInfoResponse r = msClient.showTxns();
        Assert.assertEquals("HWM didn'table match", 17, r.getTxn_high_water_mark());
        List<TxnInfo> ti = r.getOpen_txns();
        Assert.assertEquals("wrong status ti(0)", org.apache.hadoop.hive.metastore.api.TxnState.ABORTED,
                ti.get(0).getState());
        Assert.assertEquals("wrong status ti(1)", org.apache.hadoop.hive.metastore.api.TxnState.ABORTED,
                ti.get(1).getState());

        try {
            connection.beginTransaction();
        } catch (StreamingException ex) {
            expectedEx = ex;
        }
        Assert.assertTrue("beginTransaction() should have failed",
                expectedEx != null && expectedEx.getMessage().contains("Streaming connection is closed already."));

        connection = HiveStreamingConnection.newBuilder().withDatabase("testErrors").withTable("T")
                .withAgentInfo(agentInfo).withTransactionBatchSize(2).withRecordWriter(innerWriter)
                .withHiveConf(conf).connect();
        expectedEx = null;
        try {
            connection.write("name0,1,Hello streaming".getBytes());
        } catch (StreamingException ex) {
            expectedEx = ex;
        }
        Assert.assertTrue("write() should have failed", expectedEx != null
                && expectedEx.getMessage().equals("Transaction batch is null. Missing beginTransaction?"));
        expectedEx = null;
        try {
            connection.commitTransaction();
        } catch (StreamingException ex) {
            expectedEx = ex;
        }
        Assert.assertTrue("commitTransaction() should have failed", expectedEx != null
                && expectedEx.getMessage().equals("Transaction batch is null. Missing beginTransaction?"));

        connection = HiveStreamingConnection.newBuilder().withDatabase("testErrors").withTable("T")
                .withAgentInfo(agentInfo).withTransactionBatchSize(2).withRecordWriter(writer).withHiveConf(conf)
                .connect();
        connection.beginTransaction();
        connection.write("name2,2,Welcome to streaming".getBytes());
        connection.write("name4,2,more Streaming unlimited".getBytes());
        connection.write("name5,2,even more Streaming unlimited".getBytes());
        connection.commitTransaction();

        //test toString()
        String s = connection.toTransactionString();
        Assert.assertTrue("Actual: " + s,
                s.contains("LastUsed " + JavaUtils.txnIdToString(connection.getCurrentTxnId())));
        Assert.assertTrue("Actual: " + s, s.contains("TxnStatus[CO]"));

        expectedEx = null;
        connection.beginTransaction();
        writer.enableErrors();
        try {
            connection.write("name6,2,Doh!".getBytes());
        } catch (StreamingIOFailure ex) {
            expectedEx = ex;
        }
        Assert.assertTrue("Wrong exception: " + (expectedEx != null ? expectedEx.getMessage() : "?"),
                expectedEx != null && expectedEx.getMessage().contains("Simulated fault occurred"));
        expectedEx = null;
        try {
            connection.commitTransaction();
        } catch (StreamingException ex) {
            expectedEx = ex;
        }
        Assert.assertTrue("commitTransaction() should have failed", expectedEx != null
                && expectedEx.getMessage().equals("Transaction state is not OPEN. Missing beginTransaction?"));

        //test toString()
        s = connection.toTransactionString();
        Assert.assertTrue("Actual: " + s,
                s.contains("LastUsed " + JavaUtils.txnIdToString(connection.getCurrentTxnId())));
        Assert.assertTrue("Actual: " + s, s.contains("TxnStatus[CA]"));

        r = msClient.showTxns();
        Assert.assertEquals("HWM didn't match", 19, r.getTxn_high_water_mark());
        ti = r.getOpen_txns();
        Assert.assertEquals("wrong status ti(0)", org.apache.hadoop.hive.metastore.api.TxnState.ABORTED,
                ti.get(0).getState());
        Assert.assertEquals("wrong status ti(1)", org.apache.hadoop.hive.metastore.api.TxnState.ABORTED,
                ti.get(1).getState());
        //txnid 3 was committed and thus not open
        Assert.assertEquals("wrong status ti(2)", org.apache.hadoop.hive.metastore.api.TxnState.ABORTED,
                ti.get(2).getState());
        connection.close();

        writer.disableErrors();
        connection = HiveStreamingConnection.newBuilder().withDatabase("testErrors").withTable("T")
                .withAgentInfo(agentInfo).withTransactionBatchSize(2).withRecordWriter(writer).withHiveConf(conf)
                .connect();
        connection.beginTransaction();
        connection.write("name2,2,Welcome to streaming".getBytes());
        writer.enableErrors();
        expectedEx = null;
        try {
            connection.commitTransaction();
        } catch (StreamingIOFailure ex) {
            expectedEx = ex;
        }
        Assert.assertTrue("Wrong exception: " + (expectedEx != null ? expectedEx.getMessage() : "?"),
                expectedEx != null && expectedEx.getMessage().contains("Simulated fault occurred"));

        r = msClient.showTxns();
        Assert.assertEquals("HWM didn'table match", 21, r.getTxn_high_water_mark());
        ti = r.getOpen_txns();
        Assert.assertEquals("wrong status ti(3)", org.apache.hadoop.hive.metastore.api.TxnState.ABORTED,
                ti.get(3).getState());
        Assert.assertEquals("wrong status ti(4)", org.apache.hadoop.hive.metastore.api.TxnState.ABORTED,
                ti.get(4).getState());
    }

    // assumes un partitioned table
    // returns a map<bucketNum, list<record> >
    private HashMap<Integer, ArrayList<SampleRec>> dumpAllBuckets(String dbLocation, String tableName)
            throws IOException {
        HashMap<Integer, ArrayList<SampleRec>> result = new HashMap<Integer, ArrayList<SampleRec>>();

        for (File deltaDir : new File(dbLocation + "/" + tableName).listFiles()) {
            if (!deltaDir.getName().startsWith("delta")) {
                continue;
            }
            File[] bucketFiles = deltaDir.listFiles(new FileFilter() {
                @Override
                public boolean accept(File pathname) {
                    String name = pathname.getName();
                    return !name.startsWith("_") && !name.startsWith(".");
                }
            });
            for (File bucketFile : bucketFiles) {
                if (bucketFile.toString().endsWith("length")) {
                    continue;
                }
                Integer bucketNum = getBucketNumber(bucketFile);
                ArrayList<SampleRec> recs = dumpBucket(new Path(bucketFile.toString()));
                result.put(bucketNum, recs);
            }
        }
        return result;
    }

    //assumes bucket_NNNNN format of file name
    private Integer getBucketNumber(File bucketFile) {
        String fname = bucketFile.getName();
        int start = fname.indexOf('_');
        String number = fname.substring(start + 1, fname.length());
        return Integer.parseInt(number);
    }

    // delete db and all tables in it
    public static void dropDB(IMetaStoreClient client, String databaseName) {
        try {
            for (String table : client.listTableNamesByFilter(databaseName, "", (short) -1)) {
                client.dropTable(databaseName, table, true, true);
            }
            client.dropDatabase(databaseName);
        } catch (TException e) {
        }

    }

    ///////// -------- UTILS ------- /////////
    // returns Path of the partition created (if any) else Path of table
    private static Path createDbAndTable(IDriver driver, String databaseName, String tableName,
            List<String> partVals, String[] colNames, String[] colTypes, String[] bucketCols, String[] partNames,
            String dbLocation, int bucketCount) throws Exception {

        String dbUri = "raw://" + new Path(dbLocation).toUri().toString();
        String tableLoc = dbUri + Path.SEPARATOR + tableName;

        runDDL(driver, "create database IF NOT EXISTS " + databaseName + " location '" + dbUri + "'");
        runDDL(driver, "use " + databaseName);
        String crtTbl = "create table " + tableName + " ( " + getTableColumnsStr(colNames, colTypes) + " )"
                + getPartitionStmtStr(partNames) + " clustered by ( " + join(bucketCols, ",") + " )" + " into "
                + bucketCount + " buckets " + " stored as orc " + " location '" + tableLoc + "'"
                + " TBLPROPERTIES ('transactional'='true') ";
        runDDL(driver, crtTbl);
        if (partNames != null && partNames.length != 0) {
            return addPartition(driver, tableName, partVals, partNames);
        }
        return new Path(tableLoc);
    }

    private static Path addPartition(IDriver driver, String tableName, List<String> partVals, String[] partNames)
            throws Exception {
        String partSpec = getPartsSpec(partNames, partVals);
        String addPart = "alter table " + tableName + " add partition ( " + partSpec + " )";
        runDDL(driver, addPart);
        return getPartitionPath(driver, tableName, partSpec);
    }

    private static Path getPartitionPath(IDriver driver, String tableName, String partSpec) throws Exception {
        ArrayList<String> res = queryTable(driver,
                "describe extended " + tableName + " PARTITION (" + partSpec + ")");
        String partInfo = res.get(res.size() - 1);
        int start = partInfo.indexOf("location:") + "location:".length();
        int end = partInfo.indexOf(",", start);
        return new Path(partInfo.substring(start, end));
    }

    private static String getTableColumnsStr(String[] colNames, String[] colTypes) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < colNames.length; ++i) {
            sb.append(colNames[i]).append(" ").append(colTypes[i]);
            if (i < colNames.length - 1) {
                sb.append(",");
            }
        }
        return sb.toString();
    }

    // converts partNames into "partName1 string, partName2 string"
    private static String getTablePartsStr(String[] partNames) {
        if (partNames == null || partNames.length == 0) {
            return "";
        }
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < partNames.length; ++i) {
            sb.append(partNames[i]).append(" string");
            if (i < partNames.length - 1) {
                sb.append(",");
            }
        }
        return sb.toString();
    }

    // converts partNames,partVals into "partName1=val1, partName2=val2"
    private static String getPartsSpec(String[] partNames, List<String> partVals) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < partVals.size(); ++i) {
            sb.append(partNames[i]).append(" = '").append(partVals.get(i)).append("'");
            if (i < partVals.size() - 1) {
                sb.append(",");
            }
        }
        return sb.toString();
    }

    private static String join(String[] values, String delimiter) {
        if (values == null) {
            return null;
        }
        StringBuilder strbuf = new StringBuilder();

        boolean first = true;

        for (Object value : values) {
            if (!first) {
                strbuf.append(delimiter);
            } else {
                first = false;
            }
            strbuf.append(value.toString());
        }

        return strbuf.toString();
    }

    private static String getPartitionStmtStr(String[] partNames) {
        if (partNames == null || partNames.length == 0) {
            return "";
        }
        return " partitioned by (" + getTablePartsStr(partNames) + " )";
    }

    private static boolean runDDL(IDriver driver, String sql) {
        LOG.debug(sql);
        System.out.println(sql);
        //LOG.debug("Running Hive Query: "+ sql);
        CommandProcessorResponse cpr = driver.run(sql);
        if (cpr.getResponseCode() == 0) {
            return true;
        }
        LOG.error("Statement: " + sql + " failed: " + cpr);
        return false;
    }

    private static ArrayList<String> queryTable(IDriver driver, String query) throws IOException {
        CommandProcessorResponse cpr = driver.run(query);
        if (cpr.getResponseCode() != 0) {
            throw new RuntimeException(query + " failed: " + cpr);
        }
        ArrayList<String> res = new ArrayList<String>();
        driver.getResults(res);
        return res;
    }

    private static class SampleRec {
        public String field1;
        public int field2;
        public String field3;

        public SampleRec(String field1, int field2, String field3) {
            this.field1 = field1;
            this.field2 = field2;
            this.field3 = field3;
        }

        @Override
        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (o == null || getClass() != o.getClass()) {
                return false;
            }

            SampleRec that = (SampleRec) o;

            if (field2 != that.field2) {
                return false;
            }
            if (field1 != null ? !field1.equals(that.field1) : that.field1 != null) {
                return false;
            }
            return !(field3 != null ? !field3.equals(that.field3) : that.field3 != null);

        }

        @Override
        public int hashCode() {
            int result = field1 != null ? field1.hashCode() : 0;
            result = 31 * result + field2;
            result = 31 * result + (field3 != null ? field3.hashCode() : 0);
            return result;
        }

        @Override
        public String toString() {
            return " { " + "'" + field1 + '\'' + "," + field2 + ",'" + field3 + '\'' + " }";
        }
    }

    /**
     * This is test-only wrapper around the real RecordWriter.
     * It can simulate faults from lower levels to test error handling logic.
     */
    private static final class FaultyWriter implements RecordWriter {
        private final RecordWriter delegate;
        private boolean shouldThrow = false;

        private FaultyWriter(RecordWriter delegate) {
            assert delegate != null;
            this.delegate = delegate;
        }

        @Override
        public void init(final StreamingConnection connection, final long minWriteId, final long maxWriteID)
                throws StreamingException {
            delegate.init(connection, minWriteId, maxWriteID);
        }

        @Override
        public void write(long writeId, byte[] record) throws StreamingException {
            delegate.write(writeId, record);
            produceFault();
        }

        @Override
        public void write(final long writeId, final InputStream inputStream) throws StreamingException {
            delegate.write(writeId, inputStream);
            produceFault();
        }

        @Override
        public void flush() throws StreamingException {
            delegate.flush();
            produceFault();
        }

        @Override
        public void close() throws StreamingException {
            delegate.close();
        }

        @Override
        public Set<String> getPartitions() {
            return delegate.getPartitions();
        }

        /**
         * allows testing of "unexpected" errors
         *
         * @throws StreamingIOFailure
         */
        private void produceFault() throws StreamingIOFailure {
            if (shouldThrow) {
                throw new StreamingIOFailure("Simulated fault occurred");
            }
        }

        void enableErrors() {
            shouldThrow = true;
        }

        void disableErrors() {
            shouldThrow = false;
        }

        @Override
        public Path getDeltaFileLocation(List<String> partitionValues, Integer bucketId, Long minWriteId,
                Long maxWriteId, Integer statementId, Table table) throws StreamingException {
            return null;
        }
    }
}