org.apache.hadoop.hive.upgrade.acid.UpgradeTool.java Source code

Introduction

Here is the source code for org.apache.hadoop.hive.upgrade.acid.UpgradeTool.java
Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.upgrade.acid;

import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.CompactionResponse;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
import org.apache.hadoop.hive.metastore.api.ShowCompactResponseElement;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.metastore.txn.TxnStore;
import org.apache.hadoop.hive.metastore.txn.TxnUtils;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hive.common.util.HiveVersionInfo;
import org.apache.orc.OrcFile;
import org.apache.orc.Reader;
import org.apache.orc.impl.AcidStats;
import org.apache.orc.impl.OrcAcidUtils;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import static org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.escapeSQLString;

/**
 * This utility is designed to help with upgrading to Hive 3.0.  On-disk layout for transactional
 * tables has changed in 3.0 and require pre-processing before upgrade to ensure they are readable
 * by Hive 3.0.  Some transactional tables (identified by this utility) require Major compaction
 * to be run on them before upgrading to 3.0.  Once this compaction starts, no more
 * update/delete/merge statements may be executed on these tables until upgrade is finished.
 *
 * Additionally, a new type of transactional tables was added in 3.0 - insert-only tables.  These
 * tables support ACID semantics and work with any Input/OutputFormat.  Any Managed tables may
 * be made insert-only transactional table. These tables don't support Update/Delete/Merge commands.
 *
 * This utility works in 2 modes: preUpgrade and postUpgrade.
 * In preUpgrade mode it has to have 2.x Hive jars on the classpath.  It will perform analysis on
 * existing transactional tables, determine which require compaction and generate a set of SQL
 * commands to launch all of these compactions.
 *
 * Note that depending on the number of tables/partitions and amount of data in them compactions
 * may take a significant amount of time and resources.  The script output by this utility includes
 * some heuristics that may help estimate the time required.  If no script is produced, no action
 * is needed.  For compactions to run an instance of standalone Hive Metastore must be running.
 * Please make sure hive.compactor.worker.threads is sufficiently high - this specifies the limit
 * of concurrent compactions that may be run.  Each compaction job is a Map-Reduce job.
 * hive.compactor.job.queue may be used to set a Yarn queue ame where all compaction jobs will be
 * submitted.
 *
 * In postUpgrade mode, Hive 3.0 jars/hive-site.xml should be on the classpath. This utility will
 * find all the tables that may be made transactional (with ful CRUD support) and generate
 * Alter Table commands to do so.  It will also find all tables that may not support full CRUD
 * but can be made insert-only transactional tables and generate corresponding Alter Table commands.
 *
 * TODO: rename files
 *
 * "execute" option may be supplied in both modes to have the utility automatically execute the
 * equivalent of the generated commands
 *
 * "location" option may be supplied followed by a path to set the location for the generated
 * scripts.
 */
public class UpgradeTool {
    private static final Logger LOG = LoggerFactory.getLogger(UpgradeTool.class);
    private static final int PARTITION_BATCH_SIZE = 10000;
    private final Options cmdLineOptions = new Options();

    public static void main(String[] args) throws Exception {
        UpgradeTool tool = new UpgradeTool();
        tool.init();
        CommandLineParser parser = new GnuParser();
        CommandLine line;
        String outputDir = ".";
        boolean preUpgrade = false, postUpgrade = false, execute = false, nonBlocking = false;
        try {
            line = parser.parse(tool.cmdLineOptions, args);
        } catch (ParseException e) {
            System.err.println("UpgradeTool: Parsing failed.  Reason: " + e.getLocalizedMessage());
            printAndExit(tool);
            return;
        }
        if (line.hasOption("help")) {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("upgrade-acid", tool.cmdLineOptions);
            return;
        }
        if (line.hasOption("location")) {
            outputDir = line.getOptionValue("location");
        }
        if (line.hasOption("execute")) {
            execute = true;
        }
        if (line.hasOption("preUpgrade")) {
            preUpgrade = true;
        }
        if (line.hasOption("postUpgrade")) {
            postUpgrade = true;
        }
        LOG.info("Starting with preUpgrade=" + preUpgrade + ", postUpgrade=" + postUpgrade + ", execute=" + execute
                + ", location=" + outputDir);
        if (preUpgrade && postUpgrade) {
            throw new IllegalArgumentException("Cannot specify both preUpgrade and postUpgrade");
        }

        try {
            String hiveVer = HiveVersionInfo.getShortVersion();
            if (preUpgrade) {
                if (!hiveVer.startsWith("2.")) {
                    throw new IllegalStateException("preUpgrade requires Hive 2.x.  Actual: " + hiveVer);
                }
            }
            if (postUpgrade && execute && !isTestMode) {
                if (!hiveVer.startsWith("3.")) {
                    throw new IllegalStateException("postUpgrade w/execute requires Hive 3.x.  Actual: " + hiveVer);
                }
            }
            tool.prepareAcidUpgradeInternal(outputDir, preUpgrade, postUpgrade, execute);
        } catch (Exception ex) {
            LOG.error("UpgradeTool failed", ex);
            throw ex;
        }
    }

    private static void printAndExit(UpgradeTool tool) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp("upgrade-acid", tool.cmdLineOptions);
        System.exit(1);
    }

    private void init() {
        try {
            cmdLineOptions.addOption(new Option("help", "print this message"));
            cmdLineOptions.addOption(new Option("preUpgrade",
                    "Generates a script to execute on 2.x cluster.  This requires 2.x binaries"
                            + " on the classpath and hive-site.xml."));
            cmdLineOptions.addOption(new Option("postUpgrade",
                    "Generates a script to execute on 3.x cluster.  This requires 3.x binaries"
                            + " on the classpath and hive-site.xml."));
            Option exec = new Option("execute", "Executes commands equivalent to generated scrips");
            exec.setOptionalArg(true);
            cmdLineOptions.addOption(exec);
            cmdLineOptions.addOption(new Option("location", true, "Location to write scripts to. Default is CWD."));
        } catch (Exception ex) {
            LOG.error("init()", ex);
            throw ex;
        }
    }

    /**
     * todo: this should accept a file of table names to exclude from non-acid to acid conversion
     * todo: change script comments to a preamble instead of a footer
     *
     * how does rename script work?  "hadoop fs -mv oldname newname"    * and what what about S3?
     * How does this actually get executed?
     * all other actions are done via embedded JDBC
     *
     *
     */
    private void prepareAcidUpgradeInternal(String scriptLocation, boolean preUpgrade, boolean postUpgrade,
            boolean execute) throws HiveException, TException, IOException {
        HiveConf conf = hiveConf != null ? hiveConf : new HiveConf();
        boolean isAcidEnabled = isAcidEnabled(conf);
        HiveMetaStoreClient hms = new HiveMetaStoreClient(conf);//MetaException
        LOG.debug("Looking for databases");
        List<String> databases = hms.getAllDatabases();//TException
        LOG.debug("Found " + databases.size() + " databases to process");
        List<String> compactions = new ArrayList<>();
        List<String> convertToAcid = new ArrayList<>();
        List<String> convertToMM = new ArrayList<>();
        final CompactionMetaInfo compactionMetaInfo = new CompactionMetaInfo();
        ValidTxnList txns = null;
        Hive db = null;
        if (execute) {
            db = Hive.get(conf);
        }

        for (String dbName : databases) {
            List<String> tables = hms.getAllTables(dbName);
            LOG.debug("found " + tables.size() + " tables in " + dbName);
            for (String tableName : tables) {
                Table t = hms.getTable(dbName, tableName);
                LOG.debug("processing table " + Warehouse.getQualifiedName(t));
                if (preUpgrade && isAcidEnabled) {
                    //if acid is off, there can't be any acid tables - nothing to compact
                    if (execute && txns == null) {
                        /*
                         This API changed from 2.x to 3.0.  so this won't even compile with 3.0
                         but it doesn't need to since we only run this preUpgrade
                        */
                        TxnStore txnHandler = TxnUtils.getTxnStore(conf);
                        txns = TxnUtils.createValidCompactTxnList(txnHandler.getOpenTxnsInfo());
                    }
                    List<String> compactionCommands = getCompactionCommands(t, conf, hms, compactionMetaInfo,
                            execute, db, txns);
                    compactions.addAll(compactionCommands);
                }
                if (postUpgrade && isAcidEnabled) {
                    //if acid is off post upgrade, you can't make any tables acid - will throw
                    processConversion(t, convertToAcid, convertToMM, hms, db, execute);
                }
                /*todo: handle renaming files somewhere*/
            }
        }
        makeCompactionScript(compactions, scriptLocation, compactionMetaInfo);
        makeConvertTableScript(convertToAcid, convertToMM, scriptLocation);
        makeRenameFileScript(scriptLocation);//todo: is this pre or post upgrade?
        //todo: can different tables be in different FileSystems?
        if (preUpgrade && execute) {
            while (compactionMetaInfo.compactionIds.size() > 0) {
                LOG.debug("Will wait for " + compactionMetaInfo.compactionIds.size() + " compactions to complete");
                ShowCompactResponse resp = db.showCompactions();
                for (ShowCompactResponseElement e : resp.getCompacts()) {
                    final String state = e.getState();
                    boolean removed;
                    switch (state) {
                    case TxnStore.CLEANING_RESPONSE:
                    case TxnStore.SUCCEEDED_RESPONSE:
                        removed = compactionMetaInfo.compactionIds.remove(e.getId());
                        if (removed) {
                            LOG.debug("Required compaction succeeded: " + e.toString());
                        }
                        break;
                    case TxnStore.ATTEMPTED_RESPONSE:
                    case TxnStore.FAILED_RESPONSE:
                        removed = compactionMetaInfo.compactionIds.remove(e.getId());
                        if (removed) {
                            LOG.warn("Required compaction failed: " + e.toString());
                        }
                        break;
                    case TxnStore.INITIATED_RESPONSE:
                        //may flood the log
                        //LOG.debug("Still waiting  on: " + e.toString());
                        break;
                    case TxnStore.WORKING_RESPONSE:
                        LOG.debug("Still working on: " + e.toString());
                        break;
                    default://shouldn't be any others
                        LOG.error("Unexpected state for : " + e.toString());
                    }
                }
                if (compactionMetaInfo.compactionIds.size() > 0) {
                    try {
                        if (callback != null) {
                            callback.onWaitForCompaction();
                        }
                        Thread.sleep(pollIntervalMs);
                    } catch (InterruptedException ex) {
                        ;//this only responds to ^C
                    }
                }
            }
        }
    }

    /**
     * Actualy makes the table transactional
     */
    private static void alterTable(Table t, Hive db, boolean isMM) throws HiveException, InvalidOperationException {
        org.apache.hadoop.hive.ql.metadata.Table metaTable =
                //clone to make sure new prop doesn't leak
                new org.apache.hadoop.hive.ql.metadata.Table(t.deepCopy());
        metaTable.getParameters().put(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL, "true");
        if (isMM) {
            metaTable.getParameters().put(hive_metastoreConstants.TABLE_TRANSACTIONAL_PROPERTIES, "insert_only");
        }
        db.alterTable(Warehouse.getQualifiedName(t), metaTable, false, null);
    }

    /**
     * todo: handle exclusion list
     * Figures out which tables to make Acid, MM and (optionally, performs the operation)
     */
    private static void processConversion(Table t, List<String> convertToAcid, List<String> convertToMM,
            HiveMetaStoreClient hms, Hive db, boolean execute) throws TException, HiveException {
        if (isFullAcidTable(t)) {
            return;
        }
        if (!TableType.MANAGED_TABLE.name().equalsIgnoreCase(t.getTableType())) {
            return;
        }
        String fullTableName = Warehouse.getQualifiedName(t);
        if (t.getPartitionKeysSize() <= 0) {
            if (canBeMadeAcid(fullTableName, t.getSd())) {
                convertToAcid.add("ALTER TABLE " + Warehouse.getQualifiedName(t) + " SET TBLPROPERTIES ("
                        + "'transactional'='true')");
                if (execute) {
                    alterTable(t, db, false);
                }
            } else {
                convertToMM.add("ALTER TABLE " + Warehouse.getQualifiedName(t) + " SET TBLPROPERTIES ("
                        + "'transactional'='true', 'transactional_properties'='insert_only')");
                if (execute) {
                    alterTable(t, db, true);
                }
            }
        } else {
            /*
              each Partition may have different I/O Format so have to check them all before deciding to
              make a full CRUD table.
              Run in batches to prevent OOM
             */
            List<String> partNames = hms.listPartitionNames(t.getDbName(), t.getTableName(), (short) -1);
            int batchSize = PARTITION_BATCH_SIZE;
            int numWholeBatches = partNames.size() / batchSize;
            for (int i = 0; i < numWholeBatches; i++) {
                List<Partition> partitionList = hms.getPartitionsByNames(t.getDbName(), t.getTableName(),
                        partNames.subList(i * batchSize, (i + 1) * batchSize));
                if (alterTable(fullTableName, partitionList, convertToMM, t, db, execute)) {
                    return;
                }
            }
            if (numWholeBatches * batchSize < partNames.size()) {
                //last partial batch
                List<Partition> partitionList = hms.getPartitionsByNames(t.getDbName(), t.getTableName(),
                        partNames.subList(numWholeBatches * batchSize, partNames.size()));
                if (alterTable(fullTableName, partitionList, convertToMM, t, db, execute)) {
                    return;
                }
            }
            //if here checked all parts and they are Acid compatible - make it acid
            convertToAcid.add("ALTER TABLE " + Warehouse.getQualifiedName(t) + " SET TBLPROPERTIES ("
                    + "'transactional'='true')");
            if (execute) {
                alterTable(t, db, false);
            }
        }
    }

    /**
     * @return true if table was converted/command generated
     */
    private static boolean alterTable(String fullTableName, List<Partition> partitionList, List<String> convertToMM,
            Table t, Hive db, boolean execute) throws InvalidOperationException, HiveException {
        for (Partition p : partitionList) {
            if (!canBeMadeAcid(fullTableName, p.getSd())) {
                convertToMM.add("ALTER TABLE " + Warehouse.getQualifiedName(t) + " SET TBLPROPERTIES ("
                        + "'transactional'='true', 'transactional_properties'='insert_only')");
                if (execute) {
                    alterTable(t, db, true);
                }
                return true;
            }
        }
        return false;
    }

    private static boolean canBeMadeAcid(String fullTableName, StorageDescriptor sd) {
        return isAcidInputOutputFormat(fullTableName, sd) && sd.getSortColsSize() <= 0;
    }

    private static boolean isAcidInputOutputFormat(String fullTableName, StorageDescriptor sd) {
        try {
            Class inputFormatClass = sd.getInputFormat() == null ? null : Class.forName(sd.getInputFormat());
            Class outputFormatClass = sd.getOutputFormat() == null ? null : Class.forName(sd.getOutputFormat());

            if (inputFormatClass != null && outputFormatClass != null
                    && Class.forName("org.apache.hadoop.hive.ql.io.AcidInputFormat")
                            .isAssignableFrom(inputFormatClass)
                    && Class.forName("org.apache.hadoop.hive.ql.io.AcidOutputFormat")
                            .isAssignableFrom(outputFormatClass)) {
                return true;
            }
        } catch (ClassNotFoundException e) {
            //if a table is using some custom I/O format and it's not in the classpath, we won't mark
            //the table for Acid, but today (Hive 3.1 and earlier) OrcInput/OutputFormat is the only
            //Acid format
            LOG.error("Could not determine if " + fullTableName + " can be made Acid due to: " + e.getMessage(), e);
            return false;
        }
        return false;
    }

    /**
     * Generates a set compaction commands to run on pre Hive 3 cluster
     */
    private static void makeCompactionScript(List<String> commands, String scriptLocation,
            CompactionMetaInfo compactionMetaInfo) throws IOException {
        if (commands.isEmpty()) {
            LOG.info("No compaction is necessary");
            return;
        }
        String fileName = "compacts_" + System.currentTimeMillis() + ".sql";
        LOG.debug("Writing compaction commands to " + fileName);
        try (PrintWriter pw = createScript(commands, fileName, scriptLocation)) {
            //add post script
            pw.println("-- Generated total of " + commands.size() + " compaction commands");
            if (compactionMetaInfo.numberOfBytes < Math.pow(2, 20)) {
                //to see it working in UTs
                pw.println("-- The total volume of data to be compacted is "
                        + String.format("%.6fMB", compactionMetaInfo.numberOfBytes / Math.pow(2, 20)));
            } else {
                pw.println("-- The total volume of data to be compacted is "
                        + String.format("%.3fGB", compactionMetaInfo.numberOfBytes / Math.pow(2, 30)));
            }
            pw.println();
            //todo: should be at the top of the file...
            pw.println("-- Please note that compaction may be a heavyweight and time consuming process.\n"
                    + "-- Submitting all of these commands will enqueue them to a scheduling queue from\n"
                    + "-- which they will be picked up by compactor Workers.  The max number of\n"
                    + "-- concurrent Workers is controlled by hive.compactor.worker.threads configured\n"
                    + "-- for the standalone metastore process.  Compaction itself is a Map-Reduce job\n"
                    + "-- which is submitted to the YARN queue identified by hive.compactor.job.queue\n"
                    + "-- property if defined or 'default' if not defined.  It's advisable to set the\n"
                    + "-- capacity of this queue appropriately");
        }
    }

    private static void makeConvertTableScript(List<String> alterTableAcid, List<String> alterTableMm,
            String scriptLocation) throws IOException {
        if (alterTableAcid.isEmpty()) {
            LOG.info("No acid conversion is necessary");
        } else {
            String fileName = "convertToAcid_" + System.currentTimeMillis() + ".sql";
            LOG.debug("Writing CRUD conversion commands to " + fileName);
            try (PrintWriter pw = createScript(alterTableAcid, fileName, scriptLocation)) {
                //todo: fix this - it has to run in 3.0 since tables may be unbucketed
                pw.println("-- These commands may be executed by Hive 1.x later");
            }
        }

        if (alterTableMm.isEmpty()) {
            LOG.info("No managed table conversion is necessary");
        } else {
            String fileName = "convertToMM_" + System.currentTimeMillis() + ".sql";
            LOG.debug("Writing managed table conversion commands to " + fileName);
            try (PrintWriter pw = createScript(alterTableMm, fileName, scriptLocation)) {
                pw.println("-- These commands must be executed by Hive 3.0 or later");
            }
        }
    }

    private static PrintWriter createScript(List<String> commands, String fileName, String scriptLocation)
            throws IOException {
        FileWriter fw = new FileWriter(scriptLocation + "/" + fileName);
        PrintWriter pw = new PrintWriter(fw);
        for (String cmd : commands) {
            pw.println(cmd + ";");
        }
        return pw;
    }

    private static void makeRenameFileScript(String scriptLocation) throws IOException {
        List<String> commands = Collections.emptyList();
        if (commands.isEmpty()) {
            LOG.info("No file renaming is necessary");
        } else {
            String fileName = "normalizeFileNames_" + System.currentTimeMillis() + ".sh";
            LOG.debug("Writing file renaming commands to " + fileName);
            PrintWriter pw = createScript(commands, fileName, scriptLocation);
            pw.close();
        }
    }

    /**
     * @return any compaction commands to run for {@code Table t}
     */
    private static List<String> getCompactionCommands(Table t, HiveConf conf, HiveMetaStoreClient hms,
            CompactionMetaInfo compactionMetaInfo, boolean execute, Hive db, ValidTxnList txns)
            throws IOException, TException, HiveException {
        if (!isFullAcidTable(t)) {
            return Collections.emptyList();
        }
        if (t.getPartitionKeysSize() <= 0) {
            //not partitioned
            if (!needsCompaction(new Path(t.getSd().getLocation()), conf, compactionMetaInfo, txns)) {
                return Collections.emptyList();
            }

            List<String> cmds = new ArrayList<>();
            cmds.add(getCompactionCommand(t, null));
            if (execute) {
                scheduleCompaction(t, null, db, compactionMetaInfo);
            }
            return cmds;
        }
        List<String> partNames = hms.listPartitionNames(t.getDbName(), t.getTableName(), (short) -1);
        int batchSize = PARTITION_BATCH_SIZE;
        int numWholeBatches = partNames.size() / batchSize;
        List<String> compactionCommands = new ArrayList<>();
        for (int i = 0; i < numWholeBatches; i++) {
            List<Partition> partitionList = hms.getPartitionsByNames(t.getDbName(), t.getTableName(),
                    partNames.subList(i * batchSize, (i + 1) * batchSize));
            getCompactionCommands(t, partitionList, db, execute, compactionCommands, compactionMetaInfo, conf,
                    txns);
        }
        if (numWholeBatches * batchSize < partNames.size()) {
            //last partial batch
            List<Partition> partitionList = hms.getPartitionsByNames(t.getDbName(), t.getTableName(),
                    partNames.subList(numWholeBatches * batchSize, partNames.size()));
            getCompactionCommands(t, partitionList, db, execute, compactionCommands, compactionMetaInfo, conf,
                    txns);
        }
        return compactionCommands;
    }

    private static void getCompactionCommands(Table t, List<Partition> partitionList, Hive db, boolean execute,
            List<String> compactionCommands, CompactionMetaInfo compactionMetaInfo, HiveConf conf,
            ValidTxnList txns) throws IOException, TException, HiveException {
        for (Partition p : partitionList) {
            if (needsCompaction(new Path(p.getSd().getLocation()), conf, compactionMetaInfo, txns)) {
                compactionCommands.add(getCompactionCommand(t, p));
                if (execute) {
                    scheduleCompaction(t, p, db, compactionMetaInfo);
                }
            }
        }
    }

    private static void scheduleCompaction(Table t, Partition p, Hive db, CompactionMetaInfo compactionMetaInfo)
            throws HiveException, MetaException {
        String partName = p == null ? null : Warehouse.makePartName(t.getPartitionKeys(), p.getValues());
        CompactionResponse resp =
                //this gives an easy way to get at compaction ID so we can only wait for those this
                //utility started
                db.compact2(t.getDbName(), t.getTableName(), partName, "major", null);
        if (!resp.isAccepted()) {
            LOG.info(Warehouse.getQualifiedName(t) + (p == null ? "" : "/" + partName)
                    + " is already being compacted with id=" + resp.getId());
        } else {
            LOG.info("Scheduled compaction for " + Warehouse.getQualifiedName(t) + (p == null ? "" : "/" + partName)
                    + " with id=" + resp.getId());
        }
        compactionMetaInfo.compactionIds.add(resp.getId());
    }

    /**
     *
     * @param location - path to a partition (or table if not partitioned) dir
     */
    private static boolean needsCompaction2(Path location, HiveConf conf, CompactionMetaInfo compactionMetaInfo)
            throws IOException {
        FileSystem fs = location.getFileSystem(conf);
        FileStatus[] deltas = fs.listStatus(location, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                //checking for delete_delta is only so that this functionality can be exercised by code 3.0
                //which cannot produce any deltas with mix of update/insert events
                return path.getName().startsWith("delta_") || path.getName().startsWith("delete_delta_");
            }
        });
        if (deltas == null || deltas.length == 0) {
            //base_n cannot contain update/delete.  Original files are all 'insert' and we need to compact
            //only if there are update/delete events.
            return false;
        }
        deltaLoop: for (FileStatus delta : deltas) {
            if (!delta.isDirectory()) {
                //should never happen - just in case
                continue;
            }
            FileStatus[] buckets = fs.listStatus(delta.getPath(), new PathFilter() {
                @Override
                public boolean accept(Path path) {
                    //since this is inside a delta dir created by Hive 2.x or earlier it can only contain
                    //bucket_x or bucket_x__flush_length
                    return path.getName().startsWith("bucket_");
                }
            });
            for (FileStatus bucket : buckets) {
                if (bucket.getPath().getName().endsWith("_flush_length")) {
                    //streaming ingest dir - cannot have update/delete events
                    continue deltaLoop;
                }
                if (needsCompaction(bucket, fs)) {
                    //found delete events - this 'location' needs compacting
                    compactionMetaInfo.numberOfBytes += getDataSize(location, conf);
                    //todo: this is not remotely accurate if you have many (relevant) original files
                    return true;
                }
            }
        }
        return false;
    }

    /**
     *
     * @param location - path to a partition (or table if not partitioned) dir
     */
    private static boolean needsCompaction(Path location, HiveConf conf, CompactionMetaInfo compactionMetaInfo,
            ValidTxnList txns) throws IOException {
        FileSystem fs = location.getFileSystem(conf);
        FileStatus[] deltas = fs.listStatus(location, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                //checking for delete_delta is only so that this functionality can be exercised by code 3.0
                //which cannot produce any deltas with mix of update/insert events
                return path.getName().startsWith("delta_") || path.getName().startsWith("delete_delta_");
            }
        });
        if (deltas == null || deltas.length == 0) {
            //base_n cannot contain update/delete.  Original files are all 'insert' and we need to compact
            //only if there are update/delete events.
            return false;
        }
        /*getAcidState() is smart not to return any deltas in current if there is a base that covers
        * them, i.e. if they were compacted but not yet cleaned.  This means re-checking if
        * compaction is needed should cheap(er)*/
        AcidUtils.Directory dir = AcidUtils.getAcidState(location, conf, txns);
        deltaLoop: for (AcidUtils.ParsedDelta delta : dir.getCurrentDirectories()) {
            FileStatus[] buckets = fs.listStatus(delta.getPath(), new PathFilter() {
                @Override
                public boolean accept(Path path) {
                    //since this is inside a delta dir created by Hive 2.x or earlier it can only contain
                    //bucket_x or bucket_x__flush_length
                    return path.getName().startsWith("bucket_");
                }
            });
            for (FileStatus bucket : buckets) {
                if (bucket.getPath().getName().endsWith("_flush_length")) {
                    //streaming ingest dir - cannot have update/delete events
                    continue deltaLoop;
                }
                if (needsCompaction(bucket, fs)) {
                    //found delete events - this 'location' needs compacting
                    compactionMetaInfo.numberOfBytes += getDataSize(location, conf);

                    //if there are un-compacted original files, they will be included in compaction, so
                    //count at the size for 'cost' estimation later
                    for (HadoopShims.HdfsFileStatusWithId origFile : dir.getOriginalFiles()) {
                        FileStatus fileStatus = origFile.getFileStatus();
                        if (fileStatus != null) {
                            compactionMetaInfo.numberOfBytes += fileStatus.getLen();
                        }
                    }
                    return true;
                }
            }
        }
        return false;
    }

    /**
     * @param location - path to a partition (or table if not partitioned) dir
     */
    private static long getDataSize(Path location, HiveConf conf) throws IOException {
        FileSystem fs = location.getFileSystem(conf);
        ContentSummary cs = fs.getContentSummary(location);
        return cs.getLength();
    }

    private static boolean needsCompaction(FileStatus bucket, FileSystem fs) throws IOException {
        //create reader, look at footer
        //no need to check side file since it can only be in a streaming ingest delta
        Reader orcReader = OrcFile.createReader(bucket.getPath(),
                OrcFile.readerOptions(fs.getConf()).filesystem(fs));
        AcidStats as = OrcAcidUtils.parseAcidStats(orcReader);
        if (as == null) {
            //should never happen since we are reading bucket_x written by acid write
            throw new IllegalStateException("AcidStats missing in " + bucket.getPath());
        }
        return as.deletes > 0 || as.updates > 0;
    }

    private static String getCompactionCommand(Table t, Partition p) {
        StringBuilder sb = new StringBuilder("ALTER TABLE ").append(Warehouse.getQualifiedName(t));
        if (t.getPartitionKeysSize() > 0) {
            assert p != null : "must supply partition for partitioned table " + Warehouse.getQualifiedName(t);
            sb.append(" PARTITION(");
            for (int i = 0; i < t.getPartitionKeysSize(); i++) {
                sb.append(t.getPartitionKeys().get(i).getName()).append('=')
                        .append(genPartValueString(t.getPartitionKeys().get(i).getType(), p.getValues().get(i)))
                        .append(",");
            }
            sb.setCharAt(sb.length() - 1, ')');//replace trailing ','
        }
        return sb.append(" COMPACT 'major'").toString();
    }

    /**
     * This is copy-pasted from {@link org.apache.hadoop.hive.ql.parse.ColumnStatsSemanticAnalyzer},
     * which can't be refactored since this is linked against Hive 2.x
     */
    private static String genPartValueString(String partColType, String partVal) {
        String returnVal = partVal;
        if (partColType.equals(serdeConstants.STRING_TYPE_NAME)
                || partColType.contains(serdeConstants.VARCHAR_TYPE_NAME)
                || partColType.contains(serdeConstants.CHAR_TYPE_NAME)) {
            returnVal = "'" + escapeSQLString(partVal) + "'";
        } else if (partColType.equals(serdeConstants.TINYINT_TYPE_NAME)) {
            returnVal = partVal + "Y";
        } else if (partColType.equals(serdeConstants.SMALLINT_TYPE_NAME)) {
            returnVal = partVal + "S";
        } else if (partColType.equals(serdeConstants.INT_TYPE_NAME)) {
            returnVal = partVal;
        } else if (partColType.equals(serdeConstants.BIGINT_TYPE_NAME)) {
            returnVal = partVal + "L";
        } else if (partColType.contains(serdeConstants.DECIMAL_TYPE_NAME)) {
            returnVal = partVal + "BD";
        } else if (partColType.equals(serdeConstants.DATE_TYPE_NAME)
                || partColType.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) {
            returnVal = partColType + " '" + escapeSQLString(partVal) + "'";
        } else {
            //for other usually not used types, just quote the value
            returnVal = "'" + escapeSQLString(partVal) + "'";
        }

        return returnVal;
    }

    private static boolean isFullAcidTable(Table t) {
        if (t.getParametersSize() <= 0) {
            //cannot be acid
            return false;
        }
        String transacationalValue = t.getParameters().get(hive_metastoreConstants.TABLE_IS_TRANSACTIONAL);
        if (transacationalValue != null && "true".equalsIgnoreCase(transacationalValue)) {
            System.out.println("Found Acid table: " + Warehouse.getQualifiedName(t));
            return true;
        }
        return false;
    }

    private static boolean isAcidEnabled(HiveConf hiveConf) {
        String txnMgr = hiveConf.getVar(HiveConf.ConfVars.HIVE_TXN_MANAGER);
        boolean concurrency = hiveConf.getBoolVar(HiveConf.ConfVars.HIVE_SUPPORT_CONCURRENCY);
        String dbTxnMgr = "org.apache.hadoop.hive.ql.lockmgr.DbTxnManager";
        return txnMgr.equals(dbTxnMgr) && concurrency;
    }

    private static class CompactionMetaInfo {
        /**
         * total number of bytes to be compacted across all compaction commands
         */
        long numberOfBytes;
        /**
         * IDs of compactions launched by this utility
         */
        Set<Long> compactionIds = new HashSet<>();
    }

    @VisibleForTesting
    static abstract class Callback {
        /**
         * This is a hack enable Unit testing.  Derby can't handle multiple concurrent threads but
         * somehow Compactor needs to run to test "execute" mode.  This callback can be used
         * to run Worker.  For TESTING ONLY.
         */
        void onWaitForCompaction() throws MetaException {
        }
    }

    @VisibleForTesting
    static Callback callback;
    @VisibleForTesting
    static int pollIntervalMs = 1000 * 30;
    /**
     * Also to enable testing until I set up Maven profiles to be able to run with 3.0 jars
     */
    @VisibleForTesting
    static boolean isTestMode = false;
    /**
     * can set it from tests to test when config needs something other than default values
     */
    @VisibleForTesting
    static HiveConf hiveConf = null;
}