co.cask.tigon.data.util.hbase.HBaseTableUtil.java Source code

Introduction

Here is the source code for co.cask.tigon.data.util.hbase.HBaseTableUtil.java
Source

/*
 * Copyright  2014 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.tigon.data.util.hbase;

import co.cask.tigon.api.common.Bytes;
import co.cask.tigon.data.co.cask.tigon.data.hbase.wd.AbstractRowKeyDistributor;
import co.cask.tigon.data.transaction.queue.hbase.HBaseQueueAdmin;
import com.google.common.base.Preconditions;
import com.google.common.base.Stopwatch;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import com.google.common.hash.Hasher;
import com.google.common.hash.Hashing;
import com.google.common.io.Files;
import com.google.common.io.OutputSupplier;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Coprocessor;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableExistsException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.twill.filesystem.Location;
import org.apache.twill.internal.utils.Dependencies;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.jar.JarEntry;
import java.util.jar.JarOutputStream;
import java.util.regex.Matcher;

/**
 * Common utilities for dealing with HBase.
 */
public abstract class HBaseTableUtil {
    /**
     * Represents the compression types supported for HBase tables.
     */
    public enum CompressionType {
        LZO, SNAPPY, GZIP, NONE
    }

    /**
     * Represents the bloom filter types supported for HBase tables.
     */
    public enum BloomType {
        ROW, ROWCOL, NONE
    }

    private static final Logger LOG = LoggerFactory.getLogger(HBaseTableUtil.class);

    public static final long MAX_CREATE_TABLE_WAIT = 5000L; // Maximum wait of 5 seconds for table creation.

    // 4Mb
    public static final int DEFAULT_WRITE_BUFFER_SIZE = 4 * 1024 * 1024;

    private static final int COPY_BUFFER_SIZE = 0x1000; // 4K
    private static final CompressionType DEFAULT_COMPRESSION_TYPE = CompressionType.SNAPPY;
    public static final String CFG_HBASE_TABLE_COMPRESSION = "hbase.table.compression.default";

    public static String getHBaseTableName(String tableName) {
        return encodeTableName(tableName);
    }

    private static String encodeTableName(String tableName) {
        try {
            return URLEncoder.encode(tableName, "ASCII");
        } catch (UnsupportedEncodingException e) {
            // this can never happen - we know that ASCII is a supported character set!
            LOG.error("Error encoding table name '" + tableName + "'", e);
            throw new RuntimeException(e);
        }
    }

    /**
     * Create a hbase table if it does not exist. Deals with race conditions when two clients concurrently attempt to
     * create the table.
     * @param admin the hbase admin
     * @param tableName the name of the table
     * @param tableDescriptor hbase table descriptor for the new table
     */
    public void createTableIfNotExists(HBaseAdmin admin, String tableName, HTableDescriptor tableDescriptor)
            throws IOException {
        createTableIfNotExists(admin, Bytes.toBytes(tableName), tableDescriptor, null);
    }

    /**
     * Creates a hbase table if it does not exists. Same as calling
     * {@link #createTableIfNotExists(org.apache.hadoop.hbase.client.HBaseAdmin, byte[],
     * org.apache.hadoop.hbase.HTableDescriptor, byte[][], long, java.util.concurrent.TimeUnit)}
     * with timeout = {@link #MAX_CREATE_TABLE_WAIT} milliseconds.
     */
    public void createTableIfNotExists(HBaseAdmin admin, byte[] tableName, HTableDescriptor tableDescriptor,
            byte[][] splitKeys) throws IOException {
        createTableIfNotExists(admin, tableName, tableDescriptor, splitKeys, MAX_CREATE_TABLE_WAIT,
                TimeUnit.MILLISECONDS);
    }

    /**
     * Create a hbase table if it does not exist. Deals with race conditions when two clients concurrently attempt to
     * create the table.
     * @param admin the hbase admin
     * @param tableName the name of the table
     * @param tableDescriptor hbase table descriptor for the new table
     * @param timeout Maximum time to wait for table creation.
     * @param timeoutUnit The TimeUnit for timeout.
     */
    public void createTableIfNotExists(HBaseAdmin admin, byte[] tableName, HTableDescriptor tableDescriptor,
            byte[][] splitKeys, long timeout, TimeUnit timeoutUnit) throws IOException {
        if (admin.tableExists(tableName)) {
            return;
        }
        setDefaultConfiguration(tableDescriptor, admin.getConfiguration());

        String tableNameString = Bytes.toString(tableName);

        try {
            LOG.info("Creating table '{}'", tableNameString);
            // HBaseAdmin.createTable can handle null splitKeys.
            admin.createTable(tableDescriptor, splitKeys);
            LOG.info("Table created '{}'", tableNameString);
            return;
        } catch (TableExistsException e) {
            // table may exist because someone else is creating it at the same
            // time. But it may not be available yet, and opening it might fail.
            LOG.info("Failed to create table '{}'. {}.", tableNameString, e.getMessage(), e);
        }

        // Wait for table to materialize
        try {
            Stopwatch stopwatch = new Stopwatch();
            stopwatch.start();
            long sleepTime = timeoutUnit.toNanos(timeout) / 10;
            sleepTime = sleepTime <= 0 ? 1 : sleepTime;
            do {
                if (admin.tableExists(tableName)) {
                    LOG.info("Table '{}' exists now. Assuming that another process concurrently created it.",
                            tableName);
                    return;
                } else {
                    TimeUnit.NANOSECONDS.sleep(sleepTime);
                }
            } while (stopwatch.elapsedTime(timeoutUnit) < timeout);
        } catch (InterruptedException e) {
            LOG.warn("Sleeping thread interrupted.");
        }
        LOG.error("Table '{}' does not exist after waiting {} ms. Giving up.", tableName, MAX_CREATE_TABLE_WAIT);
    }

    // This is a workaround for unit-tests which should run even if compression is not supported
    // which doesn't support certain compression type
    private void setDefaultConfiguration(HTableDescriptor tableDescriptor, Configuration conf) {
        String compression = conf.get(CFG_HBASE_TABLE_COMPRESSION, DEFAULT_COMPRESSION_TYPE.name());
        CompressionType compressionAlgo = CompressionType.valueOf(compression);
        for (HColumnDescriptor hcd : tableDescriptor.getColumnFamilies()) {
            setCompression(hcd, compressionAlgo);
            setBloomFilter(hcd, BloomType.ROW);
        }
    }

    // For simplicity we allow max 255 splits per bucket for now
    private static final int MAX_SPLIT_COUNT_PER_BUCKET = 0xff;

    public static byte[][] getSplitKeys(int splits) {
        return getSplitKeys(splits, HBaseQueueAdmin.ROW_KEY_DISTRIBUTION_BUCKETS,
                HBaseQueueAdmin.ROW_KEY_DISTRIBUTOR);
    }

    public static byte[][] getSplitKeys(int splits, int buckets, AbstractRowKeyDistributor keyDistributor) {
        // "1" can be used for queue tables that we know are not "hot", so we do not pre-split in this case
        if (splits == 1) {
            return new byte[0][];
        }

        byte[][] bucketSplits = keyDistributor.getAllDistributedKeys(Bytes.EMPTY_BYTE_ARRAY);
        Preconditions.checkArgument(splits >= 1 && splits <= MAX_SPLIT_COUNT_PER_BUCKET * bucketSplits.length,
                "Number of pre-splits should be in [1.." + MAX_SPLIT_COUNT_PER_BUCKET * bucketSplits.length
                        + "] range");

        // Splits have format: <salt bucket byte><extra byte>. We use extra byte to allow more splits than buckets:
        // salt bucket bytes are usually sequential in which case we cannot insert any value in between them.

        int splitsPerBucket = (splits + buckets - 1) / buckets;
        splitsPerBucket = splitsPerBucket == 0 ? 1 : splitsPerBucket;

        byte[][] splitKeys = new byte[bucketSplits.length * splitsPerBucket - 1][];

        int prefixesPerSplitInBucket = (MAX_SPLIT_COUNT_PER_BUCKET + 1) / splitsPerBucket;

        for (int i = 0; i < bucketSplits.length; i++) {
            for (int k = 0; k < splitsPerBucket; k++) {
                if (i == 0 && k == 0) {
                    // hbase will figure out first split
                    continue;
                }
                int splitStartPrefix = k * prefixesPerSplitInBucket;
                int thisSplit = i * splitsPerBucket + k - 1;
                if (splitsPerBucket > 1) {
                    splitKeys[thisSplit] = new byte[] { (byte) i, (byte) splitStartPrefix };
                } else {
                    splitKeys[thisSplit] = new byte[] { (byte) i };
                }
            }
        }

        return splitKeys;
    }

    public static Location createCoProcessorJar(String filePrefix, Location jarDir,
            Iterable<? extends Class<? extends Coprocessor>> classes) throws IOException {
        StringBuilder buf = new StringBuilder();
        for (Class<? extends Coprocessor> c : classes) {
            buf.append(c.getName()).append(", ");
        }
        if (buf.length() == 0) {
            return null;
        }

        LOG.debug("Creating jar file for coprocessor classes: " + buf.toString());
        final Hasher hasher = Hashing.md5().newHasher();
        final byte[] buffer = new byte[COPY_BUFFER_SIZE];

        final Map<String, URL> dependentClasses = new HashMap<String, URL>();
        for (Class<? extends Coprocessor> clz : classes) {
            Dependencies.findClassDependencies(clz.getClassLoader(), new Dependencies.ClassAcceptor() {
                @Override
                public boolean accept(String className, final URL classUrl, URL classPathUrl) {
                    // Assuming the endpoint and protocol class doesn't have dependencies
                    // other than those comes with HBase and Java.
                    if (className.startsWith("co.cask")) {
                        if (!dependentClasses.containsKey(className)) {
                            dependentClasses.put(className, classUrl);
                        }
                        return true;
                    }
                    return false;
                }
            }, clz.getName());
        }

        if (!dependentClasses.isEmpty()) {
            LOG.debug("Adding " + dependentClasses.size() + " classes to jar");
            File jarFile = File.createTempFile(filePrefix, ".jar");
            try {
                JarOutputStream jarOutput = null;
                try {
                    jarOutput = new JarOutputStream(new FileOutputStream(jarFile));
                    for (Map.Entry<String, URL> entry : dependentClasses.entrySet()) {
                        try {
                            jarOutput.putNextEntry(
                                    new JarEntry(entry.getKey().replace('.', File.separatorChar) + ".class"));
                            InputStream inputStream = entry.getValue().openStream();

                            try {
                                int len = inputStream.read(buffer);
                                while (len >= 0) {
                                    hasher.putBytes(buffer, 0, len);
                                    jarOutput.write(buffer, 0, len);
                                    len = inputStream.read(buffer);
                                }
                            } finally {
                                inputStream.close();
                            }
                        } catch (IOException e) {
                            LOG.info("Error writing to jar", e);
                            throw Throwables.propagate(e);
                        }
                    }
                } finally {
                    if (jarOutput != null) {
                        jarOutput.close();
                    }
                }

                // Copy jar file into HDFS
                // Target path is the jarDir + jarMD5.jar
                final Location targetPath = jarDir.append("coprocessor" + hasher.hash().toString() + ".jar");

                // If the file exists and having same since, assume the file doesn't changed
                if (targetPath.exists() && targetPath.length() == jarFile.length()) {
                    return targetPath;
                }

                // Copy jar file into filesystem
                if (!jarDir.mkdirs() && !jarDir.exists()) {
                    throw new IOException("Fails to create directory: " + jarDir.toURI());
                }
                Files.copy(jarFile, new OutputSupplier<OutputStream>() {
                    @Override
                    public OutputStream getOutput() throws IOException {
                        return targetPath.getOutputStream();
                    }
                });
                return targetPath;
            } finally {
                jarFile.delete();
            }
        }
        // no dependent classes to add
        return null;
    }

    /**
     * Returns information for all coprocessor configured for the table.
     *
     * @return a Map from coprocessor class name to CoprocessorInfo
     */
    public static Map<String, CoprocessorInfo> getCoprocessorInfo(HTableDescriptor tableDescriptor) {
        Map<String, CoprocessorInfo> info = Maps.newHashMap();

        // Extract information about existing data janitor coprocessor
        // The following logic is copied from RegionCoprocessorHost in HBase
        for (Map.Entry<ImmutableBytesWritable, ImmutableBytesWritable> entry : tableDescriptor.getValues()
                .entrySet()) {
            String key = Bytes.toString(entry.getKey().get()).trim();
            String spec = Bytes.toString(entry.getValue().get()).trim();

            if (!HConstants.CP_HTD_ATTR_KEY_PATTERN.matcher(key).matches()) {
                continue;
            }

            try {
                Matcher matcher = HConstants.CP_HTD_ATTR_VALUE_PATTERN.matcher(spec);
                if (!matcher.matches()) {
                    continue;
                }

                String className = matcher.group(2).trim();
                Path path = matcher.group(1).trim().isEmpty() ? null : new Path(matcher.group(1).trim());
                int priority = matcher.group(3).trim().isEmpty() ? Coprocessor.PRIORITY_USER
                        : Integer.valueOf(matcher.group(3));
                String cfgSpec = null;
                try {
                    cfgSpec = matcher.group(4);
                } catch (IndexOutOfBoundsException ex) {
                    // ignore
                }

                Map<String, String> properties = Maps.newHashMap();
                if (cfgSpec != null) {
                    cfgSpec = cfgSpec.substring(cfgSpec.indexOf('|') + 1);
                    // do an explicit deep copy of the passed configuration
                    Matcher m = HConstants.CP_HTD_ATTR_VALUE_PARAM_PATTERN.matcher(cfgSpec);
                    while (m.find()) {
                        properties.put(m.group(1), m.group(2));
                    }
                }
                info.put(className, new CoprocessorInfo(className, path, priority, properties));
            } catch (Exception ex) {
                LOG.warn("Coprocessor attribute '{}' has invalid coprocessor specification '{}'", key, spec, ex);
            }
        }

        return info;
    }

    public abstract void setCompression(HColumnDescriptor columnDescriptor, CompressionType type);

    public abstract void setBloomFilter(HColumnDescriptor columnDescriptor, BloomType type);

    public abstract CompressionType getCompression(HColumnDescriptor columnDescriptor);

    public abstract BloomType getBloomFilter(HColumnDescriptor columnDescriptor);

    public abstract Class<? extends Coprocessor> getTransactionDataJanitorClassForVersion();

    public abstract Class<? extends Coprocessor> getQueueRegionObserverClassForVersion();

    public abstract Class<? extends Coprocessor> getDequeueScanObserverClassForVersion();

    public abstract Class<? extends Coprocessor> getIncrementHandlerClassForVersion();

    /**
     * Collects HBase table stats
     * @param admin instance of {@link org.apache.hadoop.hbase.client.HBaseAdmin} to communicate with HBase
     * @return map of table name -> table stats
     * @throws java.io.IOException
     */
    public abstract Map<String, TableStats> getTableStats(HBaseAdmin admin) throws IOException;

    /**
     * Carries information about table stats
     */
    public static final class TableStats {
        private int storeFileSizeMB = 0;
        private int memStoreSizeMB = 0;

        public TableStats(int storeFileSizeMB, int memStoreSizeMB) {
            this.storeFileSizeMB = storeFileSizeMB;
            this.memStoreSizeMB = memStoreSizeMB;
        }

        public int getStoreFileSizeMB() {
            return storeFileSizeMB;
        }

        public int getMemStoreSizeMB() {
            return memStoreSizeMB;
        }

        void incStoreFileSizeMB(int deltaMB) {
            this.storeFileSizeMB += deltaMB;
        }

        void incMemStoreSizeMB(int deltaMB) {
            this.memStoreSizeMB += deltaMB;
        }

        public int getTotalSizeMB() {
            // both memstore and size on fs contribute to size of the dataset, otherwise user will be confused with zeroes
            // in dataset size even after something was written...
            return storeFileSizeMB + memStoreSizeMB;
        }
    }

    /**
     * Carries information about coprocessor information.
     */
    public static final class CoprocessorInfo {
        private final String className;
        private final Path path;
        private final int priority;
        private final Map<String, String> properties;

        private CoprocessorInfo(String className, Path path, int priority, Map<String, String> properties) {
            this.className = className;
            this.path = path;
            this.priority = priority;
            this.properties = ImmutableMap.copyOf(properties);
        }

        public String getClassName() {
            return className;
        }

        public Path getPath() {
            return path;
        }

        public int getPriority() {
            return priority;
        }

        public Map<String, String> getProperties() {
            return properties;
        }
    }
}