co.cask.cdap.data2.transaction.queue.coprocessor.hbase96.HBaseQueueRegionObserver.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.cdap.data2.transaction.queue.coprocessor.hbase96.HBaseQueueRegionObserver.java

Source

/*
 * Copyright  2014-2015 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package co.cask.cdap.data2.transaction.queue.coprocessor.hbase96;

import co.cask.cdap.common.queue.QueueName;
import co.cask.cdap.data2.transaction.coprocessor.DefaultTransactionStateCacheSupplier;
import co.cask.cdap.data2.transaction.queue.ConsumerEntryState;
import co.cask.cdap.data2.transaction.queue.QueueEntryRow;
import co.cask.cdap.data2.transaction.queue.hbase.HBaseQueueAdmin;
import co.cask.cdap.data2.transaction.queue.hbase.SaltedHBaseQueueStrategy;
import co.cask.cdap.data2.transaction.queue.hbase.coprocessor.CConfigurationReader;
import co.cask.cdap.data2.transaction.queue.hbase.coprocessor.ConsumerConfigCache;
import co.cask.cdap.data2.transaction.queue.hbase.coprocessor.ConsumerInstance;
import co.cask.cdap.data2.transaction.queue.hbase.coprocessor.QueueConsumerConfig;
import co.cask.cdap.data2.util.TableId;
import co.cask.cdap.data2.util.hbase.HTable96NameConverter;
import co.cask.tephra.coprocessor.TransactionStateCache;
import co.cask.tephra.persist.TransactionSnapshot;
import com.google.common.base.Supplier;
import com.google.common.io.InputSupplier;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CoprocessorEnvironment;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver;
import org.apache.hadoop.hbase.coprocessor.ObserverContext;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.regionserver.InternalScanner;
import org.apache.hadoop.hbase.regionserver.ScanType;
import org.apache.hadoop.hbase.regionserver.Store;
import org.apache.hadoop.hbase.regionserver.compactions.CompactionRequest;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.Iterator;
import java.util.List;

/**
 * RegionObserver for queue table. This class should only have JSE and HBase classes dependencies only.
 * It can also has dependencies on CDAP classes provided that all the transitive dependencies stay within
 * the mentioned scope.
 *
 * This region observer does queue eviction during flush time and compact time by using queue consumer state
 * information to determine if a queue entry row can be omitted during flush/compact.
 */
public final class HBaseQueueRegionObserver extends BaseRegionObserver {

    private static final Log LOG = LogFactory.getLog(HBaseQueueRegionObserver.class);

    private TableName configTableName;
    private CConfigurationReader cConfReader;
    private TransactionStateCache txStateCache;
    private Supplier<TransactionSnapshot> txSnapshotSupplier;
    private ConsumerConfigCache configCache;

    private int prefixBytes;
    private String namespaceId;
    private String appName;
    private String flowName;

    @Override
    public void start(CoprocessorEnvironment env) {
        if (env instanceof RegionCoprocessorEnvironment) {
            HTableDescriptor tableDesc = ((RegionCoprocessorEnvironment) env).getRegion().getTableDesc();
            String hTableName = tableDesc.getNameAsString();

            String prefixBytes = tableDesc.getValue(HBaseQueueAdmin.PROPERTY_PREFIX_BYTES);
            try {
                // Default to SALT_BYTES for the older salted queue implementation.
                this.prefixBytes = prefixBytes == null ? SaltedHBaseQueueStrategy.SALT_BYTES
                        : Integer.parseInt(prefixBytes);
            } catch (NumberFormatException e) {
                // Shouldn't happen for table created by cdap.
                LOG.error("Unable to parse value of '" + HBaseQueueAdmin.PROPERTY_PREFIX_BYTES + "' property. "
                        + "Default to " + SaltedHBaseQueueStrategy.SALT_BYTES, e);
                this.prefixBytes = SaltedHBaseQueueStrategy.SALT_BYTES;
            }

            HTable96NameConverter nameConverter = new HTable96NameConverter();
            namespaceId = nameConverter.from(tableDesc).getNamespace().getId();
            appName = HBaseQueueAdmin.getApplicationName(hTableName);
            flowName = HBaseQueueAdmin.getFlowName(hTableName);

            Configuration conf = env.getConfiguration();
            String hbaseNamespacePrefix = nameConverter.getNamespacePrefix(tableDesc);
            TableId queueConfigTableId = HBaseQueueAdmin.getConfigTableId(namespaceId);
            final String sysConfigTablePrefix = nameConverter.getSysConfigTablePrefix(tableDesc);
            txStateCache = new DefaultTransactionStateCacheSupplier(sysConfigTablePrefix, conf).get();
            txSnapshotSupplier = new Supplier<TransactionSnapshot>() {
                @Override
                public TransactionSnapshot get() {
                    return txStateCache.getLatestState();
                }
            };
            configTableName = nameConverter.toTableName(hbaseNamespacePrefix, queueConfigTableId);
            cConfReader = new CConfigurationReader(conf, sysConfigTablePrefix);
            configCache = createConfigCache(env);
        }
    }

    @Override
    public InternalScanner preFlush(ObserverContext<RegionCoprocessorEnvironment> e, Store store,
            InternalScanner scanner) throws IOException {
        if (!e.getEnvironment().getRegion().isAvailable()) {
            return scanner;
        }

        LOG.info("preFlush, creates EvictionInternalScanner");
        return new EvictionInternalScanner("flush", e.getEnvironment(), scanner);
    }

    @Override
    public InternalScanner preCompact(ObserverContext<RegionCoprocessorEnvironment> e, Store store,
            InternalScanner scanner, ScanType type, CompactionRequest request) throws IOException {
        if (!e.getEnvironment().getRegion().isAvailable()) {
            return scanner;
        }

        LOG.info("preCompact, creates EvictionInternalScanner");
        return new EvictionInternalScanner("compaction", e.getEnvironment(), scanner);
    }

    // needed for queue unit-test
    @SuppressWarnings("unused")
    private void updateCache() throws IOException {
        ConsumerConfigCache configCache = this.configCache;
        if (configCache != null) {
            configCache.updateCache();
        }
    }

    private ConsumerConfigCache getConfigCache(CoprocessorEnvironment env) {
        if (!configCache.isAlive()) {
            configCache = createConfigCache(env);
        }
        return configCache;
    }

    private ConsumerConfigCache createConfigCache(final CoprocessorEnvironment env) {
        return ConsumerConfigCache.getInstance(configTableName, cConfReader, txSnapshotSupplier,
                new InputSupplier<HTableInterface>() {
                    @Override
                    public HTableInterface getInput() throws IOException {
                        return env.getTable(configTableName);
                    }
                });
    }

    // need for queue unit-test
    private TransactionStateCache getTxStateCache() {
        return txStateCache;
    }

    /**
     * An {@link InternalScanner} that will skip queue entries that are safe to be evicted.
     */
    private final class EvictionInternalScanner implements InternalScanner {

        private final String triggeringAction;
        private final RegionCoprocessorEnvironment env;
        private final InternalScanner scanner;
        // This is just for object reused to reduce objects creation.
        private final ConsumerInstance consumerInstance;
        private byte[] currentQueue;
        private byte[] currentQueueRowPrefix;
        private QueueConsumerConfig consumerConfig;
        private long totalRows = 0;
        private long rowsEvicted = 0;
        // couldn't be evicted due to incomplete view of row
        private long skippedIncomplete = 0;

        private EvictionInternalScanner(String action, RegionCoprocessorEnvironment env, InternalScanner scanner) {
            this.triggeringAction = action;
            this.env = env;
            this.scanner = scanner;
            this.consumerInstance = new ConsumerInstance(0, 0);
        }

        @Override
        public boolean next(List<Cell> results) throws IOException {
            return next(results, -1);
        }

        @Override
        public boolean next(List<Cell> results, int limit) throws IOException {
            boolean hasNext = scanner.next(results, limit);

            while (!results.isEmpty()) {
                totalRows++;
                // Check if it is eligible for eviction.
                Cell cell = results.get(0);

                // If current queue is unknown or the row is not a queue entry of current queue,
                // it either because it scans into next queue entry or simply current queue is not known.
                // Hence needs to find the currentQueue
                if (currentQueue == null || !QueueEntryRow.isQueueEntry(currentQueueRowPrefix, prefixBytes,
                        cell.getRowArray(), cell.getRowOffset(), cell.getRowLength())) {
                    // If not eligible, it either because it scans into next queue entry or simply current queue is not known.
                    currentQueue = null;
                }

                // This row is a queue entry. If currentQueue is null, meaning it's a new queue encountered during scan.
                if (currentQueue == null) {
                    QueueName queueName = QueueEntryRow.getQueueName(namespaceId, appName, flowName, prefixBytes,
                            cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
                    currentQueue = queueName.toBytes();
                    currentQueueRowPrefix = QueueEntryRow.getQueueRowPrefix(queueName);
                    consumerConfig = getConfigCache(env).getConsumerConfig(currentQueue);
                }

                if (consumerConfig == null) {
                    // no config is present yet, so cannot evict
                    return hasNext;
                }

                if (canEvict(consumerConfig, results)) {
                    rowsEvicted++;
                    results.clear();
                    hasNext = scanner.next(results, limit);
                } else {
                    break;
                }
            }

            return hasNext;
        }

        @Override
        public void close() throws IOException {
            LOG.info("Region " + env.getRegion().getRegionNameAsString() + " " + triggeringAction
                    + ", rows evicted: " + rowsEvicted + " / " + totalRows + ", skipped incomplete: "
                    + skippedIncomplete);
            scanner.close();
        }

        /**
         * Determines the given queue entry row can be evicted.
         * @param result All KeyValues of a queue entry row.
         * @return true if it can be evicted, false otherwise.
         */
        private boolean canEvict(QueueConsumerConfig consumerConfig, List<Cell> result) {
            // If no consumer group, this queue is dead, should be ok to evict.
            if (consumerConfig.getNumGroups() == 0) {
                return true;
            }

            // If unknown consumer config (due to error), keep the queue.
            if (consumerConfig.getNumGroups() < 0) {
                return false;
            }

            // TODO (terence): Right now we can only evict if we see all the data columns.
            // It's because it's possible that in some previous flush, only the data columns are flush,
            // then consumer writes the state columns. In the next flush, it'll only see the state columns and those
            // should not be evicted otherwise the entry might get reprocessed, depending on the consumer start row state.
            // This logic is not perfect as if flush happens after enqueue and before dequeue, that entry may never get
            // evicted (depends on when the next compaction happens, whether the queue configuration has been change or not).

            // There are two data columns, "d" and "m".
            // If the size == 2, it should not be evicted as well,
            // as state columns (dequeue) always happen after data columns (enqueue).
            if (result.size() <= 2) {
                skippedIncomplete++;
                return false;
            }

            // "d" and "m" columns always comes before the state columns, prefixed with "s".
            Iterator<Cell> iterator = result.iterator();
            Cell cell = iterator.next();
            if (!QueueEntryRow.isDataColumn(cell.getQualifierArray(), cell.getQualifierOffset())) {
                skippedIncomplete++;
                return false;
            }
            cell = iterator.next();
            if (!QueueEntryRow.isMetaColumn(cell.getQualifierArray(), cell.getQualifierOffset())) {
                skippedIncomplete++;
                return false;
            }

            // Need to determine if this row can be evicted iff all consumer groups have committed process this row.
            int consumedGroups = 0;
            // Inspect each state column
            while (iterator.hasNext()) {
                cell = iterator.next();
                if (!QueueEntryRow.isStateColumn(cell.getQualifierArray(), cell.getQualifierOffset())) {
                    continue;
                }
                // If any consumer has a state != PROCESSED, it should not be evicted
                if (!isProcessed(cell, consumerInstance)) {
                    break;
                }
                // If it is PROCESSED, check if this row is smaller than the consumer instance startRow.
                // Essentially a loose check of committed PROCESSED.
                byte[] startRow = consumerConfig.getStartRow(consumerInstance);
                if (startRow != null && compareRowKey(cell, startRow) < 0) {
                    consumedGroups++;
                }
            }

            // It can be evicted if from the state columns, it's been processed by all consumer groups
            // Otherwise, this row has to be less than smallest among all current consumers.
            // The second condition is for handling consumer being removed after it consumed some entries.
            // However, the second condition alone is not good enough as it's possible that in hash partitioning,
            // only one consumer is keep consuming when the other consumer never proceed.
            return consumedGroups == consumerConfig.getNumGroups()
                    || compareRowKey(result.get(0), consumerConfig.getSmallestStartRow()) < 0;
        }

        private int compareRowKey(Cell cell, byte[] row) {
            return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset() + prefixBytes,
                    cell.getRowLength() - prefixBytes, row, 0, row.length);
        }

        /**
         * Returns {@code true} if the given {@link KeyValue} has a {@link ConsumerEntryState#PROCESSED} state and
         * also put the consumer information into the given {@link ConsumerInstance}.
         * Otherwise, returns {@code false} and the {@link ConsumerInstance} is left untouched.
         */
        private boolean isProcessed(Cell cell, ConsumerInstance consumerInstance) {
            int stateIdx = cell.getValueOffset() + cell.getValueLength() - 1;
            boolean processed = cell.getValueArray()[stateIdx] == ConsumerEntryState.PROCESSED.getState();

            if (processed) {
                // Column is "s<groupId>"
                long groupId = Bytes.toLong(cell.getQualifierArray(), cell.getQualifierOffset() + 1);
                // Value is "<writePointer><instanceId><state>"
                int instanceId = Bytes.toInt(cell.getValueArray(), cell.getValueOffset() + Bytes.SIZEOF_LONG);
                consumerInstance.setGroupInstance(groupId, instanceId);
            }
            return processed;
        }
    }
}