org.apache.hadoop.hbase.master.CatalogJanitor.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hbase.master.CatalogJanitor.java

Source

/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.master;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Chore;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.backup.HFileArchiver;
import org.apache.hadoop.hbase.catalog.MetaEditor;
import org.apache.hadoop.hbase.catalog.MetaReader;
import org.apache.hadoop.hbase.client.MetaScanner;
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.PairOfSameType;
import org.apache.hadoop.hbase.util.Triple;

/**
 * A janitor for the catalog tables.  Scans the <code>hbase:meta</code> catalog
 * table on a period looking for unused regions to garbage collect.
 */
@InterfaceAudience.Private
public class CatalogJanitor extends Chore {
    private static final Log LOG = LogFactory.getLog(CatalogJanitor.class.getName());
    private final Server server;
    private final MasterServices services;
    private AtomicBoolean enabled = new AtomicBoolean(true);
    private AtomicBoolean alreadyRunning = new AtomicBoolean(false);

    CatalogJanitor(final Server server, final MasterServices services) {
        super("CatalogJanitor-" + server.getServerName().toShortString(),
                server.getConfiguration().getInt("hbase.catalogjanitor.interval", 300000), server);
        this.server = server;
        this.services = services;
    }

    @Override
    protected boolean initialChore() {
        try {
            if (this.enabled.get())
                scan();
        } catch (IOException e) {
            LOG.warn("Failed initial scan of catalog table", e);
            return false;
        }
        return true;
    }

    /**
     * @param enabled
     */
    public boolean setEnabled(final boolean enabled) {
        return this.enabled.getAndSet(enabled);
    }

    boolean getEnabled() {
        return this.enabled.get();
    }

    @Override
    protected void chore() {
        try {
            if (this.enabled.get()) {
                scan();
            } else {
                LOG.warn("CatalogJanitor disabled! Not running scan.");
            }
        } catch (IOException e) {
            LOG.warn("Failed scan of catalog table", e);
        }
    }

    /**
     * Scans hbase:meta and returns a number of scanned rows, and a map of merged
     * regions, and an ordered map of split parents.
     * @return triple of scanned rows, map of merged regions and map of split
     *         parent regioninfos
     * @throws IOException
     */
    Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>> getMergedRegionsAndSplitParents()
            throws IOException {
        return getMergedRegionsAndSplitParents(null);
    }

    /**
     * Scans hbase:meta and returns a number of scanned rows, and a map of merged
     * regions, and an ordered map of split parents. if the given table name is
     * null, return merged regions and split parents of all tables, else only the
     * specified table
     * @param tableName null represents all tables
     * @return triple of scanned rows, and map of merged regions, and map of split
     *         parent regioninfos
     * @throws IOException
     */
    Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>> getMergedRegionsAndSplitParents(
            final TableName tableName) throws IOException {
        final boolean isTableSpecified = (tableName != null);
        // TODO: Only works with single hbase:meta region currently.  Fix.
        final AtomicInteger count = new AtomicInteger(0);
        // Keep Map of found split parents.  There are candidates for cleanup.
        // Use a comparator that has split parents come before its daughters.
        final Map<HRegionInfo, Result> splitParents = new TreeMap<HRegionInfo, Result>(
                new SplitParentFirstComparator());
        final Map<HRegionInfo, Result> mergedRegions = new TreeMap<HRegionInfo, Result>();
        // This visitor collects split parents and counts rows in the hbase:meta table

        MetaScannerVisitor visitor = new MetaScanner.MetaScannerVisitorBase() {
            @Override
            public boolean processRow(Result r) throws IOException {
                if (r == null || r.isEmpty())
                    return true;
                count.incrementAndGet();
                HRegionInfo info = HRegionInfo.getHRegionInfo(r);
                if (info == null)
                    return true; // Keep scanning
                if (isTableSpecified && info.getTable().compareTo(tableName) > 0) {
                    // Another table, stop scanning
                    return false;
                }
                if (info.isSplitParent())
                    splitParents.put(info, r);
                if (r.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER) != null) {
                    mergedRegions.put(info, r);
                }
                // Returning true means "keep scanning"
                return true;
            }
        };

        // Run full scan of hbase:meta catalog table passing in our custom visitor with
        // the start row
        MetaScanner.metaScan(server.getConfiguration(), null, visitor, tableName);

        return new Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>>(count.get(), mergedRegions,
                splitParents);
    }

    /**
     * If merged region no longer holds reference to the merge regions, archive
     * merge region on hdfs and perform deleting references in hbase:meta
     * @param mergedRegion
     * @param regionA
     * @param regionB
     * @return true if we delete references in merged region on hbase:meta and archive
     *         the files on the file system
     * @throws IOException
     */
    boolean cleanMergeRegion(final HRegionInfo mergedRegion, final HRegionInfo regionA, final HRegionInfo regionB)
            throws IOException {
        FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
        Path rootdir = this.services.getMasterFileSystem().getRootDir();
        Path tabledir = FSUtils.getTableDir(rootdir, mergedRegion.getTable());
        HTableDescriptor htd = getTableDescriptor(mergedRegion.getTable());
        HRegionFileSystem regionFs = null;
        try {
            regionFs = HRegionFileSystem.openRegionFromFileSystem(this.services.getConfiguration(), fs, tabledir,
                    mergedRegion, true);
        } catch (IOException e) {
            LOG.warn("Merged region does not exist: " + mergedRegion.getEncodedName());
        }
        if (regionFs == null || !regionFs.hasReferences(htd)) {
            LOG.debug(
                    "Deleting region " + regionA.getRegionNameAsString() + " and " + regionB.getRegionNameAsString()
                            + " from fs because merged region no longer holds references");
            HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionA);
            HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, regionB);
            MetaEditor.deleteMergeQualifiers(server.getCatalogTracker(), mergedRegion);
            return true;
        }
        return false;
    }

    /**
     * Run janitorial scan of catalog <code>hbase:meta</code> table looking for
     * garbage to collect.
     * @return number of cleaned regions
     * @throws IOException
     */
    int scan() throws IOException {
        try {
            if (!alreadyRunning.compareAndSet(false, true)) {
                return 0;
            }
            Triple<Integer, Map<HRegionInfo, Result>, Map<HRegionInfo, Result>> scanTriple = getMergedRegionsAndSplitParents();
            int count = scanTriple.getFirst();
            /**
             * clean merge regions first
             */
            int mergeCleaned = 0;
            Map<HRegionInfo, Result> mergedRegions = scanTriple.getSecond();
            for (Map.Entry<HRegionInfo, Result> e : mergedRegions.entrySet()) {
                HRegionInfo regionA = HRegionInfo.getHRegionInfo(e.getValue(), HConstants.MERGEA_QUALIFIER);
                HRegionInfo regionB = HRegionInfo.getHRegionInfo(e.getValue(), HConstants.MERGEB_QUALIFIER);
                if (regionA == null || regionB == null) {
                    LOG.warn("Unexpected references regionA="
                            + (regionA == null ? "null" : regionA.getRegionNameAsString()) + ",regionB="
                            + (regionB == null ? "null" : regionB.getRegionNameAsString()) + " in merged region "
                            + e.getKey().getRegionNameAsString());
                } else {
                    if (cleanMergeRegion(e.getKey(), regionA, regionB)) {
                        mergeCleaned++;
                    }
                }
            }
            /**
             * clean split parents
             */
            Map<HRegionInfo, Result> splitParents = scanTriple.getThird();

            // Now work on our list of found parents. See if any we can clean up.
            int splitCleaned = 0;
            // regions whose parents are still around
            HashSet<String> parentNotCleaned = new HashSet<String>();
            for (Map.Entry<HRegionInfo, Result> e : splitParents.entrySet()) {
                if (!parentNotCleaned.contains(e.getKey().getEncodedName())
                        && cleanParent(e.getKey(), e.getValue())) {
                    splitCleaned++;
                } else {
                    // We could not clean the parent, so it's daughters should not be cleaned either (HBASE-6160)
                    PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(e.getValue());
                    parentNotCleaned.add(daughters.getFirst().getEncodedName());
                    parentNotCleaned.add(daughters.getSecond().getEncodedName());
                }
            }
            if ((mergeCleaned + splitCleaned) != 0) {
                LOG.info("Scanned " + count + " catalog row(s), gc'd " + mergeCleaned
                        + " unreferenced merged region(s) and " + splitCleaned + " unreferenced parent region(s)");
            } else if (LOG.isTraceEnabled()) {
                LOG.trace("Scanned " + count + " catalog row(s), gc'd " + mergeCleaned
                        + " unreferenced merged region(s) and " + splitCleaned + " unreferenced parent region(s)");
            }
            return mergeCleaned + splitCleaned;
        } finally {
            alreadyRunning.set(false);
        }
    }

    /**
     * Compare HRegionInfos in a way that has split parents sort BEFORE their
     * daughters.
     */
    static class SplitParentFirstComparator implements Comparator<HRegionInfo> {
        Comparator<byte[]> rowEndKeyComparator = new Bytes.RowEndKeyComparator();

        @Override
        public int compare(HRegionInfo left, HRegionInfo right) {
            // This comparator differs from the one HRegionInfo in that it sorts
            // parent before daughters.
            if (left == null)
                return -1;
            if (right == null)
                return 1;
            // Same table name.
            int result = left.getTable().compareTo(right.getTable());
            if (result != 0)
                return result;
            // Compare start keys.
            result = Bytes.compareTo(left.getStartKey(), right.getStartKey());
            if (result != 0)
                return result;
            // Compare end keys.
            result = rowEndKeyComparator.compare(left.getEndKey(), right.getEndKey());

            return -result; // Flip the result so parent comes first.
        }
    }

    /**
     * If daughters no longer hold reference to the parents, delete the parent.
     * @param parent HRegionInfo of split offlined parent
     * @param rowContent Content of <code>parent</code> row in
     * <code>metaRegionName</code>
     * @return True if we removed <code>parent</code> from meta table and from
     * the filesystem.
     * @throws IOException
     */
    boolean cleanParent(final HRegionInfo parent, Result rowContent) throws IOException {
        boolean result = false;
        // Check whether it is a merged region and not clean reference
        // No necessary to check MERGEB_QUALIFIER because these two qualifiers will
        // be inserted/deleted together
        if (rowContent.getValue(HConstants.CATALOG_FAMILY, HConstants.MERGEA_QUALIFIER) != null) {
            // wait cleaning merge region first
            return result;
        }
        // Run checks on each daughter split.
        PairOfSameType<HRegionInfo> daughters = HRegionInfo.getDaughterRegions(rowContent);
        Pair<Boolean, Boolean> a = checkDaughterInFs(parent, daughters.getFirst());
        Pair<Boolean, Boolean> b = checkDaughterInFs(parent, daughters.getSecond());
        if (hasNoReferences(a) && hasNoReferences(b)) {
            LOG.debug("Deleting region " + parent.getRegionNameAsString()
                    + " because daughter splits no longer hold references");
            FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
            if (LOG.isTraceEnabled())
                LOG.trace("Archiving parent region: " + parent);
            HFileArchiver.archiveRegion(this.services.getConfiguration(), fs, parent);
            MetaEditor.deleteRegion(this.server.getCatalogTracker(), parent);
            result = true;
        }
        return result;
    }

    /**
     * @param p A pair where the first boolean says whether or not the daughter
     * region directory exists in the filesystem and then the second boolean says
     * whether the daughter has references to the parent.
     * @return True the passed <code>p</code> signifies no references.
     */
    private boolean hasNoReferences(final Pair<Boolean, Boolean> p) {
        return !p.getFirst() || !p.getSecond();
    }

    /**
     * Checks if a daughter region -- either splitA or splitB -- still holds
     * references to parent.
     * @param parent Parent region
     * @param daughter Daughter region
     * @return A pair where the first boolean says whether or not the daughter
     * region directory exists in the filesystem and then the second boolean says
     * whether the daughter has references to the parent.
     * @throws IOException
     */
    Pair<Boolean, Boolean> checkDaughterInFs(final HRegionInfo parent, final HRegionInfo daughter)
            throws IOException {
        if (daughter == null) {
            return new Pair<Boolean, Boolean>(Boolean.FALSE, Boolean.FALSE);
        }

        FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
        Path rootdir = this.services.getMasterFileSystem().getRootDir();
        Path tabledir = FSUtils.getTableDir(rootdir, daughter.getTable());

        HRegionFileSystem regionFs = null;
        try {
            regionFs = HRegionFileSystem.openRegionFromFileSystem(this.services.getConfiguration(), fs, tabledir,
                    daughter, true);
        } catch (IOException e) {
            LOG.warn("Daughter region does not exist: " + daughter.getEncodedName() + ", parent is: "
                    + parent.getEncodedName());
            return new Pair<Boolean, Boolean>(Boolean.FALSE, Boolean.FALSE);
        }

        boolean references = false;
        HTableDescriptor parentDescriptor = getTableDescriptor(parent.getTable());
        for (HColumnDescriptor family : parentDescriptor.getFamilies()) {
            if ((references = regionFs.hasReferences(family.getNameAsString()))) {
                break;
            }
        }
        return new Pair<Boolean, Boolean>(Boolean.TRUE, Boolean.valueOf(references));
    }

    private HTableDescriptor getTableDescriptor(final TableName tableName)
            throws FileNotFoundException, IOException {
        return this.services.getTableDescriptors().get(tableName);
    }

    /**
     * Checks if the specified region has merge qualifiers, if so, try to clean
     * them
     * @param region
     * @return true if the specified region doesn't have merge qualifier now
     * @throws IOException
     */
    public boolean cleanMergeQualifier(final HRegionInfo region) throws IOException {
        // Get merge regions if it is a merged region and already has merge
        // qualifier
        Pair<HRegionInfo, HRegionInfo> mergeRegions = MetaReader
                .getRegionsFromMergeQualifier(this.services.getCatalogTracker(), region.getRegionName());
        if (mergeRegions == null || (mergeRegions.getFirst() == null && mergeRegions.getSecond() == null)) {
            // It doesn't have merge qualifier, no need to clean
            return true;
        }
        // It shouldn't happen, we must insert/delete these two qualifiers together
        if (mergeRegions.getFirst() == null || mergeRegions.getSecond() == null) {
            LOG.error("Merged region " + region.getRegionNameAsString() + " has only one merge qualifier in META.");
            return false;
        }
        return cleanMergeRegion(region, mergeRegions.getFirst(), mergeRegions.getSecond());
    }
}