org.apache.hadoop.hbase.util.HFileV1Detector.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.hadoop.hbase.util.HFileV1Detector.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.util;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.io.FileLink;
import org.apache.hadoop.hbase.io.HFileLink;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * Tool to detect presence of any HFileV1 in the given directory. It prints all such regions which
 * have such files.
 * <p>
 * To print the help section of the tool:
 * <ul>
 * <li>./bin/hbase org.apache.hadoop.hbase.util.HFileV1Detector --h or,
 * <li>java -cp `hbase classpath` org.apache.hadoop.hbase.util.HFileV1Detector --h
 * </ul>
 * It also supports -h, --help, -help options.
 * </p>
 */
public class HFileV1Detector extends Configured implements Tool {
    private FileSystem fs;
    private static final Log LOG = LogFactory.getLog(HFileV1Detector.class);
    private static final int DEFAULT_NUM_OF_THREADS = 10;
    /**
     * Pre-namespace archive directory
     */
    private static final String PRE_NS_DOT_ARCHIVE = ".archive";
    /**
     * Pre-namespace tmp directory
     */
    private static final String PRE_NS_DOT_TMP = ".tmp";
    private int numOfThreads;
    /**
     * directory to start the processing.
     */
    private Path targetDirPath;
    /**
     * executor for processing regions.
     */
    private ExecutorService exec;

    /**
     * Keeps record of processed tables.
     */
    private final Set<Path> processedTables = new HashSet<Path>();
    /**
     * set of corrupted HFiles (with undetermined major version)
     */
    private final Set<Path> corruptedHFiles = Collections.newSetFromMap(new ConcurrentHashMap<Path, Boolean>());
    /**
     * set of HfileV1;
     */
    private final Set<Path> hFileV1Set = Collections.newSetFromMap(new ConcurrentHashMap<Path, Boolean>());

    private Options options = new Options();
    /**
     * used for computing pre-namespace paths for hfilelinks
     */
    private Path defaultNamespace;

    public HFileV1Detector() {
        Option pathOption = new Option("p", "path", true, "Path to a table, or hbase installation");
        pathOption.setRequired(false);
        options.addOption(pathOption);
        Option threadOption = new Option("n", "numberOfThreads", true,
                "Number of threads to use while processing HFiles.");
        threadOption.setRequired(false);
        options.addOption(threadOption);
        options.addOption("h", "help", false, "Help");
    }

    private boolean parseOption(String[] args) throws ParseException, IOException {
        if (args.length == 0) {
            return true; // no args will process with default values.
        }
        CommandLineParser parser = new GnuParser();
        CommandLine cmd = parser.parse(options, args);
        if (cmd.hasOption("h")) {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("HFileV1Detector", options, true);
            System.out.println("In case no option is provided, it processes hbase.rootdir using 10 threads.");
            System.out.println("Example:");
            System.out.println(" To detect any HFileV1 in a given hbase installation '/myhbase':");
            System.out.println(" $ $HBASE_HOME/bin/hbase " + this.getClass().getName() + " -p /myhbase");
            System.out.println();
            return false;
        }

        if (cmd.hasOption("p")) {
            this.targetDirPath = new Path(FSUtils.getRootDir(getConf()), cmd.getOptionValue("p"));
        }
        try {
            if (cmd.hasOption("n")) {
                int n = Integer.parseInt(cmd.getOptionValue("n"));
                if (n < 0 || n > 100) {
                    LOG.warn("Please use a positive number <= 100 for number of threads."
                            + " Continuing with default value " + DEFAULT_NUM_OF_THREADS);
                    return true;
                }
                this.numOfThreads = n;
            }
        } catch (NumberFormatException nfe) {
            LOG.error("Please select a valid number for threads");
            return false;
        }
        return true;
    }

    /**
     * Checks for HFileV1.
     * @return 0 when no HFileV1 is present.
     *         1 when a HFileV1 is present or, when there is a file with corrupt major version
     *          (neither V1 nor V2).
     *        -1 in case of any error/exception
     */
    @Override
    public int run(String args[]) throws IOException, ParseException {
        FSUtils.setFsDefault(getConf(), new Path(FSUtils.getRootDir(getConf()).toUri()));
        fs = FileSystem.get(getConf());
        numOfThreads = DEFAULT_NUM_OF_THREADS;
        targetDirPath = FSUtils.getRootDir(getConf());
        if (!parseOption(args)) {
            System.exit(-1);
        }
        this.exec = Executors.newFixedThreadPool(numOfThreads);
        try {
            return processResult(checkForV1Files(targetDirPath));
        } catch (Exception e) {
            LOG.error(e);
        } finally {
            exec.shutdown();
            fs.close();
        }
        return -1;
    }

    private void setDefaultNamespaceDir() throws IOException {
        Path dataDir = new Path(FSUtils.getRootDir(getConf()), HConstants.BASE_NAMESPACE_DIR);
        defaultNamespace = new Path(dataDir, NamespaceDescriptor.DEFAULT_NAMESPACE_NAME_STR);
    }

    private int processResult(Set<Path> regionsWithHFileV1) {
        LOG.info("Result: \n");
        printSet(processedTables, "Tables Processed: ");

        int count = hFileV1Set.size();
        LOG.info("Count of HFileV1: " + count);
        if (count > 0)
            printSet(hFileV1Set, "HFileV1:");

        count = corruptedHFiles.size();
        LOG.info("Count of corrupted files: " + count);
        if (count > 0)
            printSet(corruptedHFiles, "Corrupted Files: ");

        count = regionsWithHFileV1.size();
        LOG.info("Count of Regions with HFileV1: " + count);
        if (count > 0)
            printSet(regionsWithHFileV1, "Regions to Major Compact: ");

        return (hFileV1Set.isEmpty() && corruptedHFiles.isEmpty()) ? 0 : 1;
    }

    private void printSet(Set<Path> result, String msg) {
        LOG.info(msg);
        for (Path p : result) {
            LOG.info(p);
        }
    }

    /**
     * Takes a directory path, and lists out any HFileV1, if present.
     * @param targetDir directory to start looking for HFilev1.
     * @return set of Regions that have HFileV1
     * @throws IOException
     */
    private Set<Path> checkForV1Files(Path targetDir) throws IOException {
        LOG.info("Target dir is: " + targetDir);
        if (!fs.exists(targetDir)) {
            throw new IOException("The given path does not exist: " + targetDir);
        }
        if (isTableDir(fs, targetDir)) {
            processedTables.add(targetDir);
            return processTable(targetDir);
        }
        Set<Path> regionsWithHFileV1 = new HashSet<Path>();
        FileStatus[] fsStats = fs.listStatus(targetDir);
        for (FileStatus fsStat : fsStats) {
            if (isTableDir(fs, fsStat.getPath()) && !isRootTable(fsStat.getPath())) {
                processedTables.add(fsStat.getPath());
                // look for regions and find out any v1 file.
                regionsWithHFileV1.addAll(processTable(fsStat.getPath()));
            } else {
                LOG.info("Ignoring path: " + fsStat.getPath());
            }
        }
        return regionsWithHFileV1;
    }

    /**
     * Ignore ROOT table as it doesn't exist in 0.96.
     * @param path
     */
    private boolean isRootTable(Path path) {
        if (path != null && path.toString().endsWith("-ROOT-"))
            return true;
        return false;
    }

    /**
     * Find out regions in the table which have HFileV1.
     * @param tableDir
     * @return the set of regions containing HFile v1.
     * @throws IOException
     */
    private Set<Path> processTable(Path tableDir) throws IOException {
        // list out the regions and then process each file in it.
        LOG.debug("processing table: " + tableDir);
        List<Future<Path>> regionLevelResults = new ArrayList<Future<Path>>();
        Set<Path> regionsWithHFileV1 = new HashSet<Path>();

        FileStatus[] fsStats = fs.listStatus(tableDir);
        for (FileStatus fsStat : fsStats) {
            // process each region
            if (isRegionDir(fs, fsStat.getPath())) {
                regionLevelResults.add(processRegion(fsStat.getPath()));
            }
        }
        for (Future<Path> f : regionLevelResults) {
            try {
                if (f.get() != null) {
                    regionsWithHFileV1.add(f.get());
                }
            } catch (InterruptedException e) {
                LOG.error(e);
            } catch (ExecutionException e) {
                LOG.error(e); // might be a bad hfile. We print it at the end.
            }
        }
        return regionsWithHFileV1;
    }

    /**
     * Each region is processed by a separate handler. If a HRegion has a hfileV1, its path is
     * returned as the future result, otherwise, a null value is returned.
     * @param regionDir Region to process.
     * @return corresponding Future object.
     */
    private Future<Path> processRegion(final Path regionDir) {
        LOG.debug("processing region: " + regionDir);
        Callable<Path> regionCallable = new Callable<Path>() {
            @Override
            public Path call() throws Exception {
                for (Path familyDir : FSUtils.getFamilyDirs(fs, regionDir)) {
                    FileStatus[] storeFiles = FSUtils.listStatus(fs, familyDir);
                    if (storeFiles == null || storeFiles.length == 0)
                        continue;
                    for (FileStatus storeFile : storeFiles) {
                        Path storeFilePath = storeFile.getPath();
                        FSDataInputStream fsdis = null;
                        long lenToRead = 0;
                        try {
                            // check whether this path is a reference.
                            if (StoreFileInfo.isReference(storeFilePath))
                                continue;
                            // check whether this path is a HFileLink.
                            else if (HFileLink.isHFileLink(storeFilePath)) {
                                FileLink fLink = getFileLinkWithPreNSPath(storeFilePath);
                                fsdis = fLink.open(fs);
                                lenToRead = fLink.getFileStatus(fs).getLen();
                            } else {
                                // a regular hfile
                                fsdis = fs.open(storeFilePath);
                                lenToRead = storeFile.getLen();
                            }
                            int majorVersion = computeMajorVersion(fsdis, lenToRead);
                            if (majorVersion == 1) {
                                hFileV1Set.add(storeFilePath);
                                // return this region path, as it needs to be compacted.
                                return regionDir;
                            }
                            if (majorVersion > 2 || majorVersion < 1)
                                throw new IllegalArgumentException("Incorrect major version: " + majorVersion);
                        } catch (Exception iae) {
                            corruptedHFiles.add(storeFilePath);
                            LOG.error("Got exception while reading trailer for file: " + storeFilePath, iae);
                        } finally {
                            if (fsdis != null)
                                fsdis.close();
                        }
                    }
                }
                return null;
            }

            private int computeMajorVersion(FSDataInputStream istream, long fileSize) throws IOException {
                //read up the last int of the file. Major version is in the last 3 bytes.
                long seekPoint = fileSize - Bytes.SIZEOF_INT;
                if (seekPoint < 0)
                    throw new IllegalArgumentException("File too small, no major version found");

                // Read the version from the last int of the file.
                istream.seek(seekPoint);
                int version = istream.readInt();
                // Extract and return the major version
                return version & 0x00ffffff;
            }
        };
        Future<Path> f = exec.submit(regionCallable);
        return f;
    }

    /**
     * Creates a FileLink which adds pre-namespace paths in its list of available paths. This is used
     * when reading a snapshot file in a pre-namespace file layout, for example, while upgrading.
     * @param storeFilePath
     * @return a FileLink which could read from pre-namespace paths.
     * @throws IOException
     */
    public FileLink getFileLinkWithPreNSPath(Path storeFilePath) throws IOException {
        HFileLink link = new HFileLink(getConf(), storeFilePath);
        List<Path> pathsToProcess = getPreNSPathsForHFileLink(link);
        pathsToProcess.addAll(Arrays.asList(link.getLocations()));
        return new FileLink(pathsToProcess);
    }

    private List<Path> getPreNSPathsForHFileLink(HFileLink fileLink) throws IOException {
        if (defaultNamespace == null)
            setDefaultNamespaceDir();
        List<Path> p = new ArrayList<Path>();
        String relativeTablePath = removeDefaultNSPath(fileLink.getOriginPath());
        p.add(getPreNSPath(PRE_NS_DOT_ARCHIVE, relativeTablePath));
        p.add(getPreNSPath(PRE_NS_DOT_TMP, relativeTablePath));
        p.add(getPreNSPath(null, relativeTablePath));
        return p;
    }

    /**
     * Removes the prefix of defaultNamespace from the path.
     * @param originPath
     */
    private String removeDefaultNSPath(Path originalPath) {
        String pathStr = originalPath.toString();
        if (!pathStr.startsWith(defaultNamespace.toString()))
            return pathStr;
        return pathStr.substring(defaultNamespace.toString().length() + 1);
    }

    private Path getPreNSPath(String prefix, String relativeTablePath) throws IOException {
        String relativePath = (prefix == null ? relativeTablePath : prefix + Path.SEPARATOR + relativeTablePath);
        return new Path(FSUtils.getRootDir(getConf()), relativePath);
    }

    private static boolean isTableDir(final FileSystem fs, final Path path) throws IOException {
        // check for old format, of having /table/.tableinfo; hbase:meta doesn't has .tableinfo,
        // include it.
        if (fs.isFile(path))
            return false;
        return (FSTableDescriptors.getTableInfoPath(fs, path) != null
                || FSTableDescriptors.getCurrentTableInfoStatus(fs, path, false) != null)
                || path.toString().endsWith(".META.");
    }

    private static boolean isRegionDir(final FileSystem fs, final Path path) throws IOException {
        if (fs.isFile(path))
            return false;
        Path regionInfo = new Path(path, HRegionFileSystem.REGION_INFO_FILE);
        return fs.exists(regionInfo);

    }

    public static void main(String args[]) throws Exception {
        System.exit(ToolRunner.run(HBaseConfiguration.create(), new HFileV1Detector(), args));
    }

}