com.asakusafw.operation.tools.hadoop.fs.Clean.java Source code

Java tutorial

Introduction

Here is the source code for com.asakusafw.operation.tools.hadoop.fs.Clean.java

Source

/**
 * Copyright 2011-2017 Asakusa Framework Team.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.asakusafw.operation.tools.hadoop.fs;

import java.io.File;
import java.io.IOException;
import java.lang.reflect.Method;
import java.net.URI;
import java.sql.Date;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;

import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * CLI for Hadoop FS cleaning tool.
 * @since 0.4.0
 */
public class Clean extends Configured implements Tool {

    static final Logger LOG = LoggerFactory.getLogger(Clean.class);

    static final Option OPT_RECURSIVE;
    static final Option OPT_DRY_RUN;
    static final Option OPT_KEEP_DAYS;

    private static final Options OPTIONS;
    static {
        OPT_RECURSIVE = new Option("r", "recursive", false, "remove recursively"); //$NON-NLS-1$ //$NON-NLS-2$
        OPT_DRY_RUN = new Option("s", "dry-run", false, "do not delete actually"); //$NON-NLS-1$ //$NON-NLS-2$
        OPT_KEEP_DAYS = new Option("k", "keep-days", true, "keep files lecent days"); //$NON-NLS-1$ //$NON-NLS-2$

        OPTIONS = new Options();
        OPTIONS.addOption(OPT_RECURSIVE);
        OPTIONS.addOption(OPT_DRY_RUN);
        OPTIONS.addOption(OPT_KEEP_DAYS);
    }

    private final long currentTime;

    /**
     * Creates a new instance.
     */
    public Clean() {
        this(System.currentTimeMillis());
    }

    Clean(long currentTime) {
        this.currentTime = currentTime;
    }

    /**
     * Program entry.
     * @param args arguments
     * @throws Exception if failed to execute command
     */
    public static void main(String... args) throws Exception {
        LOG.info("[OT-CLEAN-I00000] Start Hadoop FS cleaning tool");
        long start = System.currentTimeMillis();
        Tool tool = new Clean();
        tool.setConf(new Configuration());
        int exit = tool.run(args); // no generic options
        long end = System.currentTimeMillis();
        LOG.info(MessageFormat.format(
                "[OT-CLEAN-I00999] Finish Hadoop FS cleaning tool (exit-code={0}, elapsed={1}ms)", exit,
                end - start));
        if (exit != 0) {
            System.exit(exit);
        }
    }

    @Override
    public int run(String[] args) {
        if (args == null) {
            throw new IllegalArgumentException("args must not be null"); //$NON-NLS-1$
        }
        Opts opts;
        try {
            opts = parseOptions(args);
            if (opts == null) {
                return 2;
            }
        } catch (Exception e) {
            LOG.error(MessageFormat.format("[OT-CLEAN-E00001] Invalid options: {0}", Arrays.toString(args)), e);
            return 2;
        }
        long period = currentTime - (long) (opts.keepDays * TimeUnit.DAYS.toMillis(1));
        if (LOG.isDebugEnabled()) {
            LOG.debug("Keep switching-time: {}", new Date(period)); //$NON-NLS-1$
        }
        Context context = new Context(opts.recursive, period, opts.dryRun);
        for (Path path : opts.paths) {
            remove(path, context);
        }
        if (context.hasError()) {
            return 1;
        }
        return 0;
    }

    private Opts parseOptions(String[] args) throws ParseException {
        assert args != null;
        if (LOG.isDebugEnabled()) {
            LOG.debug("Parsing options: {}", Arrays.toString(args)); //$NON-NLS-1$
        }

        CommandLineParser parser = new BasicParser();
        CommandLine cmd = parser.parse(OPTIONS, args);

        boolean recursive = cmd.hasOption(OPT_RECURSIVE.getOpt());
        String keepString = cmd.getOptionValue(OPT_KEEP_DAYS.getOpt());
        boolean dryRun = cmd.hasOption(OPT_DRY_RUN.getOpt());
        String[] rest = cmd.getArgs();

        if (keepString == null) {
            LOG.error(MessageFormat.format("[OT-CLEAN-E00001] Missing option: -{0}", OPT_KEEP_DAYS.getLongOpt()));
            return null;
        }
        if (rest == null) {
            rest = new String[0];
        }

        LOG.debug("Option {}: {}", OPT_RECURSIVE.getLongOpt(), recursive); //$NON-NLS-1$
        double keepDays;
        try {
            keepDays = Double.parseDouble(keepString);
        } catch (NumberFormatException e) {
            LOG.error(MessageFormat.format("[OT-CLEAN-E00001] -{0} must be a number: {1}",
                    OPT_KEEP_DAYS.getLongOpt(), keepString));
            return null;
        }
        LOG.debug("Option {}: {}", OPT_KEEP_DAYS.getLongOpt(), keepDays); //$NON-NLS-1$
        LOG.debug("Option {}: {}", OPT_DRY_RUN.getLongOpt(), dryRun); //$NON-NLS-1$

        List<Path> paths = new ArrayList<>();
        for (String pathString : rest) {
            if (pathString.trim().isEmpty()) {
                continue;
            }
            try {
                Path path = new Path(pathString);
                paths.add(path);
                LOG.debug("Option --: {}", path); //$NON-NLS-1$
            } catch (RuntimeException e) {
                LOG.error(MessageFormat.format("[OT-CLEAN-E00001] Invalid target path: {0}", pathString), e);
                return null;
            }
        }
        if (paths.isEmpty()) {
            LOG.error(MessageFormat.format("[OT-CLEAN-E00001] Missing target paths", new Object[0]));
            return null;
        }
        return new Opts(recursive, keepDays, dryRun, paths);
    }

    boolean remove(Path path, Context context) {
        LOG.info(MessageFormat.format("[OT-CLEAN-I01000] Start cleaning: {0}", path));
        FileSystem fs;
        try {
            fs = FileSystem.get(path.toUri(), getConf());
        } catch (Exception e) {
            LOG.error(MessageFormat.format("[OT-CLEAN-E01001] Failed to connect to filesystem: {0}", path), e);
            context.setError();
            return false;
        }
        List<FileStatus> files;
        try {
            files = asList(fs.globStatus(path));
        } catch (Exception e) {
            LOG.error(MessageFormat.format("[OT-CLEAN-E01002] Failed to glob path pattern: {0}", path), e);
            context.setError();
            return false;
        }
        if (files.isEmpty()) {
            LOG.warn(MessageFormat.format("[OT-CLEAN-W01001] Target file is not found: {0}", path));
            context.setError();
            return false;
        }
        boolean removed = true;
        long start = System.currentTimeMillis();
        for (FileStatus file : files) {
            removed &= remove(fs, file, context);
        }
        long end = System.currentTimeMillis();
        LOG.info(MessageFormat.format("[OT-CLEAN-I01999] Finish cleaning: {0} (all-removed={1}, elapsed={2}ms)",
                path, removed, end - start));
        return removed;
    }

    private boolean remove(FileSystem fs, FileStatus file, Context context) {
        LOG.debug("Attempt to remove {}", file.getPath()); //$NON-NLS-1$
        boolean isSymlink = context.isSymlink(fs, file);
        if (isSymlink) {
            LOG.error(MessageFormat.format("[OT-CLEAN-W01001] Symlink is currenty not supported: {0}",
                    file.getPath()));
            context.setError();
            return false;
        }
        if (file.isDirectory()) {
            if (context.isRecursive()) {
                List<FileStatus> children;
                try {
                    children = asList(fs.listStatus(file.getPath()));
                } catch (IOException e) {
                    LOG.error(
                            MessageFormat.format("[OT-CLEAN-E01003] Failed to list directory: {0}", file.getPath()),
                            e);
                    context.setError();
                    return false;
                }
                boolean deleteChildren = true;
                for (FileStatus child : children) {
                    deleteChildren &= remove(fs, child, context);
                }
                if (deleteChildren == false) {
                    LOG.info(MessageFormat.format("[OT-CLEAN-I01004] Skipped: {0} (is no-empty directory)",
                            file.getPath(), new Date(file.getModificationTime())));
                    return false;
                }
            } else {
                LOG.info(MessageFormat.format("[OT-CLEAN-I01003] Skipped: {0} (is directory)", file.getPath(),
                        new Date(file.getModificationTime())));
                return false;
            }
        }
        if (context.canDelete(file)) {
            LOG.debug("Removing {}", file.getPath()); //$NON-NLS-1$
            if (context.isDryRun() == false) {
                try {
                    boolean removed = fs.delete(file.getPath(), false);
                    if (removed == false) {
                        LOG.error(MessageFormat.format("[OT-CLEAN-E01004] Failed to remove: {0}", file.getPath()));
                        context.setError();
                        return false;
                    }
                } catch (IOException e) {
                    LOG.warn(MessageFormat.format("[OT-CLEAN-E01004] Failed to remove: {0}", file.getPath()), e);
                    context.setError();
                    return false;
                }
            }
            LOG.info(MessageFormat.format("[OT-CLEAN-I01001] Removed: {0} (timestamp={1})", file.getPath(),
                    new Date(file.getModificationTime())));
        } else {
            LOG.info(MessageFormat.format("[OT-CLEAN-I01002] Kept: {0} (timestamp={1})", file.getPath(),
                    new Date(file.getModificationTime())));
            return false;
        }
        return true;
    }

    private List<FileStatus> asList(FileStatus[] files) {
        if (files == null) {
            return Collections.emptyList();
        } else {
            return Arrays.asList(files);
        }
    }

    private static final class Opts {

        final boolean recursive;

        final double keepDays;

        final boolean dryRun;

        final List<Path> paths;

        Opts(boolean recursive, double keepDays, boolean dryRun, List<Path> paths) {
            this.recursive = recursive;
            this.keepDays = keepDays;
            this.dryRun = dryRun;
            this.paths = paths;
        }
    }

    private static final class Context {

        private final boolean recursive;

        private final long keepPeriod;

        private final boolean dryRun;

        private boolean sawError;

        private static final Method FILE_STATUS_IS_SYMLINK;
        static {
            Method m;
            try {
                m = FileStatus.class.getMethod("isSymlink"); //$NON-NLS-1$
            } catch (Exception e) {
                m = null;
                LOG.debug("FileStatus.isSymlink does not supported"); //$NON-NLS-1$
            }
            FILE_STATUS_IS_SYMLINK = m;
        }

        Context(boolean recursive, long keepPeriod, boolean dryRun) {
            this.recursive = recursive;
            this.keepPeriod = keepPeriod;
            this.dryRun = dryRun;
            this.sawError = false;
        }

        public boolean isSymlink(FileSystem fs, FileStatus file) {
            try {
                return isSymlink0(fs, file);
            } catch (Exception e) {
                if (LOG.isDebugEnabled()) {
                    LOG.debug(MessageFormat.format("Failed to resolve symlink", //$NON-NLS-1$
                            file.getPath()), e);
                }
                return false;
            }
        }

        private boolean isSymlink0(FileSystem fs, FileStatus file) throws IOException {
            assert fs != null;
            assert file != null;
            URI uri = file.getPath().toUri();
            if (uri.getScheme() == null) {
                uri = fs.makeQualified(file.getPath()).toUri();
                if (uri == null) {
                    return false;
                }
            }
            // NOTE: It seems that Hadoop 2.0 LocalFileSystem still does not support symlink.
            if (uri.getScheme().equals("file")) { //$NON-NLS-1$
                File f = new File(uri);
                File c = f.getCanonicalFile();
                if (f.equals(c)) {
                    return false;
                }
                if (f.getName().equals(c.getName()) == false) {
                    return true;
                }
                File p = f.getParentFile().getCanonicalFile();
                if (p.equals(c.getParentFile()) == false) {
                    return true;
                }
            } else if (FILE_STATUS_IS_SYMLINK != null) {
                try {
                    return Boolean.TRUE.equals(FILE_STATUS_IS_SYMLINK.invoke(file));
                } catch (Exception e) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug(MessageFormat.format("Failed to invoke {0}({1})", //$NON-NLS-1$
                                FILE_STATUS_IS_SYMLINK.getName(), file.getPath()), e);
                    }
                    return false;
                }
            }
            return false;
        }

        public boolean isRecursive() {
            return recursive;
        }

        public boolean isDryRun() {
            return dryRun;
        }

        public boolean canDelete(FileStatus file) {
            long lastModified = file.getModificationTime();
            return lastModified < keepPeriod;
        }

        public void setError() {
            this.sawError = true;
        }

        public boolean hasError() {
            return this.sawError;
        }
    }
}