com.asakusafw.cleaner.main.HDFSCleaner.java Source code

Java tutorial

Introduction

Here is the source code for com.asakusafw.cleaner.main.HDFSCleaner.java

Source

/**
 * Copyright 2011-2016 Asakusa Framework Team.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.asakusafw.cleaner.main;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;

import com.asakusafw.cleaner.bean.DFSCleanerBean;
import com.asakusafw.cleaner.common.CleanerInitializer;
import com.asakusafw.cleaner.common.ConfigurationLoader;
import com.asakusafw.cleaner.common.Constants;
import com.asakusafw.cleaner.common.MessageIdConst;
import com.asakusafw.cleaner.exception.CleanerSystemException;
import com.asakusafw.cleaner.log.Log;

/**
 * HDFSCleaner?
 * @author yuta.shirai
 *
 */
public class HDFSCleaner extends Configured implements Tool {
    /**  */
    private static final Class<?> CLASS = HDFSCleaner.class;

    /**
     * Creates a new instance.
     */
    public HDFSCleaner() {
        super();
    }

    /**
     * Creates a new instance with configuration object.
     * @param conf configuration
     */
    public HDFSCleaner(Configuration conf) {
        super(conf);
    }

    /**
     * 
     *
     * ?????
     * args[0]=
     * args[1]=??
     * args[2]=
     *
     * @param args 
     * @throws Exception if failed to execute
     */
    public static void main(String[] args) throws Exception {
        HDFSCleaner tool = new HDFSCleaner();
        int result = tool.run(args);
        System.exit(result);
    }

    @Override
    public int run(String[] args) throws Exception {
        return execute(args);
    }

    /**
     * HDFSCleaner???
     * @param args 
     * @return 
     */
    protected int execute(String[] args) {
        String[] prop = new String[1];
        String mode = null;
        String user = null;
        FileSystem fs = null;

        if (args.length > 0) {
            mode = args[0];
        }
        if (args.length > 1) {
            user = args[1];
        }
        if (args.length > 2) {
            prop[0] = args[2];
        }

        // ??
        if (args.length != 3) {
            System.err.println("ERROR????? ?" + args.length
                    + " " + mode + " ??" + user
                    + " " + prop[0]);
            Log.log(CLASS, MessageIdConst.HCLN_PARAMCHECK_ERROR, "?", args.length, new Date(), mode,
                    prop[0]);
            return Constants.EXIT_CODE_ERROR;
        }

        try {
            // ??
            if (!CleanerInitializer.initDFSCleaner(prop)) {
                Log.log(CLASS, MessageIdConst.HCLN_INIT_ERROR, new Date(), mode, prop[0]);
                return Constants.EXIT_CODE_ERROR;
            }

            // 
            Log.log(CLASS, MessageIdConst.HCLN_START, new Date(), mode, prop[0]);

            // ?
            boolean recursive = false;
            if (Constants.CLEAN_MODE_NOMAL.equals(mode)) {
                recursive = false;
            } else if (Constants.CLEAN_MODE_RECURSIVE.equals(mode)) {
                recursive = true;
            } else {
                Log.log(CLASS, MessageIdConst.HCLN_PARAMCHECK_ERROR, "", mode, new Date(), mode,
                        prop[0]);
                return Constants.EXIT_CODE_ERROR;
            }

            // HDFS??
            DFSCleanerBean[] bean = null;
            try {
                bean = getCleanLocalPath(user);
            } catch (CleanerSystemException e) {
                Log.log(e.getCause(), e.getClazz(), e.getMessageId(), e.getMessageArgs());
                return Constants.EXIT_CODE_ERROR;
            }

            // ???
            int keepDate = getHDFSFileKeepDate();

            boolean cleanResult = true;
            Date now = new Date();
            for (int i = 0; i < bean.length; i++) {
                try {
                    // 
                    Path cleanDir = bean[i].getCleanDir();
                    // ?
                    try {
                        Configuration conf = getConf();
                        fs = cleanDir.getFileSystem(conf);
                        if (fs == null) {
                            Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_ERROR,
                                    "Path.getFileSystem??null", cleanDir.toString());
                            cleanResult = false;
                            continue;
                        }
                    } catch (IOException e) {
                        Log.log(e, CLASS, MessageIdConst.HCLN_CLEN_DIR_ERROR,
                                "HDFS????", cleanDir.toString());
                        cleanResult = false;
                        continue;
                    }

                    boolean target = bean[i].hasExecutionId();
                    String pattern = bean[i].getPattern();
                    Log.log(CLASS, MessageIdConst.HCLN_CLEN_FILE, cleanDir.toString(), pattern, keepDate, mode,
                            target, now);
                    if (cleanDir(fs, cleanDir, target, pattern, keepDate, now, recursive)) {
                        Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_SUCCESS, cleanDir.toString(), keepDate, mode);
                    } else {
                        Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_FAIL, cleanDir.toString(), keepDate, mode);
                        cleanResult = false;
                    }
                } catch (CleanerSystemException e) {
                    Log.log(e.getCause(), e.getClazz(), e.getMessageId(), e.getMessageArgs());
                    cleanResult = false;
                } finally {
                    if (fs != null) {
                        // CHECKSTYLE:OFF EmptyBlockCheck
                        try {
                            fs.close();
                        } catch (IOException ignored) {
                            // ignored
                        }
                        // CHECKSTYLE:ON EmptyBlockCheck
                    }
                }
            }

            // 
            if (cleanResult) {
                Log.log(CLASS, MessageIdConst.HCLN_EXIT_SUCCESS, new Date(), mode, prop[0]);
                return Constants.EXIT_CODE_SUCCESS;
            } else {
                Log.log(CLASS, MessageIdConst.HCLN_EXIT_WARNING, new Date(), mode, prop[0]);
                return Constants.EXIT_CODE_WARNING;
            }
        } catch (RuntimeException e) {
            try {
                Log.log(e, CLASS, MessageIdConst.HCLN_EXCEPRION, new Date(), mode, prop[0]);
                return Constants.EXIT_CODE_ERROR;
            } catch (Exception e1) {
                System.err.print("HDFSCleaner????????");
                e1.printStackTrace();
                return Constants.EXIT_CODE_ERROR;
            }
        }
    }

    /**
     * ?
     * @param fs HDFS?
     * @param cleanPath HDFS??
     * @param isSetExecutionId ID????????
     * @param pattern 
     * @param keepDate ??
     * @param now ?
     * @param recursive ????
     * @return ?
     * @throws CleanerSystemException
     */
    private boolean cleanDir(FileSystem fs, Path cleanPath, boolean isSetExecutionId, String pattern, int keepDate,
            Date now, boolean recursive) throws CleanerSystemException {
        try {
            if (!fs.exists(cleanPath)) {
                // ??????
                Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_ERROR,
                        "??????", cleanPath.toString());
                return false;
            }
            if (!fs.getFileStatus(cleanPath).isDir()) {
                // ??????
                Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_ERROR,
                        "??????", cleanPath.toString());
                return false;
            }

            // ?
            Log.log(CLASS, MessageIdConst.HCLN_FILE_DELETE, cleanPath.toString());
            int cleanFileCount = 0;
            int cleanDirCount = 0;
            boolean result = true;
            FileStatus[] dirStatus = getListStatus(fs, cleanPath);
            Path[] listedPaths = FileUtil.stat2Paths(dirStatus);
            for (Path path : listedPaths) {
                FileStatus status = fs.getFileStatus(path);
                long lastModifiedTime = status.getModificationTime();
                if (status.isDir() && recursive) {
                    // ????????
                    if (isSetExecutionId) {
                        // ID??????MM???????
                        String executionId = path.getName();
                        if (isRunningJobFlow(executionId)) {
                            // ???????
                            Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_EXEC, path.toString());
                            continue;
                        }
                    }
                    FileStatus[] childdirStatus = getListStatus(fs, path);
                    if (childdirStatus.length == 0) {
                        // ???????
                        if (isExpired(lastModifiedTime, keepDate, now)) {
                            if (!fs.delete(path, false)) {
                                Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "",
                                        path.toString());
                                result = false;
                            } else {
                                cleanDirCount++;
                                Log.log(CLASS, MessageIdConst.HCLN_DIR_DELETE, path.toString());
                            }
                        }
                    } else {
                        // ?????????
                        if (cleanDir(fs, path, false, pattern, keepDate, now, recursive)) {
                            // ????????
                            childdirStatus = getListStatus(fs, path);
                            if (childdirStatus.length == 0) {
                                if (isExpired(lastModifiedTime, keepDate, now)) {
                                    if (!fs.delete(path, false)) {
                                        Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "",
                                                path.toString());
                                        result = false;
                                    } else {
                                        cleanDirCount++;
                                        Log.log(CLASS, MessageIdConst.HCLN_DIR_DELETE, path.toString());
                                    }
                                }
                            }
                        } else {
                            Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "", path.toString());
                            result = false;
                        }
                    }
                } else if (!status.isDir()) {
                    // ???????????
                    if (isExpired(lastModifiedTime, keepDate, now) && isMatchPattern(path, pattern)) {
                        if (!fs.delete(path, false)) {
                            Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "", path.toString());
                            result = false;
                        } else {
                            Log.log(CLASS, MessageIdConst.HCLN_DELETE_FILE, path.toString());
                            cleanFileCount++;
                        }
                    }
                }
            }

            Log.log(CLASS, MessageIdConst.HCLN_FILE_DELETE_SUCCESS, cleanPath.toString(), cleanDirCount,
                    cleanFileCount);

            return result;
        } catch (IOException e) {
            Log.log(e, CLASS, MessageIdConst.HCLN_CLEN_DIR_EXCEPTION, cleanPath.getName());
            return false;
        }
    }

    /**
     * ???
     * @param fs 
     * @param path 
     * @return ?
     * @throws IOException
     */
    private FileStatus[] getListStatus(FileSystem fs, Path path) throws IOException {
        FileStatus[] status;
        try {
            status = fs.listStatus(path);
        } catch (FileNotFoundException e) {
            status = null;
        }
        if (status == null) {
            status = new FileStatus[0];
        }
        return status;
    }

    /**
     * ???????
     * @param path 
     * @param pattern 
     * @return ??
     * @throws CleanerSystemException
     */
    private boolean isMatchPattern(Path path, String pattern) throws CleanerSystemException {
        if (pattern == null || pattern.equals("")) {
            return true;
        } else {
            String strFile = path.toString();
            try {
                Matcher m = Pattern.compile(pattern).matcher(strFile);
                return m.matches();
            } catch (PatternSyntaxException e) {
                throw new CleanerSystemException(e, this.getClass(), MessageIdConst.HCLN_PATTERN_FAIL, pattern);
            }
        }
    }

    /**
     * ????????
     * @param lastModifiedTime ?
     * @param keepDate ??
     * @param now ?
     * @return ??
     */
    private boolean isExpired(long lastModifiedTime, int keepDate, Date now) {
        long keepTime = (keepDate) * 24L * 60L * 60L * 1000L;
        long period = lastModifiedTime + keepTime;
        return now.getTime() > period;
    }

    /**
     * ??????
     * @param executionId ID
     * @return ??:true?????:false
     */
    protected boolean isRunningJobFlow(String executionId) {
        // TODO ?
        return false;
    }

    /**
     * ??????
     * @return ???
     */
    private int getHDFSFileKeepDate() {
        return Integer.parseInt(ConfigurationLoader.getProperty(Constants.PROP_KEY_HDFS_FILE_KEEP_DATE));
    }

    /**
     * ????
     * HDFS?????
     * @param user ??
     * @return 
     * @throws CleanerSystemException ?????
     */
    private DFSCleanerBean[] getCleanLocalPath(String user) throws CleanerSystemException {
        // ???
        List<String> cleanDirList = ConfigurationLoader
                .getPropStartWithString(Constants.PROP_KEY_HDFS_FILE_CLEAN_DIR + ".");
        List<String> noEmptyDirList = ConfigurationLoader.getNoEmptyList(cleanDirList);

        List<DFSCleanerBean> list = new ArrayList<DFSCleanerBean>();
        int listSize = noEmptyDirList.size();
        for (int i = 0; i < listSize; i++) {
            // 
            DFSCleanerBean bean = new DFSCleanerBean();
            String dirKey = noEmptyDirList.get(i);
            // ????
            String strPath = ConfigurationLoader.getProperty(dirKey);
            String strCleanPath = strPath.replace(Constants.HDFS_PATH_REPLACE_STR_USER, user);
            // ID?
            boolean isSetexecutionId = false;
            if (strCleanPath.endsWith(Constants.HDFS_PATH_REPLACE_STR_ID)) {
                isSetexecutionId = true;
                strCleanPath = strCleanPath.substring(0,
                        strCleanPath.indexOf(Constants.HDFS_PATH_REPLACE_STR_ID) - 1);
            }
            bean.setCleanDir(createPath(strCleanPath));
            bean.setExecutionId(isSetexecutionId);

            // 
            String number = dirKey.substring(dirKey.lastIndexOf(".") + 1, dirKey.length());
            String pattarnKey = Constants.PROP_KEY_HDFS_FILE_CLEAN_PATTERN + "." + number;
            String pattern = ConfigurationLoader.getProperty(pattarnKey);
            if (pattern == null || pattern.equals("")) {
                throw new CleanerSystemException(this.getClass(), MessageIdConst.HCLN_PATTERN_NOT_FOUND, dirKey,
                        strPath, pattarnKey);
            } else {
                bean.setPattern(pattern);
            }

            list.add(bean);
        }
        return list.toArray(new DFSCleanerBean[list.size()]);
    }

    /**
     * ?????
     * @param strCleanPath 
     * @return 
     */
    protected Path createPath(String strCleanPath) {
        StringBuffer path = new StringBuffer(ConfigurationLoader.getProperty(Constants.PROP_KEY_HDFS_PROTCOL_HOST));
        path.append(Constants.HDFSFIXED_PATH);
        path.append(strCleanPath);
        return new Path(path.toString());
    }
}