Java tutorial
/** * Copyright 2011-2016 Asakusa Framework Team. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.asakusafw.cleaner.main; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.Tool; import com.asakusafw.cleaner.bean.DFSCleanerBean; import com.asakusafw.cleaner.common.CleanerInitializer; import com.asakusafw.cleaner.common.ConfigurationLoader; import com.asakusafw.cleaner.common.Constants; import com.asakusafw.cleaner.common.MessageIdConst; import com.asakusafw.cleaner.exception.CleanerSystemException; import com.asakusafw.cleaner.log.Log; /** * HDFSCleaner? * @author yuta.shirai * */ public class HDFSCleaner extends Configured implements Tool { /** */ private static final Class<?> CLASS = HDFSCleaner.class; /** * Creates a new instance. */ public HDFSCleaner() { super(); } /** * Creates a new instance with configuration object. * @param conf configuration */ public HDFSCleaner(Configuration conf) { super(conf); } /** * * * ????? * args[0]= * args[1]=?? * args[2]= * * @param args * @throws Exception if failed to execute */ public static void main(String[] args) throws Exception { HDFSCleaner tool = new HDFSCleaner(); int result = tool.run(args); System.exit(result); } @Override public int run(String[] args) throws Exception { return execute(args); } /** * HDFSCleaner??? * @param args * @return */ protected int execute(String[] args) { String[] prop = new String[1]; String mode = null; String user = null; FileSystem fs = null; if (args.length > 0) { mode = args[0]; } if (args.length > 1) { user = args[1]; } if (args.length > 2) { prop[0] = args[2]; } // ?? if (args.length != 3) { System.err.println("ERROR????? ?" + args.length + " " + mode + " ??" + user + " " + prop[0]); Log.log(CLASS, MessageIdConst.HCLN_PARAMCHECK_ERROR, "?", args.length, new Date(), mode, prop[0]); return Constants.EXIT_CODE_ERROR; } try { // ?? if (!CleanerInitializer.initDFSCleaner(prop)) { Log.log(CLASS, MessageIdConst.HCLN_INIT_ERROR, new Date(), mode, prop[0]); return Constants.EXIT_CODE_ERROR; } // Log.log(CLASS, MessageIdConst.HCLN_START, new Date(), mode, prop[0]); // ? boolean recursive = false; if (Constants.CLEAN_MODE_NOMAL.equals(mode)) { recursive = false; } else if (Constants.CLEAN_MODE_RECURSIVE.equals(mode)) { recursive = true; } else { Log.log(CLASS, MessageIdConst.HCLN_PARAMCHECK_ERROR, "", mode, new Date(), mode, prop[0]); return Constants.EXIT_CODE_ERROR; } // HDFS?? DFSCleanerBean[] bean = null; try { bean = getCleanLocalPath(user); } catch (CleanerSystemException e) { Log.log(e.getCause(), e.getClazz(), e.getMessageId(), e.getMessageArgs()); return Constants.EXIT_CODE_ERROR; } // ??? int keepDate = getHDFSFileKeepDate(); boolean cleanResult = true; Date now = new Date(); for (int i = 0; i < bean.length; i++) { try { // Path cleanDir = bean[i].getCleanDir(); // ? try { Configuration conf = getConf(); fs = cleanDir.getFileSystem(conf); if (fs == null) { Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_ERROR, "Path.getFileSystem??null", cleanDir.toString()); cleanResult = false; continue; } } catch (IOException e) { Log.log(e, CLASS, MessageIdConst.HCLN_CLEN_DIR_ERROR, "HDFS????", cleanDir.toString()); cleanResult = false; continue; } boolean target = bean[i].hasExecutionId(); String pattern = bean[i].getPattern(); Log.log(CLASS, MessageIdConst.HCLN_CLEN_FILE, cleanDir.toString(), pattern, keepDate, mode, target, now); if (cleanDir(fs, cleanDir, target, pattern, keepDate, now, recursive)) { Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_SUCCESS, cleanDir.toString(), keepDate, mode); } else { Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_FAIL, cleanDir.toString(), keepDate, mode); cleanResult = false; } } catch (CleanerSystemException e) { Log.log(e.getCause(), e.getClazz(), e.getMessageId(), e.getMessageArgs()); cleanResult = false; } finally { if (fs != null) { // CHECKSTYLE:OFF EmptyBlockCheck try { fs.close(); } catch (IOException ignored) { // ignored } // CHECKSTYLE:ON EmptyBlockCheck } } } // if (cleanResult) { Log.log(CLASS, MessageIdConst.HCLN_EXIT_SUCCESS, new Date(), mode, prop[0]); return Constants.EXIT_CODE_SUCCESS; } else { Log.log(CLASS, MessageIdConst.HCLN_EXIT_WARNING, new Date(), mode, prop[0]); return Constants.EXIT_CODE_WARNING; } } catch (RuntimeException e) { try { Log.log(e, CLASS, MessageIdConst.HCLN_EXCEPRION, new Date(), mode, prop[0]); return Constants.EXIT_CODE_ERROR; } catch (Exception e1) { System.err.print("HDFSCleaner????????"); e1.printStackTrace(); return Constants.EXIT_CODE_ERROR; } } } /** * ? * @param fs HDFS? * @param cleanPath HDFS?? * @param isSetExecutionId ID???????? * @param pattern * @param keepDate ?? * @param now ? * @param recursive ???? * @return ? * @throws CleanerSystemException */ private boolean cleanDir(FileSystem fs, Path cleanPath, boolean isSetExecutionId, String pattern, int keepDate, Date now, boolean recursive) throws CleanerSystemException { try { if (!fs.exists(cleanPath)) { // ?????? Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_ERROR, "??????", cleanPath.toString()); return false; } if (!fs.getFileStatus(cleanPath).isDir()) { // ?????? Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_ERROR, "??????", cleanPath.toString()); return false; } // ? Log.log(CLASS, MessageIdConst.HCLN_FILE_DELETE, cleanPath.toString()); int cleanFileCount = 0; int cleanDirCount = 0; boolean result = true; FileStatus[] dirStatus = getListStatus(fs, cleanPath); Path[] listedPaths = FileUtil.stat2Paths(dirStatus); for (Path path : listedPaths) { FileStatus status = fs.getFileStatus(path); long lastModifiedTime = status.getModificationTime(); if (status.isDir() && recursive) { // ???????? if (isSetExecutionId) { // ID??????MM??????? String executionId = path.getName(); if (isRunningJobFlow(executionId)) { // ??????? Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_EXEC, path.toString()); continue; } } FileStatus[] childdirStatus = getListStatus(fs, path); if (childdirStatus.length == 0) { // ??????? if (isExpired(lastModifiedTime, keepDate, now)) { if (!fs.delete(path, false)) { Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "", path.toString()); result = false; } else { cleanDirCount++; Log.log(CLASS, MessageIdConst.HCLN_DIR_DELETE, path.toString()); } } } else { // ????????? if (cleanDir(fs, path, false, pattern, keepDate, now, recursive)) { // ???????? childdirStatus = getListStatus(fs, path); if (childdirStatus.length == 0) { if (isExpired(lastModifiedTime, keepDate, now)) { if (!fs.delete(path, false)) { Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "", path.toString()); result = false; } else { cleanDirCount++; Log.log(CLASS, MessageIdConst.HCLN_DIR_DELETE, path.toString()); } } } } else { Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "", path.toString()); result = false; } } } else if (!status.isDir()) { // ??????????? if (isExpired(lastModifiedTime, keepDate, now) && isMatchPattern(path, pattern)) { if (!fs.delete(path, false)) { Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "", path.toString()); result = false; } else { Log.log(CLASS, MessageIdConst.HCLN_DELETE_FILE, path.toString()); cleanFileCount++; } } } } Log.log(CLASS, MessageIdConst.HCLN_FILE_DELETE_SUCCESS, cleanPath.toString(), cleanDirCount, cleanFileCount); return result; } catch (IOException e) { Log.log(e, CLASS, MessageIdConst.HCLN_CLEN_DIR_EXCEPTION, cleanPath.getName()); return false; } } /** * ??? * @param fs * @param path * @return ? * @throws IOException */ private FileStatus[] getListStatus(FileSystem fs, Path path) throws IOException { FileStatus[] status; try { status = fs.listStatus(path); } catch (FileNotFoundException e) { status = null; } if (status == null) { status = new FileStatus[0]; } return status; } /** * ??????? * @param path * @param pattern * @return ?? * @throws CleanerSystemException */ private boolean isMatchPattern(Path path, String pattern) throws CleanerSystemException { if (pattern == null || pattern.equals("")) { return true; } else { String strFile = path.toString(); try { Matcher m = Pattern.compile(pattern).matcher(strFile); return m.matches(); } catch (PatternSyntaxException e) { throw new CleanerSystemException(e, this.getClass(), MessageIdConst.HCLN_PATTERN_FAIL, pattern); } } } /** * ???????? * @param lastModifiedTime ? * @param keepDate ?? * @param now ? * @return ?? */ private boolean isExpired(long lastModifiedTime, int keepDate, Date now) { long keepTime = (keepDate) * 24L * 60L * 60L * 1000L; long period = lastModifiedTime + keepTime; return now.getTime() > period; } /** * ?????? * @param executionId ID * @return ??:true?????:false */ protected boolean isRunningJobFlow(String executionId) { // TODO ? return false; } /** * ?????? * @return ??? */ private int getHDFSFileKeepDate() { return Integer.parseInt(ConfigurationLoader.getProperty(Constants.PROP_KEY_HDFS_FILE_KEEP_DATE)); } /** * ???? * HDFS????? * @param user ?? * @return * @throws CleanerSystemException ????? */ private DFSCleanerBean[] getCleanLocalPath(String user) throws CleanerSystemException { // ??? List<String> cleanDirList = ConfigurationLoader .getPropStartWithString(Constants.PROP_KEY_HDFS_FILE_CLEAN_DIR + "."); List<String> noEmptyDirList = ConfigurationLoader.getNoEmptyList(cleanDirList); List<DFSCleanerBean> list = new ArrayList<DFSCleanerBean>(); int listSize = noEmptyDirList.size(); for (int i = 0; i < listSize; i++) { // DFSCleanerBean bean = new DFSCleanerBean(); String dirKey = noEmptyDirList.get(i); // ???? String strPath = ConfigurationLoader.getProperty(dirKey); String strCleanPath = strPath.replace(Constants.HDFS_PATH_REPLACE_STR_USER, user); // ID? boolean isSetexecutionId = false; if (strCleanPath.endsWith(Constants.HDFS_PATH_REPLACE_STR_ID)) { isSetexecutionId = true; strCleanPath = strCleanPath.substring(0, strCleanPath.indexOf(Constants.HDFS_PATH_REPLACE_STR_ID) - 1); } bean.setCleanDir(createPath(strCleanPath)); bean.setExecutionId(isSetexecutionId); // String number = dirKey.substring(dirKey.lastIndexOf(".") + 1, dirKey.length()); String pattarnKey = Constants.PROP_KEY_HDFS_FILE_CLEAN_PATTERN + "." + number; String pattern = ConfigurationLoader.getProperty(pattarnKey); if (pattern == null || pattern.equals("")) { throw new CleanerSystemException(this.getClass(), MessageIdConst.HCLN_PATTERN_NOT_FOUND, dirKey, strPath, pattarnKey); } else { bean.setPattern(pattern); } list.add(bean); } return list.toArray(new DFSCleanerBean[list.size()]); } /** * ????? * @param strCleanPath * @return */ protected Path createPath(String strCleanPath) { StringBuffer path = new StringBuffer(ConfigurationLoader.getProperty(Constants.PROP_KEY_HDFS_PROTCOL_HOST)); path.append(Constants.HDFSFIXED_PATH); path.append(strCleanPath); return new Path(path.toString()); } }