Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencloudengine.flamingo.mapreduce.util; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hdfs.DFSClient; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.mapreduce.InputSplit; import org.opencloudengine.flamingo.mapreduce.util.filter.BypassPathFilter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.*; import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.Collections; import java.util.List; /** * HDFS Utility. * * @author Edward KIM * @author Seo Ji Hye * @since 0.1 */ public class HdfsUtils { /** * SLF4J Logging */ private static Logger logger = LoggerFactory.getLogger(HdfsUtils.class); public static final String DEFAULT_UGI = "hadoop,hadoop"; public static final String HDFS_URL_PREFIX = "hdfs://"; /** * Hadoop HDFS? DFS Client ?. * * @param hdfsUrl HDFS URL * @return DFS Client * @throws java.io.IOException DFS Client ? */ public static DFSClient createDFSClient(String hdfsUrl) throws IOException { if (hdfsUrl == null || !hdfsUrl.startsWith("hdfs://")) { throw new IllegalArgumentException( "HDFS URL? ?. HDFS URL [" + hdfsUrl + "]"); } String url = StringUtils.replace(hdfsUrl, "hdfs://", ""); String[] parts = url.split(":"); return createDFSClient(parts[0], Integer.valueOf(parts[1])); } /** * Hadoop HDFS? DFS Client ?. * * @param namenodeIp Namenode IP * @param namenodePort Namenode Port * @return DFS Client * @throws java.io.IOException DFS Client ? */ public static DFSClient createDFSClient(String namenodeIp, int namenodePort) throws IOException { Configuration config = new Configuration(); InetSocketAddress address = new InetSocketAddress(namenodeIp, namenodePort); return new DFSClient(address, config); } /** * . * * @param client DFS Client * @param path * @param recursive Recusive ? * @return <tt>true</tt> * @throws java.io.IOException ?? */ public static boolean remove(DFSClient client, String path, boolean recursive) throws IOException { if (client.exists(path)) { logger.info(" [{}] ?? . Recursive [{}]", path, recursive); return client.delete(path, recursive); } logger.info(" [{}] ?? .", path); return false; } /** * ? ? ?? . * * @param fs FileSystem * @param path * @return ? ? * @throws java.io.IOException HDFS IO */ public static List<String> listFiles(FileSystem fs, String path) throws IOException { List<String> list = new ArrayList<String>(); FileStatus[] statuses = fs.listStatus(new Path(path)); if (statuses != null) { for (FileStatus status : statuses) { if (!status.isDir()) { String fullyQualifiedHDFSFilename = path + "/" + status.getPath().getName(); list.add(fullyQualifiedHDFSFilename); } } } return list; } /** * DFS Client? ? . * * @param client DFS Client * @param filename ? * @param overwrite Overwrite * @return * @throws java.io.IOException HDFS IO */ public static OutputStream getOutputStream(DFSClient client, String filename, boolean overwrite) throws IOException { return client.create(filename, overwrite); } /** * DFS Client? ? . * * @param client DFS Client * @param filename ? * @return * @throws java.io.IOException HDFS IO */ public static InputStream getInputStream(DFSClient client, String filename) throws IOException { return client.open(filename); } /** * ? . * * @param outputStream * @throws java.io.IOException ? */ public static void closeOuputStream(OutputStream outputStream) throws IOException { outputStream.close(); } /** * ? . * * @param inputStream * @throws java.io.IOException ? */ public static void closeInputStream(InputStream inputStream) throws IOException { inputStream.close(); } /** * Input Split? ?? . * Input Split? ? <tt>file + ":" + start + "+" + length</tt> ? ? . * * @param inputSplit Input Split * @return ? */ public static String getFilename(InputSplit inputSplit) { String filename = org.opencloudengine.flamingo.mapreduce.util.FileUtils.getFilename(inputSplit.toString()); int start = filename.indexOf(":"); return filename.substring(0, start); } /** * FileSystem? . * * @param hdfsUrl HDFS URL * @return FileSystem * @throws java.io.IOException FileSystem? ? */ public static FileSystem getFileSystem(String hdfsUrl) throws IOException { Configuration configuration = new Configuration(); configuration.set("fs.default.name", hdfsUrl); return FileSystem.get(configuration); } /** * ?. * * @param client DFS Client * @param path ? * @return <tt>true</tt> * @throws java.io.IOException HDFS IO */ public static boolean exists(DFSClient client, String path) throws IOException { return client.exists(path); } /** * ?? ?. * * @param client DFS Client * @param path * @return ?? <tt>true</tt> * @throws java.io.IOException HDFS IO */ public static boolean isFile(DFSClient client, String path) throws IOException { HdfsFileStatus status = client.getFileInfo(path); return !status.isDir(); } /** * ? ?. * * @param fs {@link org.apache.hadoop.fs.FileSystem} * @param path * @return ? <tt>true</tt> * @throws java.io.IOException HDFS IO */ public static boolean isDirectory(FileSystem fs, String path) throws IOException { try { FileStatus status = fs.getFileStatus(new Path(path)); return status.isDir(); } catch (FileNotFoundException ex) { return false; } } /** * ?? ? . * * @param client DFS Client * @param path ?? * @param content ?? ? * @throws java.io.IOException HDFS IO */ public static void saveFile(DFSClient client, String path, String content) throws IOException { OutputStream outputStream = getOutputStream(client, path, true); org.opencloudengine.flamingo.mapreduce.util.FileUtils.copy(content.getBytes(), outputStream); outputStream.close(); } /** * ? ? . * * @param client DFS Client * @param path ? * @return ? * @throws java.io.IOException HDFS IO */ public static HdfsFileStatus getFileInfo(DFSClient client, String path) throws IOException { return client.getFileInfo(path); } /** * ? ? ?? HDFS? . * * @param hdfsUrl HDFS URL * @param filename HDFS? Path? ? * @param hdfsPath HDFS? Path * @param downloadFilePath ? ? ? * @throws java.io.IOException HDFS ? */ public static void uploadToHdfs(String hdfsUrl, String filename, String hdfsPath, String downloadFilePath) throws IOException { String hdfsFullPath = hdfsPath + "/" + filename; File inputFile = new File(downloadFilePath); DFSClient dfsClient = HdfsUtils.createDFSClient(hdfsUrl); copyFromLocalFileToHdfsFile(inputFile, dfsClient, hdfsFullPath); dfsClient.close(); } /** * ? ? ?? HDFS . * * @param inputFile ? ? ? * @param client DFSClient * @param hdfsPath HDFS? ? * @throws java.io.IOException ?? */ public static void copyFromLocalFileToHdfsFile(File inputFile, DFSClient client, String hdfsPath) throws IOException { OutputStream outputStream = HdfsUtils.getOutputStream(client, hdfsPath, true); InputStream inputStream = new FileInputStream(inputFile); org.opencloudengine.flamingo.mapreduce.util.FileUtils.copy(inputStream, outputStream); } /** * HDFS ?? ?? ?? ??. * * @param conf Hadoop Configuration * @param path ?? ? * @param prefixToAppend ?? ?? ?? prefix? ? * @param targetDirectory ? * @throws java.io.IOException ?? ?? */ public static void moveFileToDirectory(Configuration conf, String path, String prefixToAppend, String targetDirectory) throws IOException { FileSystem fileSystem = FileSystem.get(conf); FileStatus[] statuses = fileSystem.listStatus(new Path(path)); for (FileStatus fileStatus : statuses) { String filename = prefixToAppend + "_" + fileStatus.getPath().getName(); if (!isExist(conf, targetDirectory + "/" + filename)) { fileSystem.rename(fileStatus.getPath(), new Path(targetDirectory + "/" + filename)); } else { throw new RuntimeException( "\t Warn: '" + fileStatus.getPath() + "' cannot moved. Already exists."); } } } /** * HDFS ?? ?? ?? ??. * * @param conf Hadoop Configuration * @param delayFiles ?? ? ? * @param targetDirectory ? * @throws java.io.IOException ?? ?? */ public static void moveFilesToDirectory(Configuration conf, List<String> delayFiles, String targetDirectory) throws IOException { for (String path : delayFiles) { String filename = FileUtils.getFilename(path); String delayedFilePrefix = filename.split("-")[0]; String outputHead = delayedFilePrefix.replaceAll("delay", ""); String outputMiddle = delayedFilePrefix.substring(0, 5); // todo String outputTail = filename.replaceAll(delayedFilePrefix, ""); System.out.println( "Acceleration Dir " + targetDirectory + "/" + outputHead + "_" + outputMiddle + outputTail); makeDirectoryIfNotExists(targetDirectory, conf); FileSystem fileSystem = FileSystem.get(conf); fileSystem.rename(new Path(path), new Path(targetDirectory + "/" + outputHead + "_" + outputMiddle + outputTail)); System.out.println("\t Moved: '" + path + "' --> '" + targetDirectory + "'"); } } /** * HDFS ?? ?? ?? ??. * * @param conf Hadoop Configuration * @param paths ?? ? ? * @param prefixToAppend ?? ?? ?? prefix? ? * @param targetDirectory ? * @throws java.io.IOException ?? ?? */ public static void moveFilesToDirectory(Configuration conf, List<String> paths, String prefixToAppend, String targetDirectory) throws IOException { for (String file : paths) { try { HdfsUtils.moveFileToDirectory(conf, file, prefixToAppend, targetDirectory); System.out.println("\t Moved: '" + file + "' --> '" + targetDirectory + "'"); } catch (Exception ex) { System.err.println(ex.getMessage()); } } } /** * ?. * * @param directory * @param hdfsUrl HDFS URL * @throws java.io.IOException HDFS ? */ public static void makeDirectoryIfNotExists(String directory, String hdfsUrl) throws IOException { Configuration conf = new Configuration(); conf.set("fs.default.name", hdfsUrl); conf.set("hadoop.job.ugi", DEFAULT_UGI); FileSystem fileSystem = FileSystem.get(conf); if (!isDirectory(fileSystem, directory)) { logger.info("HDFS? [{}] ?.", directory); fileSystem.mkdirs(new Path(directory)); } } /** * HDFS ? ? ?? . * * @param hdfsUrl HDFS URL * @param hdfsDirectories HDFS ? * @return HDFS ? ?? ? ? * @throws java.io.IOException HDFS? , ? ?? */ public static String[] getHdfsFiles(String hdfsUrl, List<String> hdfsDirectories) throws IOException { List<String> filesInDirectories = new ArrayList<String>(); FileSystem fs = HdfsUtils.getFileSystem(hdfsUrl); for (String hdfsDirectory : hdfsDirectories) { List<String> files = HdfsUtils.listFiles(fs, hdfsDirectory); filesInDirectories.addAll(files); } return StringUtils.toStringArray(filesInDirectories); } /** * HDFS? ? ?? ? ? ?? . * * @param hdfsUrl HDFS URL * @param ext ?(: <tt>.dat</tt>) * @param path * @return "<tt>.dat</tt>" ? ? ? * @throws java.io.IOException HDFS ? */ public static String[] getHdfsFiles(String hdfsUrl, String ext, String path) throws IOException { ArrayList<String> files = new ArrayList<String>(); DFSClient client = HdfsUtils.createDFSClient(hdfsUrl); makeDirectoryIfNotExists(path, hdfsUrl); client.close(); return StringUtils.toStringArray(files); } /** * ? ?? ?. * * @param hdfsUrl HDFS URL * @param path ? * @return <tt>true</tt> * @throws java.io.IOException ? , HDFS? */ public static boolean isExist(String hdfsUrl, String path) throws IOException { DFSClient client = HdfsUtils.createDFSClient(hdfsUrl); HdfsFileStatus status = client.getFileInfo(path); if (status != null && !status.isDir()) { client.close(); return true; } client.close(); return false; } /** * ?. * * @param directory * @param conf Hadoop Configuration * @throws java.io.IOException HDFS ? */ public static void makeDirectoryIfNotExists(String directory, Configuration conf) throws IOException { FileSystem fileSystem = FileSystem.get(conf); if (!isExist(conf, directory) && !isDirectory(fileSystem, directory)) { fileSystem.mkdirs(new Path(directory)); } } /** * ? ?? ?. * * @param path ? * @return <tt>true</tt> * @throws java.io.IOException ? , HDFS? */ public static boolean isExist(String path) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileStatus status = fs.getFileStatus(new Path(path)); return status != null; } /** * ? ?? ?. * * @param conf Haodop Job Configuration * @param path ? * @return <tt>true</tt> * @throws java.io.IOException ? , HDFS? */ public static boolean isExist(Configuration conf, String path) throws IOException { FileSystem fs = FileSystem.get(conf); return fs.exists(new Path(path)); } /** * HDFS? ? ?? . * * @param hdfsUrl HDFS URL * @param hdfsDirectory ?? HDFS Directory URL * @throws java.io.IOException ?? */ public static void deleteFromHdfs(String hdfsUrl, String hdfsDirectory) throws IOException { Configuration conf = new Configuration(); conf.set("fs.default.name", hdfsUrl); FileSystem fs = FileSystem.get(conf); FileStatus[] statuses = fs.globStatus(new Path(hdfsDirectory)); for (int i = 0; i < statuses.length; i++) { FileStatus fileStatus = statuses[i]; fs.delete(fileStatus.getPath(), true); } } /** * HDFS? ? ?? . * * @param hdfsDirectory ?? HDFS Directory URL * @throws java.io.IOException ?? */ public static void deleteFromHdfs(String hdfsDirectory) throws IOException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); FileStatus[] statuses = fs.globStatus(new Path(hdfsDirectory)); for (int i = 0; i < statuses.length; i++) { FileStatus fileStatus = statuses[i]; fs.delete(fileStatus.getPath(), true); } } /** * ? ?? MERGE. * * @param hdfsUrl HDFS URL * @param path HDFS Path * @throws java.io.IOException Get Merge */ public static void merge(String hdfsUrl, String path) throws IOException { // ? ?? Get Merge ?? ?. Configuration conf = new Configuration(); conf.set("fs.default.name", hdfsUrl); FileSystem fileSystem = FileSystem.get(conf); Path source = new Path(path); if (!fileSystem.getFileStatus(source).isDir()) { // ? ??? ??? Get Merge . return; } Path target = new Path(path + "_temporary"); FileUtil.copyMerge(fileSystem, source, fileSystem, target, true, conf, null); // ? ?? . fileSystem.delete(source, true); // ?? ? ? . Path in = new Path(path + "_temporary"); Path out = new Path(path); fileSystem.rename(in, out); // . fileSystem.delete(new Path(path + "_temporary"), true); } /** * HDFS? ? ?? ? . * * @param conf Hadoop Configuration * @param path * @param pathFilter ?? ? * @return ? * @throws java.io.IOException HDFS ? */ public static String getLatestFile(Configuration conf, String path, PathFilter pathFilter) throws IOException { List<SortableFileStatus> files = new ArrayList<SortableFileStatus>(); FileSystem fs = FileSystem.get(conf); FileStatus[] statuses = fs.listStatus(new Path(path), pathFilter != null ? pathFilter : new BypassPathFilter()); if (statuses != null) { for (FileStatus fileStatus : statuses) { if (!fileStatus.isDir()) { files.add(new SortableFileStatus(fileStatus)); } } } Collections.sort(files); FileStatus fileStatus = files.get(0).fileStatus; return fileStatus.getPath().toUri().getPath(); } /** * . * * @param configuration Hadoop Configuration * @param path * @throws java.io.IOException */ public static void delete(Configuration configuration, String path) throws IOException { FileSystem fileSystem = FileSystem.get(configuration); Path source = new Path(path); fileSystem.delete(source, true); } /** * HDFS? ? ?? prefix ? ?? . * * @param conf Configuration * @param path * @param prefix ?? Prefix * @param pathFilter ?? ? * @return prefix ?? ? ? * @throws java.io.IOException HDFS ? */ public static List<String> getPrefixFiles(Configuration conf, String path, String prefix, PathFilter pathFilter) throws IOException { List<String> files = new ArrayList<String>(); FileSystem fs = FileSystem.get(conf); FileStatus[] statuses = fs.listStatus(new Path(path), pathFilter != null ? pathFilter : new BypassPathFilter()); if (statuses != null) { for (FileStatus fileStatus : statuses) { if (!fileStatus.isDir() && fileStatus.getPath().getName().startsWith(prefix)) { files.add(fileStatus.getPath().toUri().getPath()); } } } return files; } }