Java tutorial
/* * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 * (the "License"). You may not use this work except in compliance with the License, which is * available at www.apache.org/licenses/LICENSE-2.0 * * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, * either express or implied, as more fully set forth in the License. * * See the NOTICE file distributed with this work for information regarding copyright ownership. */ package alluxio.hadoop; import alluxio.AlluxioURI; import alluxio.Configuration; import alluxio.Constants; import alluxio.PropertyKey; import alluxio.client.ClientContext; import alluxio.client.block.BlockStoreContext; import alluxio.client.file.FileOutStream; import alluxio.client.file.FileSystem; import alluxio.client.file.FileSystemContext; import alluxio.client.file.FileSystemMasterClient; import alluxio.client.file.URIStatus; import alluxio.client.file.options.CreateDirectoryOptions; import alluxio.client.file.options.CreateFileOptions; import alluxio.client.file.options.DeleteOptions; import alluxio.client.file.options.SetAttributeOptions; import alluxio.client.lineage.LineageContext; import alluxio.exception.AlluxioException; import alluxio.exception.ExceptionMessage; import alluxio.exception.FileDoesNotExistException; import alluxio.exception.InvalidPathException; import alluxio.exception.PreconditionMessage; import alluxio.util.CommonUtils; import alluxio.wire.FileBlockInfo; import com.google.common.base.Preconditions; import com.google.common.net.HostAndPort; import edu.umd.cs.findbugs.annotations.SuppressFBWarnings; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.util.Progressable; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; import java.util.ArrayList; import java.util.List; import javax.annotation.concurrent.GuardedBy; import javax.annotation.concurrent.NotThreadSafe; /** * Base class for Apache Hadoop based Alluxio {@link org.apache.hadoop.fs.FileSystem}. This class * really just delegates to {@link alluxio.client.file.FileSystem} for most operations. * * All implementing classes must define {@link #isZookeeperMode()} which states if fault tolerant is * used and {@link #getScheme()} for Hadoop's {@link java.util.ServiceLoader} support. */ @NotThreadSafe abstract class AbstractFileSystem extends org.apache.hadoop.fs.FileSystem { public static final String FIRST_COM_PATH = "alluxio_dep/"; private static final Logger LOG = LoggerFactory.getLogger(Constants.LOGGER_TYPE); // Always tell Hadoop that we have 3x replication. private static final int BLOCK_REPLICATION_CONSTANT = 3; /** Lock for initializing the contexts, currently only one set of contexts is supported. */ private static final Object INIT_LOCK = new Object(); /** Flag for if the contexts have been initialized. */ @GuardedBy("INIT_LOCK") private static volatile boolean sInitialized = false; @GuardedBy("INIT_LOCK") private static FileSystem sFileSystem = null; private URI mUri = null; private Path mWorkingDir = new Path(AlluxioURI.SEPARATOR); private Statistics mStatistics = null; private String mAlluxioHeader = null; @Override public FSDataOutputStream append(Path path, int bufferSize, Progressable progress) throws IOException { LOG.info("append({}, {}, {})", path, bufferSize, progress); if (mStatistics != null) { mStatistics.incrementWriteOps(1); } AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); try { if (!sFileSystem.exists(uri)) { return new FSDataOutputStream(sFileSystem.createFile(uri), mStatistics); } else { throw new IOException(ExceptionMessage.FILE_ALREADY_EXISTS.getMessage(uri)); } } catch (AlluxioException e) { throw new IOException(e); } } @Override public void close() throws IOException { super.close(); } /** * Attempts to create a file. Overwrite will not succeed if the path exists and is a folder. * * @param path path to create * @param permission permissions of the created file/folder * @param overwrite overwrite if file exists * @param bufferSize the size in bytes of the buffer to be used * @param replication under filesystem replication factor * @param blockSize block size in bytes * @param progress queryable progress * @return an {@link FSDataOutputStream} created at the indicated path of a file * @throws IOException if overwrite is not specified and the path already exists or if the path is * a folder */ @Override public FSDataOutputStream create(Path path, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { LOG.info("create({}, {}, {}, {}, {}, {}, {})", path, permission, overwrite, bufferSize, replication, blockSize, progress); if (mStatistics != null) { mStatistics.incrementWriteOps(1); } // Check whether the file already exists, and delete it if overwrite is true AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); try { if (sFileSystem.exists(uri)) { if (!overwrite) { throw new IOException(ExceptionMessage.FILE_ALREADY_EXISTS.getMessage(uri)); } if (sFileSystem.getStatus(uri).isFolder()) { throw new IOException(ExceptionMessage.FILE_CREATE_IS_DIRECTORY.getMessage(uri)); } sFileSystem.delete(uri); } } catch (AlluxioException e) { throw new IOException(e); } // The file no longer exists at this point, so we can create it CreateFileOptions options = CreateFileOptions.defaults().setBlockSizeBytes(blockSize); try { FileOutStream outStream = sFileSystem.createFile(uri, options); return new FSDataOutputStream(outStream, mStatistics); } catch (AlluxioException e) { throw new IOException(e); } } /** * Opens an {@link FSDataOutputStream} at the indicated Path with write-progress reporting. * Same as {@link #create(Path, boolean, int, short, long, Progressable)}, except fails if parent * directory doesn't already exist. * * TODO(hy): We need to refactor this method after having a new internal API support (ALLUXIO-46). * * @param path the file name to open * @param overwrite if a file with this name already exists, then if true, the file will be * overwritten, and if false an error will be thrown. * @param bufferSize the size of the buffer to be used * @param replication required block replication for the file * @param blockSize the size in bytes of the buffer to be used * @param progress queryable progress * @throws IOException if 1) overwrite is not specified and the path already exists, 2) if the * path is a folder, or 3) the parent directory does not exist * @see #setPermission(Path, FsPermission) * @deprecated API only for 0.20-append */ @Override @Deprecated public FSDataOutputStream createNonRecursive(Path path, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { AlluxioURI parentUri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path.getParent())); ensureExists(parentUri); return this.create(path, permission, overwrite, bufferSize, replication, blockSize, progress); } /** * Attempts to delete the file or directory with the specified path. * * @param path path to delete * @return true if one or more files/directories were deleted; false otherwise * @throws IOException if the path failed to be deleted due to some constraint * @deprecated Use {@link #delete(Path, boolean)} instead. */ @Override @Deprecated public boolean delete(Path path) throws IOException { return delete(path, true); } /** * Attempts to delete the file or directory with the specified path. * * @param path path to delete * @param recursive if true, will attempt to delete all children of the path * @return true if one or more files/directories were deleted; false otherwise * @throws IOException if the path failed to be deleted due to some constraint (ie. non empty * directory with recursive flag disabled) */ @Override public boolean delete(Path path, boolean recursive) throws IOException { LOG.info("delete({}, {})", path, recursive); if (mStatistics != null) { mStatistics.incrementWriteOps(1); } AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); DeleteOptions options = DeleteOptions.defaults().setRecursive(recursive); try { sFileSystem.delete(uri, options); return true; } catch (InvalidPathException | FileDoesNotExistException e) { LOG.info("delete failed: {}", e.getMessage()); return false; } catch (AlluxioException e) { throw new IOException(e); } } @Override public long getDefaultBlockSize() { return Configuration.getBytes(PropertyKey.USER_BLOCK_SIZE_BYTES_DEFAULT); } @Override public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException { if (file == null) { return null; } if (mStatistics != null) { mStatistics.incrementReadOps(1); } AlluxioURI path = new AlluxioURI(HadoopUtils.getPathWithoutScheme(file.getPath())); List<FileBlockInfo> blocks = getFileBlocks(path); List<BlockLocation> blockLocations = new ArrayList<>(); for (FileBlockInfo fileBlockInfo : blocks) { long offset = fileBlockInfo.getOffset(); long end = offset + fileBlockInfo.getBlockInfo().getLength(); // Check if there is any overlapping between [start, start+len] and [offset, end] if (end >= start && offset <= start + len) { ArrayList<String> names = new ArrayList<>(); ArrayList<String> hosts = new ArrayList<>(); // add the existing in-memory block locations for (alluxio.wire.BlockLocation location : fileBlockInfo.getBlockInfo().getLocations()) { HostAndPort address = HostAndPort.fromParts(location.getWorkerAddress().getHost(), location.getWorkerAddress().getDataPort()); names.add(address.toString()); hosts.add(address.getHostText()); } // add under file system locations for (String location : fileBlockInfo.getUfsLocations()) { names.add(location); hosts.add(HostAndPort.fromString(location).getHostText()); } blockLocations.add(new BlockLocation(CommonUtils.toStringArray(names), CommonUtils.toStringArray(hosts), offset, fileBlockInfo.getBlockInfo().getLength())); } } BlockLocation[] ret = new BlockLocation[blockLocations.size()]; blockLocations.toArray(ret); return ret; } /** * {@inheritDoc} * * If the file does not exist in Alluxio, query it from HDFS. */ @Override public FileStatus getFileStatus(Path path) throws IOException { LOG.info("getFileStatus({})", path); if (mStatistics != null) { mStatistics.incrementReadOps(1); } AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); URIStatus fileStatus; try { fileStatus = sFileSystem.getStatus(uri); } catch (FileDoesNotExistException e) { throw new FileNotFoundException(e.getMessage()); } catch (AlluxioException e) { throw new IOException(e); } return new FileStatus(fileStatus.getLength(), fileStatus.isFolder(), BLOCK_REPLICATION_CONSTANT, fileStatus.getBlockSizeBytes(), fileStatus.getCreationTimeMs(), fileStatus.getCreationTimeMs(), new FsPermission((short) fileStatus.getMode()), fileStatus.getOwner(), fileStatus.getGroup(), new Path(mAlluxioHeader + uri)); } /** * Changes owner or group of a path (i.e. a file or a directory). If username is null, the * original username remains unchanged. Same as groupname. If username and groupname are non-null, * both of them will be changed. * * @param path path to set owner or group * @param username username to be set * @param groupname groupname to be set * @throws IOException if changing owner or group of the path failed */ @Override public void setOwner(Path path, final String username, final String groupname) throws IOException { LOG.info("setOwner({},{},{})", path, username, groupname); AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); SetAttributeOptions options = SetAttributeOptions.defaults(); boolean ownerOrGroupChanged = false; if (username != null && !username.isEmpty()) { options.setOwner(username).setRecursive(false); ownerOrGroupChanged = true; } if (groupname != null && !groupname.isEmpty()) { options.setGroup(groupname).setRecursive(false); ownerOrGroupChanged = true; } if (ownerOrGroupChanged) { try { sFileSystem.setAttribute(uri, options); } catch (AlluxioException e) { throw new IOException(e); } } } /** * Changes permission of a path. * * @param path path to set permission * @param permission permission set to path * @throws IOException if the path failed to be changed permission */ public void setPermission(Path path, FsPermission permission) throws IOException { LOG.info("setMode({},{})", path, permission.toString()); AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); SetAttributeOptions options = SetAttributeOptions.defaults().setMode(permission.toShort()) .setRecursive(false); try { sFileSystem.setAttribute(uri, options); } catch (AlluxioException e) { throw new IOException(e); } } /** * Gets the URI scheme that maps to the {@link org.apache.hadoop.fs.FileSystem}. This was * introduced in Hadoop 2.x as a means to make loading new {@link org.apache.hadoop.fs.FileSystem} * s simpler. This doesn't exist in Hadoop 1.x, so cannot put {@literal @Override}. * * @return scheme hadoop should map to * * @see org.apache.hadoop.fs.FileSystem#createFileSystem(java.net.URI, * org.apache.hadoop.conf.Configuration) */ public abstract String getScheme(); @Override public URI getUri() { return mUri; } @Override public Path getWorkingDirectory() { LOG.info("getWorkingDirectory: {}", mWorkingDir); return mWorkingDir; } /** * {@inheritDoc} * * Sets up a lazy connection to Alluxio through mFileSystem. This method will override and * invalidate the current contexts. This must be called before client operations in order to * guarantee the integrity of the contexts, meaning users should not alternate between using the * Hadoop compatible API and native Alluxio API in the same process. */ @SuppressFBWarnings("ST_WRITE_TO_STATIC_FROM_INSTANCE_METHOD") @Override public void initialize(URI uri, org.apache.hadoop.conf.Configuration conf) throws IOException { Preconditions.checkNotNull(uri.getHost(), PreconditionMessage.URI_HOST_NULL); Preconditions.checkNotNull(uri.getPort(), PreconditionMessage.URI_PORT_NULL); super.initialize(uri, conf); LOG.info("initialize({}, {}). Connecting to Alluxio: {}", uri, conf, uri.toString()); HadoopUtils.addS3Credentials(conf); HadoopUtils.addSwiftCredentials(conf); setConf(conf); mAlluxioHeader = getScheme() + "://" + uri.getHost() + ":" + uri.getPort(); // Set the statistics member. Use mStatistics instead of the parent class's variable. mStatistics = statistics; mUri = URI.create(mAlluxioHeader); LOG.info("{} {}", mAlluxioHeader, mUri); if (sInitialized) { return; } synchronized (INIT_LOCK) { // If someone has initialized the object since the last check, return if (sInitialized) { return; } // Load Alluxio configuration if any and merge to the one in Alluxio file system. These // modifications to ClientContext are global, affecting all Alluxio clients in this JVM. // We assume here that all clients use the same configuration. ConfUtils.mergeHadoopConfiguration(conf); Configuration.set(PropertyKey.MASTER_HOSTNAME, uri.getHost()); Configuration.set(PropertyKey.MASTER_RPC_PORT, Integer.toString(uri.getPort())); Configuration.set(PropertyKey.ZOOKEEPER_ENABLED, Boolean.toString(isZookeeperMode())); // These must be reset to pick up the change to the master address. // TODO(andrew): We should reset key value system in this situation - see ALLUXIO-1706. ClientContext.init(); FileSystemContext.INSTANCE.reset(); BlockStoreContext.INSTANCE.reset(); LineageContext.INSTANCE.reset(); sFileSystem = FileSystem.Factory.get(); sInitialized = true; } } /** * Determines if zookeeper should be used for the {@link org.apache.hadoop.fs.FileSystem}. This * method should only be used for * {@link #initialize(java.net.URI, org.apache.hadoop.conf.Configuration)}. * * @return true if zookeeper should be used */ protected abstract boolean isZookeeperMode(); @Override public FileStatus[] listStatus(Path path) throws IOException { LOG.info("listStatus({})", path); if (mStatistics != null) { mStatistics.incrementReadOps(1); } AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); List<URIStatus> statuses; try { statuses = sFileSystem.listStatus(uri); } catch (FileDoesNotExistException e) { throw new FileNotFoundException(HadoopUtils.getPathWithoutScheme(path)); } catch (AlluxioException e) { throw new IOException(e); } FileStatus[] ret = new FileStatus[statuses.size()]; for (int k = 0; k < statuses.size(); k++) { URIStatus status = statuses.get(k); // TODO(hy): Replicate 3 with the number of disk replications. ret[k] = new FileStatus(status.getLength(), status.isFolder(), 3, status.getBlockSizeBytes(), status.getCreationTimeMs(), status.getCreationTimeMs(), null, null, null, new Path(mAlluxioHeader + status.getPath())); } return ret; } /** * Attempts to create a folder with the specified path. Parent directories will be created. * * @param path path to create * @param permission permissions to grant the created folder * @return true if the indicated folder is created successfully or already exists * @throws IOException if the folder cannot be created */ @Override public boolean mkdirs(Path path, FsPermission permission) throws IOException { LOG.info("mkdirs({}, {})", path, permission); if (mStatistics != null) { mStatistics.incrementWriteOps(1); } AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); CreateDirectoryOptions options = CreateDirectoryOptions.defaults().setRecursive(true).setAllowExists(true); try { sFileSystem.createDirectory(uri, options); return true; } catch (AlluxioException e) { throw new IOException(e); } } /** * Attempts to open the specified file for reading. * * @param path the file name to open * @param bufferSize the size in bytes of the buffer to be used * @return an {@link FSDataInputStream} at the indicated path of a file * @throws IOException if the file cannot be opened (e.g., the path is a folder) */ @Override public FSDataInputStream open(Path path, int bufferSize) throws IOException { LOG.info("open({}, {})", path, bufferSize); if (mStatistics != null) { mStatistics.incrementReadOps(1); } AlluxioURI uri = new AlluxioURI(HadoopUtils.getPathWithoutScheme(path)); return new FSDataInputStream(new HdfsFileInputStream(uri, getConf(), bufferSize, mStatistics)); } @Override public boolean rename(Path src, Path dst) throws IOException { LOG.info("rename({}, {})", src, dst); if (mStatistics != null) { mStatistics.incrementWriteOps(1); } AlluxioURI srcPath = new AlluxioURI(HadoopUtils.getPathWithoutScheme(src)); AlluxioURI dstPath = new AlluxioURI(HadoopUtils.getPathWithoutScheme(dst)); ensureExists(srcPath); URIStatus dstStatus; try { dstStatus = sFileSystem.getStatus(dstPath); } catch (IOException | AlluxioException e) { dstStatus = null; } // If the destination is an existing folder, try to move the src into the folder if (dstStatus != null && dstStatus.isFolder()) { dstPath = dstPath.join(srcPath.getName()); } try { sFileSystem.rename(srcPath, dstPath); return true; } catch (IOException | AlluxioException e) { LOG.error("Failed to rename {} to {}", src, dst, e); return false; } } @Override public void setWorkingDirectory(Path path) { LOG.info("setWorkingDirectory({})", path); if (path.isAbsolute()) { mWorkingDir = path; } else { mWorkingDir = new Path(mWorkingDir, path); } } /** * Convenience method which ensures the given path exists, wrapping any {@link AlluxioException} * in {@link IOException}. * * @param path the path to look up * @throws IOException if an Alluxio exception occurs */ private void ensureExists(AlluxioURI path) throws IOException { try { sFileSystem.getStatus(path); } catch (AlluxioException e) { throw new IOException(e); } } private List<FileBlockInfo> getFileBlocks(AlluxioURI path) throws IOException { FileSystemMasterClient master = FileSystemContext.INSTANCE.acquireMasterClient(); try { return master.getStatus(path).getFileBlockInfos(); } catch (AlluxioException e) { throw new IOException(e); } finally { FileSystemContext.INSTANCE.releaseMasterClient(master); } } }