com.cloudera.oryx.common.servcomp.Store.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.oryx.common.servcomp.Store.java

Source

/*
 * Copyright (c) 2013, Cloudera, Inc. All Rights Reserved.
 *
 * Cloudera, Inc. licenses this file to you under the Apache License,
 * Version 2.0 (the "License"). You may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied. See the License for
 * the specific language governing permissions and limitations under the
 * License.
 */

package com.cloudera.oryx.common.servcomp;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.URI;
import java.util.Collections;
import java.util.List;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipInputStream;

import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.cloudera.oryx.common.io.IOUtils;

/**
 * Interface to backing store -- for now, HDFS. This allows the Hadoop-compatible
 * binaries to write most of their code in terms of abstract operations.
 *
 * @author Sean Owen
 */
public final class Store {

    private static final Logger log = LoggerFactory.getLogger(Store.class);

    private static final Store INSTANCE = new Store();

    private final FileSystem fs;

    private Store() {
        try {
            Configuration conf = OryxConfiguration.get();
            if (Namespaces.isLocalData()) {
                fs = FileSystem.getLocal(conf);
            } else {
                UserGroupInformation.setConfiguration(conf);
                fs = FileSystem.get(URI.create(Namespaces.get().getPrefix()), conf);
            }
        } catch (IOException ioe) {
            log.error("Unable to configure Store", ioe);
            throw new IllegalStateException(ioe);
        }
    }

    /**
     * @return singleton {@code Store} instance
     */
    public static Store get() {
        return INSTANCE;
    }

    /**
     * Detects if a file or directory exists in the remote file system.
     *
     * @param key file to test
     * @param isFile if true, test for a file, otherwise for a directory
     * @return {@code true} iff the file exists
     */
    public boolean exists(String key, boolean isFile) throws IOException {
        Preconditions.checkNotNull(key);
        Path path = Namespaces.toPath(key);
        return fs.exists(path) && (fs.isFile(path) == isFile);
    }

    /**
     * Gets the size in bytes of a remote file.
     *
     * @param key file to test
     * @return size of file in bytes
     * @throws java.io.FileNotFoundException if there is no file at the key
     */
    public long getSize(String key) throws IOException {
        Preconditions.checkNotNull(key);
        Path path = Namespaces.toPath(key);
        return fs.getFileStatus(path).getLen();
    }

    /**
     * Gets the total size of all files in all subdirectories of a path.
     *
     * @param key path to compute size of
     * @return total number of bytes at the requested path in bytes
     */
    public long getSizeRecursive(String key) throws IOException {
        Preconditions.checkNotNull(key);
        Path path = Namespaces.toPath(key);

        if (!fs.exists(path)) {
            return 0L;
        }

        long size = 0L;
        RemoteIterator<? extends FileStatus> it = fs.listFiles(path, true);
        while (it.hasNext()) {
            FileStatus status = it.next();
            // listFiles() should only show files
            Preconditions.checkState(status.isFile());
            size += status.getLen();
        }

        return size;
    }

    /**
     * @param key file to read
     * @return a byte stream delivering the file's contents
     * @throws IOException if an error occurs, like the file doesn't exist
     */
    public InputStream streamFrom(String key) throws IOException {
        Preconditions.checkNotNull(key);
        InputStream in = fs.open(Namespaces.toPath(key));
        if (key.endsWith(".gz")) {
            in = new GZIPInputStream(in);
        } else if (key.endsWith(".zip")) {
            in = new ZipInputStream(in);
        }
        return in;
    }

    /**
     * @param key file to write
     * @return A byte stream to send data to
     * @throws IOException if an error occurs, or if the file already exists
     */
    public OutputStream streamTo(String key) throws IOException {
        return fs.create(Namespaces.toPath(key));
    }

    /**
     * @param key text file to read
     * @return a character stream delivering the file's contents
     * @throws IOException if an error occurs, like the file doesn't exist
     */
    public BufferedReader readFrom(String key) throws IOException {
        return new BufferedReader(new InputStreamReader(streamFrom(key), Charsets.UTF_8), 1 << 20); // ~1MB
    }

    private void makeParentDirs(Path path) throws IOException {
        Preconditions.checkNotNull(path);
        Path parent = path.getParent();
        if (fs.exists(parent)) {
            // Can't be a file
            return;
        }
        boolean success;
        try {
            success = fs.mkdirs(parent);
        } catch (AccessControlException ace) {
            log.error("Permissions problem; is {} writable in HDFS?", parent);
            throw ace;
        }
        if (!success && !fs.exists(parent)) {
            throw new IOException("Can't make " + parent);
        }
    }

    /**
     * @param key location of file to download from distibuted storage
     * @param file local {@link File} to store data into -- can be directory or explicit file
     * @throws IOException if an error occurs while downloading
     */
    public void download(String key, File file) throws IOException {
        Preconditions.checkNotNull(key);
        Preconditions.checkNotNull(file);
        Path path = Namespaces.toPath(key);
        Path filePath = new Path(file.getAbsolutePath());
        fs.copyToLocalFile(false, path, filePath);
    }

    /**
     * @param dirKey location of directory whose contents will be downloaded
     * @param dir local {@link File} to store files/directories under
     * @throws IOException if an error occurs while downloading
     */
    public void downloadDirectory(String dirKey, File dir) throws IOException {
        Preconditions.checkNotNull(dirKey);
        Preconditions.checkNotNull(dir);
        Preconditions.checkArgument(dir.exists() && dir.isDirectory(), "Not a directory: %s", dir);

        Path dirPath = Namespaces.toPath(dirKey);
        if (!fs.exists(dirPath)) {
            return;
        }
        Preconditions.checkArgument(fs.getFileStatus(dirPath).isDirectory(), "Not a directory: %s", dirPath);

        boolean complete = false;
        try {
            for (FileStatus status : fs.listStatus(dirPath)) {
                String name = status.getPath().getName();
                String fromKey = dirKey + '/' + name;
                File toLocal = new File(dir, name);
                if (status.isFile()) {
                    download(fromKey, toLocal);
                } else {
                    boolean success = toLocal.mkdir();
                    if (!success && !toLocal.exists()) {
                        throw new IOException("Can't make " + toLocal);
                    }
                    downloadDirectory(fromKey, toLocal);
                }
            }
            complete = true;
        } finally {
            if (!complete) {
                log.warn("Failed to download {} so deleting {}", dirKey, dir);
                IOUtils.deleteRecursively(dir);
            }
        }
    }

    /**
     * Uploads a local file to a remote file.
     *
     * @param key file to write to
     * @param file file bytes to upload
     * @param overwrite if true, overwrite the existing file data if exists already
     * @throws IOException if the data can't be written, or file exists and overwrite is false
     */
    public void upload(String key, File file, boolean overwrite) throws IOException {
        Preconditions.checkNotNull(key);
        Preconditions.checkNotNull(file);
        Preconditions.checkArgument(file.exists(), "Doesn't exist: %s", file);
        Preconditions.checkArgument(file.isFile(), "Not a file: %s", file);
        if (!overwrite && exists(key, true)) {
            throw new IOException(key + " already exists");
        }

        Path path = Namespaces.toPath(key);
        makeParentDirs(path);
        Path filePath = new Path(file.getAbsolutePath());
        try {
            fs.copyFromLocalFile(false, overwrite, filePath, path);
        } catch (AccessControlException ace) {
            log.error("Permissions problem; is {} writable in HDFS?", path);
            throw ace;
        }
        if (!fs.exists(path)) {
            throw new IOException("Couldn't upload " + filePath + " to " + path);
        }
    }

    /**
     * Uploads a directory recursively
     *
     * @param dirKey location under which to store the contents found under {@code dir}
     * @param dir directory whose <em>contents</em> will be uploaded
     * @param overwrite if true, overwrite existing files
     * @throws IOException if the data can't be written, or file exists and overwrite is false
     */
    public void uploadDirectory(String dirKey, File dir, boolean overwrite) throws IOException {
        Preconditions.checkNotNull(dirKey);
        Preconditions.checkNotNull(dir);
        Preconditions.checkArgument(dir.exists() && dir.isDirectory(), "Not a directory: %s", dir);
        File[] contents = dir.listFiles(IOUtils.NOT_HIDDEN);
        if (contents != null) {
            boolean complete = false;
            try {
                for (File content : contents) {
                    String toKey = dirKey + '/' + content.getName();
                    if (content.isFile()) {
                        upload(toKey, content, overwrite);
                    } else {
                        uploadDirectory(toKey, content, overwrite);
                    }
                }
                complete = true;
            } finally {
                if (!complete) {
                    log.warn("Failed to upload {} so deleting {}", dir, dirKey);
                    recursiveDelete(dirKey);
                }
            }
        }
    }

    /**
     * Creates a 0-length file.
     *
     * @param key file to create
     */
    public void touch(String key) throws IOException {
        Preconditions.checkNotNull(key);
        Path path = Namespaces.toPath(key);
        makeParentDirs(path);
        boolean success;
        try {
            success = fs.createNewFile(path);
        } catch (AccessControlException ace) {
            log.error("Permissions problem; is {} writable in HDFS?", path);
            throw ace;
        }
        if (!success) {
            throw new IOException("Can't create " + path);
        }
    }

    /**
     * Makes a directory. If the file system doesn't have an idea of directories, it makes a 0-length file.
     *
     * @param key directory to create
     */
    public void mkdir(String key) throws IOException {
        Preconditions.checkNotNull(key);
        Path path = Namespaces.toPath(key);
        if (fs.exists(path)) {
            if (fs.isDirectory(path)) {
                // OK
                return;
            }
            throw new IOException("Already exists as file: " + path);
        }
        boolean success;
        try {
            success = fs.mkdirs(path);
        } catch (AccessControlException ace) {
            log.error("Permissions problem; is {} writable in HDFS?", path);
            throw ace;
        }
        if (!success && !fs.exists(path)) {
            throw new IOException("Can't mkdirs for " + path);
        }
    }

    /**
     * Deletes the file at the given location.
     *
     * @param key file to delete
     */
    public void delete(String key) throws IOException {
        Preconditions.checkNotNull(key);
        Path path = Namespaces.toPath(key);
        if (!fs.isFile(path)) {
            throw new IOException("Not a file: " + path);
        }
        boolean success;
        try {
            success = fs.delete(path, false);
        } catch (AccessControlException ace) {
            log.error("Permissions problem; is {} writable in HDFS?", path);
            throw ace;
        }
        if (!success) {
            throw new IOException("Can't delete " + path);
        }
    }

    /**
     * Recursively deletes a file/directory. If the file system does not have a notion of directories, this deletes
     * all keys that begin with the given prefix.
     *
     * @param keyPrefix file/directory ("prefix") to delete
     */
    public void recursiveDelete(String keyPrefix) throws IOException {
        Preconditions.checkNotNull(keyPrefix);
        Path path = Namespaces.toPath(keyPrefix);
        if (!fs.exists(path)) {
            return;
        }

        boolean success;
        try {
            log.info("Deleting recursively: {}", path);
            success = fs.delete(path, true);
        } catch (AccessControlException ace) {
            log.error("Permissions problem; is {} writable in HDFS?", path);
            throw ace;
        }
        if (!success) {
            throw new IOException("Can't delete " + path);
        }
    }

    /**
     * Lists contents of a directory. For file systems without a notion of directory, this lists prefixes that
     * have the same prefix as the given prefix, but excludes "directories" (keys with same prefix, but followed
     * by more path elements). Results are returned in lexicographically sorted order.
     *
     * @param prefix directory to list
     * @param files if true, only list files, not directories
     * @return list of keys representing directory contents
     */
    public List<String> list(String prefix, boolean files) throws IOException {
        Preconditions.checkNotNull(prefix);
        Path path = Namespaces.toPath(prefix);
        if (!fs.exists(path)) {
            return Collections.emptyList();
        }

        Preconditions.checkArgument(fs.getFileStatus(path).isDirectory(), "Not a directory: %s", path);
        FileStatus[] statuses = fs.listStatus(path, new FilesOrDirsPathFilter(fs, files));
        String prefixString = Namespaces.get().getPrefix();

        List<String> result = Lists.newArrayListWithCapacity(statuses.length);
        for (FileStatus fileStatus : statuses) {
            String listPath = fileStatus.getPath().toString();
            Preconditions.checkState(listPath.startsWith(prefixString), "%s doesn't start with %s", listPath,
                    prefixString);
            if (!listPath.endsWith("_SUCCESS")) {
                listPath = listPath.substring(prefixString.length());
                if (fileStatus.isDirectory() && !listPath.endsWith("/")) {
                    listPath += "/";
                }
                result.add(listPath);
            }
        }
        Collections.sort(result);
        return result;
    }

    /**
     * @param key file to test
     * @return last-modified time of file, in milliseconds since the epoch
     * @throws java.io.FileNotFoundException if the file does not exist
     */
    public long getLastModified(String key) throws IOException {
        Preconditions.checkNotNull(key);
        Path path = Namespaces.toPath(key);
        FileStatus status = fs.getFileStatus(path);
        return status.getModificationTime();
    }

}