co.cask.cdap.common.io.Locations.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.cdap.common.io.Locations.java

Source

/*
 * Copyright  2014-2016 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package co.cask.cdap.common.io;

import co.cask.cdap.common.lang.FunctionWithException;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.base.Throwables;
import com.google.common.collect.Iterators;
import com.google.common.io.Closeables;
import com.google.common.io.InputSupplier;
import com.google.common.io.OutputSupplier;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.twill.filesystem.FileContextLocationFactory;
import org.apache.twill.filesystem.HDFSLocationFactory;
import org.apache.twill.filesystem.LocalLocationFactory;
import org.apache.twill.filesystem.Location;
import org.apache.twill.filesystem.LocationFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.lang.reflect.Method;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.security.PrivilegedExceptionAction;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedList;
import javax.annotation.Nullable;

/**
 * Utility class to help interaction with {@link Location}.
 */
public final class Locations {

    private static final Logger LOG = LoggerFactory.getLogger(Locations.class);

    // For converting local file into Location.
    private static final LocalLocationFactory LOCAL_LOCATION_FACTORY = new LocalLocationFactory();
    // For converting FileStatus to LocationStatus
    private static final FunctionWithException<FileStatus, LocationStatus, IOException> FILE_STATUS_TO_LOCATION_STATUS = new FunctionWithException<FileStatus, LocationStatus, IOException>() {
        @Override
        public LocationStatus apply(FileStatus status) throws IOException {
            return new LocationStatus(status.getPath().toUri(), status.getLen(), status.isDirectory());
        }
    };
    // For converting Location to LocationStatus
    private static final FunctionWithException<Location, LocationStatus, IOException> LOCATION_TO_LOCATION_STATUS = new FunctionWithException<Location, LocationStatus, IOException>() {
        @Override
        public LocationStatus apply(Location location) throws IOException {
            return new LocationStatus(location.toURI(), location.length(), location.isDirectory());
        }
    };

    public static final Comparator<Location> LOCATION_COMPARATOR = new Comparator<Location>() {
        @Override
        public int compare(Location o1, Location o2) {
            return o1.toURI().compareTo(o2.toURI());
        }
    };

    /**
     * Creates a new {@link InputSupplier} that can provides {@link SeekableInputStream} of the given path.
     *
     * @param fs The {@link org.apache.hadoop.fs.FileSystem} for the given path.
     * @param path The path to create {@link co.cask.cdap.common.io.SeekableInputStream} when requested.
     * @return A {@link InputSupplier}.
     */
    public static InputSupplier<? extends SeekableInputStream> newInputSupplier(final FileSystem fs,
            final Path path) {
        return new InputSupplier<SeekableInputStream>() {
            @Override
            public SeekableInputStream getInput() throws IOException {
                FSDataInputStream input = fs.open(path);
                try {
                    return new DFSSeekableInputStream(input, createDFSStreamSizeProvider(fs, path, input));
                } catch (Throwable t) {
                    Closeables.closeQuietly(input);
                    Throwables.propagateIfInstanceOf(t, IOException.class);
                    throw new IOException(t);
                }
            }
        };
    }

    /**
     * Creates a new {@link InputSupplier} that can provides {@link SeekableInputStream} from the given location.
     *
     * @param location Location for the input stream.
     * @return A {@link InputSupplier}.
     */
    public static InputSupplier<? extends SeekableInputStream> newInputSupplier(final Location location) {
        return new InputSupplier<SeekableInputStream>() {
            @Override
            public SeekableInputStream getInput() throws IOException {
                InputStream input = location.getInputStream();
                try {
                    if (input instanceof FileInputStream) {
                        return new FileSeekableInputStream((FileInputStream) input);
                    }
                    if (input instanceof FSDataInputStream) {
                        FSDataInputStream dataInput = (FSDataInputStream) input;
                        LocationFactory locationFactory = location.getLocationFactory();

                        FileSystem fs = null;
                        if (locationFactory instanceof HDFSLocationFactory) {
                            fs = ((HDFSLocationFactory) locationFactory).getFileSystem();
                        } else if (locationFactory instanceof FileContextLocationFactory) {
                            final FileContextLocationFactory lf = (FileContextLocationFactory) locationFactory;
                            fs = lf.getFileContext().getUgi().doAs(new PrivilegedExceptionAction<FileSystem>() {
                                @Override
                                public FileSystem run() throws IOException {
                                    return FileSystem.get(lf.getConfiguration());
                                }
                            });
                        }

                        if (fs != null) {
                            return new DFSSeekableInputStream(dataInput,
                                    createDFSStreamSizeProvider(fs, new Path(location.toURI()), dataInput));
                        }
                        // This shouldn't happen
                        return new DFSSeekableInputStream(dataInput, new StreamSizeProvider() {
                            @Override
                            public long size() throws IOException {
                                // Assumption is if the FS is not a HDFS fs, the location length tells the stream size
                                return location.length();
                            }
                        });
                    }

                    throw new IOException("Failed to create SeekableInputStream from location " + location);
                } catch (Throwable t) {
                    Closeables.closeQuietly(input);
                    Throwables.propagateIfInstanceOf(t, IOException.class);
                    throw new IOException(t);
                }
            }
        };
    }

    /**
     * Do some processing on the locations contained in the {@code startLocation}, using the {@code processor}. If this
     * location is a directory, all the locations contained in it will also be processed. If the {@code recursive} tag
     * is set to true, those locations that are directories will also be processed recursively. If the
     * {@code startLocation} is not a directory, this method will return the result of the processing of that location.
     *
     * @param startLocation location to start the processing from
     * @param recursive {@code true} if this method should be called on the directory {@link Location}s found from
     *                  {@code startLocation}. If the {@code startLocation} is a directory, all the locations under it
     *                  will be processed, regardless of the value of {@code recursive}
     * @param processor used to process locations. If the {@link Processor#process} method returns false on any
     *                  {@link Location} object processed, this method will return the current result of the processor.
     * @param <R> Type of the return value
     * @throws IOException if the locations could not be read
     */
    public static <R> R processLocations(Location startLocation, boolean recursive,
            Processor<LocationStatus, R> processor) throws IOException {
        boolean topLevel = true;
        LocationFactory lf = startLocation.getLocationFactory();
        LinkedList<LocationStatus> statusStack = new LinkedList<>();
        statusStack.push(getLocationStatus(startLocation));
        while (!statusStack.isEmpty()) {
            LocationStatus status = statusStack.poll();
            if (!processor.process(status)) {
                return processor.getResult();
            }
            if (status.isDir() && (topLevel || recursive)) {
                topLevel = false;
                RemoteIterator<LocationStatus> itor = listLocationStatus(lf.create(status.getUri()));
                while (itor.hasNext()) {
                    statusStack.add(0, itor.next());
                }
            }
        }
        return processor.getResult();
    }

    /**
     * Tries to create a hardlink to the given {@link Location} if it is on the local file system. If creation
     * of the hardlink failed or if the Location is not local, it will copy the the location to the given target path.
     *
     * @param location location to hardlink or copy from
     * @param targetPath the target file path
     * @return the target path
     * @throws IOException if copying failed
     */
    public static File linkOrCopy(Location location, File targetPath) throws IOException {
        URI uri = location.toURI();
        if ("file".equals(uri.getScheme())) {
            try {
                Files.createLink(targetPath.toPath(), Paths.get(uri));
                return targetPath;
            } catch (Exception e) {
                // Ignore. Fallback to copy
            }
        }

        try (InputStream is = location.getInputStream()) {
            Files.copy(is, targetPath.toPath());
        }

        return targetPath;
    }

    /**
     * Returns a {@link LocationStatus} describing the status of the given {@link Location}.
     */
    private static LocationStatus getLocationStatus(Location location) throws IOException {
        LocationFactory lf = location.getLocationFactory();
        if (lf instanceof HDFSLocationFactory) {
            return FILE_STATUS_TO_LOCATION_STATUS.apply(
                    ((HDFSLocationFactory) lf).getFileSystem().getFileLinkStatus(new Path(location.toURI())));
        }
        if (lf instanceof FileContextLocationFactory) {
            return FILE_STATUS_TO_LOCATION_STATUS.apply(((FileContextLocationFactory) lf).getFileContext()
                    .getFileLinkStatus(new Path(location.toURI())));
        }
        return LOCATION_TO_LOCATION_STATUS.apply(location);
    }

    /**
     * Returns {@link RemoteIterator} of {@link LocationStatus} under a directory
     * represented by the given {@link Location}.
     */
    private static RemoteIterator<LocationStatus> listLocationStatus(Location location) throws IOException {
        LocationFactory lf = location.getLocationFactory();
        if (lf instanceof HDFSLocationFactory) {
            FileStatus[] fileStatuses = ((HDFSLocationFactory) lf).getFileSystem()
                    .listStatus(new Path(location.toURI()));
            return transform(asRemoteIterator(Iterators.forArray(fileStatuses)), FILE_STATUS_TO_LOCATION_STATUS);
        }
        if (lf instanceof FileContextLocationFactory) {
            FileContext fc = ((FileContextLocationFactory) lf).getFileContext();
            return transform(fc.listStatus(new Path(location.toURI())), FILE_STATUS_TO_LOCATION_STATUS);
        }
        return transform(asRemoteIterator(location.list().iterator()), LOCATION_TO_LOCATION_STATUS);
    }

    /**
     * Converts a {@link Iterator} into {@link RemoteIterator}.
     */
    private static <E> RemoteIterator<E> asRemoteIterator(final Iterator<? extends E> itor) {
        return new RemoteIterator<E>() {
            @Override
            public boolean hasNext() throws IOException {
                return itor.hasNext();
            }

            @Override
            public E next() throws IOException {
                return itor.next();
            }
        };
    }

    /**
     * Transform a {@link RemoteIterator} using a {@link FunctionWithException}.
     */
    private static <F, T> RemoteIterator<T> transform(final RemoteIterator<F> itor,
            final FunctionWithException<F, T, IOException> transform) {
        return new RemoteIterator<T>() {
            @Override
            public boolean hasNext() throws IOException {
                return itor.hasNext();
            }

            @Override
            public T next() throws IOException {
                return transform.apply(itor.next());
            }
        };
    }

    /**
     * Creates a new {@link OutputSupplier} that can provides {@link OutputStream} for the given location.
     *
     * @param location Location for the output.
     * @return A {@link OutputSupplier}.
     */
    public static OutputSupplier<? extends OutputStream> newOutputSupplier(final Location location) {
        return new OutputSupplier<OutputStream>() {
            @Override
            public OutputStream getOutput() throws IOException {
                return location.getOutputStream();
            }
        };
    }

    /**
     * Creates a {@link Location} instance which represents the parent of the given location.
     *
     * @param location location to extra parent from.
     * @return an instance representing the parent location or {@code null} if there is no parent.
     */
    @Nullable
    public static Location getParent(Location location) {
        URI source = location.toURI();

        // If it is root, return null
        if ("/".equals(source.getPath())) {
            return null;
        }

        URI resolvedParent = URI.create(source.toString() + "/..").normalize();
        // NOTE: if there is a trailing slash at the end, rename(), getName() and other operations on file
        // does not work in MapR. so we remove the trailing slash (if any) at the end.
        if (resolvedParent.toString().endsWith("/")) {
            String parent = resolvedParent.toString();
            resolvedParent = URI.create(parent.substring(0, parent.length() - 1));
        }
        return location.getLocationFactory().create(resolvedParent);
    }

    /**
     * Create the directory represented by the location if not exists.
     *
     * @param location the location for the directory.
     * @throws IOException If the location cannot be created
     */
    public static void mkdirsIfNotExists(Location location) throws IOException {
        // Need to check && mkdir && check to deal with race condition
        if (!location.isDirectory() && !location.mkdirs() && !location.isDirectory()) {
            throw new IOException("Failed to create directory at " + location);
        }
    }

    public static void deleteQuietly(Location location) {
        deleteQuietly(location, false);
    }

    public static void deleteQuietly(Location location, boolean recursive) {
        try {
            location.delete(recursive);
        } catch (IOException e) {
            LOG.error("IOException while deleting location {}", location, e);
        }
    }

    /**
     * Deletes the content of the given location, but keeping the location itself.
     */
    public static void deleteContent(Location location) {
        try {
            for (Location child : location.list()) {
                deleteQuietly(child, true);
            }
        } catch (IOException e) {
            LOG.error("IOException while deleting content of {}", location, e);
        }
    }

    /**
     * Converts the given file into a local {@link Location}.
     */
    public static Location toLocation(File file) {
        return LOCAL_LOCATION_FACTORY.create(file.getAbsoluteFile().toURI());
    }

    /**
     * Creates a {@link StreamSizeProvider} for determining the size of the given {@link FSDataInputStream}.
     */
    private static StreamSizeProvider createDFSStreamSizeProvider(final FileSystem fs, final Path path,
            FSDataInputStream input) {
        // This is the default provider to use. It will try to determine if the file is closed and return the size of it.
        final StreamSizeProvider defaultSizeProvider = new StreamSizeProvider() {
            @Override
            public long size() throws IOException {
                if (fs instanceof DistributedFileSystem) {
                    if (((DistributedFileSystem) fs).isFileClosed(path)) {
                        return fs.getFileStatus(path).getLen();
                    } else {
                        return -1L;
                    }
                }
                // If the the underlying file system is not DistributedFileSystem, just assume the file length tells the size
                return fs.getFileStatus(path).getLen();
            }
        };

        // This supplier is to abstract out the logic for getting the DFSInputStream#getFileLength method using reflection
        // Reflection is used to avoid ClassLoading error if the DFSInputStream class is moved or method get renamed
        final InputStream wrappedStream = input.getWrappedStream();
        final Supplier<Method> getFileLengthMethodSupplier = Suppliers.memoize(new Supplier<Method>() {
            @Override
            public Method get() {
                try {
                    // This is a hack to get to the underlying DFSInputStream
                    // Need to revisit it when need to support different distributed file system
                    Class<? extends InputStream> cls = wrappedStream.getClass();
                    String expectedName = "org.apache.hadoop.hdfs.DFSInputStream";
                    if (!cls.getName().equals(expectedName)) {
                        throw new Exception(
                                "Expected wrapper class be " + expectedName + ", but got " + cls.getName());
                    }

                    Method getFileLengthMethod = cls.getMethod("getFileLength");
                    if (!getFileLengthMethod.isAccessible()) {
                        getFileLengthMethod.setAccessible(true);
                    }
                    return getFileLengthMethod;
                } catch (Exception e) {
                    throw Throwables.propagate(e);
                }
            }
        });

        return new StreamSizeProvider() {
            @Override
            public long size() throws IOException {
                // Try to determine the size using default provider
                long size = defaultSizeProvider.size();
                if (size >= 0) {
                    return size;
                }
                try {
                    // If not able to get length from the default provider, use the DFSInputStream#getFileLength method
                    return (Long) getFileLengthMethodSupplier.get().invoke(wrappedStream);
                } catch (Throwable t) {
                    LOG.warn("Unable to get actual file length from DFS input.", t);
                    return size;
                }
            }
        };
    }

    private Locations() {
    }
}