alluxio.job.move.MoveDefinition.java Source code

Java tutorial

Introduction

Here is the source code for alluxio.job.move.MoveDefinition.java

Source

/*
 * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
 * (the "License"). You may not use this work except in compliance with the License, which is
 * available at www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied, as more fully set forth in the License.
 *
 * See the NOTICE file distributed with this work for information regarding copyright ownership.
 */

package alluxio.job.move;

import alluxio.AlluxioURI;
import alluxio.Configuration;
import alluxio.PropertyKey;
import alluxio.client.WriteType;
import alluxio.client.block.AlluxioBlockStore;
import alluxio.client.block.BlockWorkerInfo;
import alluxio.client.file.BaseFileSystem;
import alluxio.client.file.FileInStream;
import alluxio.client.file.FileOutStream;
import alluxio.client.file.FileSystem;
import alluxio.client.file.FileSystemContext;
import alluxio.client.file.URIStatus;
import alluxio.client.file.options.CreateDirectoryOptions;
import alluxio.client.file.options.CreateFileOptions;
import alluxio.client.file.options.DeleteOptions;
import alluxio.exception.ExceptionMessage;
import alluxio.exception.FileAlreadyExistsException;
import alluxio.exception.FileDoesNotExistException;
import alluxio.job.AbstractVoidJobDefinition;
import alluxio.job.JobMasterContext;
import alluxio.job.JobWorkerContext;
import alluxio.job.util.JobUtils;
import alluxio.job.util.SerializableVoid;
import alluxio.util.io.PathUtils;
import alluxio.wire.WorkerInfo;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.commons.compress.utils.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Stack;
import java.util.concurrent.ConcurrentMap;

/**
 * A job that moves a source path to a destination path.
 *
 * If the destination exists, the source and destination must either both be files or both be
 * directories, and the overwrite configuration option must be set. If the destination does not
 * exist, its parent must be a directory and the destination will be created by the move command.
 *
 * If moving a directory to an existing directory causes files to conflict, the moved files will
 * replace the existing files.
 *
 * Unlike Unix mv, the source will not be nested inside the destination when the destination is a
 * directory. This makes it so that the move job is idempotent when the overwrite flag is set.
 *
 * Suppose we have this directory structure, where a to e are directories and f1 to f3 are files:
 *
 *  a
 *   e
 *    f2
 *   f1
 *  b
 *      d
 *      e
 *          f3
 *
 * Moving a to b will result in
 *
 *  b
 *      d
 *      e
 *     |    f2
 *      f1
 */
public final class MoveDefinition extends AbstractVoidJobDefinition<MoveConfig, ArrayList<MoveCommand>> {
    private static final Logger LOG = LoggerFactory.getLogger(MoveDefinition.class);
    private final FileSystemContext mFileSystemContext;
    private final FileSystem mFileSystem;
    private final Random mRandom = new Random();

    /**
     * Constructs a new {@link MoveDefinition}.
     */
    public MoveDefinition() {
        mFileSystemContext = FileSystemContext.get();
        mFileSystem = BaseFileSystem.get(FileSystemContext.get());
    }

    /**
     * Constructs a new {@link MoveDefinition} with FileSystem context and instance.
     *
     * @param context file system context
     * @param fileSystem file system client
     */
    public MoveDefinition(FileSystemContext context, FileSystem fileSystem) {
        mFileSystemContext = context;
        mFileSystem = fileSystem;
    }

    private void checkMoveValid(MoveConfig config) throws Exception {
        AlluxioURI source = new AlluxioURI(config.getSource());
        AlluxioURI destination = new AlluxioURI(config.getDestination());
        // The source cannot be a prefix of the destination - that would be moving a path inside itself.
        if (PathUtils.hasPrefix(destination.toString(), source.toString())) {
            throw new RuntimeException(
                    ExceptionMessage.MOVE_CANNOT_BE_TO_SUBDIRECTORY.getMessage(source, config.getDestination()));
        }

        // This will throw an appropriate exception if the source does not exist.
        boolean sourceIsDirectory = mFileSystem.getStatus(source).isFolder();
        try {
            URIStatus destinationStatus = mFileSystem.getStatus(destination);
            // Handle the case where the destination exists.
            boolean destinationIsDirectory = destinationStatus.isFolder();
            if (sourceIsDirectory && !destinationIsDirectory) {
                throw new RuntimeException(ExceptionMessage.MOVE_DIRECTORY_TO_FILE.getMessage(source, destination));
            } else if (!sourceIsDirectory && destinationIsDirectory) {
                throw new RuntimeException(ExceptionMessage.MOVE_FILE_TO_DIRECTORY.getMessage(source, destination));
            }
            if (!config.isOverwrite()) {
                throw new FileAlreadyExistsException(ExceptionMessage.MOVE_NEED_OVERWRITE.getMessage(destination));
            }
        } catch (FileDoesNotExistException e) {
            // Handle the case where the destination does not exist.
            // This will throw an appropriate exception if the destination's parent does not exist.
            URIStatus destinationParentStatus = mFileSystem.getStatus(destination.getParent());
            if (!destinationParentStatus.isFolder()) {
                throw new RuntimeException(ExceptionMessage.MOVE_TO_FILE_AS_DIRECTORY.getMessage(destination,
                        destination.getParent()));
            }
        }
    }

    /**
     * {@inheritDoc}
     *
     * Assigns each worker to move whichever files it has the most blocks for. If no worker has blocks
     * for a file, a random worker is chosen.
     */
    @Override
    public Map<WorkerInfo, ArrayList<MoveCommand>> selectExecutors(MoveConfig config,
            List<WorkerInfo> jobWorkerInfoList, JobMasterContext jobMasterContext) throws Exception {
        AlluxioURI source = new AlluxioURI(config.getSource());
        AlluxioURI destination = new AlluxioURI(config.getDestination());
        if (source.equals(destination)) {
            return new HashMap<WorkerInfo, ArrayList<MoveCommand>>();
        }
        checkMoveValid(config);

        List<BlockWorkerInfo> alluxioWorkerInfoList = AlluxioBlockStore.create(mFileSystemContext).getAllWorkers();
        Preconditions.checkState(!jobWorkerInfoList.isEmpty(), "No workers are available");

        List<URIStatus> allPathStatuses = getPathStatuses(source);
        ConcurrentMap<WorkerInfo, ArrayList<MoveCommand>> assignments = Maps.newConcurrentMap();
        ConcurrentMap<String, WorkerInfo> hostnameToWorker = Maps.newConcurrentMap();
        for (WorkerInfo workerInfo : jobWorkerInfoList) {
            hostnameToWorker.put(workerInfo.getAddress().getHost(), workerInfo);
        }
        List<String> keys = new ArrayList<>();
        keys.addAll(hostnameToWorker.keySet());
        // Assign each file to the worker with the most block locality.
        for (URIStatus status : allPathStatuses) {
            if (status.isFolder()) {
                moveDirectory(status.getPath(), source.getPath(), destination.getPath());
            } else {
                WorkerInfo bestJobWorker = getBestJobWorker(status, alluxioWorkerInfoList, jobWorkerInfoList,
                        hostnameToWorker);
                String destinationPath = computeTargetPath(status.getPath(), source.getPath(),
                        destination.getPath());
                assignments.putIfAbsent(bestJobWorker, Lists.<MoveCommand>newArrayList());
                assignments.get(bestJobWorker).add(new MoveCommand(status.getPath(), destinationPath));
            }
        }
        return assignments;
    }

    private WorkerInfo getBestJobWorker(URIStatus status, List<BlockWorkerInfo> alluxioWorkerInfoList,
            List<WorkerInfo> jobWorkerInfoList, Map<String, WorkerInfo> hostnameToJobWorker) {
        BlockWorkerInfo bestWorker = JobUtils.getWorkerWithMostBlocks(alluxioWorkerInfoList,
                status.getFileBlockInfos());
        if (bestWorker == null) {
            // Nobody has blocks, choose a random worker.
            bestWorker = alluxioWorkerInfoList.get(mRandom.nextInt(jobWorkerInfoList.size()));
        }
        // Map the best Alluxio worker to a job worker.
        WorkerInfo worker = hostnameToJobWorker.get(bestWorker.getNetAddress().getHost());
        if (worker == null) {
            return jobWorkerInfoList.get(new Random().nextInt(jobWorkerInfoList.size()));
        }
        return worker;
    }

    /**
     * Computes the path that the given path should end up at when source is moved to destination.
     *
     * @param path a path to move which must be a descendent path of the source path, e.g. /src/file
     * @param source the base source path being moved, e.g. /src
     * @param destination the path to move to, e.g. /dst/src
     * @return the path which file should be moved to, e.g. /dst/src/file
     */
    private static String computeTargetPath(String path, String source, String destination) throws Exception {
        String relativePath = PathUtils.subtractPaths(path, source);
        return PathUtils.concatPath(destination, relativePath);
    }

    /**
     * @param path the path of the directory to move; it must be a subpath of source
     * @param source the base source path being moved
     * @param destination the destination path
     */
    private void moveDirectory(String path, String source, String destination) throws Exception {
        String newDir = computeTargetPath(path, source, destination);
        mFileSystem.createDirectory(new AlluxioURI(newDir), CreateDirectoryOptions.defaults());
    }

    /**
     * Returns {@link URIStatus} for all paths under the specified path, including the path itself.
     *
     * The statuses will be listed in the order they are visited by depth-first search.
     *
     * @param path the target path
     * @return a list of the {@link URIStatus} for all paths under the given path
     * @throws Exception if an exception occurs
     */
    private List<URIStatus> getPathStatuses(AlluxioURI path) throws Exception {
        // Depth-first search to to find all files under path.
        Stack<AlluxioURI> pathsToConsider = new Stack<>();
        pathsToConsider.add(path);
        List<URIStatus> allStatuses = Lists.newArrayList();
        while (!pathsToConsider.isEmpty()) {
            AlluxioURI nextPath = pathsToConsider.pop();
            URIStatus status = mFileSystem.getStatus(nextPath);
            allStatuses.add(status);
            if (status.isFolder()) {
                List<URIStatus> childStatuses = mFileSystem.listStatus(nextPath);
                for (URIStatus childStatus : childStatuses) {
                    if (childStatus.isFolder()) {
                        pathsToConsider.push(new AlluxioURI(childStatus.getPath()));
                    } else {
                        allStatuses.add(childStatus);
                    }
                }
            }
        }
        return ImmutableList.copyOf(allStatuses);
    }

    /**
     * {@inheritDoc}
     *
     * Moves the file specified in the config to the configured path. If the destination path is a
     * directory, the file is moved inside that directory.
     */
    @Override
    public SerializableVoid runTask(MoveConfig config, ArrayList<MoveCommand> commands,
            JobWorkerContext jobWorkerContext) throws Exception {
        WriteType writeType = config.getWriteType() == null
                ? Configuration.getEnum(PropertyKey.USER_FILE_WRITE_TYPE_DEFAULT, WriteType.class)
                : WriteType.valueOf(config.getWriteType());
        for (MoveCommand command : commands) {
            move(command, writeType, mFileSystem);
        }
        // Try to delete the source directory if it is empty.
        if (!hasFiles(new AlluxioURI(config.getSource()), mFileSystem)) {
            try {
                LOG.debug("Deleting {}", config.getSource());
                mFileSystem.delete(new AlluxioURI(config.getSource()), DeleteOptions.defaults().setRecursive(true));
            } catch (FileDoesNotExistException e) {
                // It's already deleted, possibly by another worker.
            }
        }
        return null;
    }

    /**
     * @param command the move command to execute
     * @param writeType the write type to use for the moved file
     * @param fileSystem the Alluxio file system
     */
    private static void move(MoveCommand command, WriteType writeType, FileSystem fileSystem) throws Exception {
        String source = command.getSource();
        String destination = command.getDestination();
        LOG.debug("Moving {} to {}", source, destination);

        try (FileOutStream out = fileSystem.createFile(new AlluxioURI(destination),
                CreateFileOptions.defaults().setWriteType(writeType))) {
            try (FileInStream in = fileSystem.openFile(new AlluxioURI(source))) {
                IOUtils.copy(in, out);
            } catch (Throwable t) {
                try {
                    out.cancel();
                } catch (Throwable t2) {
                    t.addSuppressed(t2);
                }
                throw t;
            }
        }
        fileSystem.delete(new AlluxioURI(source));
    }

    /**
     * @param source an Alluxio URI
     * @param fileSystem the Alluxio file system
     * @return whether the URI is a file or a directory which contains files (including recursively)
     * @throws Exception if an unexpected exception occurs
     */
    private static boolean hasFiles(AlluxioURI source, FileSystem fileSystem) throws Exception {
        Stack<AlluxioURI> dirsToCheck = new Stack<>();
        dirsToCheck.add(source);
        while (!dirsToCheck.isEmpty()) {
            try {
                for (URIStatus status : fileSystem.listStatus(dirsToCheck.pop())) {
                    if (!status.isFolder()) {
                        return true;
                    }
                    dirsToCheck.push(new AlluxioURI(status.getPath()));
                }
            } catch (FileDoesNotExistException e) {
                // This probably means another worker has deleted the directory already, so we can probably
                // return false here. To be safe though, we will fall through and complete the search.
            }
        }
        return false;
    }

    @Override
    public Class<MoveConfig> getJobConfigClass() {
        return MoveConfig.class;
    }
}