com.facebook.presto.hive.util.AsyncWalker.java Source code

Java tutorial

Introduction

Here is the source code for com.facebook.presto.hive.util.AsyncWalker.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.hive.util;

import com.facebook.presto.hive.DirectoryLister;
import com.facebook.presto.hive.NamenodeStats;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.SettableFuture;
import io.airlift.stats.TimeStat;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.concurrent.Executor;
import java.util.concurrent.atomic.AtomicLong;

import static com.facebook.presto.hadoop.HadoopFileStatus.isDirectory;
import static com.google.common.base.Preconditions.checkNotNull;

public class AsyncWalker {
    private final FileSystem fileSystem;
    private final Executor executor;
    private final DirectoryLister directoryLister;
    private final NamenodeStats namenodeStats;
    private final boolean recursive;

    public AsyncWalker(FileSystem fileSystem, Executor executor, DirectoryLister directoryLister,
            NamenodeStats namenodeStats, boolean recursive) {
        this.fileSystem = checkNotNull(fileSystem, "fileSystem is null");
        this.executor = checkNotNull(executor, "executor is null");
        this.directoryLister = checkNotNull(directoryLister, "directoryLister is null");
        this.namenodeStats = checkNotNull(namenodeStats, "namenodeStats is null");
        this.recursive = recursive;
    }

    public ListenableFuture<Void> beginWalk(Path path, FileStatusCallback callback) {
        SettableFuture<Void> future = SettableFuture.create();
        recursiveWalk(path, callback, new AtomicLong(), future);
        return future;
    }

    private void recursiveWalk(final Path path, final FileStatusCallback callback, final AtomicLong taskCount,
            final SettableFuture<Void> future) {
        taskCount.incrementAndGet();
        try {
            executor.execute(new Runnable() {
                @Override
                public void run() {
                    doWalk(path, callback, taskCount, future);
                }
            });
        } catch (Throwable t) {
            future.setException(t);
        }
    }

    private void doWalk(Path path, FileStatusCallback callback, AtomicLong taskCount, SettableFuture<Void> future) {
        try (SetThreadName ignored = new SetThreadName("HiveHdfsWalker")) {
            RemoteIterator<LocatedFileStatus> iterator = getLocatedFileStatusRemoteIterator(path);

            while (iterator.hasNext()) {
                LocatedFileStatus status = getLocatedFileStatus(iterator);

                // ignore hidden files. Hive ignores files starting with _ and . as well.
                String fileName = status.getPath().getName();
                if (fileName.startsWith("_") || fileName.startsWith(".")) {
                    continue;
                }
                if (!isDirectory(status)) {
                    callback.process(status, status.getBlockLocations());
                } else if (recursive) {
                    recursiveWalk(status.getPath(), callback, taskCount, future);
                }
                if (future.isDone()) {
                    return;
                }
            }
        } catch (FileNotFoundException e) {
            future.setException(new FileNotFoundException("Partition location does not exist: " + path));
        } catch (Throwable t) {
            future.setException(t);
        } finally {
            if (taskCount.decrementAndGet() == 0) {
                future.set(null);
            }
        }
    }

    private RemoteIterator<LocatedFileStatus> getLocatedFileStatusRemoteIterator(Path path) throws IOException {
        try (TimeStat.BlockTimer timer = namenodeStats.getListLocatedStatus().time()) {
            return directoryLister.list(fileSystem, path);
        } catch (IOException | RuntimeException e) {
            namenodeStats.getListLocatedStatus().recordException(e);
            throw e;
        }
    }

    private LocatedFileStatus getLocatedFileStatus(RemoteIterator<LocatedFileStatus> iterator) throws IOException {
        try (TimeStat.BlockTimer timer = namenodeStats.getRemoteIteratorNext().time()) {
            return iterator.next();
        } catch (IOException | RuntimeException e) {
            namenodeStats.getRemoteIteratorNext().recordException(e);
            throw e;
        }
    }
}