Example usage for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath()

Source Link

Usage

From source file:com.facebook.presto.hive.HiveSplitIterable.java

License:Apache License

private List<HiveSplit> createHiveSplits(String partitionName, FileStatus file, BlockLocation[] blockLocations,
        long start, long length, Properties schema, List<HivePartitionKey> partitionKeys, boolean splittable)
        throws IOException {
    ImmutableList.Builder<HiveSplit> builder = ImmutableList.builder();
    if (splittable) {
        for (BlockLocation blockLocation : blockLocations) {
            // get the addresses for the block
            List<HostAddress> addresses = toHostAddress(blockLocation.getHosts());

            // divide the block into uniform chunks that are smaller than the max split size
            int chunks = Math.max(1, (int) (blockLocation.getLength() / maxSplitSize.toBytes()));
            // when block does not divide evenly into chunks, make the chunk size slightly bigger than necessary
            long targetChunkSize = (long) Math.ceil(blockLocation.getLength() * 1.0 / chunks);

            long chunkOffset = 0;
            while (chunkOffset < blockLocation.getLength()) {
                // adjust the actual chunk size to account for the overrun when chunks are slightly bigger than necessary (see above)
                long chunkLength = Math.min(targetChunkSize, blockLocation.getLength() - chunkOffset);

                builder.add(new HiveSplit(clientId, table.getDbName(), table.getTableName(), partitionName,
                        false, file.getPath().toString(), blockLocation.getOffset() + chunkOffset, chunkLength,
                        schema, partitionKeys, addresses));

                chunkOffset += chunkLength;
            }//w  ww.  j  a va  2 s . c o  m
            checkState(chunkOffset == blockLocation.getLength(), "Error splitting blocks");
        }
    } else {
        // not splittable, use the hosts from the first block
        builder.add(new HiveSplit(clientId, table.getDbName(), table.getTableName(), partitionName, false,
                file.getPath().toString(), start, length, schema, partitionKeys,
                toHostAddress(blockLocations[0].getHosts())));
    }
    return builder.build();
}

From source file:com.facebook.presto.hive.HiveSplitSourceProvider.java

License:Apache License

private void loadPartitionSplits(final HiveSplitSource hiveSplitSource, SuspendingExecutor suspendingExecutor,
        final ConnectorSession session) {
    final Semaphore semaphore = new Semaphore(maxPartitionBatchSize);
    try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) {
        ImmutableList.Builder<ListenableFuture<Void>> futureBuilder = ImmutableList.builder();

        Iterator<String> nameIterator = partitionNames.iterator();
        for (Partition partition : partitions) {
            checkState(nameIterator.hasNext(), "different number of partitions and partition names!");
            final String partitionName = nameIterator.next();
            final Properties schema = getPartitionSchema(table, partition);
            final List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition);

            Path path = new Path(getPartitionLocation(table, partition));
            final Configuration configuration = hdfsEnvironment.getConfiguration(path);
            final InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);

            FileSystem fs = path.getFileSystem(configuration);

            if (inputFormat instanceof SymlinkTextInputFormat) {
                JobConf jobConf = new JobConf(configuration);
                FileInputFormat.setInputPaths(jobConf, path);
                InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
                for (InputSplit rawSplit : splits) {
                    FileSplit split = ((SymlinkTextInputFormat.SymlinkTextInputSplit) rawSplit)
                            .getTargetSplit();

                    // get the filesystem for the target path -- it may be a different hdfs instance
                    FileSystem targetFilesystem = split.getPath().getFileSystem(configuration);
                    FileStatus fileStatus = targetFilesystem.getFileStatus(split.getPath());
                    hiveSplitSource.addToQueue(createHiveSplits(partitionName, fileStatus,
                            targetFilesystem.getFileBlockLocations(fileStatus, split.getStart(),
                                    split.getLength()),
                            split.getStart(), split.getLength(), schema, partitionKeys, false, session));
                }//w  w  w .java  2s . c  o  m
                continue;
            }

            // TODO: this is currently serial across all partitions and should be done in suspendingExecutor
            if (bucket.isPresent()) {
                Optional<FileStatus> bucketFile = getBucketFile(bucket.get(), fs, path);
                if (bucketFile.isPresent()) {
                    FileStatus file = bucketFile.get();
                    BlockLocation[] blockLocations = fs.getFileBlockLocations(file, 0, file.getLen());
                    boolean splittable = isSplittable(inputFormat, fs, file.getPath());

                    hiveSplitSource.addToQueue(createHiveSplits(partitionName, file, blockLocations, 0,
                            file.getLen(), schema, partitionKeys, splittable, session));
                    continue;
                }
            }

            // Acquire semaphore so that we only have a fixed number of outstanding partitions being processed asynchronously
            // NOTE: there must not be any calls that throw in the space between acquiring the semaphore and setting the Future
            // callback to release it. Otherwise, we will need a try-finally block around this section.
            try {
                semaphore.acquire();
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                return;
            }

            ListenableFuture<Void> partitionFuture = createAsyncWalker(fs, suspendingExecutor).beginWalk(path,
                    new FileStatusCallback() {
                        @Override
                        public void process(FileStatus file, BlockLocation[] blockLocations) {
                            try {
                                boolean splittable = isSplittable(inputFormat,
                                        file.getPath().getFileSystem(configuration), file.getPath());

                                hiveSplitSource.addToQueue(createHiveSplits(partitionName, file, blockLocations,
                                        0, file.getLen(), schema, partitionKeys, splittable, session));
                            } catch (IOException e) {
                                hiveSplitSource.fail(e);
                            }
                        }
                    });

            // release the semaphore when the partition finishes
            Futures.addCallback(partitionFuture, new FutureCallback<Void>() {
                @Override
                public void onSuccess(Void result) {
                    semaphore.release();
                }

                @Override
                public void onFailure(Throwable t) {
                    semaphore.release();
                }
            });

            futureBuilder.add(partitionFuture);
        }

        // when all partitions finish, mark the queue as finished
        Futures.addCallback(Futures.allAsList(futureBuilder.build()), new FutureCallback<List<Void>>() {
            @Override
            public void onSuccess(List<Void> result) {
                hiveSplitSource.finished();
            }

            @Override
            public void onFailure(Throwable t) {
                hiveSplitSource.fail(t);
            }
        });
    } catch (Throwable e) {
        hiveSplitSource.fail(e);
        Throwables.propagateIfInstanceOf(e, Error.class);
    }
}

From source file:com.facebook.presto.hive.HiveSplitSourceProvider.java

License:Apache License

private static Optional<FileStatus> getBucketFile(HiveBucket bucket, FileSystem fs, Path path) {
    FileStatus[] statuses = listStatus(fs, path);

    if (statuses.length != bucket.getBucketCount()) {
        return Optional.absent();
    }//from   w  ww.j  a  v a 2  s  . c  om

    Map<String, FileStatus> map = new HashMap<>();
    List<String> paths = new ArrayList<>();
    for (FileStatus status : statuses) {
        if (!isFile(status)) {
            return Optional.absent();
        }
        String pathString = status.getPath().toString();
        map.put(pathString, status);
        paths.add(pathString);
    }

    // Hive sorts the paths as strings lexicographically
    Collections.sort(paths);

    String pathString = paths.get(bucket.getBucketNumber());
    return Optional.of(map.get(pathString));
}

From source file:com.facebook.presto.hive.HiveSplitSourceProvider.java

License:Apache License

private List<HiveSplit> createHiveSplits(String partitionName, FileStatus file, BlockLocation[] blockLocations,
        long start, long length, Properties schema, List<HivePartitionKey> partitionKeys, boolean splittable,
        ConnectorSession session) throws IOException {
    ImmutableList.Builder<HiveSplit> builder = ImmutableList.builder();
    if (splittable) {
        for (BlockLocation blockLocation : blockLocations) {
            // get the addresses for the block
            List<HostAddress> addresses = toHostAddress(blockLocation.getHosts());

            long maxBytes = maxSplitSize.toBytes();

            if (remainingInitialSplits > 0) {
                maxBytes = maxInitialSplitSize.toBytes();
            }// www .  j  ava2s. c o  m

            // divide the block into uniform chunks that are smaller than the max split size
            int chunks = Math.max(1, (int) (blockLocation.getLength() / maxBytes));
            // when block does not divide evenly into chunks, make the chunk size slightly bigger than necessary
            long targetChunkSize = (long) Math.ceil(blockLocation.getLength() * 1.0 / chunks);

            long chunkOffset = 0;
            while (chunkOffset < blockLocation.getLength()) {
                // adjust the actual chunk size to account for the overrun when chunks are slightly bigger than necessary (see above)
                long chunkLength = Math.min(targetChunkSize, blockLocation.getLength() - chunkOffset);

                builder.add(new HiveSplit(connectorId, table.getDbName(), table.getTableName(), partitionName,
                        file.getPath().toString(), blockLocation.getOffset() + chunkOffset, chunkLength, schema,
                        partitionKeys, addresses, session));

                chunkOffset += chunkLength;
                remainingInitialSplits--;
            }
            checkState(chunkOffset == blockLocation.getLength(), "Error splitting blocks");
        }
    } else {
        // not splittable, use the hosts from the first block if it exists
        List<HostAddress> addresses = ImmutableList.of();
        if (blockLocations.length > 0) {
            addresses = toHostAddress(blockLocations[0].getHosts());
        }

        builder.add(new HiveSplit(connectorId, table.getDbName(), table.getTableName(), partitionName,
                file.getPath().toString(), start, length, schema, partitionKeys, addresses, session));
    }
    return builder.build();
}

From source file:com.facebook.presto.hive.metastore.file.FileHiveMetastore.java

License:Apache License

private List<ArrayDeque<String>> listPartitions(Path director, List<Column> partitionColumns) {
    if (partitionColumns.isEmpty()) {
        return ImmutableList.of();
    }//  www  .  j a  v  a  2  s  .  c  o  m

    try {
        String directoryPrefix = partitionColumns.get(0).getName() + '=';

        List<ArrayDeque<String>> partitionValues = new ArrayList<>();
        for (FileStatus fileStatus : metadataFileSystem.listStatus(director)) {
            if (!fileStatus.isDirectory()) {
                continue;
            }
            if (!fileStatus.getPath().getName().startsWith(directoryPrefix)) {
                continue;
            }

            List<ArrayDeque<String>> childPartitionValues;
            if (partitionColumns.size() == 1) {
                childPartitionValues = ImmutableList.of(new ArrayDeque<>());
            } else {
                childPartitionValues = listPartitions(fileStatus.getPath(),
                        partitionColumns.subList(1, partitionColumns.size()));
            }

            String value = fileStatus.getPath().getName().substring(directoryPrefix.length());
            for (ArrayDeque<String> childPartition : childPartitionValues) {
                childPartition.addFirst(value);
                partitionValues.add(childPartition);
            }
        }
        return partitionValues;
    } catch (IOException e) {
        throw new PrestoException(HIVE_METASTORE_ERROR, "Error listing partition directories", e);
    }
}

From source file:com.facebook.presto.hive.metastore.file.FileHiveMetastore.java

License:Apache License

private List<Path> getChildSchemaDirectories(Path metadataDirectory) {
    try {/*from w  ww . j  a v  a2 s .c  om*/
        if (!metadataFileSystem.isDirectory(metadataDirectory)) {
            return ImmutableList.of();
        }

        ImmutableList.Builder<Path> childSchemaDirectories = ImmutableList.builder();
        for (FileStatus child : metadataFileSystem.listStatus(metadataDirectory)) {
            if (!child.isDirectory()) {
                continue;
            }
            Path childPath = child.getPath();
            if (childPath.getName().startsWith(".")) {
                continue;
            }
            if (metadataFileSystem.isFile(new Path(childPath, PRESTO_SCHEMA_FILE_NAME))) {
                childSchemaDirectories.add(childPath);
            }
        }
        return childSchemaDirectories.build();
    } catch (IOException e) {
        throw new PrestoException(HIVE_METASTORE_ERROR, e);
    }
}

From source file:com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore.java

License:Apache License

private static RecursiveDeleteResult doRecursiveDeleteFiles(FileSystem fileSystem, Path directory,
        List<String> filePrefixes, boolean deleteEmptyDirectories) {
    FileStatus[] allFiles;/*  w  ww . jav  a  2  s  . co m*/
    try {
        allFiles = fileSystem.listStatus(directory);
    } catch (IOException e) {
        ImmutableList.Builder<String> notDeletedItems = ImmutableList.builder();
        notDeletedItems.add(directory.toString() + "/**");
        return new RecursiveDeleteResult(false, notDeletedItems.build());
    }

    boolean allDescendentsDeleted = true;
    ImmutableList.Builder<String> notDeletedEligibleItems = ImmutableList.builder();
    for (FileStatus fileStatus : allFiles) {
        if (HadoopFileStatus.isFile(fileStatus)) {
            Path filePath = fileStatus.getPath();
            String fileName = filePath.getName();
            boolean eligible = false;
            for (String filePrefix : filePrefixes) {
                if (fileName.startsWith(filePrefix)) {
                    eligible = true;
                    break;
                }
            }
            if (eligible) {
                if (!deleteIfExists(fileSystem, filePath, false)) {
                    allDescendentsDeleted = false;
                    notDeletedEligibleItems.add(filePath.toString());
                }
            } else {
                allDescendentsDeleted = false;
            }
        } else if (HadoopFileStatus.isDirectory(fileStatus)) {
            RecursiveDeleteResult subResult = doRecursiveDeleteFiles(fileSystem, fileStatus.getPath(),
                    filePrefixes, deleteEmptyDirectories);
            if (!subResult.isDirectoryNoLongerExists()) {
                allDescendentsDeleted = false;
            }
            if (!subResult.getNotDeletedEligibleItems().isEmpty()) {
                notDeletedEligibleItems.addAll(subResult.getNotDeletedEligibleItems());
            }
        } else {
            allDescendentsDeleted = false;
            notDeletedEligibleItems.add(fileStatus.getPath().toString());
        }
    }
    if (allDescendentsDeleted && deleteEmptyDirectories) {
        verify(notDeletedEligibleItems.build().isEmpty());
        if (!deleteIfExists(fileSystem, directory, false)) {
            return new RecursiveDeleteResult(false, ImmutableList.of(directory.toString() + "/"));
        }
        return new RecursiveDeleteResult(true, ImmutableList.of());
    }
    return new RecursiveDeleteResult(false, notDeletedEligibleItems.build());
}

From source file:com.facebook.presto.hive.s3.PrestoS3FileSystem.java

License:Apache License

@Override
public boolean rename(Path src, Path dst) throws IOException {
    boolean srcDirectory;
    try {// w  w w .  j  a v  a2s  . c  o  m
        srcDirectory = directory(src);
    } catch (FileNotFoundException e) {
        return false;
    }

    try {
        if (!directory(dst)) {
            // cannot copy a file to an existing file
            return keysEqual(src, dst);
        }
        // move source under destination directory
        dst = new Path(dst, src.getName());
    } catch (FileNotFoundException e) {
        // destination does not exist
    }

    if (keysEqual(src, dst)) {
        return true;
    }

    if (srcDirectory) {
        for (FileStatus file : listStatus(src)) {
            rename(file.getPath(), new Path(dst, file.getPath().getName()));
        }
        deleteObject(keyFromPath(src) + DIRECTORY_SUFFIX);
    } else {
        s3.copyObject(getBucketName(uri), keyFromPath(src), getBucketName(uri), keyFromPath(dst));
        delete(src, true);
    }

    return true;
}

From source file:com.facebook.presto.hive.s3.PrestoS3FileSystem.java

License:Apache License

@Override
public boolean delete(Path path, boolean recursive) throws IOException {
    try {//from   w  ww . j  a v a 2  s .  c om
        if (!directory(path)) {
            return deleteObject(keyFromPath(path));
        }
    } catch (FileNotFoundException e) {
        return false;
    }

    if (!recursive) {
        throw new IOException("Directory " + path + " is not empty");
    }

    for (FileStatus file : listStatus(path)) {
        delete(file.getPath(), true);
    }
    deleteObject(keyFromPath(path) + DIRECTORY_SUFFIX);

    return true;
}

From source file:com.flipkart.fdp.migration.distcp.codec.GenericHadoopCodec.java

License:Apache License

public List<FileTuple> getInputPaths(Collection<String> paths, Collection<String> excludeList)
        throws Exception {

    System.out.println("A total of " + paths.size() + " paths to scan...");

    List<FileTuple> fileList = new ArrayList<FileTuple>();
    List<String> inputPaths = new ArrayList<String>();

    // Process regular expression based paths
    for (String path : paths) {

        System.out.println("Processing path: " + path);
        FileStatus[] stats = fs.globStatus(new Path(path));
        if (stats == null || stats.length <= 0)
            continue;

        for (FileStatus fstat : stats) {
            if (fstat.isFile()) {
                fileList.add(new FileTuple(MirrorUtils.getSimplePath(fstat.getPath()), fstat.getLen(),
                        fstat.getModificationTime()));
            } else {
                inputPaths.add(MirrorUtils.getSimplePath(fstat.getPath()));
            }//from   w w  w  . jav  a2 s.  com
        }
    }

    if (inputPaths.size() > 0) {

        for (String path : inputPaths) {

            List<FileTuple> fstat = getFileStatusRecursive(new Path(path), excludeList);
            fileList.addAll(fstat);
        }
    }
    return fileList;
}