Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.facebook.presto.hive.HiveSplitIterable.java

License:Apache License

private List<HiveSplit> createHiveSplits(String partitionName, FileStatus file, BlockLocation[] blockLocations,
        long start, long length, Properties schema, List<HivePartitionKey> partitionKeys, boolean splittable)
        throws IOException {
    ImmutableList.Builder<HiveSplit> builder = ImmutableList.builder();
    if (splittable) {
        for (BlockLocation blockLocation : blockLocations) {
            // get the addresses for the block
            List<HostAddress> addresses = toHostAddress(blockLocation.getHosts());

            // divide the block into uniform chunks that are smaller than the max split size
            int chunks = Math.max(1, (int) (blockLocation.getLength() / maxSplitSize.toBytes()));
            // when block does not divide evenly into chunks, make the chunk size slightly bigger than necessary
            long targetChunkSize = (long) Math.ceil(blockLocation.getLength() * 1.0 / chunks);

            long chunkOffset = 0;
            while (chunkOffset < blockLocation.getLength()) {
                // adjust the actual chunk size to account for the overrun when chunks are slightly bigger than necessary (see above)
                long chunkLength = Math.min(targetChunkSize, blockLocation.getLength() - chunkOffset);

                builder.add(new HiveSplit(clientId, table.getDbName(), table.getTableName(), partitionName,
                        false, file.getPath().toString(), blockLocation.getOffset() + chunkOffset, chunkLength,
                        schema, partitionKeys, addresses));

                chunkOffset += chunkLength;
            }//w  ww.  j  a va  2 s . c o  m
            checkState(chunkOffset == blockLocation.getLength(), "Error splitting blocks");
        }
    } else {
        // not splittable, use the hosts from the first block
        builder.add(new HiveSplit(clientId, table.getDbName(), table.getTableName(), partitionName, false,
                file.getPath().toString(), start, length, schema, partitionKeys,
                toHostAddress(blockLocations[0].getHosts())));
    }
    return builder.build();
}

From source file:com.facebook.presto.hive.HiveSplitSourceProvider.java

License:Apache License

private void loadPartitionSplits(final HiveSplitSource hiveSplitSource, SuspendingExecutor suspendingExecutor,
        final ConnectorSession session) {
    final Semaphore semaphore = new Semaphore(maxPartitionBatchSize);
    try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) {
        ImmutableList.Builder<ListenableFuture<Void>> futureBuilder = ImmutableList.builder();

        Iterator<String> nameIterator = partitionNames.iterator();
        for (Partition partition : partitions) {
            checkState(nameIterator.hasNext(), "different number of partitions and partition names!");
            final String partitionName = nameIterator.next();
            final Properties schema = getPartitionSchema(table, partition);
            final List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition);

            Path path = new Path(getPartitionLocation(table, partition));
            final Configuration configuration = hdfsEnvironment.getConfiguration(path);
            final InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);

            FileSystem fs = path.getFileSystem(configuration);

            if (inputFormat instanceof SymlinkTextInputFormat) {
                JobConf jobConf = new JobConf(configuration);
                FileInputFormat.setInputPaths(jobConf, path);
                InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
                for (InputSplit rawSplit : splits) {
                    FileSplit split = ((SymlinkTextInputFormat.SymlinkTextInputSplit) rawSplit)
                            .getTargetSplit();

                    // get the filesystem for the target path -- it may be a different hdfs instance
                    FileSystem targetFilesystem = split.getPath().getFileSystem(configuration);
                    FileStatus fileStatus = targetFilesystem.getFileStatus(split.getPath());
                    hiveSplitSource.addToQueue(createHiveSplits(partitionName, fileStatus,
                            targetFilesystem.getFileBlockLocations(fileStatus, split.getStart(),
                                    split.getLength()),
                            split.getStart(), split.getLength(), schema, partitionKeys, false, session));
                }//w  w  w .java  2s . c  o  m
                continue;
            }

            // TODO: this is currently serial across all partitions and should be done in suspendingExecutor
            if (bucket.isPresent()) {
                Optional<FileStatus> bucketFile = getBucketFile(bucket.get(), fs, path);
                if (bucketFile.isPresent()) {
                    FileStatus file = bucketFile.get();
                    BlockLocation[] blockLocations = fs.getFileBlockLocations(file, 0, file.getLen());
                    boolean splittable = isSplittable(inputFormat, fs, file.getPath());

                    hiveSplitSource.addToQueue(createHiveSplits(partitionName, file, blockLocations, 0,
                            file.getLen(), schema, partitionKeys, splittable, session));
                    continue;
                }
            }

            // Acquire semaphore so that we only have a fixed number of outstanding partitions being processed asynchronously
            // NOTE: there must not be any calls that throw in the space between acquiring the semaphore and setting the Future
            // callback to release it. Otherwise, we will need a try-finally block around this section.
            try {
                semaphore.acquire();
            } catch (InterruptedException e) {
                Thread.currentThread().interrupt();
                return;
            }

            ListenableFuture<Void> partitionFuture = createAsyncWalker(fs, suspendingExecutor).beginWalk(path,
                    new FileStatusCallback() {
                        @Override
                        public void process(FileStatus file, BlockLocation[] blockLocations) {
                            try {
                                boolean splittable = isSplittable(inputFormat,
                                        file.getPath().getFileSystem(configuration), file.getPath());

                                hiveSplitSource.addToQueue(createHiveSplits(partitionName, file, blockLocations,
                                        0, file.getLen(), schema, partitionKeys, splittable, session));
                            } catch (IOException e) {
                                hiveSplitSource.fail(e);
                            }
                        }
                    });

            // release the semaphore when the partition finishes
            Futures.addCallback(partitionFuture, new FutureCallback<Void>() {
                @Override
                public void onSuccess(Void result) {
                    semaphore.release();
                }

                @Override
                public void onFailure(Throwable t) {
                    semaphore.release();
                }
            });

            futureBuilder.add(partitionFuture);
        }

        // when all partitions finish, mark the queue as finished
        Futures.addCallback(Futures.allAsList(futureBuilder.build()), new FutureCallback<List<Void>>() {
            @Override
            public void onSuccess(List<Void> result) {
                hiveSplitSource.finished();
            }

            @Override
            public void onFailure(Throwable t) {
                hiveSplitSource.fail(t);
            }
        });
    } catch (Throwable e) {
        hiveSplitSource.fail(e);
        Throwables.propagateIfInstanceOf(e, Error.class);
    }
}

From source file:com.facebook.presto.hive.HiveSplitSourceProvider.java

License:Apache License

private static Optional<FileStatus> getBucketFile(HiveBucket bucket, FileSystem fs, Path path) {
    FileStatus[] statuses = listStatus(fs, path);

    if (statuses.length != bucket.getBucketCount()) {
        return Optional.absent();
    }//from   w  ww.j  a  v a 2  s  . c  om

    Map<String, FileStatus> map = new HashMap<>();
    List<String> paths = new ArrayList<>();
    for (FileStatus status : statuses) {
        if (!isFile(status)) {
            return Optional.absent();
        }
        String pathString = status.getPath().toString();
        map.put(pathString, status);
        paths.add(pathString);
    }

    // Hive sorts the paths as strings lexicographically
    Collections.sort(paths);

    String pathString = paths.get(bucket.getBucketNumber());
    return Optional.of(map.get(pathString));
}

From source file:com.facebook.presto.hive.HiveSplitSourceProvider.java

License:Apache License

private List<HiveSplit> createHiveSplits(String partitionName, FileStatus file, BlockLocation[] blockLocations,
        long start, long length, Properties schema, List<HivePartitionKey> partitionKeys, boolean splittable,
        ConnectorSession session) throws IOException {
    ImmutableList.Builder<HiveSplit> builder = ImmutableList.builder();
    if (splittable) {
        for (BlockLocation blockLocation : blockLocations) {
            // get the addresses for the block
            List<HostAddress> addresses = toHostAddress(blockLocation.getHosts());

            long maxBytes = maxSplitSize.toBytes();

            if (remainingInitialSplits > 0) {
                maxBytes = maxInitialSplitSize.toBytes();
            }// www .  j  ava2s. c o  m

            // divide the block into uniform chunks that are smaller than the max split size
            int chunks = Math.max(1, (int) (blockLocation.getLength() / maxBytes));
            // when block does not divide evenly into chunks, make the chunk size slightly bigger than necessary
            long targetChunkSize = (long) Math.ceil(blockLocation.getLength() * 1.0 / chunks);

            long chunkOffset = 0;
            while (chunkOffset < blockLocation.getLength()) {
                // adjust the actual chunk size to account for the overrun when chunks are slightly bigger than necessary (see above)
                long chunkLength = Math.min(targetChunkSize, blockLocation.getLength() - chunkOffset);

                builder.add(new HiveSplit(connectorId, table.getDbName(), table.getTableName(), partitionName,
                        file.getPath().toString(), blockLocation.getOffset() + chunkOffset, chunkLength, schema,
                        partitionKeys, addresses, session));

                chunkOffset += chunkLength;
                remainingInitialSplits--;
            }
            checkState(chunkOffset == blockLocation.getLength(), "Error splitting blocks");
        }
    } else {
        // not splittable, use the hosts from the first block if it exists
        List<HostAddress> addresses = ImmutableList.of();
        if (blockLocations.length > 0) {
            addresses = toHostAddress(blockLocations[0].getHosts());
        }

        builder.add(new HiveSplit(connectorId, table.getDbName(), table.getTableName(), partitionName,
                file.getPath().toString(), start, length, schema, partitionKeys, addresses, session));
    }
    return builder.build();
}

From source file:com.facebook.presto.hive.metastore.file.FileHiveMetastore.java

License:Apache License

private List<ArrayDeque<String>> listPartitions(Path director, List<Column> partitionColumns) {
    if (partitionColumns.isEmpty()) {
        return ImmutableList.of();
    }//  www  .  j a  v  a  2  s  .  c  o  m

    try {
        String directoryPrefix = partitionColumns.get(0).getName() + '=';

        List<ArrayDeque<String>> partitionValues = new ArrayList<>();
        for (FileStatus fileStatus : metadataFileSystem.listStatus(director)) {
            if (!fileStatus.isDirectory()) {
                continue;
            }
            if (!fileStatus.getPath().getName().startsWith(directoryPrefix)) {
                continue;
            }

            List<ArrayDeque<String>> childPartitionValues;
            if (partitionColumns.size() == 1) {
                childPartitionValues = ImmutableList.of(new ArrayDeque<>());
            } else {
                childPartitionValues = listPartitions(fileStatus.getPath(),
                        partitionColumns.subList(1, partitionColumns.size()));
            }

            String value = fileStatus.getPath().getName().substring(directoryPrefix.length());
            for (ArrayDeque<String> childPartition : childPartitionValues) {
                childPartition.addFirst(value);
                partitionValues.add(childPartition);
            }
        }
        return partitionValues;
    } catch (IOException e) {
        throw new PrestoException(HIVE_METASTORE_ERROR, "Error listing partition directories", e);
    }
}

From source file:com.facebook.presto.hive.metastore.file.FileHiveMetastore.java

License:Apache License

private List<Path> getChildSchemaDirectories(Path metadataDirectory) {
    try {/*from w  ww . j  a v  a2 s .c  om*/
        if (!metadataFileSystem.isDirectory(metadataDirectory)) {
            return ImmutableList.of();
        }

        ImmutableList.Builder<Path> childSchemaDirectories = ImmutableList.builder();
        for (FileStatus child : metadataFileSystem.listStatus(metadataDirectory)) {
            if (!child.isDirectory()) {
                continue;
            }
            Path childPath = child.getPath();
            if (childPath.getName().startsWith(".")) {
                continue;
            }
            if (metadataFileSystem.isFile(new Path(childPath, PRESTO_SCHEMA_FILE_NAME))) {
                childSchemaDirectories.add(childPath);
            }
        }
        return childSchemaDirectories.build();
    } catch (IOException e) {
        throw new PrestoException(HIVE_METASTORE_ERROR, e);
    }
}

From source file:com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore.java

License:Apache License

private static RecursiveDeleteResult doRecursiveDeleteFiles(FileSystem fileSystem, Path directory,
        List<String> filePrefixes, boolean deleteEmptyDirectories) {
    FileStatus[] allFiles;/*  w  ww . jav  a  2  s  . co m*/
    try {
        allFiles = fileSystem.listStatus(directory);
    } catch (IOException e) {
        ImmutableList.Builder<String> notDeletedItems = ImmutableList.builder();
        notDeletedItems.add(directory.toString() + "/**");
        return new RecursiveDeleteResult(false, notDeletedItems.build());
    }

    boolean allDescendentsDeleted = true;
    ImmutableList.Builder<String> notDeletedEligibleItems = ImmutableList.builder();
    for (FileStatus fileStatus : allFiles) {
        if (HadoopFileStatus.isFile(fileStatus)) {
            Path filePath = fileStatus.getPath();
            String fileName = filePath.getName();
            boolean eligible = false;
            for (String filePrefix : filePrefixes) {
                if (fileName.startsWith(filePrefix)) {
                    eligible = true;
                    break;
                }
            }
            if (eligible) {
                if (!deleteIfExists(fileSystem, filePath, false)) {
                    allDescendentsDeleted = false;
                    notDeletedEligibleItems.add(filePath.toString());
                }
            } else {
                allDescendentsDeleted = false;
            }
        } else if (HadoopFileStatus.isDirectory(fileStatus)) {
            RecursiveDeleteResult subResult = doRecursiveDeleteFiles(fileSystem, fileStatus.getPath(),
                    filePrefixes, deleteEmptyDirectories);
            if (!subResult.isDirectoryNoLongerExists()) {
                allDescendentsDeleted = false;
            }
            if (!subResult.getNotDeletedEligibleItems().isEmpty()) {
                notDeletedEligibleItems.addAll(subResult.getNotDeletedEligibleItems());
            }
        } else {
            allDescendentsDeleted = false;
            notDeletedEligibleItems.add(fileStatus.getPath().toString());
        }
    }
    if (allDescendentsDeleted && deleteEmptyDirectories) {
        verify(notDeletedEligibleItems.build().isEmpty());
        if (!deleteIfExists(fileSystem, directory, false)) {
            return new RecursiveDeleteResult(false, ImmutableList.of(directory.toString() + "/"));
        }
        return new RecursiveDeleteResult(true, ImmutableList.of());
    }
    return new RecursiveDeleteResult(false, notDeletedEligibleItems.build());
}

From source file:com.facebook.presto.hive.s3.PrestoS3FileSystem.java

License:Apache License

@Override
public boolean rename(Path src, Path dst) throws IOException {
    boolean srcDirectory;
    try {// w  w w .  j  a v  a2s  . c  o  m
        srcDirectory = directory(src);
    } catch (FileNotFoundException e) {
        return false;
    }

    try {
        if (!directory(dst)) {
            // cannot copy a file to an existing file
            return keysEqual(src, dst);
        }
        // move source under destination directory
        dst = new Path(dst, src.getName());
    } catch (FileNotFoundException e) {
        // destination does not exist
    }

    if (keysEqual(src, dst)) {
        return true;
    }

    if (srcDirectory) {
        for (FileStatus file : listStatus(src)) {
            rename(file.getPath(), new Path(dst, file.getPath().getName()));
        }
        deleteObject(keyFromPath(src) + DIRECTORY_SUFFIX);
    } else {
        s3.copyObject(getBucketName(uri), keyFromPath(src), getBucketName(uri), keyFromPath(dst));
        delete(src, true);
    }

    return true;
}

From source file:com.facebook.presto.hive.s3.PrestoS3FileSystem.java

License:Apache License

@Override
public boolean delete(Path path, boolean recursive) throws IOException {
    try {//from   w  ww . j  a v a 2  s .  c om
        if (!directory(path)) {
            return deleteObject(keyFromPath(path));
        }
    } catch (FileNotFoundException e) {
        return false;
    }

    if (!recursive) {
        throw new IOException("Directory " + path + " is not empty");
    }

    for (FileStatus file : listStatus(path)) {
        delete(file.getPath(), true);
    }
    deleteObject(keyFromPath(path) + DIRECTORY_SUFFIX);

    return true;
}

From source file:com.flipkart.fdp.migration.distcp.codec.GenericHadoopCodec.java

License:Apache License

public List<FileTuple> getInputPaths(Collection<String> paths, Collection<String> excludeList)
        throws Exception {

    System.out.println("A total of " + paths.size() + " paths to scan...");

    List<FileTuple> fileList = new ArrayList<FileTuple>();
    List<String> inputPaths = new ArrayList<String>();

    // Process regular expression based paths
    for (String path : paths) {

        System.out.println("Processing path: " + path);
        FileStatus[] stats = fs.globStatus(new Path(path));
        if (stats == null || stats.length <= 0)
            continue;

        for (FileStatus fstat : stats) {
            if (fstat.isFile()) {
                fileList.add(new FileTuple(MirrorUtils.getSimplePath(fstat.getPath()), fstat.getLen(),
                        fstat.getModificationTime()));
            } else {
                inputPaths.add(MirrorUtils.getSimplePath(fstat.getPath()));
            }//from   w w  w  . jav  a2 s.  com
        }
    }

    if (inputPaths.size() > 0) {

        for (String path : inputPaths) {

            List<FileTuple> fstat = getFileStatusRecursive(new Path(path), excludeList);
            fileList.addAll(fstat);
        }
    }
    return fileList;
}