Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.elex.dmp.lda.CVB0Driver.java

License:Apache License

private static int getNumTerms(Configuration conf, Path dictionaryPath) throws IOException {
    FileSystem fs = dictionaryPath.getFileSystem(conf);
    Text key = new Text();
    IntWritable value = new IntWritable();
    int maxTermId = -1;
    for (FileStatus stat : fs.globStatus(dictionaryPath)) {
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, stat.getPath(), conf);
        while (reader.next(key, value)) {
            maxTermId = Math.max(maxTermId, value.get());
        }//w  ww .  j av  a 2 s.  c  o  m
    }
    return maxTermId + 1;
}

From source file:com.elex.dmp.lda.InMemoryCollapsedVariationalBayes0.java

License:Apache License

private static Matrix loadVectors(String vectorPathString, Configuration conf) throws IOException {
    Path vectorPath = new Path(vectorPathString);
    FileSystem fs = vectorPath.getFileSystem(conf);
    List<Path> subPaths = Lists.newArrayList();
    if (fs.isFile(vectorPath)) {
        subPaths.add(vectorPath);/*w  ww  .  j a va 2  s  .  c om*/
    } else {
        for (FileStatus fileStatus : fs.listStatus(vectorPath, PathFilters.logsCRCFilter())) {
            subPaths.add(fileStatus.getPath());
        }
    }
    List<Vector> vectorList = Lists.newArrayList();
    for (Path subPath : subPaths) {
        for (Pair<IntWritable, VectorWritable> record : new SequenceFileIterable<IntWritable, VectorWritable>(
                subPath, true, conf)) {
            vectorList.add(record.getSecond().get());
        }
    }
    int numRows = vectorList.size();
    int numCols = vectorList.get(0).size();
    return new SparseRowMatrix(numRows, numCols, vectorList.toArray(new Vector[vectorList.size()]), true,
            vectorList.get(0).isSequentialAccess());
}

From source file:com.example.hadoop.hdfs.test.HdfsClient.java

License:Open Source License

public static void ls(String folder) throws FileNotFoundException, IOException {
    Path path = new Path(folder);
    FileStatus[] list = fs.listStatus(path);
    System.out.println("ls: " + folder);
    System.out.println("==========================================================");
    for (FileStatus f : list) {
        System.out.printf("name: %s, folder: %s, size: %d\n", f.getPath(), f.isDir(), f.getLen());
    }//from  ww  w  . ja  va2 s  . c  om
    System.out.println("==========================================================");
}

From source file:com.facebook.hive.orc.OrcInputFormat.java

License:Open Source License

@Override
public boolean validateInput(FileSystem fs, HiveConf conf, ArrayList<FileStatus> files) throws IOException {
    if (files.size() <= 0) {
        return false;
    }/*from   www  .ja  va  2  s . c om*/
    for (FileStatus file : files) {
        try {
            OrcFile.createReader(fs, file.getPath(), conf);
        } catch (IOException e) {
            return false;
        }
    }
    return true;
}

From source file:com.facebook.hiveio.common.FileSystems.java

License:Apache License

/**
 * Move a file or directory from source to destination, recursively copying
 * subdirectories./*from   www.  j  a  va2 s. co m*/
 *
 * @param fs FileSystem
 * @param file path to copy (file or directory)
 * @param src path to source directory
 * @param dest path to destination directory
 * @throws IOException I/O problems
 */
public static void move(FileSystem fs, Path file, Path src, Path dest) throws IOException {
    Path destFilePath = pathInDestination(file, src, dest);
    if (fs.isFile(file)) {
        if (fs.exists(destFilePath)) {
            if (!fs.delete(destFilePath, true)) {
                throw new IllegalArgumentException("Could not remove existing file " + destFilePath);
            }
        }
        if (!fs.rename(file, destFilePath)) {
            throw new IllegalArgumentException("Could not move " + file + " to " + destFilePath);
        }
    } else if (fs.getFileStatus(file).isDir()) {
        FileStatus[] statuses = fs.listStatus(file);
        fs.mkdirs(destFilePath);
        if (statuses != null) {
            for (FileStatus status : statuses) {
                move(fs, status.getPath(), src, dest);
            }
        }
    }
}

From source file:com.facebook.presto.hive.AbstractTestHiveClient.java

License:Apache License

protected Set<String> listAllDataFiles(Path path) throws IOException {
    Set<String> result = new HashSet<>();
    FileSystem fileSystem = hdfsEnvironment.getFileSystem("user", path);
    if (fileSystem.exists(path)) {
        for (FileStatus fileStatus : fileSystem.listStatus(path)) {
            if (HadoopFileStatus.isFile(fileStatus)) {
                result.add(fileStatus.getPath().toString());
            } else if (HadoopFileStatus.isDirectory(fileStatus)) {
                result.addAll(listAllDataFiles(fileStatus.getPath()));
            }/*  www. j  a  v a 2s .  com*/
        }
    }
    return result;
}

From source file:com.facebook.presto.hive.AbstractTestHiveClient.java

License:Apache License

private List<String> listDirectory(String user, Path path) throws IOException {
    FileSystem fileSystem = hdfsEnvironment.getFileSystem(user, path);
    ImmutableList.Builder<String> result = ImmutableList.builder();
    for (FileStatus fileStatus : fileSystem.listStatus(path)) {
        result.add(fileStatus.getPath().getName());
    }/*from  w w  w. j  a  va2 s .  c  o m*/
    return result.build();
}

From source file:com.facebook.presto.hive.BackgroundHiveSplitLoader.java

License:Apache License

private void loadPartition(HivePartitionMetadata partition) throws IOException {
    String partitionName = partition.getHivePartition().getPartitionId();
    Properties schema = getPartitionSchema(table, partition.getPartition());
    List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition.getPartition());
    TupleDomain<HiveColumnHandle> effectivePredicate = partition.getHivePartition().getEffectivePredicate();

    Path path = new Path(getPartitionLocation(table, partition.getPartition()));
    Configuration configuration = hdfsEnvironment.getConfiguration(path);
    InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);
    FileSystem fs = hdfsEnvironment.getFileSystem(session.getUser(), path);

    if (inputFormat instanceof SymlinkTextInputFormat) {
        if (bucketHandle.isPresent()) {
            throw new PrestoException(StandardErrorCode.NOT_SUPPORTED,
                    "Bucketed table in SymlinkTextInputFormat is not yet supported");
        }/*from   w  w  w  .  j a  va 2 s  . com*/

        // TODO: This should use an iterator like the HiveFileIterator
        for (Path targetPath : getTargetPathsFromSymlink(fs, path)) {
            // The input should be in TextInputFormat.
            TextInputFormat targetInputFormat = new TextInputFormat();
            // get the configuration for the target path -- it may be a different hdfs instance
            Configuration targetConfiguration = hdfsEnvironment.getConfiguration(targetPath);
            JobConf targetJob = new JobConf(targetConfiguration);
            targetJob.setInputFormat(TextInputFormat.class);
            targetInputFormat.configure(targetJob);
            FileInputFormat.setInputPaths(targetJob, targetPath);
            InputSplit[] targetSplits = targetInputFormat.getSplits(targetJob, 0);

            for (InputSplit inputSplit : targetSplits) {
                FileSplit split = (FileSplit) inputSplit;
                FileSystem targetFilesystem = hdfsEnvironment.getFileSystem(session.getUser(), split.getPath());
                FileStatus file = targetFilesystem.getFileStatus(split.getPath());
                hiveSplitSource.addToQueue(createHiveSplits(partitionName, file.getPath().toString(),
                        targetFilesystem.getFileBlockLocations(file, split.getStart(), split.getLength()),
                        split.getStart(), split.getLength(), schema, partitionKeys, false, session,
                        OptionalInt.empty(), effectivePredicate, partition.getColumnCoercions()));
                if (stopped) {
                    return;
                }
            }
        }
        return;
    }

    // If only one bucket could match: load that one file
    HiveFileIterator iterator = new HiveFileIterator(path, fs, directoryLister, namenodeStats, partitionName,
            inputFormat, schema, partitionKeys, effectivePredicate, partition.getColumnCoercions());
    if (!buckets.isEmpty()) {
        int bucketCount = buckets.get(0).getBucketCount();
        List<LocatedFileStatus> list = listAndSortBucketFiles(iterator, bucketCount);

        for (HiveBucket bucket : buckets) {
            int bucketNumber = bucket.getBucketNumber();
            LocatedFileStatus file = list.get(bucketNumber);
            boolean splittable = isSplittable(iterator.getInputFormat(),
                    hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath());

            hiveSplitSource.addToQueue(createHiveSplits(iterator.getPartitionName(), file.getPath().toString(),
                    file.getBlockLocations(), 0, file.getLen(), iterator.getSchema(),
                    iterator.getPartitionKeys(), splittable, session, OptionalInt.of(bucketNumber),
                    effectivePredicate, partition.getColumnCoercions()));
        }

        return;
    }

    // If table is bucketed: list the directory, sort, tag with bucket id
    if (bucketHandle.isPresent()) {
        // HiveFileIterator skips hidden files automatically.
        int bucketCount = bucketHandle.get().getBucketCount();
        List<LocatedFileStatus> list = listAndSortBucketFiles(iterator, bucketCount);

        for (int bucketIndex = 0; bucketIndex < bucketCount; bucketIndex++) {
            LocatedFileStatus file = list.get(bucketIndex);
            boolean splittable = isSplittable(iterator.getInputFormat(),
                    hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath());

            hiveSplitSource.addToQueue(createHiveSplits(iterator.getPartitionName(), file.getPath().toString(),
                    file.getBlockLocations(), 0, file.getLen(), iterator.getSchema(),
                    iterator.getPartitionKeys(), splittable, session, OptionalInt.of(bucketIndex),
                    iterator.getEffectivePredicate(), partition.getColumnCoercions()));
        }

        return;
    }

    fileIterators.addLast(iterator);
}

From source file:com.facebook.presto.hive.BackgroundHiveSplitLoader.java

License:Apache License

private static List<Path> getTargetPathsFromSymlink(FileSystem fileSystem, Path symlinkDir) {
    try {/*from w  w w. j  av  a 2  s .c o m*/
        FileStatus[] symlinks = fileSystem.listStatus(symlinkDir, HIDDEN_FILES_PATH_FILTER);
        List<Path> targets = new ArrayList<>();

        for (FileStatus symlink : symlinks) {
            try (BufferedReader reader = new BufferedReader(
                    new InputStreamReader(fileSystem.open(symlink.getPath()), StandardCharsets.UTF_8))) {
                CharStreams.readLines(reader).stream().map(Path::new).forEach(targets::add);
            }
        }
        return targets;
    } catch (IOException e) {
        throw new PrestoException(HIVE_BAD_DATA, "Error parsing symlinks from: " + symlinkDir, e);
    }
}

From source file:com.facebook.presto.hive.HiveSplitIterable.java

License:Apache License

private void loadPartitionSplits(final HiveSplitQueue hiveSplitQueue, SuspendingExecutor suspendingExecutor)
        throws InterruptedException {
    final Semaphore semaphore = new Semaphore(maxPartitionBatchSize);
    try (ThreadContextClassLoader ignored = new ThreadContextClassLoader(classLoader)) {
        ImmutableList.Builder<ListenableFuture<Void>> futureBuilder = ImmutableList.builder();

        Iterator<String> nameIterator = partitionNames.iterator();
        for (Partition partition : partitions) {
            checkState(nameIterator.hasNext(), "different number of partitions and partition names!");
            semaphore.acquire();/*from  w w  w  . ja  v  a  2 s .  co  m*/
            final String partitionName = nameIterator.next();
            final Properties schema = getPartitionSchema(table, partition);
            final List<HivePartitionKey> partitionKeys = getPartitionKeys(table, partition);

            Path path = new Path(getPartitionLocation(table, partition));
            final Configuration configuration = hdfsEnvironment.getConfiguration(path);
            final InputFormat<?, ?> inputFormat = getInputFormat(configuration, schema, false);
            Path partitionPath = hdfsEnvironment.getFileSystemWrapper().wrap(path);

            FileSystem fs = partitionPath.getFileSystem(configuration);
            final LastSplitMarkingQueue markerQueue = new LastSplitMarkingQueue(hiveSplitQueue);

            if (inputFormat instanceof SymlinkTextInputFormat) {
                JobConf jobConf = new JobConf(configuration);
                FileInputFormat.setInputPaths(jobConf, partitionPath);
                InputSplit[] splits = inputFormat.getSplits(jobConf, 0);
                for (InputSplit rawSplit : splits) {
                    FileSplit split = ((SymlinkTextInputFormat.SymlinkTextInputSplit) rawSplit)
                            .getTargetSplit();

                    // get the filesystem for the target path -- it may be a different hdfs instance
                    FileSystem targetFilesystem = split.getPath().getFileSystem(configuration);
                    FileStatus fileStatus = targetFilesystem.getFileStatus(split.getPath());
                    markerQueue.addToQueue(createHiveSplits(partitionName, fileStatus,
                            targetFilesystem.getFileBlockLocations(fileStatus, split.getStart(),
                                    split.getLength()),
                            split.getStart(), split.getLength(), schema, partitionKeys, false));
                }
                markerQueue.finish();
                continue;
            }

            ListenableFuture<Void> partitionFuture = new AsyncRecursiveWalker(fs, suspendingExecutor)
                    .beginWalk(partitionPath, new FileStatusCallback() {
                        @Override
                        public void process(FileStatus file, BlockLocation[] blockLocations) {
                            if (bucket.isPresent()
                                    && !fileMatchesBucket(file.getPath().getName(), bucket.get())) {
                                return;
                            }

                            try {
                                boolean splittable = isSplittable(inputFormat,
                                        file.getPath().getFileSystem(configuration), file.getPath());

                                markerQueue.addToQueue(createHiveSplits(partitionName, file, blockLocations, 0,
                                        file.getLen(), schema, partitionKeys, splittable));
                            } catch (IOException e) {
                                hiveSplitQueue.fail(e);
                            }
                        }
                    });

            // release the semaphore when the partition finishes
            Futures.addCallback(partitionFuture, new FutureCallback<Void>() {
                @Override
                public void onSuccess(Void result) {
                    markerQueue.finish();
                    semaphore.release();
                }

                @Override
                public void onFailure(Throwable t) {
                    markerQueue.finish();
                    semaphore.release();
                }
            });
            futureBuilder.add(partitionFuture);
        }

        // when all partitions finish, mark the queue as finished
        Futures.addCallback(Futures.allAsList(futureBuilder.build()), new FutureCallback<List<Void>>() {
            @Override
            public void onSuccess(List<Void> result) {
                hiveSplitQueue.finished();
            }

            @Override
            public void onFailure(Throwable t) {
                hiveSplitQueue.fail(t);
            }
        });
    } catch (Throwable e) {
        hiveSplitQueue.fail(e);
        Throwables.propagateIfInstanceOf(e, Error.class);
    }
}