Example usage for org.apache.hadoop.fs LocatedFileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs LocatedFileStatus getPath.

Prototype

public Path getPath()

Source Link

Usage

From source file:com.datatorrent.stram.client.RecordingsAgent.java

License:Apache License

public List<RecordingInfo> getRecordingInfo(String appId) {
    List<RecordingInfo> result = new ArrayList<RecordingInfo>();
    String dir = getRecordingsDirectory(appId);
    if (dir == null) {
        return result;
    }/*w w  w . ja  va  2 s. co  m*/
    Path path = new Path(dir);
    try {
        FileStatus fileStatus = stramAgent.getFileSystem().getFileStatus(path);

        if (!fileStatus.isDirectory()) {
            return result;
        }
        RemoteIterator<LocatedFileStatus> ri = stramAgent.getFileSystem().listLocatedStatus(path);
        while (ri.hasNext()) {
            LocatedFileStatus lfs = ri.next();
            if (lfs.isDirectory()) {
                try {
                    String opId = lfs.getPath().getName();
                    result.addAll(getRecordingInfo(appId, opId));
                } catch (NumberFormatException ex) {
                    // ignore
                }
            }
        }
    } catch (IOException ex) {
        LOG.warn("Got exception when getting recording info", ex);
        return result;
    }

    return result;
}

From source file:com.datatorrent.stram.client.RecordingsAgent.java

License:Apache License

private List<RecordingInfo> getRecordingInfoHelper(String appId, String opId, Set<String> containers) {
    List<RecordingInfo> result = new ArrayList<RecordingInfo>();
    String dir = getRecordingsDirectory(appId, opId);
    if (dir == null) {
        return result;
    }/*from  w w w  . j a  va2 s. c o  m*/
    Path path = new Path(dir);
    try {
        FileStatus fileStatus = stramAgent.getFileSystem().getFileStatus(path);

        if (!fileStatus.isDirectory()) {
            return result;
        }
        RemoteIterator<LocatedFileStatus> ri = stramAgent.getFileSystem().listLocatedStatus(path);
        while (ri.hasNext()) {
            LocatedFileStatus lfs = ri.next();
            if (lfs.isDirectory()) {
                try {
                    String id = lfs.getPath().getName();
                    RecordingInfo recordingInfo = getRecordingInfoHelper(appId, opId, id, containers);
                    if (recordingInfo != null) {
                        result.add(recordingInfo);
                    }
                } catch (NumberFormatException ex) {
                    // ignore
                }
            }
        }
    } catch (IOException ex) {
        LOG.warn("Got exception when getting recording info", ex);
        return result;
    }

    return result;
}

From source file:com.facebook.presto.hive.BackgroundHiveSplitLoader.java

License:Apache License

private CompletableFuture<?> loadSplits() throws IOException {
    HiveFileIterator files = fileIterators.poll();
    if (files == null) {
        HivePartitionMetadata partition = partitions.poll();
        if (partition == null) {
            return COMPLETED_FUTURE;
        }/* w w w  .  j av  a2 s .c  o m*/
        loadPartition(partition);
        return COMPLETED_FUTURE;
    }

    while (files.hasNext() && !stopped) {
        LocatedFileStatus file = files.next();
        if (isDirectory(file)) {
            if (recursiveDirWalkerEnabled) {
                HiveFileIterator fileIterator = new HiveFileIterator(file.getPath(), files.getFileSystem(),
                        files.getDirectoryLister(), files.getNamenodeStats(), files.getPartitionName(),
                        files.getInputFormat(), files.getSchema(), files.getPartitionKeys(),
                        files.getEffectivePredicate(), files.getColumnCoercions());
                fileIterators.add(fileIterator);
            }
        } else {
            boolean splittable = isSplittable(files.getInputFormat(),
                    hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath());

            CompletableFuture<?> future = hiveSplitSource.addToQueue(createHiveSplits(files.getPartitionName(),
                    file.getPath().toString(), file.getBlockLocations(), 0, file.getLen(), files.getSchema(),
                    files.getPartitionKeys(), splittable, session, OptionalInt.empty(),
                    files.getEffectivePredicate(), files.getColumnCoercions()));
            if (!future.isDone()) {
                fileIterators.addFirst(files);
                return future;
            }
        }
    }

    // No need to put the iterator back, since it's either empty or we've stopped
    return COMPLETED_FUTURE;
}

From source file:com.facebook.presto.hive.util.AsyncWalker.java

License:Apache License

private void doWalk(Path path, FileStatusCallback callback, AtomicLong taskCount, SettableFuture<Void> future) {
    try (SetThreadName ignored = new SetThreadName("HiveHdfsWalker")) {
        RemoteIterator<LocatedFileStatus> iterator = getLocatedFileStatusRemoteIterator(path);

        while (iterator.hasNext()) {
            LocatedFileStatus status = getLocatedFileStatus(iterator);

            // ignore hidden files. Hive ignores files starting with _ and . as well.
            String fileName = status.getPath().getName();
            if (fileName.startsWith("_") || fileName.startsWith(".")) {
                continue;
            }//from w w w . j  a v a 2 s .  com
            if (!isDirectory(status)) {
                callback.process(status, status.getBlockLocations());
            } else if (recursive) {
                recursiveWalk(status.getPath(), callback, taskCount, future);
            }
            if (future.isDone()) {
                return;
            }
        }
    } catch (FileNotFoundException e) {
        future.setException(new FileNotFoundException("Partition location does not exist: " + path));
    } catch (Throwable t) {
        future.setException(t);
    } finally {
        if (taskCount.decrementAndGet() == 0) {
            future.set(null);
        }
    }
}

From source file:com.facebook.presto.hive.util.HiveFileIterator.java

License:Apache License

@Override
protected LocatedFileStatus computeNext() {
    try {//  w  ww.  j av a  2 s  .  c o  m
        if (remoteIterator == null) {
            remoteIterator = getLocatedFileStatusRemoteIterator(path);
        }

        while (remoteIterator.hasNext()) {
            LocatedFileStatus status = getLocatedFileStatus(remoteIterator);

            // ignore hidden files. Hive ignores files starting with _ and . as well.
            String fileName = status.getPath().getName();
            if (fileName.startsWith("_") || fileName.startsWith(".")) {
                continue;
            }
            return status;
        }
        return endOfData();
    } catch (FileNotFoundException e) {
        throw new PrestoException(HIVE_FILE_NOT_FOUND, "Partition location does not exist: " + path);
    } catch (IOException e) {
        throw new PrestoException(HIVE_FILESYSTEM_ERROR, "Failed to list directory: " + path, e);
    }
}

From source file:com.facebook.presto.hive.util.InternalHiveSplitFactory.java

License:Apache License

private Optional<InternalHiveSplit> createInternalHiveSplit(LocatedFileStatus status, OptionalInt bucketNumber,
        boolean splittable) {
    splittable = splittable && isSplittable(inputFormat, fileSystem, status.getPath());
    return createInternalHiveSplit(status.getPath(), status.getBlockLocations(), 0, status.getLen(),
            status.getLen(), bucketNumber, splittable);
}

From source file:com.github.sakserv.storm.KafkaHdfsTopologyTest.java

License:Apache License

/**
 * Validate that the files in HDFS contain the expected data from Kafka
 * @throws Exception//w w  w .  j  a  v  a 2s.  c  om
 */
private void validateHdfsResults() throws Exception {
    LOG.info("HDFS: VALIDATING");

    // Get the filesystem handle and a list of files written by the test
    FileSystem hdfsFsHandle = hdfsLocalCluster.getHdfsFileSystemHandle();
    RemoteIterator<LocatedFileStatus> listFiles = hdfsFsHandle.listFiles(
            new Path(propertyParser.getProperty(ConfigVars.STORM_HDFS_BOLT_OUTPUT_LOCATION_KEY)), true);

    // Loop through the files and count up the lines
    int count = 0;
    while (listFiles.hasNext()) {
        LocatedFileStatus file = listFiles.next();

        LOG.info("HDFS READ: Found File: " + file);

        BufferedReader br = new BufferedReader(new InputStreamReader(hdfsFsHandle.open(file.getPath())));
        String line = br.readLine();
        while (line != null) {
            LOG.info("HDFS READ: Found Line: " + line);
            line = br.readLine();
            count++;
        }
    }
    hdfsFsHandle.close();

    // Validate the number of lines matches the number of kafka messages
    assertEquals(Integer.parseInt(propertyParser.getProperty(ConfigVars.KAFKA_TEST_MESSAGE_COUNT_KEY)), count);
}

From source file:com.github.sakserv.storm.KafkaHiveHdfsTopologyTest.java

License:Apache License

public void validateHdfsResults() throws IOException {
    System.out.println("HDFS: VALIDATING");
    FileSystem hdfsFsHandle = hdfsCluster.getHdfsFileSystemHandle();
    RemoteIterator<LocatedFileStatus> listFiles = hdfsFsHandle.listFiles(new Path("/tmp/kafka_data"), true);
    while (listFiles.hasNext()) {
        LocatedFileStatus file = listFiles.next();

        System.out.println("HDFS READ: Found File: " + file);

        BufferedReader br = new BufferedReader(new InputStreamReader(hdfsFsHandle.open(file.getPath())));
        String line = br.readLine();
        while (line != null) {
            System.out.println("HDFS READ: Found Line: " + line);
            line = br.readLine();// w w w  .  ja  v a2  s .  c  om
        }
    }
    hdfsFsHandle.close();
}

From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java

License:Apache License

void browseDir() throws Exception {
    System.out.println("reading enumarate dir, path " + path);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    //benchmark/*from  w ww.j a  v  a  2  s .c om*/
    System.out.println("starting benchmark...");
    RemoteIterator<LocatedFileStatus> iter = fs.listFiles(path, false);
    while (iter.hasNext()) {
        LocatedFileStatus status = iter.next();
        System.out.println(status.getPath());
    }
    fs.close();
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java

License:Apache License

private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs, PathFilter inputFilter,
        boolean recursive) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    List<IOException> errors = new ArrayList<IOException>();
    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];/*from  w w w  . j  a v  a  2  s  . c o  m*/
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath());
                    while (iter.hasNext()) {
                        LocatedFileStatus stat = iter.next();
                        if (inputFilter.accept(stat.getPath())) {
                            if (recursive && stat.isDirectory()) {
                                addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
                            } else {
                                result.add(stat);
                            }
                        }
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    return result;
}