Example usage for org.apache.hadoop.fs LocatedFileStatus getPath

List of usage examples for org.apache.hadoop.fs LocatedFileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs LocatedFileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.datatorrent.stram.client.RecordingsAgent.java

License:Apache License

public List<RecordingInfo> getRecordingInfo(String appId) {
    List<RecordingInfo> result = new ArrayList<RecordingInfo>();
    String dir = getRecordingsDirectory(appId);
    if (dir == null) {
        return result;
    }/*w w  w . ja  va  2 s. co  m*/
    Path path = new Path(dir);
    try {
        FileStatus fileStatus = stramAgent.getFileSystem().getFileStatus(path);

        if (!fileStatus.isDirectory()) {
            return result;
        }
        RemoteIterator<LocatedFileStatus> ri = stramAgent.getFileSystem().listLocatedStatus(path);
        while (ri.hasNext()) {
            LocatedFileStatus lfs = ri.next();
            if (lfs.isDirectory()) {
                try {
                    String opId = lfs.getPath().getName();
                    result.addAll(getRecordingInfo(appId, opId));
                } catch (NumberFormatException ex) {
                    // ignore
                }
            }
        }
    } catch (IOException ex) {
        LOG.warn("Got exception when getting recording info", ex);
        return result;
    }

    return result;
}

From source file:com.datatorrent.stram.client.RecordingsAgent.java

License:Apache License

private List<RecordingInfo> getRecordingInfoHelper(String appId, String opId, Set<String> containers) {
    List<RecordingInfo> result = new ArrayList<RecordingInfo>();
    String dir = getRecordingsDirectory(appId, opId);
    if (dir == null) {
        return result;
    }/*from  w w w  . j a  va2 s. c o  m*/
    Path path = new Path(dir);
    try {
        FileStatus fileStatus = stramAgent.getFileSystem().getFileStatus(path);

        if (!fileStatus.isDirectory()) {
            return result;
        }
        RemoteIterator<LocatedFileStatus> ri = stramAgent.getFileSystem().listLocatedStatus(path);
        while (ri.hasNext()) {
            LocatedFileStatus lfs = ri.next();
            if (lfs.isDirectory()) {
                try {
                    String id = lfs.getPath().getName();
                    RecordingInfo recordingInfo = getRecordingInfoHelper(appId, opId, id, containers);
                    if (recordingInfo != null) {
                        result.add(recordingInfo);
                    }
                } catch (NumberFormatException ex) {
                    // ignore
                }
            }
        }
    } catch (IOException ex) {
        LOG.warn("Got exception when getting recording info", ex);
        return result;
    }

    return result;
}

From source file:com.facebook.presto.hive.BackgroundHiveSplitLoader.java

License:Apache License

private CompletableFuture<?> loadSplits() throws IOException {
    HiveFileIterator files = fileIterators.poll();
    if (files == null) {
        HivePartitionMetadata partition = partitions.poll();
        if (partition == null) {
            return COMPLETED_FUTURE;
        }/* w w w  .  j av  a2 s .c  o m*/
        loadPartition(partition);
        return COMPLETED_FUTURE;
    }

    while (files.hasNext() && !stopped) {
        LocatedFileStatus file = files.next();
        if (isDirectory(file)) {
            if (recursiveDirWalkerEnabled) {
                HiveFileIterator fileIterator = new HiveFileIterator(file.getPath(), files.getFileSystem(),
                        files.getDirectoryLister(), files.getNamenodeStats(), files.getPartitionName(),
                        files.getInputFormat(), files.getSchema(), files.getPartitionKeys(),
                        files.getEffectivePredicate(), files.getColumnCoercions());
                fileIterators.add(fileIterator);
            }
        } else {
            boolean splittable = isSplittable(files.getInputFormat(),
                    hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath());

            CompletableFuture<?> future = hiveSplitSource.addToQueue(createHiveSplits(files.getPartitionName(),
                    file.getPath().toString(), file.getBlockLocations(), 0, file.getLen(), files.getSchema(),
                    files.getPartitionKeys(), splittable, session, OptionalInt.empty(),
                    files.getEffectivePredicate(), files.getColumnCoercions()));
            if (!future.isDone()) {
                fileIterators.addFirst(files);
                return future;
            }
        }
    }

    // No need to put the iterator back, since it's either empty or we've stopped
    return COMPLETED_FUTURE;
}

From source file:com.facebook.presto.hive.util.AsyncWalker.java

License:Apache License

private void doWalk(Path path, FileStatusCallback callback, AtomicLong taskCount, SettableFuture<Void> future) {
    try (SetThreadName ignored = new SetThreadName("HiveHdfsWalker")) {
        RemoteIterator<LocatedFileStatus> iterator = getLocatedFileStatusRemoteIterator(path);

        while (iterator.hasNext()) {
            LocatedFileStatus status = getLocatedFileStatus(iterator);

            // ignore hidden files. Hive ignores files starting with _ and . as well.
            String fileName = status.getPath().getName();
            if (fileName.startsWith("_") || fileName.startsWith(".")) {
                continue;
            }//from w w w . j  a v a 2 s .  com
            if (!isDirectory(status)) {
                callback.process(status, status.getBlockLocations());
            } else if (recursive) {
                recursiveWalk(status.getPath(), callback, taskCount, future);
            }
            if (future.isDone()) {
                return;
            }
        }
    } catch (FileNotFoundException e) {
        future.setException(new FileNotFoundException("Partition location does not exist: " + path));
    } catch (Throwable t) {
        future.setException(t);
    } finally {
        if (taskCount.decrementAndGet() == 0) {
            future.set(null);
        }
    }
}

From source file:com.facebook.presto.hive.util.HiveFileIterator.java

License:Apache License

@Override
protected LocatedFileStatus computeNext() {
    try {//  w  ww.  j av a  2 s  .  c o  m
        if (remoteIterator == null) {
            remoteIterator = getLocatedFileStatusRemoteIterator(path);
        }

        while (remoteIterator.hasNext()) {
            LocatedFileStatus status = getLocatedFileStatus(remoteIterator);

            // ignore hidden files. Hive ignores files starting with _ and . as well.
            String fileName = status.getPath().getName();
            if (fileName.startsWith("_") || fileName.startsWith(".")) {
                continue;
            }
            return status;
        }
        return endOfData();
    } catch (FileNotFoundException e) {
        throw new PrestoException(HIVE_FILE_NOT_FOUND, "Partition location does not exist: " + path);
    } catch (IOException e) {
        throw new PrestoException(HIVE_FILESYSTEM_ERROR, "Failed to list directory: " + path, e);
    }
}

From source file:com.facebook.presto.hive.util.InternalHiveSplitFactory.java

License:Apache License

private Optional<InternalHiveSplit> createInternalHiveSplit(LocatedFileStatus status, OptionalInt bucketNumber,
        boolean splittable) {
    splittable = splittable && isSplittable(inputFormat, fileSystem, status.getPath());
    return createInternalHiveSplit(status.getPath(), status.getBlockLocations(), 0, status.getLen(),
            status.getLen(), bucketNumber, splittable);
}

From source file:com.github.sakserv.storm.KafkaHdfsTopologyTest.java

License:Apache License

/**
 * Validate that the files in HDFS contain the expected data from Kafka
 * @throws Exception//w w  w .  j  a  v  a 2s.  c  om
 */
private void validateHdfsResults() throws Exception {
    LOG.info("HDFS: VALIDATING");

    // Get the filesystem handle and a list of files written by the test
    FileSystem hdfsFsHandle = hdfsLocalCluster.getHdfsFileSystemHandle();
    RemoteIterator<LocatedFileStatus> listFiles = hdfsFsHandle.listFiles(
            new Path(propertyParser.getProperty(ConfigVars.STORM_HDFS_BOLT_OUTPUT_LOCATION_KEY)), true);

    // Loop through the files and count up the lines
    int count = 0;
    while (listFiles.hasNext()) {
        LocatedFileStatus file = listFiles.next();

        LOG.info("HDFS READ: Found File: " + file);

        BufferedReader br = new BufferedReader(new InputStreamReader(hdfsFsHandle.open(file.getPath())));
        String line = br.readLine();
        while (line != null) {
            LOG.info("HDFS READ: Found Line: " + line);
            line = br.readLine();
            count++;
        }
    }
    hdfsFsHandle.close();

    // Validate the number of lines matches the number of kafka messages
    assertEquals(Integer.parseInt(propertyParser.getProperty(ConfigVars.KAFKA_TEST_MESSAGE_COUNT_KEY)), count);
}

From source file:com.github.sakserv.storm.KafkaHiveHdfsTopologyTest.java

License:Apache License

public void validateHdfsResults() throws IOException {
    System.out.println("HDFS: VALIDATING");
    FileSystem hdfsFsHandle = hdfsCluster.getHdfsFileSystemHandle();
    RemoteIterator<LocatedFileStatus> listFiles = hdfsFsHandle.listFiles(new Path("/tmp/kafka_data"), true);
    while (listFiles.hasNext()) {
        LocatedFileStatus file = listFiles.next();

        System.out.println("HDFS READ: Found File: " + file);

        BufferedReader br = new BufferedReader(new InputStreamReader(hdfsFsHandle.open(file.getPath())));
        String line = br.readLine();
        while (line != null) {
            System.out.println("HDFS READ: Found Line: " + line);
            line = br.readLine();// w w w  .  ja  v a2  s .  c  om
        }
    }
    hdfsFsHandle.close();
}

From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java

License:Apache License

void browseDir() throws Exception {
    System.out.println("reading enumarate dir, path " + path);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    //benchmark/*from  w ww.j a  v  a  2  s .c om*/
    System.out.println("starting benchmark...");
    RemoteIterator<LocatedFileStatus> iter = fs.listFiles(path, false);
    while (iter.hasNext()) {
        LocatedFileStatus status = iter.next();
        System.out.println(status.getPath());
    }
    fs.close();
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java

License:Apache License

private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs, PathFilter inputFilter,
        boolean recursive) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    List<IOException> errors = new ArrayList<IOException>();
    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];/*from  w w w  . j  a v  a  2  s  . c o  m*/
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath());
                    while (iter.hasNext()) {
                        LocatedFileStatus stat = iter.next();
                        if (inputFilter.accept(stat.getPath())) {
                            if (recursive && stat.isDirectory()) {
                                addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
                            } else {
                                result.add(stat);
                            }
                        }
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    return result;
}