List of usage examples for org.apache.hadoop.fs LocatedFileStatus getPath
public Path getPath()
From source file:com.datatorrent.stram.client.RecordingsAgent.java
License:Apache License
public List<RecordingInfo> getRecordingInfo(String appId) { List<RecordingInfo> result = new ArrayList<RecordingInfo>(); String dir = getRecordingsDirectory(appId); if (dir == null) { return result; }/*w w w . ja va 2 s. co m*/ Path path = new Path(dir); try { FileStatus fileStatus = stramAgent.getFileSystem().getFileStatus(path); if (!fileStatus.isDirectory()) { return result; } RemoteIterator<LocatedFileStatus> ri = stramAgent.getFileSystem().listLocatedStatus(path); while (ri.hasNext()) { LocatedFileStatus lfs = ri.next(); if (lfs.isDirectory()) { try { String opId = lfs.getPath().getName(); result.addAll(getRecordingInfo(appId, opId)); } catch (NumberFormatException ex) { // ignore } } } } catch (IOException ex) { LOG.warn("Got exception when getting recording info", ex); return result; } return result; }
From source file:com.datatorrent.stram.client.RecordingsAgent.java
License:Apache License
private List<RecordingInfo> getRecordingInfoHelper(String appId, String opId, Set<String> containers) { List<RecordingInfo> result = new ArrayList<RecordingInfo>(); String dir = getRecordingsDirectory(appId, opId); if (dir == null) { return result; }/*from w w w . j a va2 s. c o m*/ Path path = new Path(dir); try { FileStatus fileStatus = stramAgent.getFileSystem().getFileStatus(path); if (!fileStatus.isDirectory()) { return result; } RemoteIterator<LocatedFileStatus> ri = stramAgent.getFileSystem().listLocatedStatus(path); while (ri.hasNext()) { LocatedFileStatus lfs = ri.next(); if (lfs.isDirectory()) { try { String id = lfs.getPath().getName(); RecordingInfo recordingInfo = getRecordingInfoHelper(appId, opId, id, containers); if (recordingInfo != null) { result.add(recordingInfo); } } catch (NumberFormatException ex) { // ignore } } } } catch (IOException ex) { LOG.warn("Got exception when getting recording info", ex); return result; } return result; }
From source file:com.facebook.presto.hive.BackgroundHiveSplitLoader.java
License:Apache License
private CompletableFuture<?> loadSplits() throws IOException { HiveFileIterator files = fileIterators.poll(); if (files == null) { HivePartitionMetadata partition = partitions.poll(); if (partition == null) { return COMPLETED_FUTURE; }/* w w w . j av a2 s .c o m*/ loadPartition(partition); return COMPLETED_FUTURE; } while (files.hasNext() && !stopped) { LocatedFileStatus file = files.next(); if (isDirectory(file)) { if (recursiveDirWalkerEnabled) { HiveFileIterator fileIterator = new HiveFileIterator(file.getPath(), files.getFileSystem(), files.getDirectoryLister(), files.getNamenodeStats(), files.getPartitionName(), files.getInputFormat(), files.getSchema(), files.getPartitionKeys(), files.getEffectivePredicate(), files.getColumnCoercions()); fileIterators.add(fileIterator); } } else { boolean splittable = isSplittable(files.getInputFormat(), hdfsEnvironment.getFileSystem(session.getUser(), file.getPath()), file.getPath()); CompletableFuture<?> future = hiveSplitSource.addToQueue(createHiveSplits(files.getPartitionName(), file.getPath().toString(), file.getBlockLocations(), 0, file.getLen(), files.getSchema(), files.getPartitionKeys(), splittable, session, OptionalInt.empty(), files.getEffectivePredicate(), files.getColumnCoercions())); if (!future.isDone()) { fileIterators.addFirst(files); return future; } } } // No need to put the iterator back, since it's either empty or we've stopped return COMPLETED_FUTURE; }
From source file:com.facebook.presto.hive.util.AsyncWalker.java
License:Apache License
private void doWalk(Path path, FileStatusCallback callback, AtomicLong taskCount, SettableFuture<Void> future) { try (SetThreadName ignored = new SetThreadName("HiveHdfsWalker")) { RemoteIterator<LocatedFileStatus> iterator = getLocatedFileStatusRemoteIterator(path); while (iterator.hasNext()) { LocatedFileStatus status = getLocatedFileStatus(iterator); // ignore hidden files. Hive ignores files starting with _ and . as well. String fileName = status.getPath().getName(); if (fileName.startsWith("_") || fileName.startsWith(".")) { continue; }//from w w w . j a v a 2 s . com if (!isDirectory(status)) { callback.process(status, status.getBlockLocations()); } else if (recursive) { recursiveWalk(status.getPath(), callback, taskCount, future); } if (future.isDone()) { return; } } } catch (FileNotFoundException e) { future.setException(new FileNotFoundException("Partition location does not exist: " + path)); } catch (Throwable t) { future.setException(t); } finally { if (taskCount.decrementAndGet() == 0) { future.set(null); } } }
From source file:com.facebook.presto.hive.util.HiveFileIterator.java
License:Apache License
@Override protected LocatedFileStatus computeNext() { try {// w ww. j av a 2 s . c o m if (remoteIterator == null) { remoteIterator = getLocatedFileStatusRemoteIterator(path); } while (remoteIterator.hasNext()) { LocatedFileStatus status = getLocatedFileStatus(remoteIterator); // ignore hidden files. Hive ignores files starting with _ and . as well. String fileName = status.getPath().getName(); if (fileName.startsWith("_") || fileName.startsWith(".")) { continue; } return status; } return endOfData(); } catch (FileNotFoundException e) { throw new PrestoException(HIVE_FILE_NOT_FOUND, "Partition location does not exist: " + path); } catch (IOException e) { throw new PrestoException(HIVE_FILESYSTEM_ERROR, "Failed to list directory: " + path, e); } }
From source file:com.facebook.presto.hive.util.InternalHiveSplitFactory.java
License:Apache License
private Optional<InternalHiveSplit> createInternalHiveSplit(LocatedFileStatus status, OptionalInt bucketNumber, boolean splittable) { splittable = splittable && isSplittable(inputFormat, fileSystem, status.getPath()); return createInternalHiveSplit(status.getPath(), status.getBlockLocations(), 0, status.getLen(), status.getLen(), bucketNumber, splittable); }
From source file:com.github.sakserv.storm.KafkaHdfsTopologyTest.java
License:Apache License
/** * Validate that the files in HDFS contain the expected data from Kafka * @throws Exception//w w w . j a v a 2s. c om */ private void validateHdfsResults() throws Exception { LOG.info("HDFS: VALIDATING"); // Get the filesystem handle and a list of files written by the test FileSystem hdfsFsHandle = hdfsLocalCluster.getHdfsFileSystemHandle(); RemoteIterator<LocatedFileStatus> listFiles = hdfsFsHandle.listFiles( new Path(propertyParser.getProperty(ConfigVars.STORM_HDFS_BOLT_OUTPUT_LOCATION_KEY)), true); // Loop through the files and count up the lines int count = 0; while (listFiles.hasNext()) { LocatedFileStatus file = listFiles.next(); LOG.info("HDFS READ: Found File: " + file); BufferedReader br = new BufferedReader(new InputStreamReader(hdfsFsHandle.open(file.getPath()))); String line = br.readLine(); while (line != null) { LOG.info("HDFS READ: Found Line: " + line); line = br.readLine(); count++; } } hdfsFsHandle.close(); // Validate the number of lines matches the number of kafka messages assertEquals(Integer.parseInt(propertyParser.getProperty(ConfigVars.KAFKA_TEST_MESSAGE_COUNT_KEY)), count); }
From source file:com.github.sakserv.storm.KafkaHiveHdfsTopologyTest.java
License:Apache License
public void validateHdfsResults() throws IOException { System.out.println("HDFS: VALIDATING"); FileSystem hdfsFsHandle = hdfsCluster.getHdfsFileSystemHandle(); RemoteIterator<LocatedFileStatus> listFiles = hdfsFsHandle.listFiles(new Path("/tmp/kafka_data"), true); while (listFiles.hasNext()) { LocatedFileStatus file = listFiles.next(); System.out.println("HDFS READ: Found File: " + file); BufferedReader br = new BufferedReader(new InputStreamReader(hdfsFsHandle.open(file.getPath()))); String line = br.readLine(); while (line != null) { System.out.println("HDFS READ: Found Line: " + line); line = br.readLine();// w w w . ja v a2 s . c om } } hdfsFsHandle.close(); }
From source file:com.ibm.crail.hdfs.tools.HdfsIOBenchmark.java
License:Apache License
void browseDir() throws Exception { System.out.println("reading enumarate dir, path " + path); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(conf); //benchmark/*from w ww.j a v a 2 s .c om*/ System.out.println("starting benchmark..."); RemoteIterator<LocatedFileStatus> iter = fs.listFiles(path, false); while (iter.hasNext()) { LocatedFileStatus status = iter.next(); System.out.println(status.getPath()); } fs.close(); }
From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java
License:Apache License
private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs, PathFilter inputFilter, boolean recursive) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); for (int i = 0; i < dirs.length; ++i) { Path p = dirs[i];/*from w w w . j a v a 2 s . c o m*/ FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }