List of usage examples for org.apache.hadoop.fs LocatedFileStatus isDirectory
public boolean isDirectory()
From source file:br.com.lassal.nqueens.grid.job.NQueenCounter.java
private Path setWorkingFolder(int queensSize, String workingFolder, boolean isFinal, Job job) throws IOException { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path returnPath = null;/*from w ww . j av a 2 s .c o m*/ if (workingFolder == null) { workingFolder = ""; } Path partialSolDir = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/"); Path inputPath = null; Path outputPath = null; String nextRunPath = "run_1"; if (fs.exists(partialSolDir)) { RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir); String lastRunPath = null; Path lastPath = null; while (dirsFound.hasNext()) { LocatedFileStatus dir = dirsFound.next(); if (dir.isDirectory()) { if (lastRunPath == null || dir.getPath().getName().compareTo(lastRunPath) > 0) { lastPath = dir.getPath(); lastRunPath = lastPath.getName(); } } } if (lastRunPath != null) { String[] runParts = lastRunPath.split("_"); int lastRun = Integer.parseInt(runParts[1]); nextRunPath = runParts[0] + "_" + (++lastRun); inputPath = lastPath; } } if (inputPath == null) { inputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/seed"); if (!fs.exists(inputPath)) { FSDataOutputStream seedFile = fs.create(inputPath, true); seedFile.writeBytes(queensSize + ":"); seedFile.close(); } } else { returnPath = inputPath; } // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(TextInputFormat.class); if (isFinal) { outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/final"); } else { outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/" + nextRunPath); } // Output FileOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(TextOutputFormat.class); return returnPath; }
From source file:com.datatorrent.stram.client.RecordingsAgent.java
License:Apache License
public List<RecordingInfo> getRecordingInfo(String appId) { List<RecordingInfo> result = new ArrayList<RecordingInfo>(); String dir = getRecordingsDirectory(appId); if (dir == null) { return result; }/* www. j a v a 2 s . c om*/ Path path = new Path(dir); try { FileStatus fileStatus = stramAgent.getFileSystem().getFileStatus(path); if (!fileStatus.isDirectory()) { return result; } RemoteIterator<LocatedFileStatus> ri = stramAgent.getFileSystem().listLocatedStatus(path); while (ri.hasNext()) { LocatedFileStatus lfs = ri.next(); if (lfs.isDirectory()) { try { String opId = lfs.getPath().getName(); result.addAll(getRecordingInfo(appId, opId)); } catch (NumberFormatException ex) { // ignore } } } } catch (IOException ex) { LOG.warn("Got exception when getting recording info", ex); return result; } return result; }
From source file:com.datatorrent.stram.client.RecordingsAgent.java
License:Apache License
private List<RecordingInfo> getRecordingInfoHelper(String appId, String opId, Set<String> containers) { List<RecordingInfo> result = new ArrayList<RecordingInfo>(); String dir = getRecordingsDirectory(appId, opId); if (dir == null) { return result; }//from w w w . j a va 2 s .c o m Path path = new Path(dir); try { FileStatus fileStatus = stramAgent.getFileSystem().getFileStatus(path); if (!fileStatus.isDirectory()) { return result; } RemoteIterator<LocatedFileStatus> ri = stramAgent.getFileSystem().listLocatedStatus(path); while (ri.hasNext()) { LocatedFileStatus lfs = ri.next(); if (lfs.isDirectory()) { try { String id = lfs.getPath().getName(); RecordingInfo recordingInfo = getRecordingInfoHelper(appId, opId, id, containers); if (recordingInfo != null) { result.add(recordingInfo); } } catch (NumberFormatException ex) { // ignore } } } } catch (IOException ex) { LOG.warn("Got exception when getting recording info", ex); return result; } return result; }
From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java
License:Apache License
private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs, PathFilter inputFilter, boolean recursive) throws IOException { List<FileStatus> result = new ArrayList<FileStatus>(); List<IOException> errors = new ArrayList<IOException>(); for (int i = 0; i < dirs.length; ++i) { Path p = dirs[i];/* w w w. ja v a 2s . co m*/ FileSystem fs = p.getFileSystem(job.getConfiguration()); FileStatus[] matches = fs.globStatus(p, inputFilter); if (matches == null) { errors.add(new IOException("Input path does not exist: " + p)); } else if (matches.length == 0) { errors.add(new IOException("Input Pattern " + p + " matches 0 files")); } else { for (FileStatus globStat : matches) { if (globStat.isDirectory()) { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath()); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (recursive && stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } else { result.add(globStat); } } } } if (!errors.isEmpty()) { throw new InvalidInputException(errors); } return result; }
From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java
License:Apache License
/** * Add files in the input path recursively into the results. * @param result/*from w w w. j a va2 s . c om*/ * The List to store all files. * @param fs * The FileSystem. * @param path * The input path. * @param inputFilter * The input filter that can be used to filter files/dirs. * @throws IOException */ protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path, PathFilter inputFilter) throws IOException { RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } }
From source file:com.uber.hoodie.utilities.sources.DFSSource.java
License:Apache License
@Override public Pair<Optional<JavaRDD<GenericRecord>>, String> fetchNewData(Optional<String> lastCheckpointStr, long maxInputBytes) { try {//from w ww. j av a2s.c o m // obtain all eligible files under root folder. List<FileStatus> eligibleFiles = new ArrayList<>(); RemoteIterator<LocatedFileStatus> fitr = fs .listFiles(new Path(config.getString(Config.ROOT_INPUT_PATH_PROP)), true); while (fitr.hasNext()) { LocatedFileStatus fileStatus = fitr.next(); if (fileStatus.isDirectory() || IGNORE_FILEPREFIX_LIST.stream() .filter(pfx -> fileStatus.getPath().getName().startsWith(pfx)).count() > 0) { continue; } eligibleFiles.add(fileStatus); } // sort them by modification time. eligibleFiles.sort((FileStatus f1, FileStatus f2) -> Long.valueOf(f1.getModificationTime()) .compareTo(Long.valueOf(f2.getModificationTime()))); // Filter based on checkpoint & input size, if needed long currentBytes = 0; long maxModificationTime = Long.MIN_VALUE; List<FileStatus> filteredFiles = new ArrayList<>(); for (FileStatus f : eligibleFiles) { if (lastCheckpointStr.isPresent() && f.getModificationTime() <= Long.valueOf(lastCheckpointStr.get())) { // skip processed files continue; } maxModificationTime = f.getModificationTime(); currentBytes += f.getLen(); filteredFiles.add(f); if (currentBytes >= maxInputBytes) { // we have enough data, we are done break; } } // no data to read if (filteredFiles.size() == 0) { return new ImmutablePair<>(Optional.empty(), lastCheckpointStr.isPresent() ? lastCheckpointStr.get() : String.valueOf(Long.MIN_VALUE)); } // read the files out. String pathStr = filteredFiles.stream().map(f -> f.getPath().toString()) .collect(Collectors.joining(",")); String schemaStr = schemaProvider.getSourceSchema().toString(); final AvroConvertor avroConvertor = new AvroConvertor(schemaStr); return new ImmutablePair<>( Optional.of(DFSSource.fromFiles(dataFormat, avroConvertor, pathStr, sparkContext)), String.valueOf(maxModificationTime)); } catch (IOException ioe) { throw new HoodieIOException("Unable to read from source from checkpoint: " + lastCheckpointStr, ioe); } }
From source file:com.uber.hoodie.utilities.sources.helpers.DFSPathSelector.java
License:Apache License
public Pair<Optional<String>, String> getNextFilePathsAndMaxModificationTime(Optional<String> lastCheckpointStr, long sourceLimit) { try {//www. j a va 2 s. c om // obtain all eligible files under root folder. List<FileStatus> eligibleFiles = new ArrayList<>(); RemoteIterator<LocatedFileStatus> fitr = fs .listFiles(new Path(props.getString(Config.ROOT_INPUT_PATH_PROP)), true); while (fitr.hasNext()) { LocatedFileStatus fileStatus = fitr.next(); if (fileStatus.isDirectory() || IGNORE_FILEPREFIX_LIST.stream() .anyMatch(pfx -> fileStatus.getPath().getName().startsWith(pfx))) { continue; } eligibleFiles.add(fileStatus); } // sort them by modification time. eligibleFiles.sort(Comparator.comparingLong(FileStatus::getModificationTime)); // Filter based on checkpoint & input size, if needed long currentBytes = 0; long maxModificationTime = Long.MIN_VALUE; List<FileStatus> filteredFiles = new ArrayList<>(); for (FileStatus f : eligibleFiles) { if (lastCheckpointStr.isPresent() && f.getModificationTime() <= Long.valueOf(lastCheckpointStr.get())) { // skip processed files continue; } if (currentBytes + f.getLen() >= sourceLimit) { // we have enough data, we are done break; } maxModificationTime = f.getModificationTime(); currentBytes += f.getLen(); filteredFiles.add(f); } // no data to read if (filteredFiles.size() == 0) { return new ImmutablePair<>(Optional.empty(), lastCheckpointStr.orElseGet(() -> String.valueOf(Long.MIN_VALUE))); } // read the files out. String pathStr = filteredFiles.stream().map(f -> f.getPath().toString()) .collect(Collectors.joining(",")); return new ImmutablePair<>(Optional.ofNullable(pathStr), String.valueOf(maxModificationTime)); } catch (IOException ioe) { throw new HoodieIOException("Unable to read from source from checkpoint: " + lastCheckpointStr, ioe); } }
From source file:io.prestosql.plugin.hive.util.HiveFileIterator.java
License:Apache License
@Override protected LocatedFileStatus computeNext() { while (true) { while (remoteIterator.hasNext()) { LocatedFileStatus status = getLocatedFileStatus(remoteIterator); // Ignore hidden files and directories. Hive ignores files starting with _ and . as well. String fileName = status.getPath().getName(); if (fileName.startsWith("_") || fileName.startsWith(".")) { continue; }/*from w w w.j av a 2 s.c om*/ if (status.isDirectory()) { switch (nestedDirectoryPolicy) { case IGNORED: continue; case RECURSE: paths.add(status.getPath()); continue; case FAIL: throw new NestedDirectoryNotAllowedException(); } } return status; } if (paths.isEmpty()) { return endOfData(); } remoteIterator = getLocatedFileStatusRemoteIterator(paths.removeFirst()); } }
From source file:org.apache.accumulo.test.GarbageCollectWALIT.java
License:Apache License
private int countWALsInFS(MiniAccumuloClusterImpl cluster) throws Exception { FileSystem fs = cluster.getFileSystem(); RemoteIterator<LocatedFileStatus> iterator = fs .listFiles(new Path(cluster.getConfig().getAccumuloDir() + "/wal"), true); int result = 0; while (iterator.hasNext()) { LocatedFileStatus next = iterator.next(); if (!next.isDirectory()) { result++;/* ww w .jav a2s . c o m*/ } } return result; }
From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java
License:Apache License
private void getFileStatusOfSegments(JobContext job, String[] segmentsToConsider, List<FileStatus> result) throws IOException { String[] partitionsToConsider = getValidPartitions(job); if (partitionsToConsider.length == 0) { throw new IOException("No partitions/data found"); }//from w ww. j av a 2 s . co m PathFilter inputFilter = getDataFileFilter(job); CarbonTablePath tablePath = getTablePath(job.getConfiguration()); // get tokens for all the required FileSystem for table path TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { tablePath }, job.getConfiguration()); //get all data files of valid partitions and segments for (int i = 0; i < partitionsToConsider.length; ++i) { String partition = partitionsToConsider[i]; for (int j = 0; j < segmentsToConsider.length; ++j) { String segmentId = segmentsToConsider[j]; Path segmentPath = new Path(tablePath.getCarbonDataDirectoryPath(partition, segmentId)); FileSystem fs = segmentPath.getFileSystem(job.getConfiguration()); RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(segmentPath); while (iter.hasNext()) { LocatedFileStatus stat = iter.next(); if (inputFilter.accept(stat.getPath())) { if (stat.isDirectory()) { addInputPathRecursively(result, fs, stat.getPath(), inputFilter); } else { result.add(stat); } } } } } }