List of usage examples for org.apache.hadoop.fs FileStatus isFile
public boolean isFile()
From source file:org.apache.beam.sdk.io.hdfs.HadoopFileSystem.java
License:Apache License
private Set<Metadata> matchRecursiveGlob(String directorySpec, String fileSpec) throws IOException { final org.apache.hadoop.fs.FileSystem fs = new Path(directorySpec).getFileSystem(configuration); Set<Metadata> metadata = new HashSet<>(); if (directorySpec.contains("*")) { // An abstract directory with a wildcard is converted to concrete directories to search. FileStatus[] directoryStatuses = fs.globStatus(new Path(directorySpec)); for (FileStatus directoryStatus : directoryStatuses) { if (directoryStatus.isDirectory()) { metadata.addAll(matchRecursiveGlob(directoryStatus.getPath().toUri().toString(), fileSpec)); }//from w w w .j ava 2s. com } } else { // A concrete directory is searched. FileStatus[] fileStatuses = fs.globStatus(new Path(directorySpec + "/" + fileSpec)); for (FileStatus fileStatus : fileStatuses) { if (fileStatus.isFile()) { metadata.add(toMetadata(fileStatus)); } } // All sub-directories of a concrete directory are searched. FileStatus[] directoryStatuses = fs.globStatus(new Path(directorySpec + "/*")); for (FileStatus directoryStatus : directoryStatuses) { if (directoryStatus.isDirectory()) { metadata.addAll(matchRecursiveGlob(directoryStatus.getPath().toUri().toString(), fileSpec)); } } // Handle additional instances of recursive globs. if (fileSpec.contains("**")) { int index = fileSpec.indexOf("**"); metadata.addAll(matchRecursiveGlob(directorySpec + "/" + fileSpec.substring(0, index + 1), fileSpec.substring(index + 1))); } } return metadata; }
From source file:org.apache.crunch.kafka.offset.hdfs.HDFSOffsetReader.java
License:Apache License
private List<Long> getStoredOffsetPersistenceTimes(boolean newestFirst) throws IOException { List<Long> persistedTimes = new LinkedList<>(); FileSystem fs = getFileSystem(); try {//from www .ja va2s . c o m FileStatus[] fileStatuses = fs.listStatus(baseOffsetStoragePath); for (FileStatus status : fileStatuses) { if (status.isFile()) { String fileName = status.getPath().getName(); try { persistedTimes.add(HDFSOffsetWriter.fileNameToPersistenceTime(fileName)); } catch (IllegalArgumentException iae) { LOG.info("Skipping file {} due to filename not being of the correct format.", status.getPath(), iae); } } else { LOG.info("Skippping {} because it is not a file.", status.getPath()); } } } catch (FileNotFoundException fnfe) { LOG.error("Unable to retrieve prior offsets.", fnfe); } //natural order should put oldest (smallest long) first. This will put newest first. if (newestFirst) { Collections.sort(persistedTimes, Collections.reverseOrder()); } else { Collections.sort(persistedTimes); } return Collections.unmodifiableList(persistedTimes); }
From source file:org.apache.drill.exec.store.parquet.metadata.Metadata.java
License:Apache License
/** * Get the parquet metadata for the parquet files in a directory. * * @param path the path of the directory * @return metadata object for an entire parquet directory structure * @throws IOException in case of problems during accessing files *//*w w w.j a v a 2 s. co m*/ private ParquetTableMetadata_v3 getParquetTableMetadata(String path, FileSystem fs) throws IOException { Path p = new Path(path); FileStatus fileStatus = fs.getFileStatus(p); Stopwatch watch = logger.isDebugEnabled() ? Stopwatch.createStarted() : null; List<FileStatus> fileStatuses = new ArrayList<>(); if (fileStatus.isFile()) { fileStatuses.add(fileStatus); } else { fileStatuses.addAll(DrillFileSystemUtil.listFiles(fs, p, true)); } if (watch != null) { logger.debug("Took {} ms to get file statuses", watch.elapsed(TimeUnit.MILLISECONDS)); watch.reset(); watch.start(); } Map<FileStatus, FileSystem> fileStatusMap = fileStatuses.stream().collect(java.util.stream.Collectors .toMap(Function.identity(), s -> fs, (oldFs, newFs) -> newFs, LinkedHashMap::new)); ParquetTableMetadata_v3 metadata_v3 = getParquetTableMetadata(fileStatusMap); if (watch != null) { logger.debug("Took {} ms to read file metadata", watch.elapsed(TimeUnit.MILLISECONDS)); watch.stop(); } return metadata_v3; }
From source file:org.apache.drill.exec.util.FileSystemUtil.java
License:Apache License
/** * Checks if file status is applicable based on file system object {@link Scope}. * * @param status file status//from w w w . ja va 2s .co m * @param scope file system objects scope * @return true if status is applicable, false otherwise */ private static boolean isStatusApplicable(FileStatus status, Scope scope) { switch (scope) { case DIRECTORIES: return status.isDirectory(); case FILES: return status.isFile(); case ALL: return true; default: return false; } }
From source file:org.apache.giraph.yarn.GiraphYarnClient.java
License:Apache License
/** * Without Hadoop MR to check for us, make sure the output dir doesn't exist! *///from w ww .java2 s . c o m private void verifyOutputDirDoesNotExist() { Path outDir = null; try { FileSystem fs = FileSystem.get(giraphConf); String errorMsg = "__ERROR_NO_OUTPUT_DIR_SET__"; outDir = new Path(fs.getHomeDirectory(), giraphConf.get(OUTDIR, errorMsg)); FileStatus outStatus = fs.getFileStatus(outDir); if (outStatus.isDirectory() || outStatus.isFile() || outStatus.isSymlink()) { throw new IllegalStateException("Path " + outDir + " already exists."); } } catch (IOException ioe) { LOG.info("Final output path is: " + outDir); } }
From source file:org.apache.gobblin.compliance.restore.RestorableHivePartitionDataset.java
License:Apache License
private void fsMove(Path from, Path to) throws IOException { for (FileStatus fileStatus : this.datasetOwnerFs.listStatus(from)) { if (fileStatus.isFile()) { this.datasetOwnerFs.rename(fileStatus.getPath(), to); }//from w w w . ja va2 s .c om } }
From source file:org.apache.gobblin.compliance.retention.HivePartitionVersionRetentionReaper.java
License:Apache License
private void fsMove(Path from, Path to) throws IOException { if (PartitionUtils.isUnixTimeStamp(from.getName())) { this.versionOwnerFs.rename(from, to.getParent()); } else {// w w w. jav a 2 s . com for (FileStatus fileStatus : this.versionOwnerFs.listStatus(from)) { if (fileStatus.isFile()) { this.versionOwnerFs.rename(fileStatus.getPath(), to); } } } }
From source file:org.apache.gobblin.data.management.conversion.hive.validation.ValidationJob.java
License:Apache License
/*** * Execute Hive queries using {@link HiveJdbcConnector} and validate results. * @param queries Queries to execute.//from ww w. ja va 2s. co m */ @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE", justification = "Temporary fix") private List<Long> getValidationOutputFromHive(List<String> queries) throws IOException { if (null == queries || queries.size() == 0) { log.warn("No queries specified to be executed"); return Collections.emptyList(); } List<Long> rowCounts = Lists.newArrayList(); Closer closer = Closer.create(); try { HiveJdbcConnector hiveJdbcConnector = closer.register(HiveJdbcConnector.newConnectorWithProps(props)); for (String query : queries) { String hiveOutput = "hiveConversionValidationOutput_" + UUID.randomUUID().toString(); Path hiveTempDir = new Path("/tmp" + Path.SEPARATOR + hiveOutput); query = "INSERT OVERWRITE DIRECTORY '" + hiveTempDir + "' " + query; log.info("Executing query: " + query); try { if (this.hiveSettings.size() > 0) { hiveJdbcConnector .executeStatements(this.hiveSettings.toArray(new String[this.hiveSettings.size()])); } hiveJdbcConnector.executeStatements("SET hive.exec.compress.output=false", "SET hive.auto.convert.join=false", query); FileStatus[] fileStatusList = this.fs.listStatus(hiveTempDir); List<FileStatus> files = new ArrayList<>(); for (FileStatus fileStatus : fileStatusList) { if (fileStatus.isFile()) { files.add(fileStatus); } } if (files.size() > 1) { log.warn("Found more than one output file. Should have been one."); } else if (files.size() == 0) { log.warn("Found no output file. Should have been one."); } else { String theString = IOUtils.toString( new InputStreamReader(this.fs.open(files.get(0).getPath()), Charsets.UTF_8)); log.info("Found row count: " + theString.trim()); if (StringUtils.isBlank(theString.trim())) { rowCounts.add(0l); } else { try { rowCounts.add(Long.parseLong(theString.trim())); } catch (NumberFormatException e) { throw new RuntimeException("Could not parse Hive output: " + theString.trim(), e); } } } } finally { if (this.fs.exists(hiveTempDir)) { log.debug("Deleting temp dir: " + hiveTempDir); this.fs.delete(hiveTempDir, true); } } } } catch (SQLException e) { log.warn("Execution failed for query set " + queries.toString(), e); } finally { try { closer.close(); } catch (Exception e) { log.warn("Could not close HiveJdbcConnector", e); } } return rowCounts; }
From source file:org.apache.hawq.pxf.plugins.hdfs.HdfsAnalyzer.java
License:Apache License
/** * Collects a number of basic statistics based on an estimate. Statistics * are: number of records, number of hdfs blocks and hdfs block size. * * @param datapath path is a data source URI that can appear as a file name, * a directory name or a wildcard pattern * @return statistics in JSON format/*from ww w . j a v a 2s .co m*/ * @throws Exception if path is wrong, its metadata cannot be retrieved from * file system, or if scanning the first block using the * accessor failed */ @Override public AnalyzerStats getEstimatedStats(String datapath) throws Exception { long blockSize = 0; long numberOfBlocks; long dataSize = 0; Path path = new Path(HdfsUtilities.absoluteDataPath(datapath)); ArrayList<InputSplit> splits = getSplits(path); for (InputSplit split : splits) { FileSplit fsp = (FileSplit) split; dataSize += fsp.getLength(); if (blockSize == 0) { Path filePath = fsp.getPath(); FileStatus fileStatus = fs.getFileStatus(filePath); if (fileStatus.isFile()) { blockSize = fileStatus.getBlockSize(); } } } // if no file is in path (only dirs), get default block size if (blockSize == 0) { blockSize = fs.getDefaultBlockSize(path); } numberOfBlocks = splits.size(); /* * The estimate of the number of tuples in table is based on the * actual number of tuples in the first block, multiplied by its * size compared to the size of the whole data to be read. * The calculation: * Ratio of tuples to size = number of tuples in first block / first block size. * Total of tuples = ratio * number of blocks * total block size. */ long numberOfTuplesInBlock = getNumberOfTuplesInBlock(splits); long numberOfTuples = 0; if (!splits.isEmpty()) { long blockLength = splits.get(0).getLength(); numberOfTuples = (long) Math.floor((((double) numberOfTuplesInBlock / blockLength) * (dataSize))); } // AnalyzerStats stats = new AnalyzerStats(blockSize, numberOfBlocks, AnalyzerStats stats = new AnalyzerStats(blockSize, numberOfBlocks, numberOfTuples); // print files size to log when in debug level Log.debug(AnalyzerStats.dataToString(stats, path.toString())); return stats; }
From source file:org.apache.hoya.avro.RoleHistoryWriter.java
License:Apache License
/** * Find all history entries in a dir. The dir is created if it is * not already defined./*from w w w . j a v a2 s. co m*/ * * The scan uses the match pattern {@link HoyaKeys#HISTORY_FILENAME_MATCH_PATTERN} * while dropping empty files and directories which match the pattern. * The list is then sorted with a comparator that sorts on filename, * relying on the filename of newer created files being later than the old ones. * * * * @param fs filesystem * @param dir dir to scan * @param includeEmptyFiles should empty files be included in the result? * @return a possibly empty list * @throws IOException IO problems * @throws FileNotFoundException if the target dir is actually a path */ public List<Path> findAllHistoryEntries(FileSystem fs, Path dir, boolean includeEmptyFiles) throws IOException { assert fs != null; assert dir != null; if (!fs.exists(dir)) { fs.mkdirs(dir); } else if (!fs.isDirectory(dir)) { throw new FileNotFoundException("Not a directory " + dir.toString()); } PathFilter filter = new GlobFilter(HoyaKeys.HISTORY_FILENAME_GLOB_PATTERN); FileStatus[] stats = fs.listStatus(dir, filter); List<Path> paths = new ArrayList<Path>(stats.length); for (FileStatus stat : stats) { log.debug("Possible entry: {}", stat.toString()); if (stat.isFile() && (includeEmptyFiles || stat.getLen() > 0)) { paths.add(stat.getPath()); } } sortHistoryPaths(paths); return paths; }