Example usage for org.apache.hadoop.fs FileStatus isFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isFile.

Prototype

public boolean isFile()

Source Link

Document

Is this a file?

Usage

From source file:org.apache.beam.sdk.io.hdfs.HadoopFileSystem.java

License:Apache License

private Set<Metadata> matchRecursiveGlob(String directorySpec, String fileSpec) throws IOException {
    final org.apache.hadoop.fs.FileSystem fs = new Path(directorySpec).getFileSystem(configuration);
    Set<Metadata> metadata = new HashSet<>();
    if (directorySpec.contains("*")) {
        // An abstract directory with a wildcard is converted to concrete directories to search.
        FileStatus[] directoryStatuses = fs.globStatus(new Path(directorySpec));
        for (FileStatus directoryStatus : directoryStatuses) {
            if (directoryStatus.isDirectory()) {
                metadata.addAll(matchRecursiveGlob(directoryStatus.getPath().toUri().toString(), fileSpec));
            }//from w  w  w  .j ava  2s.  com
        }
    } else {
        // A concrete directory is searched.
        FileStatus[] fileStatuses = fs.globStatus(new Path(directorySpec + "/" + fileSpec));
        for (FileStatus fileStatus : fileStatuses) {
            if (fileStatus.isFile()) {
                metadata.add(toMetadata(fileStatus));
            }
        }

        // All sub-directories of a concrete directory are searched.
        FileStatus[] directoryStatuses = fs.globStatus(new Path(directorySpec + "/*"));
        for (FileStatus directoryStatus : directoryStatuses) {
            if (directoryStatus.isDirectory()) {
                metadata.addAll(matchRecursiveGlob(directoryStatus.getPath().toUri().toString(), fileSpec));
            }
        }

        // Handle additional instances of recursive globs.
        if (fileSpec.contains("**")) {
            int index = fileSpec.indexOf("**");
            metadata.addAll(matchRecursiveGlob(directorySpec + "/" + fileSpec.substring(0, index + 1),
                    fileSpec.substring(index + 1)));
        }
    }
    return metadata;
}

From source file:org.apache.crunch.kafka.offset.hdfs.HDFSOffsetReader.java

License:Apache License

private List<Long> getStoredOffsetPersistenceTimes(boolean newestFirst) throws IOException {
    List<Long> persistedTimes = new LinkedList<>();
    FileSystem fs = getFileSystem();
    try {//from   www .ja va2s . c o m
        FileStatus[] fileStatuses = fs.listStatus(baseOffsetStoragePath);
        for (FileStatus status : fileStatuses) {
            if (status.isFile()) {
                String fileName = status.getPath().getName();
                try {
                    persistedTimes.add(HDFSOffsetWriter.fileNameToPersistenceTime(fileName));
                } catch (IllegalArgumentException iae) {
                    LOG.info("Skipping file {} due to filename not being of the correct format.",
                            status.getPath(), iae);
                }
            } else {
                LOG.info("Skippping {} because it is not a file.", status.getPath());
            }
        }
    } catch (FileNotFoundException fnfe) {
        LOG.error("Unable to retrieve prior offsets.", fnfe);
    }

    //natural order should put oldest (smallest long) first. This will put newest first.
    if (newestFirst) {
        Collections.sort(persistedTimes, Collections.reverseOrder());
    } else {
        Collections.sort(persistedTimes);
    }
    return Collections.unmodifiableList(persistedTimes);
}

From source file:org.apache.drill.exec.store.parquet.metadata.Metadata.java

License:Apache License

/**
 * Get the parquet metadata for the parquet files in a directory.
 *
 * @param path the path of the directory
 * @return metadata object for an entire parquet directory structure
 * @throws IOException in case of problems during accessing files
 *//*w w w.j a  v  a 2  s. co m*/
private ParquetTableMetadata_v3 getParquetTableMetadata(String path, FileSystem fs) throws IOException {
    Path p = new Path(path);
    FileStatus fileStatus = fs.getFileStatus(p);
    Stopwatch watch = logger.isDebugEnabled() ? Stopwatch.createStarted() : null;
    List<FileStatus> fileStatuses = new ArrayList<>();
    if (fileStatus.isFile()) {
        fileStatuses.add(fileStatus);
    } else {
        fileStatuses.addAll(DrillFileSystemUtil.listFiles(fs, p, true));
    }
    if (watch != null) {
        logger.debug("Took {} ms to get file statuses", watch.elapsed(TimeUnit.MILLISECONDS));
        watch.reset();
        watch.start();
    }

    Map<FileStatus, FileSystem> fileStatusMap = fileStatuses.stream().collect(java.util.stream.Collectors
            .toMap(Function.identity(), s -> fs, (oldFs, newFs) -> newFs, LinkedHashMap::new));

    ParquetTableMetadata_v3 metadata_v3 = getParquetTableMetadata(fileStatusMap);
    if (watch != null) {
        logger.debug("Took {} ms to read file metadata", watch.elapsed(TimeUnit.MILLISECONDS));
        watch.stop();
    }
    return metadata_v3;
}

From source file:org.apache.drill.exec.util.FileSystemUtil.java

License:Apache License

/**
 * Checks if file status is applicable based on file system object {@link Scope}.
 *
 * @param status file status//from   w  w w .  ja va  2s .co  m
 * @param scope file system objects scope
 * @return true if status is applicable, false otherwise
 */
private static boolean isStatusApplicable(FileStatus status, Scope scope) {
    switch (scope) {
    case DIRECTORIES:
        return status.isDirectory();
    case FILES:
        return status.isFile();
    case ALL:
        return true;
    default:
        return false;
    }
}

From source file:org.apache.giraph.yarn.GiraphYarnClient.java

License:Apache License

/**
 * Without Hadoop MR to check for us, make sure the output dir doesn't exist!
 *///from  w  ww  .java2 s  .  c  o  m
private void verifyOutputDirDoesNotExist() {
    Path outDir = null;
    try {
        FileSystem fs = FileSystem.get(giraphConf);
        String errorMsg = "__ERROR_NO_OUTPUT_DIR_SET__";
        outDir = new Path(fs.getHomeDirectory(), giraphConf.get(OUTDIR, errorMsg));
        FileStatus outStatus = fs.getFileStatus(outDir);
        if (outStatus.isDirectory() || outStatus.isFile() || outStatus.isSymlink()) {
            throw new IllegalStateException("Path " + outDir + " already exists.");
        }
    } catch (IOException ioe) {
        LOG.info("Final output path is: " + outDir);
    }
}

From source file:org.apache.gobblin.compliance.restore.RestorableHivePartitionDataset.java

License:Apache License

private void fsMove(Path from, Path to) throws IOException {
    for (FileStatus fileStatus : this.datasetOwnerFs.listStatus(from)) {
        if (fileStatus.isFile()) {
            this.datasetOwnerFs.rename(fileStatus.getPath(), to);
        }//from   w  w w .  ja va2 s  .c  om
    }
}

From source file:org.apache.gobblin.compliance.retention.HivePartitionVersionRetentionReaper.java

License:Apache License

private void fsMove(Path from, Path to) throws IOException {
    if (PartitionUtils.isUnixTimeStamp(from.getName())) {
        this.versionOwnerFs.rename(from, to.getParent());
    } else {// w w  w.  jav  a  2 s .  com
        for (FileStatus fileStatus : this.versionOwnerFs.listStatus(from)) {
            if (fileStatus.isFile()) {
                this.versionOwnerFs.rename(fileStatus.getPath(), to);
            }
        }
    }
}

From source file:org.apache.gobblin.data.management.conversion.hive.validation.ValidationJob.java

License:Apache License

/***
 * Execute Hive queries using {@link HiveJdbcConnector} and validate results.
 * @param queries Queries to execute.//from  ww  w.  ja va  2s. co m
 */
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE", justification = "Temporary fix")
private List<Long> getValidationOutputFromHive(List<String> queries) throws IOException {

    if (null == queries || queries.size() == 0) {
        log.warn("No queries specified to be executed");
        return Collections.emptyList();
    }

    List<Long> rowCounts = Lists.newArrayList();
    Closer closer = Closer.create();

    try {
        HiveJdbcConnector hiveJdbcConnector = closer.register(HiveJdbcConnector.newConnectorWithProps(props));
        for (String query : queries) {
            String hiveOutput = "hiveConversionValidationOutput_" + UUID.randomUUID().toString();
            Path hiveTempDir = new Path("/tmp" + Path.SEPARATOR + hiveOutput);
            query = "INSERT OVERWRITE DIRECTORY '" + hiveTempDir + "' " + query;
            log.info("Executing query: " + query);
            try {
                if (this.hiveSettings.size() > 0) {
                    hiveJdbcConnector
                            .executeStatements(this.hiveSettings.toArray(new String[this.hiveSettings.size()]));
                }
                hiveJdbcConnector.executeStatements("SET hive.exec.compress.output=false",
                        "SET hive.auto.convert.join=false", query);
                FileStatus[] fileStatusList = this.fs.listStatus(hiveTempDir);
                List<FileStatus> files = new ArrayList<>();
                for (FileStatus fileStatus : fileStatusList) {
                    if (fileStatus.isFile()) {
                        files.add(fileStatus);
                    }
                }
                if (files.size() > 1) {
                    log.warn("Found more than one output file. Should have been one.");
                } else if (files.size() == 0) {
                    log.warn("Found no output file. Should have been one.");
                } else {
                    String theString = IOUtils.toString(
                            new InputStreamReader(this.fs.open(files.get(0).getPath()), Charsets.UTF_8));
                    log.info("Found row count: " + theString.trim());
                    if (StringUtils.isBlank(theString.trim())) {
                        rowCounts.add(0l);
                    } else {
                        try {
                            rowCounts.add(Long.parseLong(theString.trim()));
                        } catch (NumberFormatException e) {
                            throw new RuntimeException("Could not parse Hive output: " + theString.trim(), e);
                        }
                    }
                }
            } finally {
                if (this.fs.exists(hiveTempDir)) {
                    log.debug("Deleting temp dir: " + hiveTempDir);
                    this.fs.delete(hiveTempDir, true);
                }
            }
        }
    } catch (SQLException e) {
        log.warn("Execution failed for query set " + queries.toString(), e);
    } finally {
        try {
            closer.close();
        } catch (Exception e) {
            log.warn("Could not close HiveJdbcConnector", e);
        }
    }

    return rowCounts;
}

From source file:org.apache.hawq.pxf.plugins.hdfs.HdfsAnalyzer.java

License:Apache License

/**
 * Collects a number of basic statistics based on an estimate. Statistics
 * are: number of records, number of hdfs blocks and hdfs block size.
 *
 * @param datapath path is a data source URI that can appear as a file name,
 *            a directory name or a wildcard pattern
 * @return statistics in JSON format/*from ww  w  .  j  a v a  2s .co m*/
 * @throws Exception if path is wrong, its metadata cannot be retrieved from
 *             file system, or if scanning the first block using the
 *             accessor failed
 */
@Override
public AnalyzerStats getEstimatedStats(String datapath) throws Exception {
    long blockSize = 0;
    long numberOfBlocks;
    long dataSize = 0;
    Path path = new Path(HdfsUtilities.absoluteDataPath(datapath));

    ArrayList<InputSplit> splits = getSplits(path);

    for (InputSplit split : splits) {
        FileSplit fsp = (FileSplit) split;
        dataSize += fsp.getLength();
        if (blockSize == 0) {
            Path filePath = fsp.getPath();
            FileStatus fileStatus = fs.getFileStatus(filePath);
            if (fileStatus.isFile()) {
                blockSize = fileStatus.getBlockSize();
            }
        }
    }

    // if no file is in path (only dirs), get default block size
    if (blockSize == 0) {
        blockSize = fs.getDefaultBlockSize(path);
    }
    numberOfBlocks = splits.size();

    /*
     * The estimate of the number of tuples in table is based on the
     * actual number of tuples in the first block, multiplied by its
     * size compared to the size of the whole data to be read.
     * The calculation:
     * Ratio of tuples to size = number of tuples in first block / first block size.
     * Total of tuples = ratio * number of blocks * total block size.
     */
    long numberOfTuplesInBlock = getNumberOfTuplesInBlock(splits);
    long numberOfTuples = 0;
    if (!splits.isEmpty()) {
        long blockLength = splits.get(0).getLength();
        numberOfTuples = (long) Math.floor((((double) numberOfTuplesInBlock / blockLength) * (dataSize)));
    }
    // AnalyzerStats stats = new AnalyzerStats(blockSize, numberOfBlocks,
    AnalyzerStats stats = new AnalyzerStats(blockSize, numberOfBlocks, numberOfTuples);

    // print files size to log when in debug level
    Log.debug(AnalyzerStats.dataToString(stats, path.toString()));

    return stats;
}

From source file:org.apache.hoya.avro.RoleHistoryWriter.java

License:Apache License

/**
 * Find all history entries in a dir. The dir is created if it is
 * not already defined./*from  w w w  .  j a  v a2  s.  co m*/
 * 
 * The scan uses the match pattern {@link HoyaKeys#HISTORY_FILENAME_MATCH_PATTERN}
 * while dropping empty files and directories which match the pattern.
 * The list is then sorted with a comparator that sorts on filename,
 * relying on the filename of newer created files being later than the old ones.
 * 
 * 
 *
 * @param fs filesystem
 * @param dir dir to scan
 * @param includeEmptyFiles should empty files be included in the result?
 * @return a possibly empty list
 * @throws IOException IO problems
 * @throws FileNotFoundException if the target dir is actually a path
 */
public List<Path> findAllHistoryEntries(FileSystem fs, Path dir, boolean includeEmptyFiles) throws IOException {
    assert fs != null;
    assert dir != null;
    if (!fs.exists(dir)) {
        fs.mkdirs(dir);
    } else if (!fs.isDirectory(dir)) {
        throw new FileNotFoundException("Not a directory " + dir.toString());
    }

    PathFilter filter = new GlobFilter(HoyaKeys.HISTORY_FILENAME_GLOB_PATTERN);
    FileStatus[] stats = fs.listStatus(dir, filter);
    List<Path> paths = new ArrayList<Path>(stats.length);
    for (FileStatus stat : stats) {
        log.debug("Possible entry: {}", stat.toString());
        if (stat.isFile() && (includeEmptyFiles || stat.getLen() > 0)) {
            paths.add(stat.getPath());
        }
    }
    sortHistoryPaths(paths);
    return paths;
}