Example usage for org.apache.hadoop.fs FileStatus isFile

List of usage examples for org.apache.hadoop.fs FileStatus isFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isFile.

Prototype

public boolean isFile() 

Source Link

Document

Is this a file?

Usage

From source file:org.apache.beam.sdk.io.hdfs.HadoopFileSystem.java

License:Apache License

private Set<Metadata> matchRecursiveGlob(String directorySpec, String fileSpec) throws IOException {
    final org.apache.hadoop.fs.FileSystem fs = new Path(directorySpec).getFileSystem(configuration);
    Set<Metadata> metadata = new HashSet<>();
    if (directorySpec.contains("*")) {
        // An abstract directory with a wildcard is converted to concrete directories to search.
        FileStatus[] directoryStatuses = fs.globStatus(new Path(directorySpec));
        for (FileStatus directoryStatus : directoryStatuses) {
            if (directoryStatus.isDirectory()) {
                metadata.addAll(matchRecursiveGlob(directoryStatus.getPath().toUri().toString(), fileSpec));
            }//from w  w  w  .j ava  2s.  com
        }
    } else {
        // A concrete directory is searched.
        FileStatus[] fileStatuses = fs.globStatus(new Path(directorySpec + "/" + fileSpec));
        for (FileStatus fileStatus : fileStatuses) {
            if (fileStatus.isFile()) {
                metadata.add(toMetadata(fileStatus));
            }
        }

        // All sub-directories of a concrete directory are searched.
        FileStatus[] directoryStatuses = fs.globStatus(new Path(directorySpec + "/*"));
        for (FileStatus directoryStatus : directoryStatuses) {
            if (directoryStatus.isDirectory()) {
                metadata.addAll(matchRecursiveGlob(directoryStatus.getPath().toUri().toString(), fileSpec));
            }
        }

        // Handle additional instances of recursive globs.
        if (fileSpec.contains("**")) {
            int index = fileSpec.indexOf("**");
            metadata.addAll(matchRecursiveGlob(directorySpec + "/" + fileSpec.substring(0, index + 1),
                    fileSpec.substring(index + 1)));
        }
    }
    return metadata;
}

From source file:org.apache.crunch.kafka.offset.hdfs.HDFSOffsetReader.java

License:Apache License

private List<Long> getStoredOffsetPersistenceTimes(boolean newestFirst) throws IOException {
    List<Long> persistedTimes = new LinkedList<>();
    FileSystem fs = getFileSystem();
    try {//from   www .ja va2s . c o m
        FileStatus[] fileStatuses = fs.listStatus(baseOffsetStoragePath);
        for (FileStatus status : fileStatuses) {
            if (status.isFile()) {
                String fileName = status.getPath().getName();
                try {
                    persistedTimes.add(HDFSOffsetWriter.fileNameToPersistenceTime(fileName));
                } catch (IllegalArgumentException iae) {
                    LOG.info("Skipping file {} due to filename not being of the correct format.",
                            status.getPath(), iae);
                }
            } else {
                LOG.info("Skippping {} because it is not a file.", status.getPath());
            }
        }
    } catch (FileNotFoundException fnfe) {
        LOG.error("Unable to retrieve prior offsets.", fnfe);
    }

    //natural order should put oldest (smallest long) first. This will put newest first.
    if (newestFirst) {
        Collections.sort(persistedTimes, Collections.reverseOrder());
    } else {
        Collections.sort(persistedTimes);
    }
    return Collections.unmodifiableList(persistedTimes);
}

From source file:org.apache.drill.exec.store.parquet.metadata.Metadata.java

License:Apache License

/**
 * Get the parquet metadata for the parquet files in a directory.
 *
 * @param path the path of the directory
 * @return metadata object for an entire parquet directory structure
 * @throws IOException in case of problems during accessing files
 *//*w w w.j a  v  a 2  s. co m*/
private ParquetTableMetadata_v3 getParquetTableMetadata(String path, FileSystem fs) throws IOException {
    Path p = new Path(path);
    FileStatus fileStatus = fs.getFileStatus(p);
    Stopwatch watch = logger.isDebugEnabled() ? Stopwatch.createStarted() : null;
    List<FileStatus> fileStatuses = new ArrayList<>();
    if (fileStatus.isFile()) {
        fileStatuses.add(fileStatus);
    } else {
        fileStatuses.addAll(DrillFileSystemUtil.listFiles(fs, p, true));
    }
    if (watch != null) {
        logger.debug("Took {} ms to get file statuses", watch.elapsed(TimeUnit.MILLISECONDS));
        watch.reset();
        watch.start();
    }

    Map<FileStatus, FileSystem> fileStatusMap = fileStatuses.stream().collect(java.util.stream.Collectors
            .toMap(Function.identity(), s -> fs, (oldFs, newFs) -> newFs, LinkedHashMap::new));

    ParquetTableMetadata_v3 metadata_v3 = getParquetTableMetadata(fileStatusMap);
    if (watch != null) {
        logger.debug("Took {} ms to read file metadata", watch.elapsed(TimeUnit.MILLISECONDS));
        watch.stop();
    }
    return metadata_v3;
}

From source file:org.apache.drill.exec.util.FileSystemUtil.java

License:Apache License

/**
 * Checks if file status is applicable based on file system object {@link Scope}.
 *
 * @param status file status//from   w  w w .  ja va  2s .co  m
 * @param scope file system objects scope
 * @return true if status is applicable, false otherwise
 */
private static boolean isStatusApplicable(FileStatus status, Scope scope) {
    switch (scope) {
    case DIRECTORIES:
        return status.isDirectory();
    case FILES:
        return status.isFile();
    case ALL:
        return true;
    default:
        return false;
    }
}

From source file:org.apache.giraph.yarn.GiraphYarnClient.java

License:Apache License

/**
 * Without Hadoop MR to check for us, make sure the output dir doesn't exist!
 *///from  w  ww  .java2 s  .  c  o  m
private void verifyOutputDirDoesNotExist() {
    Path outDir = null;
    try {
        FileSystem fs = FileSystem.get(giraphConf);
        String errorMsg = "__ERROR_NO_OUTPUT_DIR_SET__";
        outDir = new Path(fs.getHomeDirectory(), giraphConf.get(OUTDIR, errorMsg));
        FileStatus outStatus = fs.getFileStatus(outDir);
        if (outStatus.isDirectory() || outStatus.isFile() || outStatus.isSymlink()) {
            throw new IllegalStateException("Path " + outDir + " already exists.");
        }
    } catch (IOException ioe) {
        LOG.info("Final output path is: " + outDir);
    }
}

From source file:org.apache.gobblin.compliance.restore.RestorableHivePartitionDataset.java

License:Apache License

private void fsMove(Path from, Path to) throws IOException {
    for (FileStatus fileStatus : this.datasetOwnerFs.listStatus(from)) {
        if (fileStatus.isFile()) {
            this.datasetOwnerFs.rename(fileStatus.getPath(), to);
        }//from   w  w w .  ja va2 s  .c  om
    }
}

From source file:org.apache.gobblin.compliance.retention.HivePartitionVersionRetentionReaper.java

License:Apache License

private void fsMove(Path from, Path to) throws IOException {
    if (PartitionUtils.isUnixTimeStamp(from.getName())) {
        this.versionOwnerFs.rename(from, to.getParent());
    } else {// w w  w.  jav  a  2 s .  com
        for (FileStatus fileStatus : this.versionOwnerFs.listStatus(from)) {
            if (fileStatus.isFile()) {
                this.versionOwnerFs.rename(fileStatus.getPath(), to);
            }
        }
    }
}

From source file:org.apache.gobblin.data.management.conversion.hive.validation.ValidationJob.java

License:Apache License

/***
 * Execute Hive queries using {@link HiveJdbcConnector} and validate results.
 * @param queries Queries to execute.//from  ww  w.  ja va  2s. co m
 */
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE", justification = "Temporary fix")
private List<Long> getValidationOutputFromHive(List<String> queries) throws IOException {

    if (null == queries || queries.size() == 0) {
        log.warn("No queries specified to be executed");
        return Collections.emptyList();
    }

    List<Long> rowCounts = Lists.newArrayList();
    Closer closer = Closer.create();

    try {
        HiveJdbcConnector hiveJdbcConnector = closer.register(HiveJdbcConnector.newConnectorWithProps(props));
        for (String query : queries) {
            String hiveOutput = "hiveConversionValidationOutput_" + UUID.randomUUID().toString();
            Path hiveTempDir = new Path("/tmp" + Path.SEPARATOR + hiveOutput);
            query = "INSERT OVERWRITE DIRECTORY '" + hiveTempDir + "' " + query;
            log.info("Executing query: " + query);
            try {
                if (this.hiveSettings.size() > 0) {
                    hiveJdbcConnector
                            .executeStatements(this.hiveSettings.toArray(new String[this.hiveSettings.size()]));
                }
                hiveJdbcConnector.executeStatements("SET hive.exec.compress.output=false",
                        "SET hive.auto.convert.join=false", query);
                FileStatus[] fileStatusList = this.fs.listStatus(hiveTempDir);
                List<FileStatus> files = new ArrayList<>();
                for (FileStatus fileStatus : fileStatusList) {
                    if (fileStatus.isFile()) {
                        files.add(fileStatus);
                    }
                }
                if (files.size() > 1) {
                    log.warn("Found more than one output file. Should have been one.");
                } else if (files.size() == 0) {
                    log.warn("Found no output file. Should have been one.");
                } else {
                    String theString = IOUtils.toString(
                            new InputStreamReader(this.fs.open(files.get(0).getPath()), Charsets.UTF_8));
                    log.info("Found row count: " + theString.trim());
                    if (StringUtils.isBlank(theString.trim())) {
                        rowCounts.add(0l);
                    } else {
                        try {
                            rowCounts.add(Long.parseLong(theString.trim()));
                        } catch (NumberFormatException e) {
                            throw new RuntimeException("Could not parse Hive output: " + theString.trim(), e);
                        }
                    }
                }
            } finally {
                if (this.fs.exists(hiveTempDir)) {
                    log.debug("Deleting temp dir: " + hiveTempDir);
                    this.fs.delete(hiveTempDir, true);
                }
            }
        }
    } catch (SQLException e) {
        log.warn("Execution failed for query set " + queries.toString(), e);
    } finally {
        try {
            closer.close();
        } catch (Exception e) {
            log.warn("Could not close HiveJdbcConnector", e);
        }
    }

    return rowCounts;
}

From source file:org.apache.hawq.pxf.plugins.hdfs.HdfsAnalyzer.java

License:Apache License

/**
 * Collects a number of basic statistics based on an estimate. Statistics
 * are: number of records, number of hdfs blocks and hdfs block size.
 *
 * @param datapath path is a data source URI that can appear as a file name,
 *            a directory name or a wildcard pattern
 * @return statistics in JSON format/*from ww  w  .  j  a v a  2s .co m*/
 * @throws Exception if path is wrong, its metadata cannot be retrieved from
 *             file system, or if scanning the first block using the
 *             accessor failed
 */
@Override
public AnalyzerStats getEstimatedStats(String datapath) throws Exception {
    long blockSize = 0;
    long numberOfBlocks;
    long dataSize = 0;
    Path path = new Path(HdfsUtilities.absoluteDataPath(datapath));

    ArrayList<InputSplit> splits = getSplits(path);

    for (InputSplit split : splits) {
        FileSplit fsp = (FileSplit) split;
        dataSize += fsp.getLength();
        if (blockSize == 0) {
            Path filePath = fsp.getPath();
            FileStatus fileStatus = fs.getFileStatus(filePath);
            if (fileStatus.isFile()) {
                blockSize = fileStatus.getBlockSize();
            }
        }
    }

    // if no file is in path (only dirs), get default block size
    if (blockSize == 0) {
        blockSize = fs.getDefaultBlockSize(path);
    }
    numberOfBlocks = splits.size();

    /*
     * The estimate of the number of tuples in table is based on the
     * actual number of tuples in the first block, multiplied by its
     * size compared to the size of the whole data to be read.
     * The calculation:
     * Ratio of tuples to size = number of tuples in first block / first block size.
     * Total of tuples = ratio * number of blocks * total block size.
     */
    long numberOfTuplesInBlock = getNumberOfTuplesInBlock(splits);
    long numberOfTuples = 0;
    if (!splits.isEmpty()) {
        long blockLength = splits.get(0).getLength();
        numberOfTuples = (long) Math.floor((((double) numberOfTuplesInBlock / blockLength) * (dataSize)));
    }
    // AnalyzerStats stats = new AnalyzerStats(blockSize, numberOfBlocks,
    AnalyzerStats stats = new AnalyzerStats(blockSize, numberOfBlocks, numberOfTuples);

    // print files size to log when in debug level
    Log.debug(AnalyzerStats.dataToString(stats, path.toString()));

    return stats;
}

From source file:org.apache.hoya.avro.RoleHistoryWriter.java

License:Apache License

/**
 * Find all history entries in a dir. The dir is created if it is
 * not already defined./*from  w w w  .  j a  v a2  s.  co m*/
 * 
 * The scan uses the match pattern {@link HoyaKeys#HISTORY_FILENAME_MATCH_PATTERN}
 * while dropping empty files and directories which match the pattern.
 * The list is then sorted with a comparator that sorts on filename,
 * relying on the filename of newer created files being later than the old ones.
 * 
 * 
 *
 * @param fs filesystem
 * @param dir dir to scan
 * @param includeEmptyFiles should empty files be included in the result?
 * @return a possibly empty list
 * @throws IOException IO problems
 * @throws FileNotFoundException if the target dir is actually a path
 */
public List<Path> findAllHistoryEntries(FileSystem fs, Path dir, boolean includeEmptyFiles) throws IOException {
    assert fs != null;
    assert dir != null;
    if (!fs.exists(dir)) {
        fs.mkdirs(dir);
    } else if (!fs.isDirectory(dir)) {
        throw new FileNotFoundException("Not a directory " + dir.toString());
    }

    PathFilter filter = new GlobFilter(HoyaKeys.HISTORY_FILENAME_GLOB_PATTERN);
    FileStatus[] stats = fs.listStatus(dir, filter);
    List<Path> paths = new ArrayList<Path>(stats.length);
    for (FileStatus stat : stats) {
        log.debug("Possible entry: {}", stat.toString());
        if (stat.isFile() && (includeEmptyFiles || stat.getLen() > 0)) {
            paths.add(stat.getPath());
        }
    }
    sortHistoryPaths(paths);
    return paths;
}