Example usage for org.apache.hadoop.fs FileStatus isDirectory

List of usage examples for org.apache.hadoop.fs FileStatus isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isDirectory.

Prototype

public boolean isDirectory() 

Source Link

Document

Is this a directory?

Usage

From source file:org.apache.tajo.QueryTestCaseBase.java

License:Apache License

private List<Path> listFiles(FileSystem fs, Path path) throws Exception {
    List<Path> result = new ArrayList<Path>();
    FileStatus[] files = fs.listStatus(path);
    if (files == null || files.length == 0) {
        return result;
    }//  w w  w.j av a2  s  . co  m

    for (FileStatus eachFile : files) {
        if (eachFile.isDirectory()) {
            result.addAll(listFiles(fs, eachFile.getPath()));
        } else {
            result.add(eachFile.getPath());
        }
    }
    return result;
}

From source file:org.apache.tajo.storage.AbstractStorageManager.java

License:Apache License

/**
 * List input directories./*from  w ww  . j av  a 2 s  .c  om*/
 * Subclasses may override to, e.g., select only files matching a regular
 * expression.
 *
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(Path... dirs) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);

    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];

        FileSystem fs = p.getFileSystem(conf);
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) {
                        result.add(stat);
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:org.apache.tajo.storage.FileStorageManager.java

License:Apache License

/**
 *
 * @param fs/*from   w ww .j  av  a 2s  .  c o  m*/
 * @param path The table path
 * @param result The final result files to be used
 * @param startFileIndex
 * @param numResultFiles
 * @param currentFileIndex
 * @param partitioned A flag to indicate if this table is partitioned
 * @param currentDepth Current visiting depth of partition directories
 * @param maxDepth The partition depth of this table
 * @throws IOException
 */
private void getNonZeroLengthDataFiles(FileSystem fs, Path path, List<FileStatus> result, int startFileIndex,
        int numResultFiles, AtomicInteger currentFileIndex, boolean partitioned, int currentDepth, int maxDepth)
        throws IOException {
    // Intermediate directory
    if (fs.isDirectory(path)) {

        FileStatus[] files = fs.listStatus(path, StorageManager.hiddenFileFilter);

        if (files != null && files.length > 0) {

            for (FileStatus eachFile : files) {

                // checking if the enough number of files are found
                if (result.size() >= numResultFiles) {
                    return;
                }
                if (eachFile.isDirectory()) {

                    getNonZeroLengthDataFiles(fs, eachFile.getPath(), result, startFileIndex, numResultFiles,
                            currentFileIndex, partitioned, currentDepth + 1, // increment a visiting depth
                            maxDepth);

                    // if partitioned table, we should ignore files located in the intermediate directory.
                    // we can ensure that this file is in leaf directory if currentDepth == maxDepth.
                } else if (eachFile.isFile() && eachFile.getLen() > 0
                        && (!partitioned || currentDepth == maxDepth)) {
                    if (currentFileIndex.get() >= startFileIndex) {
                        result.add(eachFile);
                    }
                    currentFileIndex.incrementAndGet();
                }
            }
        }

        // Files located in leaf directory
    } else {
        FileStatus fileStatus = fs.getFileStatus(path);
        if (fileStatus != null && fileStatus.getLen() > 0) {
            if (currentFileIndex.get() >= startFileIndex) {
                result.add(fileStatus);
            }
            currentFileIndex.incrementAndGet();
            if (result.size() >= numResultFiles) {
                return;
            }
        }
    }
}

From source file:org.apache.tajo.storage.FileTablespace.java

License:Apache License

/**
 * List input directories./*from w w  w . j a  v  a2s .  c  om*/
 * Subclasses may override to, e.g., select only files matching a regular
 * expression.
 *
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected List<FileStatus> listStatus(Path... dirs) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    if (dirs.length == 0) {
        throw new IOException("No input paths specified in job");
    }

    List<IOException> errors = new ArrayList<IOException>();

    // creates a MultiPathFilter with the hiddenFileFilter and the
    // user provided one (if any).
    List<PathFilter> filters = new ArrayList<PathFilter>();
    filters.add(hiddenFileFilter);

    PathFilter inputFilter = new MultiPathFilter(filters);

    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];

        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    for (FileStatus stat : fs.listStatus(globStat.getPath(), inputFilter)) {
                        result.add(stat);
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    LOG.info("Total input paths to process : " + result.size());
    return result;
}

From source file:org.apache.tajo.storage.FileTablespace.java

License:Apache License

/**
 * Attach the sequence number to the output file name and than move the file into the final result path.
 *
 * @param fs FileSystem/*from   w  w  w . j a v  a  2s. c o m*/
 * @param stagingResultDir The staging result dir
 * @param fileStatus The file status
 * @param finalOutputPath Final output path
 * @param nf Number format
 * @param fileSeq The sequence number
 * @throws java.io.IOException
 */
private void moveResultFromStageToFinal(FileSystem fs, Path stagingResultDir, FileStatus fileStatus,
        Path finalOutputPath, NumberFormat nf, int fileSeq, boolean changeFileSeq) throws IOException {
    if (fileStatus.isDirectory()) {
        String subPath = extractSubPath(stagingResultDir, fileStatus.getPath());
        if (subPath != null) {
            Path finalSubPath = new Path(finalOutputPath, subPath);
            if (!fs.exists(finalSubPath)) {
                fs.mkdirs(finalSubPath);
            }
            int maxSeq = StorageUtil.getMaxFileSequence(fs, finalSubPath, false);
            for (FileStatus eachFile : fs.listStatus(fileStatus.getPath())) {
                if (eachFile.getPath().getName().startsWith("_")) {
                    continue;
                }
                moveResultFromStageToFinal(fs, stagingResultDir, eachFile, finalOutputPath, nf, ++maxSeq,
                        changeFileSeq);
            }
        } else {
            throw new IOException("Wrong staging dir:" + stagingResultDir + "," + fileStatus.getPath());
        }
    } else {
        String subPath = extractSubPath(stagingResultDir, fileStatus.getPath());
        if (subPath != null) {
            Path finalSubPath = new Path(finalOutputPath, subPath);
            if (changeFileSeq) {
                finalSubPath = new Path(finalSubPath.getParent(),
                        replaceFileNameSeq(finalSubPath, fileSeq, nf));
            }
            if (!fs.exists(finalSubPath.getParent())) {
                fs.mkdirs(finalSubPath.getParent());
            }
            if (fs.exists(finalSubPath)) {
                throw new IOException("Already exists data file:" + finalSubPath);
            }
            boolean success = fs.rename(fileStatus.getPath(), finalSubPath);
            if (success) {
                LOG.info("Moving staging file[" + fileStatus.getPath() + "] + " + "to final output["
                        + finalSubPath + "]");
            } else {
                LOG.error("Can't move staging file[" + fileStatus.getPath() + "] + " + "to final output["
                        + finalSubPath + "]");
            }
        }
    }
}

From source file:org.apache.tajo.storage.FileTablespace.java

License:Apache License

/**
 * This method sets a rename map which includes renamed staging directory to final output directory recursively.
 * If there exists some data files, this delete it for duplicate data.
 *
 *
 * @param fs// w w w .  ja va2 s  .co  m
 * @param stagingPath
 * @param outputPath
 * @param stagingParentPathString
 * @throws java.io.IOException
 */
private void visitPartitionedDirectory(FileSystem fs, Path stagingPath, Path outputPath,
        String stagingParentPathString, Map<Path, Path> renameDirs, Path oldTableDir) throws IOException {
    FileStatus[] files = fs.listStatus(stagingPath);

    for (FileStatus eachFile : files) {
        if (eachFile.isDirectory()) {
            Path oldPath = eachFile.getPath();

            // Make recover directory.
            String recoverPathString = oldPath.toString().replaceAll(stagingParentPathString,
                    oldTableDir.toString());
            Path recoveryPath = new Path(recoverPathString);
            if (!fs.exists(recoveryPath)) {
                fs.mkdirs(recoveryPath);
            }

            visitPartitionedDirectory(fs, eachFile.getPath(), outputPath, stagingParentPathString, renameDirs,
                    oldTableDir);
            // Find last order partition for renaming
            String newPathString = oldPath.toString().replaceAll(stagingParentPathString,
                    outputPath.toString());
            Path newPath = new Path(newPathString);
            if (!isLeafDirectory(fs, eachFile.getPath())) {
                renameDirs.put(eachFile.getPath(), newPath);
            } else {
                if (!fs.exists(newPath)) {
                    fs.mkdirs(newPath);
                }
            }
        }
    }
}

From source file:org.apache.tajo.storage.s3.S3TableSpace.java

License:Apache License

/**
 * Calculate the total size of all objects in the indicated bucket
 *
 * @param path to use/* w w w . j  a  va2  s .  co  m*/
 * @return calculated size
 * @throws IOException
 */
@Override
public long calculateSize(Path path) throws IOException {
    long totalBucketSize = 0L;

    if (s3Enabled) {
        String key = pathToKey(path);

        final FileStatus fileStatus = fs.getFileStatus(path);

        if (fileStatus.isDirectory()) {
            if (!key.isEmpty()) {
                key = key + "/";
            }

            ListObjectsRequest request = new ListObjectsRequest();
            request.setBucketName(uri.getHost());
            request.setPrefix(key);
            request.setMaxKeys(maxKeys);

            if (LOG.isDebugEnabled()) {
                LOG.debug("listStatus: doing listObjects for directory " + key);
            }

            ObjectListing objects = s3.listObjects(request);

            while (true) {
                for (S3ObjectSummary summary : objects.getObjectSummaries()) {
                    Path keyPath = keyToPath(summary.getKey()).makeQualified(uri, fs.getWorkingDirectory());

                    // Skip over keys that are ourselves and old S3N _$folder$ files
                    if (keyPath.equals(path) || summary.getKey().endsWith(S3N_FOLDER_SUFFIX)) {
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Ignoring: " + keyPath);
                        }
                        continue;
                    }

                    if (!objectRepresentsDirectory(summary.getKey(), summary.getSize())) {
                        totalBucketSize += summary.getSize();
                    }
                }

                if (objects.isTruncated()) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("listStatus: list truncated - getting next batch");
                    }
                    objects = s3.listNextBatchOfObjects(objects);
                } else {
                    break;
                }
            }
        } else {
            return fileStatus.getLen();
        }
    } else {
        totalBucketSize = fs.getContentSummary(path).getLength();
    }

    return totalBucketSize;
}

From source file:org.apache.tajo.storage.StorageUtil.java

License:Apache License

/**
 * Written files can be one of two forms: "part-[0-9]*-[0-9]*" or "part-[0-9]*-[0-9]*-[0-9]*".
 *
 * This method finds the maximum sequence number from existing data files through the above patterns.
 * If it cannot find any matched file or the maximum number, it will return -1.
 *
 * @param fs//ww  w.ja v  a  2s  . c  o  m
 * @param path
 * @param recursive
 * @return The maximum sequence number
 * @throws java.io.IOException
 */
public static int getMaxFileSequence(FileSystem fs, Path path, boolean recursive) throws IOException {
    if (!fs.isDirectory(path)) {
        return -1;
    }

    FileStatus[] files = fs.listStatus(path);

    if (files == null || files.length == 0) {
        return -1;
    }

    int maxValue = -1;

    for (FileStatus eachFile : files) {
        // In the case of partition table, return largest value within all partition dirs.
        int value;
        if (eachFile.isDirectory() && recursive) {
            value = getMaxFileSequence(fs, eachFile.getPath(), recursive);
            if (value > maxValue) {
                maxValue = value;
            }
        } else {
            if (eachFile.getPath().getName().matches(fileNamePatternV08)
                    || eachFile.getPath().getName().matches(fileNamePatternV09)) {
                value = getSequence(eachFile.getPath().getName());
                if (value > maxValue) {
                    maxValue = value;
                }
            }
        }
    }

    return maxValue;
}

From source file:org.apache.tajo.util.history.HistoryReader.java

License:Apache License

public List<QueryInfo> getQueries(String keyword) throws IOException {
    List<QueryInfo> queryInfos = new ArrayList<QueryInfo>();

    FileSystem fs = HistoryWriter.getNonCrcFileSystem(historyParentPath, tajoConf);
    try {/*ww w . j  av  a 2  s . c  om*/
        if (!fs.exists(historyParentPath)) {
            return queryInfos;
        }
    } catch (Throwable e) {
        return queryInfos;
    }

    FileStatus[] files = fs.listStatus(historyParentPath);
    if (files == null || files.length == 0) {
        return queryInfos;
    }

    for (FileStatus eachDateFile : files) {
        Path queryListPath = new Path(eachDateFile.getPath(), HistoryWriter.QUERY_LIST);
        if (eachDateFile.isFile() || !fs.exists(queryListPath)) {
            continue;
        }

        FileStatus[] dateFiles = fs.listStatus(queryListPath);
        if (dateFiles == null || dateFiles.length == 0) {
            continue;
        }

        for (FileStatus eachFile : dateFiles) {
            Path path = eachFile.getPath();
            if (eachFile.isDirectory() || !path.getName().endsWith(HistoryWriter.HISTORY_FILE_POSTFIX)) {
                continue;
            }

            FSDataInputStream in = null;
            try {
                in = fs.open(path);

                byte[] buf = new byte[100 * 1024];
                while (true) {
                    int length = in.readInt();
                    if (length > buf.length) {
                        buf = new byte[length];
                    }
                    in.readFully(buf, 0, length);
                    String queryInfoJson = new String(buf, 0, length, Bytes.UTF8_CHARSET);
                    QueryInfo queryInfo = QueryInfo.fromJson(queryInfoJson);
                    if (keyword != null) {
                        if (queryInfo.getSql().indexOf(keyword) >= 0) {
                            queryInfos.add(queryInfo);
                        }
                    } else {
                        queryInfos.add(queryInfo);
                    }
                }
            } catch (EOFException e) {
            } catch (Throwable e) {
                LOG.warn("Reading error:" + path + ", " + e.getMessage());
            } finally {
                IOUtils.cleanup(LOG, in);
            }
        }
    }

    Collections.sort(queryInfos, new Comparator<QueryInfo>() {
        @Override
        public int compare(QueryInfo query1, QueryInfo query2) {
            return query2.getQueryIdStr().toString().compareTo(query1.getQueryIdStr().toString());
        }
    });

    return queryInfos;
}

From source file:org.apache.tez.client.TezClientUtils.java

License:Apache License

private static void addLocalResources(Configuration conf, String[] configUris,
        Map<String, LocalResource> tezJarResources, Credentials credentials) throws IOException {
    if (configUris == null || configUris.length == 0) {
        return;//from   w w  w.j  av a  2  s  .  co m
    }
    List<Path> configuredPaths = Lists.newArrayListWithCapacity(configUris.length);
    for (String configUri : configUris) {
        boolean ancestorsHavePermission = checkAncestorPermissionsForAllUsers(conf, configUri,
                FsAction.EXECUTE);
        FileStatus[] fileStatuses = getLRFileStatus(configUri, conf);
        for (FileStatus fStatus : fileStatuses) {
            if (fStatus.isDirectory()) {
                // Skip directories - no recursive search support.
                continue;
            }
            LocalResourceVisibility lrVisibility;
            if (ancestorsHavePermission && fStatus.getPermission().getOtherAction().implies(FsAction.READ)) {
                lrVisibility = LocalResourceVisibility.PUBLIC;
            } else {
                lrVisibility = LocalResourceVisibility.PRIVATE;
            }
            String rsrcName = fStatus.getPath().getName();
            if (tezJarResources.containsKey(rsrcName)) {
                String message = "Duplicate resource found" + ", resourceName=" + rsrcName + ", existingPath="
                        + tezJarResources.get(rsrcName).getResource().toString() + ", newPath="
                        + fStatus.getPath();
                LOG.warn(message);
            }
            tezJarResources.put(rsrcName,
                    LocalResource.newInstance(ConverterUtils.getYarnUrlFromPath(fStatus.getPath()),
                            LocalResourceType.FILE, lrVisibility, fStatus.getLen(),
                            fStatus.getModificationTime()));
            configuredPaths.add(fStatus.getPath());
        }
    }
    // Obtain credentials.
    if (!configuredPaths.isEmpty()) {
        TokenCache.obtainTokensForFileSystems(credentials,
                configuredPaths.toArray(new Path[configuredPaths.size()]), conf);
    }
}