Example usage for org.apache.hadoop.fs LocatedFileStatus isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs LocatedFileStatus isDirectory.

Prototype

public boolean isDirectory()

Source Link

Document

Is this a directory?

Usage

From source file:br.com.lassal.nqueens.grid.job.NQueenCounter.java

private Path setWorkingFolder(int queensSize, String workingFolder, boolean isFinal, Job job)
        throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path returnPath = null;/*from  w  ww  .  j  av a  2  s .c  o m*/

    if (workingFolder == null) {
        workingFolder = "";
    }

    Path partialSolDir = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/");
    Path inputPath = null;
    Path outputPath = null;
    String nextRunPath = "run_1";

    if (fs.exists(partialSolDir)) {
        RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir);
        String lastRunPath = null;
        Path lastPath = null;

        while (dirsFound.hasNext()) {
            LocatedFileStatus dir = dirsFound.next();

            if (dir.isDirectory()) {
                if (lastRunPath == null || dir.getPath().getName().compareTo(lastRunPath) > 0) {
                    lastPath = dir.getPath();
                    lastRunPath = lastPath.getName();
                }
            }
        }
        if (lastRunPath != null) {
            String[] runParts = lastRunPath.split("_");
            int lastRun = Integer.parseInt(runParts[1]);
            nextRunPath = runParts[0] + "_" + (++lastRun);
            inputPath = lastPath;
        }

    }
    if (inputPath == null) {
        inputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/seed");
        if (!fs.exists(inputPath)) {
            FSDataOutputStream seedFile = fs.create(inputPath, true);
            seedFile.writeBytes(queensSize + ":");
            seedFile.close();
        }
    } else {
        returnPath = inputPath;
    }
    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);

    if (isFinal) {
        outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/final");
    } else {
        outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/" + nextRunPath);
    }

    // Output
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClass(TextOutputFormat.class);

    return returnPath;
}

From source file:com.datatorrent.stram.client.RecordingsAgent.java

License:Apache License

public List<RecordingInfo> getRecordingInfo(String appId) {
    List<RecordingInfo> result = new ArrayList<RecordingInfo>();
    String dir = getRecordingsDirectory(appId);
    if (dir == null) {
        return result;
    }/*  www. j  a  v  a 2 s  . c  om*/
    Path path = new Path(dir);
    try {
        FileStatus fileStatus = stramAgent.getFileSystem().getFileStatus(path);

        if (!fileStatus.isDirectory()) {
            return result;
        }
        RemoteIterator<LocatedFileStatus> ri = stramAgent.getFileSystem().listLocatedStatus(path);
        while (ri.hasNext()) {
            LocatedFileStatus lfs = ri.next();
            if (lfs.isDirectory()) {
                try {
                    String opId = lfs.getPath().getName();
                    result.addAll(getRecordingInfo(appId, opId));
                } catch (NumberFormatException ex) {
                    // ignore
                }
            }
        }
    } catch (IOException ex) {
        LOG.warn("Got exception when getting recording info", ex);
        return result;
    }

    return result;
}

From source file:com.datatorrent.stram.client.RecordingsAgent.java

License:Apache License

private List<RecordingInfo> getRecordingInfoHelper(String appId, String opId, Set<String> containers) {
    List<RecordingInfo> result = new ArrayList<RecordingInfo>();
    String dir = getRecordingsDirectory(appId, opId);
    if (dir == null) {
        return result;
    }//from   w w  w . j  a  va 2 s .c o m
    Path path = new Path(dir);
    try {
        FileStatus fileStatus = stramAgent.getFileSystem().getFileStatus(path);

        if (!fileStatus.isDirectory()) {
            return result;
        }
        RemoteIterator<LocatedFileStatus> ri = stramAgent.getFileSystem().listLocatedStatus(path);
        while (ri.hasNext()) {
            LocatedFileStatus lfs = ri.next();
            if (lfs.isDirectory()) {
                try {
                    String id = lfs.getPath().getName();
                    RecordingInfo recordingInfo = getRecordingInfoHelper(appId, opId, id, containers);
                    if (recordingInfo != null) {
                        result.add(recordingInfo);
                    }
                } catch (NumberFormatException ex) {
                    // ignore
                }
            }
        }
    } catch (IOException ex) {
        LOG.warn("Got exception when getting recording info", ex);
        return result;
    }

    return result;
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java

License:Apache License

private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs, PathFilter inputFilter,
        boolean recursive) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    List<IOException> errors = new ArrayList<IOException>();
    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];/*  w  w w.  ja v  a 2s  .  co m*/
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath());
                    while (iter.hasNext()) {
                        LocatedFileStatus stat = iter.next();
                        if (inputFilter.accept(stat.getPath())) {
                            if (recursive && stat.isDirectory()) {
                                addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
                            } else {
                                result.add(stat);
                            }
                        }
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    return result;
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java

License:Apache License

/**
 * Add files in the input path recursively into the results.
 * @param result/*from w w w.  j a va2 s  .  c  om*/
 *          The List to store all files.
 * @param fs
 *          The FileSystem.
 * @param path
 *          The input path.
 * @param inputFilter
 *          The input filter that can be used to filter files/dirs. 
 * @throws IOException
 */
protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path,
        PathFilter inputFilter) throws IOException {
    RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
    while (iter.hasNext()) {
        LocatedFileStatus stat = iter.next();
        if (inputFilter.accept(stat.getPath())) {
            if (stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
            } else {
                result.add(stat);
            }
        }
    }
}

From source file:com.uber.hoodie.utilities.sources.DFSSource.java

License:Apache License

@Override
public Pair<Optional<JavaRDD<GenericRecord>>, String> fetchNewData(Optional<String> lastCheckpointStr,
        long maxInputBytes) {

    try {//from   w  ww. j  av  a2s.c o m
        // obtain all eligible files under root folder.
        List<FileStatus> eligibleFiles = new ArrayList<>();
        RemoteIterator<LocatedFileStatus> fitr = fs
                .listFiles(new Path(config.getString(Config.ROOT_INPUT_PATH_PROP)), true);
        while (fitr.hasNext()) {
            LocatedFileStatus fileStatus = fitr.next();
            if (fileStatus.isDirectory() || IGNORE_FILEPREFIX_LIST.stream()
                    .filter(pfx -> fileStatus.getPath().getName().startsWith(pfx)).count() > 0) {
                continue;
            }
            eligibleFiles.add(fileStatus);
        }
        // sort them by modification time.
        eligibleFiles.sort((FileStatus f1, FileStatus f2) -> Long.valueOf(f1.getModificationTime())
                .compareTo(Long.valueOf(f2.getModificationTime())));

        // Filter based on checkpoint & input size, if needed
        long currentBytes = 0;
        long maxModificationTime = Long.MIN_VALUE;
        List<FileStatus> filteredFiles = new ArrayList<>();
        for (FileStatus f : eligibleFiles) {
            if (lastCheckpointStr.isPresent()
                    && f.getModificationTime() <= Long.valueOf(lastCheckpointStr.get())) {
                // skip processed files
                continue;
            }

            maxModificationTime = f.getModificationTime();
            currentBytes += f.getLen();
            filteredFiles.add(f);
            if (currentBytes >= maxInputBytes) {
                // we have enough data, we are done
                break;
            }
        }

        // no data to read
        if (filteredFiles.size() == 0) {
            return new ImmutablePair<>(Optional.empty(),
                    lastCheckpointStr.isPresent() ? lastCheckpointStr.get() : String.valueOf(Long.MIN_VALUE));
        }

        // read the files out.
        String pathStr = filteredFiles.stream().map(f -> f.getPath().toString())
                .collect(Collectors.joining(","));
        String schemaStr = schemaProvider.getSourceSchema().toString();
        final AvroConvertor avroConvertor = new AvroConvertor(schemaStr);

        return new ImmutablePair<>(
                Optional.of(DFSSource.fromFiles(dataFormat, avroConvertor, pathStr, sparkContext)),
                String.valueOf(maxModificationTime));
    } catch (IOException ioe) {
        throw new HoodieIOException("Unable to read from source from checkpoint: " + lastCheckpointStr, ioe);
    }
}

From source file:com.uber.hoodie.utilities.sources.helpers.DFSPathSelector.java

License:Apache License

public Pair<Optional<String>, String> getNextFilePathsAndMaxModificationTime(Optional<String> lastCheckpointStr,
        long sourceLimit) {

    try {//www.  j  a  va  2 s. c  om
        // obtain all eligible files under root folder.
        List<FileStatus> eligibleFiles = new ArrayList<>();
        RemoteIterator<LocatedFileStatus> fitr = fs
                .listFiles(new Path(props.getString(Config.ROOT_INPUT_PATH_PROP)), true);
        while (fitr.hasNext()) {
            LocatedFileStatus fileStatus = fitr.next();
            if (fileStatus.isDirectory() || IGNORE_FILEPREFIX_LIST.stream()
                    .anyMatch(pfx -> fileStatus.getPath().getName().startsWith(pfx))) {
                continue;
            }
            eligibleFiles.add(fileStatus);
        }
        // sort them by modification time.
        eligibleFiles.sort(Comparator.comparingLong(FileStatus::getModificationTime));

        // Filter based on checkpoint & input size, if needed
        long currentBytes = 0;
        long maxModificationTime = Long.MIN_VALUE;
        List<FileStatus> filteredFiles = new ArrayList<>();
        for (FileStatus f : eligibleFiles) {
            if (lastCheckpointStr.isPresent()
                    && f.getModificationTime() <= Long.valueOf(lastCheckpointStr.get())) {
                // skip processed files
                continue;
            }

            if (currentBytes + f.getLen() >= sourceLimit) {
                // we have enough data, we are done
                break;
            }

            maxModificationTime = f.getModificationTime();
            currentBytes += f.getLen();
            filteredFiles.add(f);
        }

        // no data to read
        if (filteredFiles.size() == 0) {
            return new ImmutablePair<>(Optional.empty(),
                    lastCheckpointStr.orElseGet(() -> String.valueOf(Long.MIN_VALUE)));
        }

        // read the files out.
        String pathStr = filteredFiles.stream().map(f -> f.getPath().toString())
                .collect(Collectors.joining(","));

        return new ImmutablePair<>(Optional.ofNullable(pathStr), String.valueOf(maxModificationTime));
    } catch (IOException ioe) {
        throw new HoodieIOException("Unable to read from source from checkpoint: " + lastCheckpointStr, ioe);
    }
}

From source file:io.prestosql.plugin.hive.util.HiveFileIterator.java

License:Apache License

@Override
protected LocatedFileStatus computeNext() {
    while (true) {
        while (remoteIterator.hasNext()) {
            LocatedFileStatus status = getLocatedFileStatus(remoteIterator);

            // Ignore hidden files and directories. Hive ignores files starting with _ and . as well.
            String fileName = status.getPath().getName();
            if (fileName.startsWith("_") || fileName.startsWith(".")) {
                continue;
            }/*from  w  w w.j  av  a  2  s.c  om*/

            if (status.isDirectory()) {
                switch (nestedDirectoryPolicy) {
                case IGNORED:
                    continue;
                case RECURSE:
                    paths.add(status.getPath());
                    continue;
                case FAIL:
                    throw new NestedDirectoryNotAllowedException();
                }
            }

            return status;
        }

        if (paths.isEmpty()) {
            return endOfData();
        }
        remoteIterator = getLocatedFileStatusRemoteIterator(paths.removeFirst());
    }
}

From source file:org.apache.accumulo.test.GarbageCollectWALIT.java

License:Apache License

private int countWALsInFS(MiniAccumuloClusterImpl cluster) throws Exception {
    FileSystem fs = cluster.getFileSystem();
    RemoteIterator<LocatedFileStatus> iterator = fs
            .listFiles(new Path(cluster.getConfig().getAccumuloDir() + "/wal"), true);
    int result = 0;
    while (iterator.hasNext()) {
        LocatedFileStatus next = iterator.next();
        if (!next.isDirectory()) {
            result++;/*  ww w .jav a2s . c  o m*/
        }
    }
    return result;
}

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

private void getFileStatusOfSegments(JobContext job, String[] segmentsToConsider, List<FileStatus> result)
        throws IOException {
    String[] partitionsToConsider = getValidPartitions(job);
    if (partitionsToConsider.length == 0) {
        throw new IOException("No partitions/data found");
    }//from w  ww. j  av  a  2 s . co  m

    PathFilter inputFilter = getDataFileFilter(job);
    CarbonTablePath tablePath = getTablePath(job.getConfiguration());

    // get tokens for all the required FileSystem for table path
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { tablePath }, job.getConfiguration());

    //get all data files of valid partitions and segments
    for (int i = 0; i < partitionsToConsider.length; ++i) {
        String partition = partitionsToConsider[i];

        for (int j = 0; j < segmentsToConsider.length; ++j) {
            String segmentId = segmentsToConsider[j];
            Path segmentPath = new Path(tablePath.getCarbonDataDirectoryPath(partition, segmentId));
            FileSystem fs = segmentPath.getFileSystem(job.getConfiguration());

            RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(segmentPath);
            while (iter.hasNext()) {
                LocatedFileStatus stat = iter.next();
                if (inputFilter.accept(stat.getPath())) {
                    if (stat.isDirectory()) {
                        addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
                    } else {
                        result.add(stat);
                    }
                }
            }
        }
    }
}