Example usage for org.apache.hadoop.fs LocatedFileStatus isDirectory

List of usage examples for org.apache.hadoop.fs LocatedFileStatus isDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.fs LocatedFileStatus isDirectory.

Prototype

public boolean isDirectory() 

Source Link

Document

Is this a directory?

Usage

From source file:br.com.lassal.nqueens.grid.job.NQueenCounter.java

private Path setWorkingFolder(int queensSize, String workingFolder, boolean isFinal, Job job)
        throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path returnPath = null;/*from  w  ww  .  j  av a  2  s .c  o m*/

    if (workingFolder == null) {
        workingFolder = "";
    }

    Path partialSolDir = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/");
    Path inputPath = null;
    Path outputPath = null;
    String nextRunPath = "run_1";

    if (fs.exists(partialSolDir)) {
        RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir);
        String lastRunPath = null;
        Path lastPath = null;

        while (dirsFound.hasNext()) {
            LocatedFileStatus dir = dirsFound.next();

            if (dir.isDirectory()) {
                if (lastRunPath == null || dir.getPath().getName().compareTo(lastRunPath) > 0) {
                    lastPath = dir.getPath();
                    lastRunPath = lastPath.getName();
                }
            }
        }
        if (lastRunPath != null) {
            String[] runParts = lastRunPath.split("_");
            int lastRun = Integer.parseInt(runParts[1]);
            nextRunPath = runParts[0] + "_" + (++lastRun);
            inputPath = lastPath;
        }

    }
    if (inputPath == null) {
        inputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/seed");
        if (!fs.exists(inputPath)) {
            FSDataOutputStream seedFile = fs.create(inputPath, true);
            seedFile.writeBytes(queensSize + ":");
            seedFile.close();
        }
    } else {
        returnPath = inputPath;
    }
    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);

    if (isFinal) {
        outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/final");
    } else {
        outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/" + nextRunPath);
    }

    // Output
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClass(TextOutputFormat.class);

    return returnPath;
}

From source file:com.datatorrent.stram.client.RecordingsAgent.java

License:Apache License

public List<RecordingInfo> getRecordingInfo(String appId) {
    List<RecordingInfo> result = new ArrayList<RecordingInfo>();
    String dir = getRecordingsDirectory(appId);
    if (dir == null) {
        return result;
    }/*  www. j  a  v  a 2 s  . c  om*/
    Path path = new Path(dir);
    try {
        FileStatus fileStatus = stramAgent.getFileSystem().getFileStatus(path);

        if (!fileStatus.isDirectory()) {
            return result;
        }
        RemoteIterator<LocatedFileStatus> ri = stramAgent.getFileSystem().listLocatedStatus(path);
        while (ri.hasNext()) {
            LocatedFileStatus lfs = ri.next();
            if (lfs.isDirectory()) {
                try {
                    String opId = lfs.getPath().getName();
                    result.addAll(getRecordingInfo(appId, opId));
                } catch (NumberFormatException ex) {
                    // ignore
                }
            }
        }
    } catch (IOException ex) {
        LOG.warn("Got exception when getting recording info", ex);
        return result;
    }

    return result;
}

From source file:com.datatorrent.stram.client.RecordingsAgent.java

License:Apache License

private List<RecordingInfo> getRecordingInfoHelper(String appId, String opId, Set<String> containers) {
    List<RecordingInfo> result = new ArrayList<RecordingInfo>();
    String dir = getRecordingsDirectory(appId, opId);
    if (dir == null) {
        return result;
    }//from   w w  w . j  a  va 2 s .c o m
    Path path = new Path(dir);
    try {
        FileStatus fileStatus = stramAgent.getFileSystem().getFileStatus(path);

        if (!fileStatus.isDirectory()) {
            return result;
        }
        RemoteIterator<LocatedFileStatus> ri = stramAgent.getFileSystem().listLocatedStatus(path);
        while (ri.hasNext()) {
            LocatedFileStatus lfs = ri.next();
            if (lfs.isDirectory()) {
                try {
                    String id = lfs.getPath().getName();
                    RecordingInfo recordingInfo = getRecordingInfoHelper(appId, opId, id, containers);
                    if (recordingInfo != null) {
                        result.add(recordingInfo);
                    }
                } catch (NumberFormatException ex) {
                    // ignore
                }
            }
        }
    } catch (IOException ex) {
        LOG.warn("Got exception when getting recording info", ex);
        return result;
    }

    return result;
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java

License:Apache License

private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs, PathFilter inputFilter,
        boolean recursive) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    List<IOException> errors = new ArrayList<IOException>();
    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];/*  w  w w.  ja v  a 2s  .  co m*/
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath());
                    while (iter.hasNext()) {
                        LocatedFileStatus stat = iter.next();
                        if (inputFilter.accept(stat.getPath())) {
                            if (recursive && stat.isDirectory()) {
                                addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
                            } else {
                                result.add(stat);
                            }
                        }
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    return result;
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java

License:Apache License

/**
 * Add files in the input path recursively into the results.
 * @param result/*from w w w.  j a va2 s  .  c  om*/
 *          The List to store all files.
 * @param fs
 *          The FileSystem.
 * @param path
 *          The input path.
 * @param inputFilter
 *          The input filter that can be used to filter files/dirs. 
 * @throws IOException
 */
protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path,
        PathFilter inputFilter) throws IOException {
    RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
    while (iter.hasNext()) {
        LocatedFileStatus stat = iter.next();
        if (inputFilter.accept(stat.getPath())) {
            if (stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
            } else {
                result.add(stat);
            }
        }
    }
}

From source file:com.uber.hoodie.utilities.sources.DFSSource.java

License:Apache License

@Override
public Pair<Optional<JavaRDD<GenericRecord>>, String> fetchNewData(Optional<String> lastCheckpointStr,
        long maxInputBytes) {

    try {//from   w  ww. j  av  a2s.c o m
        // obtain all eligible files under root folder.
        List<FileStatus> eligibleFiles = new ArrayList<>();
        RemoteIterator<LocatedFileStatus> fitr = fs
                .listFiles(new Path(config.getString(Config.ROOT_INPUT_PATH_PROP)), true);
        while (fitr.hasNext()) {
            LocatedFileStatus fileStatus = fitr.next();
            if (fileStatus.isDirectory() || IGNORE_FILEPREFIX_LIST.stream()
                    .filter(pfx -> fileStatus.getPath().getName().startsWith(pfx)).count() > 0) {
                continue;
            }
            eligibleFiles.add(fileStatus);
        }
        // sort them by modification time.
        eligibleFiles.sort((FileStatus f1, FileStatus f2) -> Long.valueOf(f1.getModificationTime())
                .compareTo(Long.valueOf(f2.getModificationTime())));

        // Filter based on checkpoint & input size, if needed
        long currentBytes = 0;
        long maxModificationTime = Long.MIN_VALUE;
        List<FileStatus> filteredFiles = new ArrayList<>();
        for (FileStatus f : eligibleFiles) {
            if (lastCheckpointStr.isPresent()
                    && f.getModificationTime() <= Long.valueOf(lastCheckpointStr.get())) {
                // skip processed files
                continue;
            }

            maxModificationTime = f.getModificationTime();
            currentBytes += f.getLen();
            filteredFiles.add(f);
            if (currentBytes >= maxInputBytes) {
                // we have enough data, we are done
                break;
            }
        }

        // no data to read
        if (filteredFiles.size() == 0) {
            return new ImmutablePair<>(Optional.empty(),
                    lastCheckpointStr.isPresent() ? lastCheckpointStr.get() : String.valueOf(Long.MIN_VALUE));
        }

        // read the files out.
        String pathStr = filteredFiles.stream().map(f -> f.getPath().toString())
                .collect(Collectors.joining(","));
        String schemaStr = schemaProvider.getSourceSchema().toString();
        final AvroConvertor avroConvertor = new AvroConvertor(schemaStr);

        return new ImmutablePair<>(
                Optional.of(DFSSource.fromFiles(dataFormat, avroConvertor, pathStr, sparkContext)),
                String.valueOf(maxModificationTime));
    } catch (IOException ioe) {
        throw new HoodieIOException("Unable to read from source from checkpoint: " + lastCheckpointStr, ioe);
    }
}

From source file:com.uber.hoodie.utilities.sources.helpers.DFSPathSelector.java

License:Apache License

public Pair<Optional<String>, String> getNextFilePathsAndMaxModificationTime(Optional<String> lastCheckpointStr,
        long sourceLimit) {

    try {//www.  j  a  va  2 s. c  om
        // obtain all eligible files under root folder.
        List<FileStatus> eligibleFiles = new ArrayList<>();
        RemoteIterator<LocatedFileStatus> fitr = fs
                .listFiles(new Path(props.getString(Config.ROOT_INPUT_PATH_PROP)), true);
        while (fitr.hasNext()) {
            LocatedFileStatus fileStatus = fitr.next();
            if (fileStatus.isDirectory() || IGNORE_FILEPREFIX_LIST.stream()
                    .anyMatch(pfx -> fileStatus.getPath().getName().startsWith(pfx))) {
                continue;
            }
            eligibleFiles.add(fileStatus);
        }
        // sort them by modification time.
        eligibleFiles.sort(Comparator.comparingLong(FileStatus::getModificationTime));

        // Filter based on checkpoint & input size, if needed
        long currentBytes = 0;
        long maxModificationTime = Long.MIN_VALUE;
        List<FileStatus> filteredFiles = new ArrayList<>();
        for (FileStatus f : eligibleFiles) {
            if (lastCheckpointStr.isPresent()
                    && f.getModificationTime() <= Long.valueOf(lastCheckpointStr.get())) {
                // skip processed files
                continue;
            }

            if (currentBytes + f.getLen() >= sourceLimit) {
                // we have enough data, we are done
                break;
            }

            maxModificationTime = f.getModificationTime();
            currentBytes += f.getLen();
            filteredFiles.add(f);
        }

        // no data to read
        if (filteredFiles.size() == 0) {
            return new ImmutablePair<>(Optional.empty(),
                    lastCheckpointStr.orElseGet(() -> String.valueOf(Long.MIN_VALUE)));
        }

        // read the files out.
        String pathStr = filteredFiles.stream().map(f -> f.getPath().toString())
                .collect(Collectors.joining(","));

        return new ImmutablePair<>(Optional.ofNullable(pathStr), String.valueOf(maxModificationTime));
    } catch (IOException ioe) {
        throw new HoodieIOException("Unable to read from source from checkpoint: " + lastCheckpointStr, ioe);
    }
}

From source file:io.prestosql.plugin.hive.util.HiveFileIterator.java

License:Apache License

@Override
protected LocatedFileStatus computeNext() {
    while (true) {
        while (remoteIterator.hasNext()) {
            LocatedFileStatus status = getLocatedFileStatus(remoteIterator);

            // Ignore hidden files and directories. Hive ignores files starting with _ and . as well.
            String fileName = status.getPath().getName();
            if (fileName.startsWith("_") || fileName.startsWith(".")) {
                continue;
            }/*from  w  w w.j  av  a  2  s.c  om*/

            if (status.isDirectory()) {
                switch (nestedDirectoryPolicy) {
                case IGNORED:
                    continue;
                case RECURSE:
                    paths.add(status.getPath());
                    continue;
                case FAIL:
                    throw new NestedDirectoryNotAllowedException();
                }
            }

            return status;
        }

        if (paths.isEmpty()) {
            return endOfData();
        }
        remoteIterator = getLocatedFileStatusRemoteIterator(paths.removeFirst());
    }
}

From source file:org.apache.accumulo.test.GarbageCollectWALIT.java

License:Apache License

private int countWALsInFS(MiniAccumuloClusterImpl cluster) throws Exception {
    FileSystem fs = cluster.getFileSystem();
    RemoteIterator<LocatedFileStatus> iterator = fs
            .listFiles(new Path(cluster.getConfig().getAccumuloDir() + "/wal"), true);
    int result = 0;
    while (iterator.hasNext()) {
        LocatedFileStatus next = iterator.next();
        if (!next.isDirectory()) {
            result++;/*  ww w .jav a2s . c  o m*/
        }
    }
    return result;
}

From source file:org.apache.carbondata.hadoop.CarbonInputFormat.java

License:Apache License

private void getFileStatusOfSegments(JobContext job, String[] segmentsToConsider, List<FileStatus> result)
        throws IOException {
    String[] partitionsToConsider = getValidPartitions(job);
    if (partitionsToConsider.length == 0) {
        throw new IOException("No partitions/data found");
    }//from w  ww. j  av  a  2 s . co  m

    PathFilter inputFilter = getDataFileFilter(job);
    CarbonTablePath tablePath = getTablePath(job.getConfiguration());

    // get tokens for all the required FileSystem for table path
    TokenCache.obtainTokensForNamenodes(job.getCredentials(), new Path[] { tablePath }, job.getConfiguration());

    //get all data files of valid partitions and segments
    for (int i = 0; i < partitionsToConsider.length; ++i) {
        String partition = partitionsToConsider[i];

        for (int j = 0; j < segmentsToConsider.length; ++j) {
            String segmentId = segmentsToConsider[j];
            Path segmentPath = new Path(tablePath.getCarbonDataDirectoryPath(partition, segmentId));
            FileSystem fs = segmentPath.getFileSystem(job.getConfiguration());

            RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(segmentPath);
            while (iter.hasNext()) {
                LocatedFileStatus stat = iter.next();
                if (inputFilter.accept(stat.getPath())) {
                    if (stat.isDirectory()) {
                        addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
                    } else {
                        result.add(stat);
                    }
                }
            }
        }
    }
}