Example usage for org.apache.hadoop.fs FileSystem listLocatedStatus

List of usage examples for org.apache.hadoop.fs FileSystem listLocatedStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem listLocatedStatus.

Prototype

public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f)
        throws FileNotFoundException, IOException 

Source Link

Document

List the statuses of the files/directories in the given path if the path is a directory.

Usage

From source file:br.com.lassal.nqueens.grid.job.GenerateSolutions.java

/**
 * NQueens working folder structure /nqueens/board-{x}/partial/solution_X-4
 *
 * @param queensSize//w  w w  .j  a v  a  2s. c  o m
 * @throws IOException
 */
private void setWorkingFolder(int queensSize, Job job) throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    if (fs.isDirectory(new Path("/nqueens/board-" + queensSize + "/final"))) {
        System.exit(0); // ja foi processado anteriormente nao processa de novo
    }

    String lastSolution = null;
    Path partialSolDir = new Path("/nqueens/board-" + queensSize + "/partial/");
    Path inputPath = null;
    Path outputPath = null;

    if (fs.exists(partialSolDir)) {
        RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir);

        while (dirsFound.hasNext()) {
            LocatedFileStatus path = dirsFound.next();
            if (lastSolution == null) {
                lastSolution = path.getPath().getName();
                inputPath = path.getPath();
            } else {
                String currentDir = path.getPath().getName();
                if (lastSolution.compareToIgnoreCase(currentDir) < 0) {
                    lastSolution = currentDir;
                    inputPath = path.getPath();
                }
            }
        }
    }
    int currentSolutionSet = 0;
    if (inputPath == null) {
        inputPath = new Path("/nqueens/board-" + queensSize + "/seed");
        if (!fs.exists(inputPath)) {
            FSDataOutputStream seedFile = fs.create(inputPath, true);
            seedFile.writeBytes(queensSize + "#");
            seedFile.close();
        }
    }
    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);

    if (lastSolution != null) {
        String[] solution = lastSolution.split("-");
        if (solution[0].equalsIgnoreCase("solution_" + queensSize)) {
            currentSolutionSet = Integer.parseInt(solution[1]) + 4;

            if (currentSolutionSet >= queensSize) {
                outputPath = new Path("/nqueens/board-" + queensSize + "/final");
            } else {
                outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-"
                        + currentSolutionSet);
            }
        }
    } else {
        outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-4");
    }

    // Output
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClass(TextOutputFormat.class);

}

From source file:br.com.lassal.nqueens.grid.job.NQueenCounter.java

private Path setWorkingFolder(int queensSize, String workingFolder, boolean isFinal, Job job)
        throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path returnPath = null;//from w  w  w . j ava2 s  . com

    if (workingFolder == null) {
        workingFolder = "";
    }

    Path partialSolDir = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/");
    Path inputPath = null;
    Path outputPath = null;
    String nextRunPath = "run_1";

    if (fs.exists(partialSolDir)) {
        RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir);
        String lastRunPath = null;
        Path lastPath = null;

        while (dirsFound.hasNext()) {
            LocatedFileStatus dir = dirsFound.next();

            if (dir.isDirectory()) {
                if (lastRunPath == null || dir.getPath().getName().compareTo(lastRunPath) > 0) {
                    lastPath = dir.getPath();
                    lastRunPath = lastPath.getName();
                }
            }
        }
        if (lastRunPath != null) {
            String[] runParts = lastRunPath.split("_");
            int lastRun = Integer.parseInt(runParts[1]);
            nextRunPath = runParts[0] + "_" + (++lastRun);
            inputPath = lastPath;
        }

    }
    if (inputPath == null) {
        inputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/seed");
        if (!fs.exists(inputPath)) {
            FSDataOutputStream seedFile = fs.create(inputPath, true);
            seedFile.writeBytes(queensSize + ":");
            seedFile.close();
        }
    } else {
        returnPath = inputPath;
    }
    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);

    if (isFinal) {
        outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/final");
    } else {
        outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/" + nextRunPath);
    }

    // Output
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClass(TextOutputFormat.class);

    return returnPath;
}

From source file:com.cloudera.impala.catalog.TestLoadHdfsMetadataPerf.java

License:Apache License

/**
 * List file status by calling fileSystem.listLocatedStatus.
 *///from  w  w  w.  j a v a  2 s .c om
private static void listLocatedStatus(String dirPath) {
    Path path = new Path(dirPath);
    boolean exceptionThrown = false;
    try {
        FileSystem fs = path.getFileSystem(LoadMetadataUtil.getConf());
        RemoteIterator<LocatedFileStatus> iterator = fs.listLocatedStatus(path);
        if (fs.exists(path)) {
            while (iterator.hasNext()) {
                LocatedFileStatus fileStatus = iterator.next();
                BlockLocation[] locations = fileStatus.getBlockLocations();
                for (BlockLocation loc : locations) {
                    loc.getHosts();
                    loc.getNames();
                }
            }
        }
    } catch (IOException e) {
        exceptionThrown = true;
        LOG.error("Failed to list Located Status", e);
    }
    assertFalse(exceptionThrown);
}

From source file:com.cloudera.impala.util.LoadMetadataUtil.java

License:Apache License

/**
 * Identical to loadFileDescriptors, except using the ListLocatedStatus HDFS API to load
 * file status./* ww  w  .  j av a 2 s  .  c o  m*/
 * TODO: Got AnalysisException error: Failed to load metadata for table
 * CAUSED BY: ClassCastException: DFSClient#getVolumeBlockLocations expected to be
 * passed HdfsBlockLocations
 * TODO: Use new HDFS API resolved by CDH-30342.
 */
public static List<FileDescriptor> loadViaListLocatedStatus(FileSystem fs, Path partDirPath,
        Map<String, List<FileDescriptor>> oldFileDescMap, HdfsFileFormat fileFormat,
        Map<FsKey, FileBlocksInfo> perFsFileBlocks, boolean isMarkedCached, String tblName,
        ListMap<TNetworkAddress> hostIndex, Map<String, List<FileDescriptor>> fileDescMap)
        throws FileNotFoundException, IOException {
    List<FileDescriptor> fileDescriptors = Lists.newArrayList();

    RemoteIterator<LocatedFileStatus> fileStatusItor = fs.listLocatedStatus(partDirPath);

    while (fileStatusItor.hasNext()) {
        LocatedFileStatus fileStatus = fileStatusItor.next();
        FileDescriptor fd = getFileDescriptor(fs, fileStatus, fileFormat, oldFileDescMap, isMarkedCached,
                perFsFileBlocks, tblName, hostIndex);

        if (fd == null)
            continue;

        // Add partition dir to fileDescMap if it does not exist.
        String partitionDir = fileStatus.getPath().getParent().toString();
        if (!fileDescMap.containsKey(partitionDir)) {
            fileDescMap.put(partitionDir, new ArrayList<FileDescriptor>());
        }
        fileDescMap.get(partitionDir).add(fd);

        // Add to the list of FileDescriptors for this partition.
        fileDescriptors.add(fd);
    }

    return fileDescriptors;
}

From source file:com.facebook.presto.hadoop.HadoopFileSystem.java

License:Apache License

public static RemoteIterator<LocatedFileStatus> listLocatedStatus(FileSystem fs, Path path) throws IOException {
    return fs.listLocatedStatus(path);
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java

License:Apache License

private List<FileStatus> singleThreadedListStatus(JobContext job, Path[] dirs, PathFilter inputFilter,
        boolean recursive) throws IOException {
    List<FileStatus> result = new ArrayList<FileStatus>();
    List<IOException> errors = new ArrayList<IOException>();
    for (int i = 0; i < dirs.length; ++i) {
        Path p = dirs[i];/*from   w ww .  ja v  a 2 s  .  com*/
        FileSystem fs = p.getFileSystem(job.getConfiguration());
        FileStatus[] matches = fs.globStatus(p, inputFilter);
        if (matches == null) {
            errors.add(new IOException("Input path does not exist: " + p));
        } else if (matches.length == 0) {
            errors.add(new IOException("Input Pattern " + p + " matches 0 files"));
        } else {
            for (FileStatus globStat : matches) {
                if (globStat.isDirectory()) {
                    RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(globStat.getPath());
                    while (iter.hasNext()) {
                        LocatedFileStatus stat = iter.next();
                        if (inputFilter.accept(stat.getPath())) {
                            if (recursive && stat.isDirectory()) {
                                addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
                            } else {
                                result.add(stat);
                            }
                        }
                    }
                } else {
                    result.add(globStat);
                }
            }
        }
    }

    if (!errors.isEmpty()) {
        throw new InvalidInputException(errors);
    }
    return result;
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.UpdatedFileInputFormat.java

License:Apache License

/**
 * Add files in the input path recursively into the results.
 * @param result/*from ww w  .  j  ava 2 s  .  c  om*/
 *          The List to store all files.
 * @param fs
 *          The FileSystem.
 * @param path
 *          The input path.
 * @param inputFilter
 *          The input filter that can be used to filter files/dirs. 
 * @throws IOException
 */
protected void addInputPathRecursively(List<FileStatus> result, FileSystem fs, Path path,
        PathFilter inputFilter) throws IOException {
    RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
    while (iter.hasNext()) {
        LocatedFileStatus stat = iter.next();
        if (inputFilter.accept(stat.getPath())) {
            if (stat.isDirectory()) {
                addInputPathRecursively(result, fs, stat.getPath(), inputFilter);
            } else {
                result.add(stat);
            }
        }
    }
}

From source file:io.prestosql.plugin.hive.HadoopDirectoryLister.java

License:Apache License

@Override
public RemoteIterator<LocatedFileStatus> list(FileSystem fs, Path path) throws IOException {
    return fs.listLocatedStatus(path);
}

From source file:org.apache.apex.malhar.lib.state.managed.IncrementalCheckpointManagerTest.java

License:Apache License

@Test
public void testPurge() throws IOException, InterruptedException {
    FileSystem fileSystem = FileSystem.newInstance(new Configuration());

    testTransferWindowFiles();//from w w  w . j  a v a  2 s.c  om
    RemoteIterator<LocatedFileStatus> iterator = fileSystem
            .listLocatedStatus(new Path(testMeta.applicationPath + "/bucket_data"));
    Assert.assertTrue(iterator.hasNext());

    testMeta.managedStateContext.getBucketsFileSystem().deleteTimeBucketsLessThanEqualTo(200);

    iterator = fileSystem.listLocatedStatus(new Path(testMeta.applicationPath + "/bucket_data"));
    if (iterator.hasNext()) {
        Assert.fail("All buckets should be deleted");
    }
}

From source file:org.apache.carbondata.core.indexstore.blockletindex.BlockletDataMapFactory.java

License:Apache License

@Override
public List<DataMapDistributable> toDistributable(Segment segment) {
    List<DataMapDistributable> distributables = new ArrayList<>();
    try {/*from w w  w . j  a va  2  s . com*/
        Set<TableBlockIndexUniqueIdentifier> tableBlockIndexUniqueIdentifiers = getTableBlockIndexUniqueIdentifiers(
                segment);
        CarbonFile[] carbonIndexFiles = new CarbonFile[tableBlockIndexUniqueIdentifiers.size()];
        int identifierCounter = 0;
        for (TableBlockIndexUniqueIdentifier tableBlockIndexUniqueIdentifier : tableBlockIndexUniqueIdentifiers) {
            String indexFilePath = tableBlockIndexUniqueIdentifier.getIndexFilePath();
            String fileName = tableBlockIndexUniqueIdentifier.getIndexFileName();
            carbonIndexFiles[identifierCounter++] = FileFactory
                    .getCarbonFile(indexFilePath + CarbonCommonConstants.FILE_SEPARATOR + fileName);
        }
        for (int i = 0; i < carbonIndexFiles.length; i++) {
            Path path = new Path(carbonIndexFiles[i].getPath());
            FileSystem fs = path.getFileSystem(FileFactory.getConfiguration());
            RemoteIterator<LocatedFileStatus> iter = fs.listLocatedStatus(path);
            LocatedFileStatus fileStatus = iter.next();
            String[] location = fileStatus.getBlockLocations()[0].getHosts();
            BlockletDataMapDistributable distributable = new BlockletDataMapDistributable(path.toString());
            distributable.setLocations(location);
            distributables.add(distributable);
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    return distributables;
}