Example usage for org.apache.hadoop.fs LocatedFileStatus getPath

List of usage examples for org.apache.hadoop.fs LocatedFileStatus getPath


In this page you can find the example usage for org.apache.hadoop.fs LocatedFileStatus getPath.


public Path getPath() 

Source Link


From source file:br.com.lassal.nqueens.grid.job.GenerateSolutions.java

 * NQueens working folder structure /nqueens/board-{x}/partial/solution_X-4
 * @param queensSize//  w  ww.j  av  a2  s. c o m
 * @throws IOException
private void setWorkingFolder(int queensSize, Job job) throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    if (fs.isDirectory(new Path("/nqueens/board-" + queensSize + "/final"))) {
        System.exit(0); // ja foi processado anteriormente nao processa de novo

    String lastSolution = null;
    Path partialSolDir = new Path("/nqueens/board-" + queensSize + "/partial/");
    Path inputPath = null;
    Path outputPath = null;

    if (fs.exists(partialSolDir)) {
        RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir);

        while (dirsFound.hasNext()) {
            LocatedFileStatus path = dirsFound.next();
            if (lastSolution == null) {
                lastSolution = path.getPath().getName();
                inputPath = path.getPath();
            } else {
                String currentDir = path.getPath().getName();
                if (lastSolution.compareToIgnoreCase(currentDir) < 0) {
                    lastSolution = currentDir;
                    inputPath = path.getPath();
    int currentSolutionSet = 0;
    if (inputPath == null) {
        inputPath = new Path("/nqueens/board-" + queensSize + "/seed");
        if (!fs.exists(inputPath)) {
            FSDataOutputStream seedFile = fs.create(inputPath, true);
            seedFile.writeBytes(queensSize + "#");
    // Input
    FileInputFormat.addInputPath(job, inputPath);

    if (lastSolution != null) {
        String[] solution = lastSolution.split("-");
        if (solution[0].equalsIgnoreCase("solution_" + queensSize)) {
            currentSolutionSet = Integer.parseInt(solution[1]) + 4;

            if (currentSolutionSet >= queensSize) {
                outputPath = new Path("/nqueens/board-" + queensSize + "/final");
            } else {
                outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-"
                        + currentSolutionSet);
    } else {
        outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-4");

    // Output
    FileOutputFormat.setOutputPath(job, outputPath);


From source file:br.com.lassal.nqueens.grid.job.NQueenCounter.java

private Path setWorkingFolder(int queensSize, String workingFolder, boolean isFinal, Job job)
        throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path returnPath = null;/*from   ww  w .ja  v  a 2 s . c  om*/

    if (workingFolder == null) {
        workingFolder = "";

    Path partialSolDir = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/");
    Path inputPath = null;
    Path outputPath = null;
    String nextRunPath = "run_1";

    if (fs.exists(partialSolDir)) {
        RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir);
        String lastRunPath = null;
        Path lastPath = null;

        while (dirsFound.hasNext()) {
            LocatedFileStatus dir = dirsFound.next();

            if (dir.isDirectory()) {
                if (lastRunPath == null || dir.getPath().getName().compareTo(lastRunPath) > 0) {
                    lastPath = dir.getPath();
                    lastRunPath = lastPath.getName();
        if (lastRunPath != null) {
            String[] runParts = lastRunPath.split("_");
            int lastRun = Integer.parseInt(runParts[1]);
            nextRunPath = runParts[0] + "_" + (++lastRun);
            inputPath = lastPath;

    if (inputPath == null) {
        inputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/seed");
        if (!fs.exists(inputPath)) {
            FSDataOutputStream seedFile = fs.create(inputPath, true);
            seedFile.writeBytes(queensSize + ":");
    } else {
        returnPath = inputPath;
    // Input
    FileInputFormat.addInputPath(job, inputPath);

    if (isFinal) {
        outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/final");
    } else {
        outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/" + nextRunPath);

    // Output
    FileOutputFormat.setOutputPath(job, outputPath);

    return returnPath;

From source file:com.alibaba.jstorm.hdfs.common.HdfsUtils.java

License:Apache License

/** list files sorted by modification time that have not been modified since 'olderThan'. if
 * 'olderThan' is <= 0 then the filtering is disabled */
public static ArrayList<Path> listFilesByModificationTime(FileSystem fs, Path directory, long olderThan)
        throws IOException {
    ArrayList<LocatedFileStatus> fstats = new ArrayList<>();

    RemoteIterator<LocatedFileStatus> itr = fs.listFiles(directory, false);
    while (itr.hasNext()) {
        LocatedFileStatus fileStatus = itr.next();
        if (olderThan > 0) {
            if (fileStatus.getModificationTime() <= olderThan)
                fstats.add(fileStatus);/*w w  w  . j  a  v  a 2  s  . c  om*/
        } else {
    Collections.sort(fstats, new ModifTimeComparator());

    ArrayList<Path> result = new ArrayList<>(fstats.size());
    for (LocatedFileStatus fstat : fstats) {
    return result;

From source file:com.alibaba.jstorm.hdfs.HdfsCache.java

License:Apache License

public Collection<String> listFile(String dstPath, boolean recursive) throws IOException {
    Collection<String> files = new HashSet<String>();
    Path path = new Path(dstPath);
    if (fs.exists(path)) {
        RemoteIterator<LocatedFileStatus> itr = fs.listFiles(path, recursive);
        while (itr.hasNext()) {
            LocatedFileStatus status = itr.next();
        }//from  w w  w . j  a v  a2 s  . co m
    return files;

From source file:com.awcoleman.StandaloneJava.AvroCombinerByBlock.java

License:Apache License

public AvroCombinerByBlock(String inDirStr, String outDirStr, String handleExisting) throws IOException {

    //handle both an output directory and an output filename (ending with .avro)
    String outputFilename = DEFAULTOUTPUTFILENAME;
    if (outDirStr.endsWith(".avro")) {
        isOutputNameSpecifiedAndAFile = true;
        //String[] outputParts = outDirStr.split(":?\\\\");
        String[] outputParts = outDirStr.split("/");

        outputFilename = outputParts[outputParts.length - 1];

        //remove outputFilename from outDirStr to get new outDirStr which is just directory (and trailing /)
        outDirStr = outDirStr.replaceAll(Pattern.quote(outputFilename), "");
        outDirStr = outDirStr.substring(0, outDirStr.length() - (outDirStr.endsWith("/") ? 1 : 0));
    }/*from w  w  w .  ja  v a 2 s .c om*/

    //Get block size - not needed
    //long hdfsBlockSize = getBlockSize();
    //System.out.println("HDFS FS block size: "+hdfsBlockSize);

    //Get list of input files
    ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>();

    Configuration conf = new Configuration();
    conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"));
    conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less

    FileSystem hdfs = null;
    try {
        hdfs = FileSystem.get(conf);
    } catch (java.io.IOException ioe) {
        System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage());
    if (hdfs.getStatus() == null) {
        System.out.println("Unable to contact HDFS filesystem. Exiting.");

    //Check if input and output dirs exist
    Path inDir = new Path(inDirStr);
    Path outDir = new Path(outDirStr);
    if (!(hdfs.exists(inDir) || hdfs.isDirectory(inDir))) {
        System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting.");

    if (!(hdfs.exists(outDir) || hdfs.isDirectory(outDir))) {
        if (hdfs.exists(outDir)) { //outDir exists and is a symlink or file, must die
            System.out.println("Requested output directory name ( " + outDirStr
                    + " ) exists but is not a directory. Exiting.");
        } else {

    RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inDir, true);
    while (fileStatusListIterator.hasNext()) {
        LocatedFileStatus fileStatus = fileStatusListIterator.next();

        if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) {
            inputFileList.add((FileStatus) fileStatus);

    if (inputFileList.size() <= 1 && !isOutputNameSpecifiedAndAFile) { //If an output file is specified assume we just want a rename.
        System.out.println("Only one or zero files found in input directory ( " + inDirStr + " ). Exiting.");

    //Get Schema and Compression Codec from seed file since we need it for the writer
    Path firstFile = inputFileList.get(0).getPath();
    FsInput fsin = new FsInput(firstFile, conf);
    DataFileReader<Object> dfrFirstFile = new DataFileReader<Object>(fsin, new GenericDatumReader<Object>());
    Schema fileSchema = dfrFirstFile.getSchema();
    String compCodecName = dfrFirstFile.getMetaString("avro.codec");
    //compCodecName should be null, deflate, snappy, or bzip2
    if (compCodecName == null) {
        compCodecName = "deflate"; //set to deflate even though original is no compression

    //Create Empty HDFS file in output dir
    String seedFileStr = outDirStr + "/" + outputFilename;
    Path seedFile = new Path(seedFileStr);
    FSDataOutputStream hdfsdos = null;
    try {
        hdfsdos = hdfs.create(seedFile, false);
    } catch (org.apache.hadoop.fs.FileAlreadyExistsException faee) {
        if (handleExisting.equals("overwrite")) {
            hdfs.delete(seedFile, false);
            hdfsdos = hdfs.create(seedFile, false);
        } else if (handleExisting.equals("append")) {
            hdfsdos = hdfs.append(seedFile);
        } else {
                    .println("File " + seedFileStr + " exists and will not overwrite. handleExisting is set to "
                            + handleExisting + ". Exiting.");
    if (hdfsdos == null) {
        System.out.println("Unable to create or write to output file ( " + seedFileStr
                + " ). handleExisting is set to " + handleExisting + ". Exiting.");

    //Append other files
    GenericDatumWriter gdw = new GenericDatumWriter(fileSchema);
    DataFileWriter dfwBase = new DataFileWriter(gdw);
    //Set compression to that found in the first file

    DataFileWriter dfw = dfwBase.create(fileSchema, hdfsdos);
    for (FileStatus thisFileStatus : inputFileList) {

        //_SUCCESS files are 0 bytes
        if (thisFileStatus.getLen() == 0) {

        FsInput fsin1 = new FsInput(thisFileStatus.getPath(), conf);
        DataFileReader dfr = new DataFileReader<Object>(fsin1, new GenericDatumReader<Object>());

        dfw.appendAllFrom(dfr, false);




From source file:com.awcoleman.StandaloneJava.AvroCounterByBlock.java

License:Apache License

public AvroCounterByBlock(String inDirStr) throws IOException {

    long numAvroRecords = 0;

    //Get list of input files
    ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>();

    Configuration conf = new Configuration();
    conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"));
    conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less

    FileSystem hdfs = null;//  w  ww.j a v  a 2  s .c  o m
    try {
        hdfs = FileSystem.get(conf);
    } catch (java.io.IOException ioe) {
        System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage());
    if (hdfs.getStatus() == null) {
        System.out.println("Unable to contact HDFS filesystem. Exiting.");

    //Check if input dirs/file exists and get file list (even if list of single file)
    Path inPath = new Path(inDirStr);
    if (hdfs.exists(inPath) && hdfs.isFile(inPath)) { //single file
    } else if (hdfs.exists(inPath) && hdfs.isDirectory(inPath)) { //dir
        //Get list of input files
        RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inPath, true);
        while (fileStatusListIterator.hasNext()) {
            LocatedFileStatus fileStatus = fileStatusListIterator.next();

            if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) {
                inputFileList.add((FileStatus) fileStatus);
    } else {
        System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting.");

    for (FileStatus thisFileStatus : inputFileList) {

        //_SUCCESS files are 0 bytes
        if (thisFileStatus.getLen() == 0) {

        DataFileStream<Object> dfs = null;
        FSDataInputStream inStream = hdfs.open(thisFileStatus.getPath());
        GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
        dfs = new DataFileStream<Object>(inStream, reader);

        long thisFileRecords = 0;
        while (dfs.hasNext()) {

            numAvroRecords = numAvroRecords + dfs.getBlockCount();
            thisFileRecords = thisFileRecords + dfs.getBlockCount();

            //System.out.println("Input file "+thisFileStatus.getPath()+" getBlockCount() is "+dfs.getBlockCount()+"." );


        System.out.println("Input file " + thisFileStatus.getPath() + " has " + thisFileRecords + " records.");


        //TODO test on dir with non-avro file and see what the exception is, catch that and log to output but don't die.

    System.out.println("Input dir/file ( " + inDirStr + " ) has " + inputFileList.size() + " files and "
            + numAvroRecords + " total records.");


From source file:com.awcoleman.StandaloneJava.AvroCounterByRecord.java

License:Apache License

public AvroCounterByRecord(String inDirStr) throws IOException {

    long numAvroRecords = 0;

    //Get list of input files
    ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>();

    Configuration conf = new Configuration();
    conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"));
    conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less

    FileSystem hdfs = null;/*from  w w  w  .  j  a  v  a2 s  .  co  m*/
    try {
        hdfs = FileSystem.get(conf);
    } catch (java.io.IOException ioe) {
        System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage());
    if (hdfs.getStatus() == null) {
        System.out.println("Unable to contact HDFS filesystem. Exiting.");

    //Check if input dirs/file exists and get file list (even if list of single file)
    Path inPath = new Path(inDirStr);
    if (hdfs.exists(inPath) && hdfs.isFile(inPath)) { //single file
    } else if (hdfs.exists(inPath) && hdfs.isDirectory(inPath)) { //dir
        //Get list of input files
        RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inPath, true);
        while (fileStatusListIterator.hasNext()) {
            LocatedFileStatus fileStatus = fileStatusListIterator.next();

            if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) {
                inputFileList.add((FileStatus) fileStatus);
    } else {
        System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting.");

    for (FileStatus thisFileStatus : inputFileList) {

        //_SUCCESS files are 0 bytes
        if (thisFileStatus.getLen() == 0) {

        DataFileStream<Object> avroStream = null;
        FSDataInputStream inStream = hdfs.open(thisFileStatus.getPath());
        GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
        avroStream = new DataFileStream<Object>(inStream, reader);

        long thisFileRecords = 0;

        while (avroStream.hasNext()) {

        System.out.println("Input file " + thisFileStatus.getPath() + " has " + thisFileRecords + " records.");

        //TODO test on dir with non-avro file and see what the exception is, catch that and log to output but don't die.

    System.out.println("Input dir/file ( " + inDirStr + " ) has " + inputFileList.size() + " files and "
            + numAvroRecords + " total records.");


From source file:com.bark.hadoop.lab3.PageRank.java

public int run(String args[]) {
    String tmp = "/tmp/" + new Date().getTime();
    //        long timeStamp = new Date().getTime();
    try {//from   w  w  w. ja v  a2 s .  c o m
         * Job 1: Parse XML input and read title,links
        Configuration conf = new Configuration();
        conf.set("xmlinput.start", "<page>");
        conf.set("xmlinput.end", "</page>");

        Job job = Job.getInstance(conf);

        // specify a mapper

        // specify a reducer

        // specify output types

        // specify input and output DIRECTORIES
        FileInputFormat.addInputPath(job, new Path(args[0]));

        FileOutputFormat.setOutputPath(job, new Path((args[1] + tmp + "/job1")));

    } catch (InterruptedException | ClassNotFoundException | IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error during mapreduce job1.");
        return 2;
     * Job 2: Adjacency outGraph
    try {
        Configuration conf2 = new Configuration();

        Job job2 = Job.getInstance(conf2);

        // specify a mapper

        // specify a reducer

        // specify output types

        // specify input and output DIRECTORIES
        FileInputFormat.addInputPath(job2, new Path((args[1] + tmp + "/job1")));

        FileOutputFormat.setOutputPath(job2, new Path((args[1] + tmp + "/job2")));

    } catch (InterruptedException | ClassNotFoundException | IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error during mapreduce job2.");
        return 2;
     * Job 3: PageCount
    try {
        Configuration conf3 = new Configuration();
         * Change output separator to "=" instead of default \t for this job
        conf3.set("mapreduce.output.textoutputformat.separator", "=");

        Job job3 = Job.getInstance(conf3);

        // specify a mapper

        // specify a reducer

        // specify output types

        // specify input and output DIRECTORIES
        FileInputFormat.addInputPath(job3, new Path((args[1] + tmp + "/job2")));

        FileOutputFormat.setOutputPath(job3, new Path((args[1] + tmp + "/job3")));

    } catch (InterruptedException | ClassNotFoundException | IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error during mapreduce job3.");
        return 2;
     * Job 4: PageRank
    for (int i = 1; i < 9; i++) {
        try {
            Configuration conf4 = new Configuration();
             * Read number of nodes from the output of job 3 : pageCount
            Path path = new Path((args[1] + tmp + "/job3"));
            FileSystem fs = path.getFileSystem(conf4);
            RemoteIterator<LocatedFileStatus> ri = fs.listFiles(path, true);

            int n = 0;
            Pattern pt = Pattern.compile("(\\d+)");
            while (ri.hasNext()) {
                LocatedFileStatus lfs = ri.next();
                if (lfs.isFile() && n == 0) {
                    FSDataInputStream inputStream = fs.open(lfs.getPath());
                    BufferedReader br = new BufferedReader(new InputStreamReader(inputStream));
                    String s = null;
                    while ((s = br.readLine()) != null) {
                        Matcher mt = pt.matcher(s);
                        if (mt.find()) {
                            n = new Integer(mt.group(1));
             * Done reading number of nodes, make it available to MapReduce
             * job key: N
            conf4.setInt("N", n);

            Job job4 = Job.getInstance(conf4);

            // specify a mapper

            // specify a reducer

            // specify output types

            // specify input and output DIRECTORIES
            if (i == 1) {
                FileInputFormat.addInputPath(job4, new Path((args[1] + tmp + "/job2")));
            } else {
                FileInputFormat.addInputPath(job4, new Path((args[1] + tmp + "/job4/" + (i - 1))));

            FileOutputFormat.setOutputPath(job4, new Path((args[1] + tmp + "/job4/" + i)));
        } catch (InterruptedException | ClassNotFoundException | IOException ex) {
            Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
            System.err.println("Error during mapreduce job4.");
            return 2;
     * Job 5: Sort iteration 1 and iteration 8
    int returnCode = 0;
    for (int i = 0; i < 2; i++) {
        try {
            Configuration conf5 = new Configuration();

             * Read number of nodes from the output of job 3 : pageCount
            Path path = new Path((args[1] + tmp + "/job3"));
            FileSystem fs = path.getFileSystem(conf5);
            RemoteIterator<LocatedFileStatus> ri = fs.listFiles(path, true);

            int n = 0;
            Pattern pt = Pattern.compile("(\\d+)");
            while (ri.hasNext()) {
                LocatedFileStatus lfs = ri.next();
                if (lfs.isFile() && n == 0) {
                    FSDataInputStream inputStream = fs.open(lfs.getPath());
                    BufferedReader br = new BufferedReader(new InputStreamReader(inputStream));
                    String s = null;
                    while ((s = br.readLine()) != null) {
                        Matcher mt = pt.matcher(s);
                        if (mt.find()) {
                            n = new Integer(mt.group(1));
             * Done reading number of nodes, make it available to MapReduce
             * job key: N
            conf5.setInt("N", n);

            Job job5 = Job.getInstance(conf5);
             * one reducer only

            // specify a mapper

            // specify a reducer

            // specify output types

            // specify input and output DIRECTORIES
            int y = 7 * i + 1;
            FileInputFormat.addInputPath(job5, new Path((args[1] + tmp + "/job4/" + y)));

            FileOutputFormat.setOutputPath(job5, new Path((args[1] + tmp + "/job5/" + y)));

            returnCode = job5.waitForCompletion(true) ? 0 : 1;
        } catch (InterruptedException | ClassNotFoundException | IOException ex) {
            Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
            System.err.println("Error during mapreduce job5.");
            return 2;
     * Copy necessary output files to args[1]        /**
     * Copy necessary output files to args[1]

     * Rename and copy OutLinkGraph
    try {
        Configuration conf = new Configuration();

        Path outLinkGraph = new Path((args[1] + tmp + "/job2/part-r-00000"));
        FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf);

        Path output = new Path(args[1] + "/results/PageRank.outlink.out");
        FileSystem outputFS = output.getFileSystem(conf);
        org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf);
    } catch (IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error while copying results.");
        return 2;

     * Rename and copy total number of pages
    try {
        Configuration conf = new Configuration();

        Path outLinkGraph = new Path((args[1] + tmp + "/job3/part-r-00000"));
        FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf);

        Path output = new Path(args[1] + "/results/PageRank.n.out");
        FileSystem outputFS = output.getFileSystem(conf);
        org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf);
    } catch (IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error while copying results.");
        return 2;

     * Rename and copy iteration 1
    try {
        Configuration conf = new Configuration();

        Path outLinkGraph = new Path((args[1] + tmp + "/job5/1/part-r-00000"));
        FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf);

        Path output = new Path(args[1] + "/results/PageRank.iter1.out");
        FileSystem outputFS = output.getFileSystem(conf);
        org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf);
    } catch (IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error while copying results.");
        return 2;

     * Rename and copy iteration 8
    try {
        Configuration conf = new Configuration();

        Path outLinkGraph = new Path((args[1] + tmp + "/job5/8/part-r-00000"));
        FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf);

        Path output = new Path(args[1] + "/results/PageRank.iter8.out");
        FileSystem outputFS = output.getFileSystem(conf);
        org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf);
    } catch (IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error while copying results.");
        return 2;
    return returnCode;

From source file:com.cloudera.impala.util.LoadMetadataUtil.java

License:Apache License

 * Identical to loadFileDescriptors, except using the ListLocatedStatus HDFS API to load
 * file status.//from w  w w . j a  va  2  s.  co m
 * TODO: Got AnalysisException error: Failed to load metadata for table
 * CAUSED BY: ClassCastException: DFSClient#getVolumeBlockLocations expected to be
 * passed HdfsBlockLocations
 * TODO: Use new HDFS API resolved by CDH-30342.
public static List<FileDescriptor> loadViaListLocatedStatus(FileSystem fs, Path partDirPath,
        Map<String, List<FileDescriptor>> oldFileDescMap, HdfsFileFormat fileFormat,
        Map<FsKey, FileBlocksInfo> perFsFileBlocks, boolean isMarkedCached, String tblName,
        ListMap<TNetworkAddress> hostIndex, Map<String, List<FileDescriptor>> fileDescMap)
        throws FileNotFoundException, IOException {
    List<FileDescriptor> fileDescriptors = Lists.newArrayList();

    RemoteIterator<LocatedFileStatus> fileStatusItor = fs.listLocatedStatus(partDirPath);

    while (fileStatusItor.hasNext()) {
        LocatedFileStatus fileStatus = fileStatusItor.next();
        FileDescriptor fd = getFileDescriptor(fs, fileStatus, fileFormat, oldFileDescMap, isMarkedCached,
                perFsFileBlocks, tblName, hostIndex);

        if (fd == null)

        // Add partition dir to fileDescMap if it does not exist.
        String partitionDir = fileStatus.getPath().getParent().toString();
        if (!fileDescMap.containsKey(partitionDir)) {
            fileDescMap.put(partitionDir, new ArrayList<FileDescriptor>());

        // Add to the list of FileDescriptors for this partition.

    return fileDescriptors;

From source file:com.datatorrent.stram.client.FSAgent.java

License:Apache License

public List<String> listFiles(String dir) throws IOException {
    List<String> files = new ArrayList<String>();
    Path path = new Path(dir);

    FileStatus fileStatus = fileSystem.getFileStatus(path);
    if (!fileStatus.isDirectory()) {
        throw new FileNotFoundException("Cannot read directory " + dir);
    }/*  w w  w.j  av a  2s  .c o m*/
    RemoteIterator<LocatedFileStatus> it = fileSystem.listFiles(path, false);
    while (it.hasNext()) {
        LocatedFileStatus lfs = it.next();
    return files;