Example usage for org.apache.hadoop.fs LocatedFileStatus getPath

List of usage examples for org.apache.hadoop.fs LocatedFileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs LocatedFileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:br.com.lassal.nqueens.grid.job.GenerateSolutions.java

/**
 * NQueens working folder structure /nqueens/board-{x}/partial/solution_X-4
 *
 * @param queensSize//  w  ww.j  av  a2  s. c o m
 * @throws IOException
 */
private void setWorkingFolder(int queensSize, Job job) throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    if (fs.isDirectory(new Path("/nqueens/board-" + queensSize + "/final"))) {
        System.exit(0); // ja foi processado anteriormente nao processa de novo
    }

    String lastSolution = null;
    Path partialSolDir = new Path("/nqueens/board-" + queensSize + "/partial/");
    Path inputPath = null;
    Path outputPath = null;

    if (fs.exists(partialSolDir)) {
        RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir);

        while (dirsFound.hasNext()) {
            LocatedFileStatus path = dirsFound.next();
            if (lastSolution == null) {
                lastSolution = path.getPath().getName();
                inputPath = path.getPath();
            } else {
                String currentDir = path.getPath().getName();
                if (lastSolution.compareToIgnoreCase(currentDir) < 0) {
                    lastSolution = currentDir;
                    inputPath = path.getPath();
                }
            }
        }
    }
    int currentSolutionSet = 0;
    if (inputPath == null) {
        inputPath = new Path("/nqueens/board-" + queensSize + "/seed");
        if (!fs.exists(inputPath)) {
            FSDataOutputStream seedFile = fs.create(inputPath, true);
            seedFile.writeBytes(queensSize + "#");
            seedFile.close();
        }
    }
    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);

    if (lastSolution != null) {
        String[] solution = lastSolution.split("-");
        if (solution[0].equalsIgnoreCase("solution_" + queensSize)) {
            currentSolutionSet = Integer.parseInt(solution[1]) + 4;

            if (currentSolutionSet >= queensSize) {
                outputPath = new Path("/nqueens/board-" + queensSize + "/final");
            } else {
                outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-"
                        + currentSolutionSet);
            }
        }
    } else {
        outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-4");
    }

    // Output
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClass(TextOutputFormat.class);

}

From source file:br.com.lassal.nqueens.grid.job.NQueenCounter.java

private Path setWorkingFolder(int queensSize, String workingFolder, boolean isFinal, Job job)
        throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path returnPath = null;/*from   ww  w .ja  v  a 2 s . c  om*/

    if (workingFolder == null) {
        workingFolder = "";
    }

    Path partialSolDir = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/");
    Path inputPath = null;
    Path outputPath = null;
    String nextRunPath = "run_1";

    if (fs.exists(partialSolDir)) {
        RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir);
        String lastRunPath = null;
        Path lastPath = null;

        while (dirsFound.hasNext()) {
            LocatedFileStatus dir = dirsFound.next();

            if (dir.isDirectory()) {
                if (lastRunPath == null || dir.getPath().getName().compareTo(lastRunPath) > 0) {
                    lastPath = dir.getPath();
                    lastRunPath = lastPath.getName();
                }
            }
        }
        if (lastRunPath != null) {
            String[] runParts = lastRunPath.split("_");
            int lastRun = Integer.parseInt(runParts[1]);
            nextRunPath = runParts[0] + "_" + (++lastRun);
            inputPath = lastPath;
        }

    }
    if (inputPath == null) {
        inputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/seed");
        if (!fs.exists(inputPath)) {
            FSDataOutputStream seedFile = fs.create(inputPath, true);
            seedFile.writeBytes(queensSize + ":");
            seedFile.close();
        }
    } else {
        returnPath = inputPath;
    }
    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);

    if (isFinal) {
        outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/final");
    } else {
        outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/" + nextRunPath);
    }

    // Output
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClass(TextOutputFormat.class);

    return returnPath;
}

From source file:com.alibaba.jstorm.hdfs.common.HdfsUtils.java

License:Apache License

/** list files sorted by modification time that have not been modified since 'olderThan'. if
 * 'olderThan' is <= 0 then the filtering is disabled */
public static ArrayList<Path> listFilesByModificationTime(FileSystem fs, Path directory, long olderThan)
        throws IOException {
    ArrayList<LocatedFileStatus> fstats = new ArrayList<>();

    RemoteIterator<LocatedFileStatus> itr = fs.listFiles(directory, false);
    while (itr.hasNext()) {
        LocatedFileStatus fileStatus = itr.next();
        if (olderThan > 0) {
            if (fileStatus.getModificationTime() <= olderThan)
                fstats.add(fileStatus);/*w w  w  . j  a  v  a 2  s  . c  om*/
        } else {
            fstats.add(fileStatus);
        }
    }
    Collections.sort(fstats, new ModifTimeComparator());

    ArrayList<Path> result = new ArrayList<>(fstats.size());
    for (LocatedFileStatus fstat : fstats) {
        result.add(fstat.getPath());
    }
    return result;
}

From source file:com.alibaba.jstorm.hdfs.HdfsCache.java

License:Apache License

public Collection<String> listFile(String dstPath, boolean recursive) throws IOException {
    Collection<String> files = new HashSet<String>();
    Path path = new Path(dstPath);
    if (fs.exists(path)) {
        RemoteIterator<LocatedFileStatus> itr = fs.listFiles(path, recursive);
        while (itr.hasNext()) {
            LocatedFileStatus status = itr.next();
            files.add(status.getPath().getName());
        }//from  w w  w . j  a v  a2 s  . co m
    }
    return files;
}

From source file:com.awcoleman.StandaloneJava.AvroCombinerByBlock.java

License:Apache License

public AvroCombinerByBlock(String inDirStr, String outDirStr, String handleExisting) throws IOException {

    //handle both an output directory and an output filename (ending with .avro)
    String outputFilename = DEFAULTOUTPUTFILENAME;
    if (outDirStr.endsWith(".avro")) {
        isOutputNameSpecifiedAndAFile = true;
        //String[] outputParts = outDirStr.split(":?\\\\");
        String[] outputParts = outDirStr.split("/");

        outputFilename = outputParts[outputParts.length - 1];

        //remove outputFilename from outDirStr to get new outDirStr which is just directory (and trailing /)
        outDirStr = outDirStr.replaceAll(Pattern.quote(outputFilename), "");
        outDirStr = outDirStr.substring(0, outDirStr.length() - (outDirStr.endsWith("/") ? 1 : 0));
    }/*from w  w  w .  ja  v a 2 s .c om*/

    //Get block size - not needed
    //long hdfsBlockSize = getBlockSize();
    //System.out.println("HDFS FS block size: "+hdfsBlockSize);

    //Get list of input files
    ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>();

    Configuration conf = new Configuration();
    conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"));
    conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less

    FileSystem hdfs = null;
    try {
        hdfs = FileSystem.get(conf);
    } catch (java.io.IOException ioe) {
        System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage());
        System.exit(1);
    }
    if (hdfs.getStatus() == null) {
        System.out.println("Unable to contact HDFS filesystem. Exiting.");
        System.exit(1);
    }

    //Check if input and output dirs exist
    Path inDir = new Path(inDirStr);
    Path outDir = new Path(outDirStr);
    if (!(hdfs.exists(inDir) || hdfs.isDirectory(inDir))) {
        System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting.");
        System.exit(1);
    }

    if (!(hdfs.exists(outDir) || hdfs.isDirectory(outDir))) {
        if (hdfs.exists(outDir)) { //outDir exists and is a symlink or file, must die
            System.out.println("Requested output directory name ( " + outDirStr
                    + " ) exists but is not a directory. Exiting.");
            System.exit(1);
        } else {
            hdfs.mkdirs(outDir);
        }
    }

    RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inDir, true);
    while (fileStatusListIterator.hasNext()) {
        LocatedFileStatus fileStatus = fileStatusListIterator.next();

        if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) {
            inputFileList.add((FileStatus) fileStatus);
        }
    }

    if (inputFileList.size() <= 1 && !isOutputNameSpecifiedAndAFile) { //If an output file is specified assume we just want a rename.
        System.out.println("Only one or zero files found in input directory ( " + inDirStr + " ). Exiting.");
        System.exit(1);
    }

    //Get Schema and Compression Codec from seed file since we need it for the writer
    Path firstFile = inputFileList.get(0).getPath();
    FsInput fsin = new FsInput(firstFile, conf);
    DataFileReader<Object> dfrFirstFile = new DataFileReader<Object>(fsin, new GenericDatumReader<Object>());
    Schema fileSchema = dfrFirstFile.getSchema();
    String compCodecName = dfrFirstFile.getMetaString("avro.codec");
    //compCodecName should be null, deflate, snappy, or bzip2
    if (compCodecName == null) {
        compCodecName = "deflate"; //set to deflate even though original is no compression
    }
    dfrFirstFile.close();

    //Create Empty HDFS file in output dir
    String seedFileStr = outDirStr + "/" + outputFilename;
    Path seedFile = new Path(seedFileStr);
    FSDataOutputStream hdfsdos = null;
    try {
        hdfsdos = hdfs.create(seedFile, false);
    } catch (org.apache.hadoop.fs.FileAlreadyExistsException faee) {
        if (handleExisting.equals("overwrite")) {
            hdfs.delete(seedFile, false);
            hdfsdos = hdfs.create(seedFile, false);
        } else if (handleExisting.equals("append")) {
            hdfsdos = hdfs.append(seedFile);
        } else {
            System.out
                    .println("File " + seedFileStr + " exists and will not overwrite. handleExisting is set to "
                            + handleExisting + ". Exiting.");
            System.exit(1);
        }
    }
    if (hdfsdos == null) {
        System.out.println("Unable to create or write to output file ( " + seedFileStr
                + " ). handleExisting is set to " + handleExisting + ". Exiting.");
        System.exit(1);
    }

    //Append other files
    GenericDatumWriter gdw = new GenericDatumWriter(fileSchema);
    DataFileWriter dfwBase = new DataFileWriter(gdw);
    //Set compression to that found in the first file
    dfwBase.setCodec(CodecFactory.fromString(compCodecName));

    DataFileWriter dfw = dfwBase.create(fileSchema, hdfsdos);
    for (FileStatus thisFileStatus : inputFileList) {

        //_SUCCESS files are 0 bytes
        if (thisFileStatus.getLen() == 0) {
            continue;
        }

        FsInput fsin1 = new FsInput(thisFileStatus.getPath(), conf);
        DataFileReader dfr = new DataFileReader<Object>(fsin1, new GenericDatumReader<Object>());

        dfw.appendAllFrom(dfr, false);

        dfr.close();
    }

    dfw.close();
    dfwBase.close();

}

From source file:com.awcoleman.StandaloneJava.AvroCounterByBlock.java

License:Apache License

public AvroCounterByBlock(String inDirStr) throws IOException {

    long numAvroRecords = 0;

    //Get list of input files
    ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>();

    Configuration conf = new Configuration();
    conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"));
    conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less

    FileSystem hdfs = null;//  w  ww.j a v  a 2  s .c  o m
    try {
        hdfs = FileSystem.get(conf);
    } catch (java.io.IOException ioe) {
        System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage());
        System.exit(1);
    }
    if (hdfs.getStatus() == null) {
        System.out.println("Unable to contact HDFS filesystem. Exiting.");
        System.exit(1);
    }

    //Check if input dirs/file exists and get file list (even if list of single file)
    Path inPath = new Path(inDirStr);
    if (hdfs.exists(inPath) && hdfs.isFile(inPath)) { //single file
        inputFileList.add(hdfs.getFileStatus(inPath));
    } else if (hdfs.exists(inPath) && hdfs.isDirectory(inPath)) { //dir
        //Get list of input files
        RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inPath, true);
        while (fileStatusListIterator.hasNext()) {
            LocatedFileStatus fileStatus = fileStatusListIterator.next();

            if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) {
                inputFileList.add((FileStatus) fileStatus);
            }
        }
    } else {
        System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting.");
        System.exit(1);
    }

    for (FileStatus thisFileStatus : inputFileList) {

        //_SUCCESS files are 0 bytes
        if (thisFileStatus.getLen() == 0) {
            continue;
        }

        DataFileStream<Object> dfs = null;
        FSDataInputStream inStream = hdfs.open(thisFileStatus.getPath());
        GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
        dfs = new DataFileStream<Object>(inStream, reader);

        long thisFileRecords = 0;
        while (dfs.hasNext()) {

            numAvroRecords = numAvroRecords + dfs.getBlockCount();
            thisFileRecords = thisFileRecords + dfs.getBlockCount();

            //System.out.println("Input file "+thisFileStatus.getPath()+" getBlockCount() is "+dfs.getBlockCount()+"." );

            dfs.nextBlock();
        }

        System.out.println("Input file " + thisFileStatus.getPath() + " has " + thisFileRecords + " records.");

        dfs.close();
        inStream.close();

        //TODO test on dir with non-avro file and see what the exception is, catch that and log to output but don't die.
    }

    System.out.println("Input dir/file ( " + inDirStr + " ) has " + inputFileList.size() + " files and "
            + numAvroRecords + " total records.");

}

From source file:com.awcoleman.StandaloneJava.AvroCounterByRecord.java

License:Apache License

public AvroCounterByRecord(String inDirStr) throws IOException {

    long numAvroRecords = 0;

    //Get list of input files
    ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>();

    Configuration conf = new Configuration();
    conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"));
    conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less

    FileSystem hdfs = null;/*from  w w  w  .  j  a  v  a2 s  .  co  m*/
    try {
        hdfs = FileSystem.get(conf);
    } catch (java.io.IOException ioe) {
        System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage());
        System.exit(1);
    }
    if (hdfs.getStatus() == null) {
        System.out.println("Unable to contact HDFS filesystem. Exiting.");
        System.exit(1);
    }

    //Check if input dirs/file exists and get file list (even if list of single file)
    Path inPath = new Path(inDirStr);
    if (hdfs.exists(inPath) && hdfs.isFile(inPath)) { //single file
        inputFileList.add(hdfs.getFileStatus(inPath));
    } else if (hdfs.exists(inPath) && hdfs.isDirectory(inPath)) { //dir
        //Get list of input files
        RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inPath, true);
        while (fileStatusListIterator.hasNext()) {
            LocatedFileStatus fileStatus = fileStatusListIterator.next();

            if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) {
                inputFileList.add((FileStatus) fileStatus);
            }
        }
    } else {
        System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting.");
        System.exit(1);
    }

    for (FileStatus thisFileStatus : inputFileList) {

        //_SUCCESS files are 0 bytes
        if (thisFileStatus.getLen() == 0) {
            continue;
        }

        DataFileStream<Object> avroStream = null;
        FSDataInputStream inStream = hdfs.open(thisFileStatus.getPath());
        GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
        avroStream = new DataFileStream<Object>(inStream, reader);

        long thisFileRecords = 0;

        while (avroStream.hasNext()) {
            numAvroRecords++;
            thisFileRecords++;
            avroStream.next();
        }
        avroStream.close();
        inStream.close();

        System.out.println("Input file " + thisFileStatus.getPath() + " has " + thisFileRecords + " records.");

        //TODO test on dir with non-avro file and see what the exception is, catch that and log to output but don't die.
    }

    System.out.println("Input dir/file ( " + inDirStr + " ) has " + inputFileList.size() + " files and "
            + numAvroRecords + " total records.");

}

From source file:com.bark.hadoop.lab3.PageRank.java

@Override
public int run(String args[]) {
    String tmp = "/tmp/" + new Date().getTime();
    //        long timeStamp = new Date().getTime();
    try {//from   w  w  w. ja v  a2 s .  c o m
        /**
         * Job 1: Parse XML input and read title,links
         */
        Configuration conf = new Configuration();
        conf.set("xmlinput.start", "<page>");
        conf.set("xmlinput.end", "</page>");

        Job job = Job.getInstance(conf);
        job.setJarByClass(PageRank.class);

        // specify a mapper
        job.setMapperClass(RedLinkMapper.class);

        // specify a reducer
        job.setReducerClass(RedLinkReducer.class);

        // specify output types
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        // specify input and output DIRECTORIES
        FileInputFormat.addInputPath(job, new Path(args[0]));
        job.setInputFormatClass(XmlInputFormat.class);

        FileOutputFormat.setOutputPath(job, new Path((args[1] + tmp + "/job1")));
        job.setOutputFormatClass(TextOutputFormat.class);

        job.waitForCompletion(true);
    } catch (InterruptedException | ClassNotFoundException | IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error during mapreduce job1.");
        return 2;
    }
    /**
     * Job 2: Adjacency outGraph
     */
    try {
        Configuration conf2 = new Configuration();

        Job job2 = Job.getInstance(conf2);
        job2.setJarByClass(PageRank.class);

        // specify a mapper
        job2.setMapperClass(AdjMapper.class);

        // specify a reducer
        job2.setReducerClass(AdjReducer.class);

        // specify output types
        job2.setOutputKeyClass(Text.class);
        job2.setOutputValueClass(Text.class);

        // specify input and output DIRECTORIES
        FileInputFormat.addInputPath(job2, new Path((args[1] + tmp + "/job1")));
        job2.setInputFormatClass(TextInputFormat.class);

        FileOutputFormat.setOutputPath(job2, new Path((args[1] + tmp + "/job2")));
        job2.setOutputFormatClass(TextOutputFormat.class);

        job2.waitForCompletion(true);
    } catch (InterruptedException | ClassNotFoundException | IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error during mapreduce job2.");
        return 2;
    }
    /**
     * Job 3: PageCount
     */
    try {
        Configuration conf3 = new Configuration();
        /**
         * Change output separator to "=" instead of default \t for this job
         */
        conf3.set("mapreduce.output.textoutputformat.separator", "=");

        Job job3 = Job.getInstance(conf3);
        job3.setJarByClass(PageRank.class);

        // specify a mapper
        job3.setMapperClass(PageCountMapper.class);

        // specify a reducer
        job3.setReducerClass(PageCountReducer.class);

        // specify output types
        job3.setOutputKeyClass(Text.class);
        job3.setOutputValueClass(IntWritable.class);

        // specify input and output DIRECTORIES
        FileInputFormat.addInputPath(job3, new Path((args[1] + tmp + "/job2")));
        job3.setInputFormatClass(TextInputFormat.class);

        FileOutputFormat.setOutputPath(job3, new Path((args[1] + tmp + "/job3")));
        job3.setOutputFormatClass(TextOutputFormat.class);

        job3.waitForCompletion(true);
    } catch (InterruptedException | ClassNotFoundException | IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error during mapreduce job3.");
        return 2;
    }
    /**
     * Job 4: PageRank
     */
    for (int i = 1; i < 9; i++) {
        try {
            Configuration conf4 = new Configuration();
            /**
             * Read number of nodes from the output of job 3 : pageCount
             */
            Path path = new Path((args[1] + tmp + "/job3"));
            FileSystem fs = path.getFileSystem(conf4);
            RemoteIterator<LocatedFileStatus> ri = fs.listFiles(path, true);

            int n = 0;
            Pattern pt = Pattern.compile("(\\d+)");
            while (ri.hasNext()) {
                LocatedFileStatus lfs = ri.next();
                if (lfs.isFile() && n == 0) {
                    FSDataInputStream inputStream = fs.open(lfs.getPath());
                    BufferedReader br = new BufferedReader(new InputStreamReader(inputStream));
                    String s = null;
                    while ((s = br.readLine()) != null) {
                        Matcher mt = pt.matcher(s);
                        if (mt.find()) {
                            n = new Integer(mt.group(1));
                            break;
                        }
                    }
                }
            }
            /**
             * Done reading number of nodes, make it available to MapReduce
             * job key: N
             */
            conf4.setInt("N", n);

            Job job4 = Job.getInstance(conf4);
            job4.setJarByClass(PageRank.class);

            // specify a mapper
            job4.setMapperClass(PageRankMapper.class);

            // specify a reducer
            job4.setReducerClass(PageRankReducer.class);

            // specify output types
            job4.setOutputKeyClass(Text.class);
            job4.setOutputValueClass(Text.class);

            // specify input and output DIRECTORIES
            if (i == 1) {
                FileInputFormat.addInputPath(job4, new Path((args[1] + tmp + "/job2")));
            } else {
                FileInputFormat.addInputPath(job4, new Path((args[1] + tmp + "/job4/" + (i - 1))));
            }
            job4.setInputFormatClass(TextInputFormat.class);

            FileOutputFormat.setOutputPath(job4, new Path((args[1] + tmp + "/job4/" + i)));
            job4.setOutputFormatClass(TextOutputFormat.class);
            job4.waitForCompletion(true);
        } catch (InterruptedException | ClassNotFoundException | IOException ex) {
            Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
            System.err.println("Error during mapreduce job4.");
            return 2;
        }
    }
    /**
     * Job 5: Sort iteration 1 and iteration 8
     */
    int returnCode = 0;
    for (int i = 0; i < 2; i++) {
        try {
            Configuration conf5 = new Configuration();

            /**
             * Read number of nodes from the output of job 3 : pageCount
             */
            Path path = new Path((args[1] + tmp + "/job3"));
            FileSystem fs = path.getFileSystem(conf5);
            RemoteIterator<LocatedFileStatus> ri = fs.listFiles(path, true);

            int n = 0;
            Pattern pt = Pattern.compile("(\\d+)");
            while (ri.hasNext()) {
                LocatedFileStatus lfs = ri.next();
                if (lfs.isFile() && n == 0) {
                    FSDataInputStream inputStream = fs.open(lfs.getPath());
                    BufferedReader br = new BufferedReader(new InputStreamReader(inputStream));
                    String s = null;
                    while ((s = br.readLine()) != null) {
                        Matcher mt = pt.matcher(s);
                        if (mt.find()) {
                            n = new Integer(mt.group(1));
                            break;
                        }
                    }
                }
            }
            /**
             * Done reading number of nodes, make it available to MapReduce
             * job key: N
             */
            conf5.setInt("N", n);

            Job job5 = Job.getInstance(conf5);
            /**
             * one reducer only
             */
            job5.setNumReduceTasks(1);
            job5.setSortComparatorClass(MyWritableComparator.class);
            job5.setJarByClass(PageRank.class);

            // specify a mapper
            job5.setMapperClass(SortMapper.class);
            job5.setMapOutputKeyClass(DoubleWritable.class);
            job5.setMapOutputValueClass(Text.class);

            // specify a reducer
            job5.setReducerClass(SortReducer.class);

            // specify output types
            job5.setOutputKeyClass(Text.class);
            job5.setOutputValueClass(DoubleWritable.class);

            // specify input and output DIRECTORIES
            int y = 7 * i + 1;
            FileInputFormat.addInputPath(job5, new Path((args[1] + tmp + "/job4/" + y)));
            job5.setInputFormatClass(TextInputFormat.class);

            FileOutputFormat.setOutputPath(job5, new Path((args[1] + tmp + "/job5/" + y)));
            job5.setOutputFormatClass(TextOutputFormat.class);

            returnCode = job5.waitForCompletion(true) ? 0 : 1;
        } catch (InterruptedException | ClassNotFoundException | IOException ex) {
            Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
            System.err.println("Error during mapreduce job5.");
            return 2;
        }
    }
    /**
     * Copy necessary output files to args[1]        /**
     * Copy necessary output files to args[1]
     */

    /**
     * Rename and copy OutLinkGraph
     */
    try {
        Configuration conf = new Configuration();

        Path outLinkGraph = new Path((args[1] + tmp + "/job2/part-r-00000"));
        FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf);

        Path output = new Path(args[1] + "/results/PageRank.outlink.out");
        FileSystem outputFS = output.getFileSystem(conf);
        org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf);
    } catch (IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error while copying results.");
        return 2;
    }

    /**
     * Rename and copy total number of pages
     */
    try {
        Configuration conf = new Configuration();

        Path outLinkGraph = new Path((args[1] + tmp + "/job3/part-r-00000"));
        FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf);

        Path output = new Path(args[1] + "/results/PageRank.n.out");
        FileSystem outputFS = output.getFileSystem(conf);
        org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf);
    } catch (IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error while copying results.");
        return 2;
    }

    /**
     * Rename and copy iteration 1
     */
    try {
        Configuration conf = new Configuration();

        Path outLinkGraph = new Path((args[1] + tmp + "/job5/1/part-r-00000"));
        FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf);

        Path output = new Path(args[1] + "/results/PageRank.iter1.out");
        FileSystem outputFS = output.getFileSystem(conf);
        org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf);
    } catch (IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error while copying results.");
        return 2;
    }

    /**
     * Rename and copy iteration 8
     */
    try {
        Configuration conf = new Configuration();

        Path outLinkGraph = new Path((args[1] + tmp + "/job5/8/part-r-00000"));
        FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf);

        Path output = new Path(args[1] + "/results/PageRank.iter8.out");
        FileSystem outputFS = output.getFileSystem(conf);
        org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf);
    } catch (IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error while copying results.");
        return 2;
    }
    return returnCode;
}

From source file:com.cloudera.impala.util.LoadMetadataUtil.java

License:Apache License

/**
 * Identical to loadFileDescriptors, except using the ListLocatedStatus HDFS API to load
 * file status.//from w  w w . j a  va  2  s.  co m
 * TODO: Got AnalysisException error: Failed to load metadata for table
 * CAUSED BY: ClassCastException: DFSClient#getVolumeBlockLocations expected to be
 * passed HdfsBlockLocations
 * TODO: Use new HDFS API resolved by CDH-30342.
 */
public static List<FileDescriptor> loadViaListLocatedStatus(FileSystem fs, Path partDirPath,
        Map<String, List<FileDescriptor>> oldFileDescMap, HdfsFileFormat fileFormat,
        Map<FsKey, FileBlocksInfo> perFsFileBlocks, boolean isMarkedCached, String tblName,
        ListMap<TNetworkAddress> hostIndex, Map<String, List<FileDescriptor>> fileDescMap)
        throws FileNotFoundException, IOException {
    List<FileDescriptor> fileDescriptors = Lists.newArrayList();

    RemoteIterator<LocatedFileStatus> fileStatusItor = fs.listLocatedStatus(partDirPath);

    while (fileStatusItor.hasNext()) {
        LocatedFileStatus fileStatus = fileStatusItor.next();
        FileDescriptor fd = getFileDescriptor(fs, fileStatus, fileFormat, oldFileDescMap, isMarkedCached,
                perFsFileBlocks, tblName, hostIndex);

        if (fd == null)
            continue;

        // Add partition dir to fileDescMap if it does not exist.
        String partitionDir = fileStatus.getPath().getParent().toString();
        if (!fileDescMap.containsKey(partitionDir)) {
            fileDescMap.put(partitionDir, new ArrayList<FileDescriptor>());
        }
        fileDescMap.get(partitionDir).add(fd);

        // Add to the list of FileDescriptors for this partition.
        fileDescriptors.add(fd);
    }

    return fileDescriptors;
}

From source file:com.datatorrent.stram.client.FSAgent.java

License:Apache License

public List<String> listFiles(String dir) throws IOException {
    List<String> files = new ArrayList<String>();
    Path path = new Path(dir);

    FileStatus fileStatus = fileSystem.getFileStatus(path);
    if (!fileStatus.isDirectory()) {
        throw new FileNotFoundException("Cannot read directory " + dir);
    }/*  w w  w.j  av a  2s  .c o m*/
    RemoteIterator<LocatedFileStatus> it = fileSystem.listFiles(path, false);
    while (it.hasNext()) {
        LocatedFileStatus lfs = it.next();
        files.add(lfs.getPath().getName());
    }
    return files;
}