Example usage for org.apache.hadoop.fs LocatedFileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs LocatedFileStatus getPath.

Prototype

public Path getPath()

Source Link

Usage

From source file:br.com.lassal.nqueens.grid.job.GenerateSolutions.java

/**
 * NQueens working folder structure /nqueens/board-{x}/partial/solution_X-4
 *
 * @param queensSize//  w  ww.j  av  a2  s. c o m
 * @throws IOException
 */
private void setWorkingFolder(int queensSize, Job job) throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    if (fs.isDirectory(new Path("/nqueens/board-" + queensSize + "/final"))) {
        System.exit(0); // ja foi processado anteriormente nao processa de novo
    }

    String lastSolution = null;
    Path partialSolDir = new Path("/nqueens/board-" + queensSize + "/partial/");
    Path inputPath = null;
    Path outputPath = null;

    if (fs.exists(partialSolDir)) {
        RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir);

        while (dirsFound.hasNext()) {
            LocatedFileStatus path = dirsFound.next();
            if (lastSolution == null) {
                lastSolution = path.getPath().getName();
                inputPath = path.getPath();
            } else {
                String currentDir = path.getPath().getName();
                if (lastSolution.compareToIgnoreCase(currentDir) < 0) {
                    lastSolution = currentDir;
                    inputPath = path.getPath();
                }
            }
        }
    }
    int currentSolutionSet = 0;
    if (inputPath == null) {
        inputPath = new Path("/nqueens/board-" + queensSize + "/seed");
        if (!fs.exists(inputPath)) {
            FSDataOutputStream seedFile = fs.create(inputPath, true);
            seedFile.writeBytes(queensSize + "#");
            seedFile.close();
        }
    }
    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);

    if (lastSolution != null) {
        String[] solution = lastSolution.split("-");
        if (solution[0].equalsIgnoreCase("solution_" + queensSize)) {
            currentSolutionSet = Integer.parseInt(solution[1]) + 4;

            if (currentSolutionSet >= queensSize) {
                outputPath = new Path("/nqueens/board-" + queensSize + "/final");
            } else {
                outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-"
                        + currentSolutionSet);
            }
        }
    } else {
        outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-4");
    }

    // Output
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClass(TextOutputFormat.class);

}

From source file:br.com.lassal.nqueens.grid.job.NQueenCounter.java

private Path setWorkingFolder(int queensSize, String workingFolder, boolean isFinal, Job job)
        throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    Path returnPath = null;/*from   ww  w .ja  v  a 2 s . c  om*/

    if (workingFolder == null) {
        workingFolder = "";
    }

    Path partialSolDir = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/");
    Path inputPath = null;
    Path outputPath = null;
    String nextRunPath = "run_1";

    if (fs.exists(partialSolDir)) {
        RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir);
        String lastRunPath = null;
        Path lastPath = null;

        while (dirsFound.hasNext()) {
            LocatedFileStatus dir = dirsFound.next();

            if (dir.isDirectory()) {
                if (lastRunPath == null || dir.getPath().getName().compareTo(lastRunPath) > 0) {
                    lastPath = dir.getPath();
                    lastRunPath = lastPath.getName();
                }
            }
        }
        if (lastRunPath != null) {
            String[] runParts = lastRunPath.split("_");
            int lastRun = Integer.parseInt(runParts[1]);
            nextRunPath = runParts[0] + "_" + (++lastRun);
            inputPath = lastPath;
        }

    }
    if (inputPath == null) {
        inputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/seed");
        if (!fs.exists(inputPath)) {
            FSDataOutputStream seedFile = fs.create(inputPath, true);
            seedFile.writeBytes(queensSize + ":");
            seedFile.close();
        }
    } else {
        returnPath = inputPath;
    }
    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);

    if (isFinal) {
        outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/final");
    } else {
        outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/" + nextRunPath);
    }

    // Output
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClass(TextOutputFormat.class);

    return returnPath;
}

From source file:com.alibaba.jstorm.hdfs.common.HdfsUtils.java

License:Apache License

/** list files sorted by modification time that have not been modified since 'olderThan'. if
 * 'olderThan' is <= 0 then the filtering is disabled */
public static ArrayList<Path> listFilesByModificationTime(FileSystem fs, Path directory, long olderThan)
        throws IOException {
    ArrayList<LocatedFileStatus> fstats = new ArrayList<>();

    RemoteIterator<LocatedFileStatus> itr = fs.listFiles(directory, false);
    while (itr.hasNext()) {
        LocatedFileStatus fileStatus = itr.next();
        if (olderThan > 0) {
            if (fileStatus.getModificationTime() <= olderThan)
                fstats.add(fileStatus);/*w w  w  . j  a  v  a 2  s  . c  om*/
        } else {
            fstats.add(fileStatus);
        }
    }
    Collections.sort(fstats, new ModifTimeComparator());

    ArrayList<Path> result = new ArrayList<>(fstats.size());
    for (LocatedFileStatus fstat : fstats) {
        result.add(fstat.getPath());
    }
    return result;
}

From source file:com.alibaba.jstorm.hdfs.HdfsCache.java

License:Apache License

public Collection<String> listFile(String dstPath, boolean recursive) throws IOException {
    Collection<String> files = new HashSet<String>();
    Path path = new Path(dstPath);
    if (fs.exists(path)) {
        RemoteIterator<LocatedFileStatus> itr = fs.listFiles(path, recursive);
        while (itr.hasNext()) {
            LocatedFileStatus status = itr.next();
            files.add(status.getPath().getName());
        }//from  w w  w . j  a v  a2 s  . co m
    }
    return files;
}

From source file:com.awcoleman.StandaloneJava.AvroCombinerByBlock.java

License:Apache License

public AvroCombinerByBlock(String inDirStr, String outDirStr, String handleExisting) throws IOException {

    //handle both an output directory and an output filename (ending with .avro)
    String outputFilename = DEFAULTOUTPUTFILENAME;
    if (outDirStr.endsWith(".avro")) {
        isOutputNameSpecifiedAndAFile = true;
        //String[] outputParts = outDirStr.split(":?\\\\");
        String[] outputParts = outDirStr.split("/");

        outputFilename = outputParts[outputParts.length - 1];

        //remove outputFilename from outDirStr to get new outDirStr which is just directory (and trailing /)
        outDirStr = outDirStr.replaceAll(Pattern.quote(outputFilename), "");
        outDirStr = outDirStr.substring(0, outDirStr.length() - (outDirStr.endsWith("/") ? 1 : 0));
    }/*from w  w  w .  ja  v a 2 s .c om*/

    //Get block size - not needed
    //long hdfsBlockSize = getBlockSize();
    //System.out.println("HDFS FS block size: "+hdfsBlockSize);

    //Get list of input files
    ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>();

    Configuration conf = new Configuration();
    conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"));
    conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less

    FileSystem hdfs = null;
    try {
        hdfs = FileSystem.get(conf);
    } catch (java.io.IOException ioe) {
        System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage());
        System.exit(1);
    }
    if (hdfs.getStatus() == null) {
        System.out.println("Unable to contact HDFS filesystem. Exiting.");
        System.exit(1);
    }

    //Check if input and output dirs exist
    Path inDir = new Path(inDirStr);
    Path outDir = new Path(outDirStr);
    if (!(hdfs.exists(inDir) || hdfs.isDirectory(inDir))) {
        System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting.");
        System.exit(1);
    }

    if (!(hdfs.exists(outDir) || hdfs.isDirectory(outDir))) {
        if (hdfs.exists(outDir)) { //outDir exists and is a symlink or file, must die
            System.out.println("Requested output directory name ( " + outDirStr
                    + " ) exists but is not a directory. Exiting.");
            System.exit(1);
        } else {
            hdfs.mkdirs(outDir);
        }
    }

    RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inDir, true);
    while (fileStatusListIterator.hasNext()) {
        LocatedFileStatus fileStatus = fileStatusListIterator.next();

        if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) {
            inputFileList.add((FileStatus) fileStatus);
        }
    }

    if (inputFileList.size() <= 1 && !isOutputNameSpecifiedAndAFile) { //If an output file is specified assume we just want a rename.
        System.out.println("Only one or zero files found in input directory ( " + inDirStr + " ). Exiting.");
        System.exit(1);
    }

    //Get Schema and Compression Codec from seed file since we need it for the writer
    Path firstFile = inputFileList.get(0).getPath();
    FsInput fsin = new FsInput(firstFile, conf);
    DataFileReader<Object> dfrFirstFile = new DataFileReader<Object>(fsin, new GenericDatumReader<Object>());
    Schema fileSchema = dfrFirstFile.getSchema();
    String compCodecName = dfrFirstFile.getMetaString("avro.codec");
    //compCodecName should be null, deflate, snappy, or bzip2
    if (compCodecName == null) {
        compCodecName = "deflate"; //set to deflate even though original is no compression
    }
    dfrFirstFile.close();

    //Create Empty HDFS file in output dir
    String seedFileStr = outDirStr + "/" + outputFilename;
    Path seedFile = new Path(seedFileStr);
    FSDataOutputStream hdfsdos = null;
    try {
        hdfsdos = hdfs.create(seedFile, false);
    } catch (org.apache.hadoop.fs.FileAlreadyExistsException faee) {
        if (handleExisting.equals("overwrite")) {
            hdfs.delete(seedFile, false);
            hdfsdos = hdfs.create(seedFile, false);
        } else if (handleExisting.equals("append")) {
            hdfsdos = hdfs.append(seedFile);
        } else {
            System.out
                    .println("File " + seedFileStr + " exists and will not overwrite. handleExisting is set to "
                            + handleExisting + ". Exiting.");
            System.exit(1);
        }
    }
    if (hdfsdos == null) {
        System.out.println("Unable to create or write to output file ( " + seedFileStr
                + " ). handleExisting is set to " + handleExisting + ". Exiting.");
        System.exit(1);
    }

    //Append other files
    GenericDatumWriter gdw = new GenericDatumWriter(fileSchema);
    DataFileWriter dfwBase = new DataFileWriter(gdw);
    //Set compression to that found in the first file
    dfwBase.setCodec(CodecFactory.fromString(compCodecName));

    DataFileWriter dfw = dfwBase.create(fileSchema, hdfsdos);
    for (FileStatus thisFileStatus : inputFileList) {

        //_SUCCESS files are 0 bytes
        if (thisFileStatus.getLen() == 0) {
            continue;
        }

        FsInput fsin1 = new FsInput(thisFileStatus.getPath(), conf);
        DataFileReader dfr = new DataFileReader<Object>(fsin1, new GenericDatumReader<Object>());

        dfw.appendAllFrom(dfr, false);

        dfr.close();
    }

    dfw.close();
    dfwBase.close();

}

From source file:com.awcoleman.StandaloneJava.AvroCounterByBlock.java

License:Apache License

public AvroCounterByBlock(String inDirStr) throws IOException {

    long numAvroRecords = 0;

    //Get list of input files
    ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>();

    Configuration conf = new Configuration();
    conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"));
    conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less

    FileSystem hdfs = null;//  w  ww.j a v  a 2  s .c  o m
    try {
        hdfs = FileSystem.get(conf);
    } catch (java.io.IOException ioe) {
        System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage());
        System.exit(1);
    }
    if (hdfs.getStatus() == null) {
        System.out.println("Unable to contact HDFS filesystem. Exiting.");
        System.exit(1);
    }

    //Check if input dirs/file exists and get file list (even if list of single file)
    Path inPath = new Path(inDirStr);
    if (hdfs.exists(inPath) && hdfs.isFile(inPath)) { //single file
        inputFileList.add(hdfs.getFileStatus(inPath));
    } else if (hdfs.exists(inPath) && hdfs.isDirectory(inPath)) { //dir
        //Get list of input files
        RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inPath, true);
        while (fileStatusListIterator.hasNext()) {
            LocatedFileStatus fileStatus = fileStatusListIterator.next();

            if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) {
                inputFileList.add((FileStatus) fileStatus);
            }
        }
    } else {
        System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting.");
        System.exit(1);
    }

    for (FileStatus thisFileStatus : inputFileList) {

        //_SUCCESS files are 0 bytes
        if (thisFileStatus.getLen() == 0) {
            continue;
        }

        DataFileStream<Object> dfs = null;
        FSDataInputStream inStream = hdfs.open(thisFileStatus.getPath());
        GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
        dfs = new DataFileStream<Object>(inStream, reader);

        long thisFileRecords = 0;
        while (dfs.hasNext()) {

            numAvroRecords = numAvroRecords + dfs.getBlockCount();
            thisFileRecords = thisFileRecords + dfs.getBlockCount();

            //System.out.println("Input file "+thisFileStatus.getPath()+" getBlockCount() is "+dfs.getBlockCount()+"." );

            dfs.nextBlock();
        }

        System.out.println("Input file " + thisFileStatus.getPath() + " has " + thisFileRecords + " records.");

        dfs.close();
        inStream.close();

        //TODO test on dir with non-avro file and see what the exception is, catch that and log to output but don't die.
    }

    System.out.println("Input dir/file ( " + inDirStr + " ) has " + inputFileList.size() + " files and "
            + numAvroRecords + " total records.");

}

From source file:com.awcoleman.StandaloneJava.AvroCounterByRecord.java

License:Apache License

public AvroCounterByRecord(String inDirStr) throws IOException {

    long numAvroRecords = 0;

    //Get list of input files
    ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>();

    Configuration conf = new Configuration();
    conf.addResource(new Path("/etc/hadoop/conf/core-site.xml"));
    conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less

    FileSystem hdfs = null;/*from  w w  w  .  j  a  v  a2 s  .  co  m*/
    try {
        hdfs = FileSystem.get(conf);
    } catch (java.io.IOException ioe) {
        System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage());
        System.exit(1);
    }
    if (hdfs.getStatus() == null) {
        System.out.println("Unable to contact HDFS filesystem. Exiting.");
        System.exit(1);
    }

    //Check if input dirs/file exists and get file list (even if list of single file)
    Path inPath = new Path(inDirStr);
    if (hdfs.exists(inPath) && hdfs.isFile(inPath)) { //single file
        inputFileList.add(hdfs.getFileStatus(inPath));
    } else if (hdfs.exists(inPath) && hdfs.isDirectory(inPath)) { //dir
        //Get list of input files
        RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inPath, true);
        while (fileStatusListIterator.hasNext()) {
            LocatedFileStatus fileStatus = fileStatusListIterator.next();

            if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) {
                inputFileList.add((FileStatus) fileStatus);
            }
        }
    } else {
        System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting.");
        System.exit(1);
    }

    for (FileStatus thisFileStatus : inputFileList) {

        //_SUCCESS files are 0 bytes
        if (thisFileStatus.getLen() == 0) {
            continue;
        }

        DataFileStream<Object> avroStream = null;
        FSDataInputStream inStream = hdfs.open(thisFileStatus.getPath());
        GenericDatumReader<Object> reader = new GenericDatumReader<Object>();
        avroStream = new DataFileStream<Object>(inStream, reader);

        long thisFileRecords = 0;

        while (avroStream.hasNext()) {
            numAvroRecords++;
            thisFileRecords++;
            avroStream.next();
        }
        avroStream.close();
        inStream.close();

        System.out.println("Input file " + thisFileStatus.getPath() + " has " + thisFileRecords + " records.");

        //TODO test on dir with non-avro file and see what the exception is, catch that and log to output but don't die.
    }

    System.out.println("Input dir/file ( " + inDirStr + " ) has " + inputFileList.size() + " files and "
            + numAvroRecords + " total records.");

}

From source file:com.bark.hadoop.lab3.PageRank.java

@Override
public int run(String args[]) {
    String tmp = "/tmp/" + new Date().getTime();
    //        long timeStamp = new Date().getTime();
    try {//from   w  w  w. ja v  a2 s .  c o m
        /**
         * Job 1: Parse XML input and read title,links
         */
        Configuration conf = new Configuration();
        conf.set("xmlinput.start", "<page>");
        conf.set("xmlinput.end", "</page>");

        Job job = Job.getInstance(conf);
        job.setJarByClass(PageRank.class);

        // specify a mapper
        job.setMapperClass(RedLinkMapper.class);

        // specify a reducer
        job.setReducerClass(RedLinkReducer.class);

        // specify output types
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        // specify input and output DIRECTORIES
        FileInputFormat.addInputPath(job, new Path(args[0]));
        job.setInputFormatClass(XmlInputFormat.class);

        FileOutputFormat.setOutputPath(job, new Path((args[1] + tmp + "/job1")));
        job.setOutputFormatClass(TextOutputFormat.class);

        job.waitForCompletion(true);
    } catch (InterruptedException | ClassNotFoundException | IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error during mapreduce job1.");
        return 2;
    }
    /**
     * Job 2: Adjacency outGraph
     */
    try {
        Configuration conf2 = new Configuration();

        Job job2 = Job.getInstance(conf2);
        job2.setJarByClass(PageRank.class);

        // specify a mapper
        job2.setMapperClass(AdjMapper.class);

        // specify a reducer
        job2.setReducerClass(AdjReducer.class);

        // specify output types
        job2.setOutputKeyClass(Text.class);
        job2.setOutputValueClass(Text.class);

        // specify input and output DIRECTORIES
        FileInputFormat.addInputPath(job2, new Path((args[1] + tmp + "/job1")));
        job2.setInputFormatClass(TextInputFormat.class);

        FileOutputFormat.setOutputPath(job2, new Path((args[1] + tmp + "/job2")));
        job2.setOutputFormatClass(TextOutputFormat.class);

        job2.waitForCompletion(true);
    } catch (InterruptedException | ClassNotFoundException | IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error during mapreduce job2.");
        return 2;
    }
    /**
     * Job 3: PageCount
     */
    try {
        Configuration conf3 = new Configuration();
        /**
         * Change output separator to "=" instead of default \t for this job
         */
        conf3.set("mapreduce.output.textoutputformat.separator", "=");

        Job job3 = Job.getInstance(conf3);
        job3.setJarByClass(PageRank.class);

        // specify a mapper
        job3.setMapperClass(PageCountMapper.class);

        // specify a reducer
        job3.setReducerClass(PageCountReducer.class);

        // specify output types
        job3.setOutputKeyClass(Text.class);
        job3.setOutputValueClass(IntWritable.class);

        // specify input and output DIRECTORIES
        FileInputFormat.addInputPath(job3, new Path((args[1] + tmp + "/job2")));
        job3.setInputFormatClass(TextInputFormat.class);

        FileOutputFormat.setOutputPath(job3, new Path((args[1] + tmp + "/job3")));
        job3.setOutputFormatClass(TextOutputFormat.class);

        job3.waitForCompletion(true);
    } catch (InterruptedException | ClassNotFoundException | IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error during mapreduce job3.");
        return 2;
    }
    /**
     * Job 4: PageRank
     */
    for (int i = 1; i < 9; i++) {
        try {
            Configuration conf4 = new Configuration();
            /**
             * Read number of nodes from the output of job 3 : pageCount
             */
            Path path = new Path((args[1] + tmp + "/job3"));
            FileSystem fs = path.getFileSystem(conf4);
            RemoteIterator<LocatedFileStatus> ri = fs.listFiles(path, true);

            int n = 0;
            Pattern pt = Pattern.compile("(\\d+)");
            while (ri.hasNext()) {
                LocatedFileStatus lfs = ri.next();
                if (lfs.isFile() && n == 0) {
                    FSDataInputStream inputStream = fs.open(lfs.getPath());
                    BufferedReader br = new BufferedReader(new InputStreamReader(inputStream));
                    String s = null;
                    while ((s = br.readLine()) != null) {
                        Matcher mt = pt.matcher(s);
                        if (mt.find()) {
                            n = new Integer(mt.group(1));
                            break;
                        }
                    }
                }
            }
            /**
             * Done reading number of nodes, make it available to MapReduce
             * job key: N
             */
            conf4.setInt("N", n);

            Job job4 = Job.getInstance(conf4);
            job4.setJarByClass(PageRank.class);

            // specify a mapper
            job4.setMapperClass(PageRankMapper.class);

            // specify a reducer
            job4.setReducerClass(PageRankReducer.class);

            // specify output types
            job4.setOutputKeyClass(Text.class);
            job4.setOutputValueClass(Text.class);

            // specify input and output DIRECTORIES
            if (i == 1) {
                FileInputFormat.addInputPath(job4, new Path((args[1] + tmp + "/job2")));
            } else {
                FileInputFormat.addInputPath(job4, new Path((args[1] + tmp + "/job4/" + (i - 1))));
            }
            job4.setInputFormatClass(TextInputFormat.class);

            FileOutputFormat.setOutputPath(job4, new Path((args[1] + tmp + "/job4/" + i)));
            job4.setOutputFormatClass(TextOutputFormat.class);
            job4.waitForCompletion(true);
        } catch (InterruptedException | ClassNotFoundException | IOException ex) {
            Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
            System.err.println("Error during mapreduce job4.");
            return 2;
        }
    }
    /**
     * Job 5: Sort iteration 1 and iteration 8
     */
    int returnCode = 0;
    for (int i = 0; i < 2; i++) {
        try {
            Configuration conf5 = new Configuration();

            /**
             * Read number of nodes from the output of job 3 : pageCount
             */
            Path path = new Path((args[1] + tmp + "/job3"));
            FileSystem fs = path.getFileSystem(conf5);
            RemoteIterator<LocatedFileStatus> ri = fs.listFiles(path, true);

            int n = 0;
            Pattern pt = Pattern.compile("(\\d+)");
            while (ri.hasNext()) {
                LocatedFileStatus lfs = ri.next();
                if (lfs.isFile() && n == 0) {
                    FSDataInputStream inputStream = fs.open(lfs.getPath());
                    BufferedReader br = new BufferedReader(new InputStreamReader(inputStream));
                    String s = null;
                    while ((s = br.readLine()) != null) {
                        Matcher mt = pt.matcher(s);
                        if (mt.find()) {
                            n = new Integer(mt.group(1));
                            break;
                        }
                    }
                }
            }
            /**
             * Done reading number of nodes, make it available to MapReduce
             * job key: N
             */
            conf5.setInt("N", n);

            Job job5 = Job.getInstance(conf5);
            /**
             * one reducer only
             */
            job5.setNumReduceTasks(1);
            job5.setSortComparatorClass(MyWritableComparator.class);
            job5.setJarByClass(PageRank.class);

            // specify a mapper
            job5.setMapperClass(SortMapper.class);
            job5.setMapOutputKeyClass(DoubleWritable.class);
            job5.setMapOutputValueClass(Text.class);

            // specify a reducer
            job5.setReducerClass(SortReducer.class);

            // specify output types
            job5.setOutputKeyClass(Text.class);
            job5.setOutputValueClass(DoubleWritable.class);

            // specify input and output DIRECTORIES
            int y = 7 * i + 1;
            FileInputFormat.addInputPath(job5, new Path((args[1] + tmp + "/job4/" + y)));
            job5.setInputFormatClass(TextInputFormat.class);

            FileOutputFormat.setOutputPath(job5, new Path((args[1] + tmp + "/job5/" + y)));
            job5.setOutputFormatClass(TextOutputFormat.class);

            returnCode = job5.waitForCompletion(true) ? 0 : 1;
        } catch (InterruptedException | ClassNotFoundException | IOException ex) {
            Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
            System.err.println("Error during mapreduce job5.");
            return 2;
        }
    }
    /**
     * Copy necessary output files to args[1]        /**
     * Copy necessary output files to args[1]
     */

    /**
     * Rename and copy OutLinkGraph
     */
    try {
        Configuration conf = new Configuration();

        Path outLinkGraph = new Path((args[1] + tmp + "/job2/part-r-00000"));
        FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf);

        Path output = new Path(args[1] + "/results/PageRank.outlink.out");
        FileSystem outputFS = output.getFileSystem(conf);
        org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf);
    } catch (IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error while copying results.");
        return 2;
    }

    /**
     * Rename and copy total number of pages
     */
    try {
        Configuration conf = new Configuration();

        Path outLinkGraph = new Path((args[1] + tmp + "/job3/part-r-00000"));
        FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf);

        Path output = new Path(args[1] + "/results/PageRank.n.out");
        FileSystem outputFS = output.getFileSystem(conf);
        org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf);
    } catch (IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error while copying results.");
        return 2;
    }

    /**
     * Rename and copy iteration 1
     */
    try {
        Configuration conf = new Configuration();

        Path outLinkGraph = new Path((args[1] + tmp + "/job5/1/part-r-00000"));
        FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf);

        Path output = new Path(args[1] + "/results/PageRank.iter1.out");
        FileSystem outputFS = output.getFileSystem(conf);
        org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf);
    } catch (IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error while copying results.");
        return 2;
    }

    /**
     * Rename and copy iteration 8
     */
    try {
        Configuration conf = new Configuration();

        Path outLinkGraph = new Path((args[1] + tmp + "/job5/8/part-r-00000"));
        FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf);

        Path output = new Path(args[1] + "/results/PageRank.iter8.out");
        FileSystem outputFS = output.getFileSystem(conf);
        org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf);
    } catch (IOException ex) {
        Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex);
        System.err.println("Error while copying results.");
        return 2;
    }
    return returnCode;
}

From source file:com.cloudera.impala.util.LoadMetadataUtil.java

License:Apache License

/**
 * Identical to loadFileDescriptors, except using the ListLocatedStatus HDFS API to load
 * file status.//from w  w w . j a  va  2  s.  co m
 * TODO: Got AnalysisException error: Failed to load metadata for table
 * CAUSED BY: ClassCastException: DFSClient#getVolumeBlockLocations expected to be
 * passed HdfsBlockLocations
 * TODO: Use new HDFS API resolved by CDH-30342.
 */
public static List<FileDescriptor> loadViaListLocatedStatus(FileSystem fs, Path partDirPath,
        Map<String, List<FileDescriptor>> oldFileDescMap, HdfsFileFormat fileFormat,
        Map<FsKey, FileBlocksInfo> perFsFileBlocks, boolean isMarkedCached, String tblName,
        ListMap<TNetworkAddress> hostIndex, Map<String, List<FileDescriptor>> fileDescMap)
        throws FileNotFoundException, IOException {
    List<FileDescriptor> fileDescriptors = Lists.newArrayList();

    RemoteIterator<LocatedFileStatus> fileStatusItor = fs.listLocatedStatus(partDirPath);

    while (fileStatusItor.hasNext()) {
        LocatedFileStatus fileStatus = fileStatusItor.next();
        FileDescriptor fd = getFileDescriptor(fs, fileStatus, fileFormat, oldFileDescMap, isMarkedCached,
                perFsFileBlocks, tblName, hostIndex);

        if (fd == null)
            continue;

        // Add partition dir to fileDescMap if it does not exist.
        String partitionDir = fileStatus.getPath().getParent().toString();
        if (!fileDescMap.containsKey(partitionDir)) {
            fileDescMap.put(partitionDir, new ArrayList<FileDescriptor>());
        }
        fileDescMap.get(partitionDir).add(fd);

        // Add to the list of FileDescriptors for this partition.
        fileDescriptors.add(fd);
    }

    return fileDescriptors;
}

From source file:com.datatorrent.stram.client.FSAgent.java

License:Apache License

public List<String> listFiles(String dir) throws IOException {
    List<String> files = new ArrayList<String>();
    Path path = new Path(dir);

    FileStatus fileStatus = fileSystem.getFileStatus(path);
    if (!fileStatus.isDirectory()) {
        throw new FileNotFoundException("Cannot read directory " + dir);
    }/*  w w  w.j  av a  2s  .c o m*/
    RemoteIterator<LocatedFileStatus> it = fileSystem.listFiles(path, false);
    while (it.hasNext()) {
        LocatedFileStatus lfs = it.next();
        files.add(lfs.getPath().getName());
    }
    return files;
}