List of usage examples for org.apache.hadoop.fs LocatedFileStatus getPath
public Path getPath()
From source file:br.com.lassal.nqueens.grid.job.GenerateSolutions.java
/** * NQueens working folder structure /nqueens/board-{x}/partial/solution_X-4 * * @param queensSize// w ww.j av a2 s. c o m * @throws IOException */ private void setWorkingFolder(int queensSize, Job job) throws IOException { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); if (fs.isDirectory(new Path("/nqueens/board-" + queensSize + "/final"))) { System.exit(0); // ja foi processado anteriormente nao processa de novo } String lastSolution = null; Path partialSolDir = new Path("/nqueens/board-" + queensSize + "/partial/"); Path inputPath = null; Path outputPath = null; if (fs.exists(partialSolDir)) { RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir); while (dirsFound.hasNext()) { LocatedFileStatus path = dirsFound.next(); if (lastSolution == null) { lastSolution = path.getPath().getName(); inputPath = path.getPath(); } else { String currentDir = path.getPath().getName(); if (lastSolution.compareToIgnoreCase(currentDir) < 0) { lastSolution = currentDir; inputPath = path.getPath(); } } } } int currentSolutionSet = 0; if (inputPath == null) { inputPath = new Path("/nqueens/board-" + queensSize + "/seed"); if (!fs.exists(inputPath)) { FSDataOutputStream seedFile = fs.create(inputPath, true); seedFile.writeBytes(queensSize + "#"); seedFile.close(); } } // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(TextInputFormat.class); if (lastSolution != null) { String[] solution = lastSolution.split("-"); if (solution[0].equalsIgnoreCase("solution_" + queensSize)) { currentSolutionSet = Integer.parseInt(solution[1]) + 4; if (currentSolutionSet >= queensSize) { outputPath = new Path("/nqueens/board-" + queensSize + "/final"); } else { outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-" + currentSolutionSet); } } } else { outputPath = new Path("/nqueens/board-" + queensSize + "/partial/solution_" + queensSize + "-4"); } // Output FileOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(TextOutputFormat.class); }
From source file:br.com.lassal.nqueens.grid.job.NQueenCounter.java
private Path setWorkingFolder(int queensSize, String workingFolder, boolean isFinal, Job job) throws IOException { Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path returnPath = null;/*from ww w .ja v a 2 s . c om*/ if (workingFolder == null) { workingFolder = ""; } Path partialSolDir = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/"); Path inputPath = null; Path outputPath = null; String nextRunPath = "run_1"; if (fs.exists(partialSolDir)) { RemoteIterator<LocatedFileStatus> dirsFound = fs.listLocatedStatus(partialSolDir); String lastRunPath = null; Path lastPath = null; while (dirsFound.hasNext()) { LocatedFileStatus dir = dirsFound.next(); if (dir.isDirectory()) { if (lastRunPath == null || dir.getPath().getName().compareTo(lastRunPath) > 0) { lastPath = dir.getPath(); lastRunPath = lastPath.getName(); } } } if (lastRunPath != null) { String[] runParts = lastRunPath.split("_"); int lastRun = Integer.parseInt(runParts[1]); nextRunPath = runParts[0] + "_" + (++lastRun); inputPath = lastPath; } } if (inputPath == null) { inputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/seed"); if (!fs.exists(inputPath)) { FSDataOutputStream seedFile = fs.create(inputPath, true); seedFile.writeBytes(queensSize + ":"); seedFile.close(); } } else { returnPath = inputPath; } // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(TextInputFormat.class); if (isFinal) { outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/final"); } else { outputPath = new Path(workingFolder + "/nqueens/board-" + queensSize + "/partial/" + nextRunPath); } // Output FileOutputFormat.setOutputPath(job, outputPath); job.setOutputFormatClass(TextOutputFormat.class); return returnPath; }
From source file:com.alibaba.jstorm.hdfs.common.HdfsUtils.java
License:Apache License
/** list files sorted by modification time that have not been modified since 'olderThan'. if * 'olderThan' is <= 0 then the filtering is disabled */ public static ArrayList<Path> listFilesByModificationTime(FileSystem fs, Path directory, long olderThan) throws IOException { ArrayList<LocatedFileStatus> fstats = new ArrayList<>(); RemoteIterator<LocatedFileStatus> itr = fs.listFiles(directory, false); while (itr.hasNext()) { LocatedFileStatus fileStatus = itr.next(); if (olderThan > 0) { if (fileStatus.getModificationTime() <= olderThan) fstats.add(fileStatus);/*w w w . j a v a 2 s . c om*/ } else { fstats.add(fileStatus); } } Collections.sort(fstats, new ModifTimeComparator()); ArrayList<Path> result = new ArrayList<>(fstats.size()); for (LocatedFileStatus fstat : fstats) { result.add(fstat.getPath()); } return result; }
From source file:com.alibaba.jstorm.hdfs.HdfsCache.java
License:Apache License
public Collection<String> listFile(String dstPath, boolean recursive) throws IOException { Collection<String> files = new HashSet<String>(); Path path = new Path(dstPath); if (fs.exists(path)) { RemoteIterator<LocatedFileStatus> itr = fs.listFiles(path, recursive); while (itr.hasNext()) { LocatedFileStatus status = itr.next(); files.add(status.getPath().getName()); }//from w w w . j a v a2 s . co m } return files; }
From source file:com.awcoleman.StandaloneJava.AvroCombinerByBlock.java
License:Apache License
public AvroCombinerByBlock(String inDirStr, String outDirStr, String handleExisting) throws IOException { //handle both an output directory and an output filename (ending with .avro) String outputFilename = DEFAULTOUTPUTFILENAME; if (outDirStr.endsWith(".avro")) { isOutputNameSpecifiedAndAFile = true; //String[] outputParts = outDirStr.split(":?\\\\"); String[] outputParts = outDirStr.split("/"); outputFilename = outputParts[outputParts.length - 1]; //remove outputFilename from outDirStr to get new outDirStr which is just directory (and trailing /) outDirStr = outDirStr.replaceAll(Pattern.quote(outputFilename), ""); outDirStr = outDirStr.substring(0, outDirStr.length() - (outDirStr.endsWith("/") ? 1 : 0)); }/*from w w w . ja v a 2 s .c om*/ //Get block size - not needed //long hdfsBlockSize = getBlockSize(); //System.out.println("HDFS FS block size: "+hdfsBlockSize); //Get list of input files ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>(); Configuration conf = new Configuration(); conf.addResource(new Path("/etc/hadoop/conf/core-site.xml")); conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less FileSystem hdfs = null; try { hdfs = FileSystem.get(conf); } catch (java.io.IOException ioe) { System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage()); System.exit(1); } if (hdfs.getStatus() == null) { System.out.println("Unable to contact HDFS filesystem. Exiting."); System.exit(1); } //Check if input and output dirs exist Path inDir = new Path(inDirStr); Path outDir = new Path(outDirStr); if (!(hdfs.exists(inDir) || hdfs.isDirectory(inDir))) { System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting."); System.exit(1); } if (!(hdfs.exists(outDir) || hdfs.isDirectory(outDir))) { if (hdfs.exists(outDir)) { //outDir exists and is a symlink or file, must die System.out.println("Requested output directory name ( " + outDirStr + " ) exists but is not a directory. Exiting."); System.exit(1); } else { hdfs.mkdirs(outDir); } } RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inDir, true); while (fileStatusListIterator.hasNext()) { LocatedFileStatus fileStatus = fileStatusListIterator.next(); if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) { inputFileList.add((FileStatus) fileStatus); } } if (inputFileList.size() <= 1 && !isOutputNameSpecifiedAndAFile) { //If an output file is specified assume we just want a rename. System.out.println("Only one or zero files found in input directory ( " + inDirStr + " ). Exiting."); System.exit(1); } //Get Schema and Compression Codec from seed file since we need it for the writer Path firstFile = inputFileList.get(0).getPath(); FsInput fsin = new FsInput(firstFile, conf); DataFileReader<Object> dfrFirstFile = new DataFileReader<Object>(fsin, new GenericDatumReader<Object>()); Schema fileSchema = dfrFirstFile.getSchema(); String compCodecName = dfrFirstFile.getMetaString("avro.codec"); //compCodecName should be null, deflate, snappy, or bzip2 if (compCodecName == null) { compCodecName = "deflate"; //set to deflate even though original is no compression } dfrFirstFile.close(); //Create Empty HDFS file in output dir String seedFileStr = outDirStr + "/" + outputFilename; Path seedFile = new Path(seedFileStr); FSDataOutputStream hdfsdos = null; try { hdfsdos = hdfs.create(seedFile, false); } catch (org.apache.hadoop.fs.FileAlreadyExistsException faee) { if (handleExisting.equals("overwrite")) { hdfs.delete(seedFile, false); hdfsdos = hdfs.create(seedFile, false); } else if (handleExisting.equals("append")) { hdfsdos = hdfs.append(seedFile); } else { System.out .println("File " + seedFileStr + " exists and will not overwrite. handleExisting is set to " + handleExisting + ". Exiting."); System.exit(1); } } if (hdfsdos == null) { System.out.println("Unable to create or write to output file ( " + seedFileStr + " ). handleExisting is set to " + handleExisting + ". Exiting."); System.exit(1); } //Append other files GenericDatumWriter gdw = new GenericDatumWriter(fileSchema); DataFileWriter dfwBase = new DataFileWriter(gdw); //Set compression to that found in the first file dfwBase.setCodec(CodecFactory.fromString(compCodecName)); DataFileWriter dfw = dfwBase.create(fileSchema, hdfsdos); for (FileStatus thisFileStatus : inputFileList) { //_SUCCESS files are 0 bytes if (thisFileStatus.getLen() == 0) { continue; } FsInput fsin1 = new FsInput(thisFileStatus.getPath(), conf); DataFileReader dfr = new DataFileReader<Object>(fsin1, new GenericDatumReader<Object>()); dfw.appendAllFrom(dfr, false); dfr.close(); } dfw.close(); dfwBase.close(); }
From source file:com.awcoleman.StandaloneJava.AvroCounterByBlock.java
License:Apache License
public AvroCounterByBlock(String inDirStr) throws IOException { long numAvroRecords = 0; //Get list of input files ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>(); Configuration conf = new Configuration(); conf.addResource(new Path("/etc/hadoop/conf/core-site.xml")); conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less FileSystem hdfs = null;// w ww.j a v a 2 s .c o m try { hdfs = FileSystem.get(conf); } catch (java.io.IOException ioe) { System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage()); System.exit(1); } if (hdfs.getStatus() == null) { System.out.println("Unable to contact HDFS filesystem. Exiting."); System.exit(1); } //Check if input dirs/file exists and get file list (even if list of single file) Path inPath = new Path(inDirStr); if (hdfs.exists(inPath) && hdfs.isFile(inPath)) { //single file inputFileList.add(hdfs.getFileStatus(inPath)); } else if (hdfs.exists(inPath) && hdfs.isDirectory(inPath)) { //dir //Get list of input files RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inPath, true); while (fileStatusListIterator.hasNext()) { LocatedFileStatus fileStatus = fileStatusListIterator.next(); if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) { inputFileList.add((FileStatus) fileStatus); } } } else { System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting."); System.exit(1); } for (FileStatus thisFileStatus : inputFileList) { //_SUCCESS files are 0 bytes if (thisFileStatus.getLen() == 0) { continue; } DataFileStream<Object> dfs = null; FSDataInputStream inStream = hdfs.open(thisFileStatus.getPath()); GenericDatumReader<Object> reader = new GenericDatumReader<Object>(); dfs = new DataFileStream<Object>(inStream, reader); long thisFileRecords = 0; while (dfs.hasNext()) { numAvroRecords = numAvroRecords + dfs.getBlockCount(); thisFileRecords = thisFileRecords + dfs.getBlockCount(); //System.out.println("Input file "+thisFileStatus.getPath()+" getBlockCount() is "+dfs.getBlockCount()+"." ); dfs.nextBlock(); } System.out.println("Input file " + thisFileStatus.getPath() + " has " + thisFileRecords + " records."); dfs.close(); inStream.close(); //TODO test on dir with non-avro file and see what the exception is, catch that and log to output but don't die. } System.out.println("Input dir/file ( " + inDirStr + " ) has " + inputFileList.size() + " files and " + numAvroRecords + " total records."); }
From source file:com.awcoleman.StandaloneJava.AvroCounterByRecord.java
License:Apache License
public AvroCounterByRecord(String inDirStr) throws IOException { long numAvroRecords = 0; //Get list of input files ArrayList<FileStatus> inputFileList = new ArrayList<FileStatus>(); Configuration conf = new Configuration(); conf.addResource(new Path("/etc/hadoop/conf/core-site.xml")); conf.set("dfs.replication", "1"); //see http://stackoverflow.com/questions/24548699/how-to-append-to-an-hdfs-file-on-an-extremely-small-cluster-3-nodes-or-less FileSystem hdfs = null;/*from w w w . j a v a2 s . co m*/ try { hdfs = FileSystem.get(conf); } catch (java.io.IOException ioe) { System.out.println("Error opening HDFS filesystem. Exiting. Error message: " + ioe.getMessage()); System.exit(1); } if (hdfs.getStatus() == null) { System.out.println("Unable to contact HDFS filesystem. Exiting."); System.exit(1); } //Check if input dirs/file exists and get file list (even if list of single file) Path inPath = new Path(inDirStr); if (hdfs.exists(inPath) && hdfs.isFile(inPath)) { //single file inputFileList.add(hdfs.getFileStatus(inPath)); } else if (hdfs.exists(inPath) && hdfs.isDirectory(inPath)) { //dir //Get list of input files RemoteIterator<LocatedFileStatus> fileStatusListIterator = hdfs.listFiles(inPath, true); while (fileStatusListIterator.hasNext()) { LocatedFileStatus fileStatus = fileStatusListIterator.next(); if (fileStatus.isFile() && !fileStatus.getPath().getName().equals("_SUCCESS")) { inputFileList.add((FileStatus) fileStatus); } } } else { System.out.println("Input directory ( " + inDirStr + " ) not found or is not directory. Exiting."); System.exit(1); } for (FileStatus thisFileStatus : inputFileList) { //_SUCCESS files are 0 bytes if (thisFileStatus.getLen() == 0) { continue; } DataFileStream<Object> avroStream = null; FSDataInputStream inStream = hdfs.open(thisFileStatus.getPath()); GenericDatumReader<Object> reader = new GenericDatumReader<Object>(); avroStream = new DataFileStream<Object>(inStream, reader); long thisFileRecords = 0; while (avroStream.hasNext()) { numAvroRecords++; thisFileRecords++; avroStream.next(); } avroStream.close(); inStream.close(); System.out.println("Input file " + thisFileStatus.getPath() + " has " + thisFileRecords + " records."); //TODO test on dir with non-avro file and see what the exception is, catch that and log to output but don't die. } System.out.println("Input dir/file ( " + inDirStr + " ) has " + inputFileList.size() + " files and " + numAvroRecords + " total records."); }
From source file:com.bark.hadoop.lab3.PageRank.java
@Override public int run(String args[]) { String tmp = "/tmp/" + new Date().getTime(); // long timeStamp = new Date().getTime(); try {//from w w w. ja v a2 s . c o m /** * Job 1: Parse XML input and read title,links */ Configuration conf = new Configuration(); conf.set("xmlinput.start", "<page>"); conf.set("xmlinput.end", "</page>"); Job job = Job.getInstance(conf); job.setJarByClass(PageRank.class); // specify a mapper job.setMapperClass(RedLinkMapper.class); // specify a reducer job.setReducerClass(RedLinkReducer.class); // specify output types job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // specify input and output DIRECTORIES FileInputFormat.addInputPath(job, new Path(args[0])); job.setInputFormatClass(XmlInputFormat.class); FileOutputFormat.setOutputPath(job, new Path((args[1] + tmp + "/job1"))); job.setOutputFormatClass(TextOutputFormat.class); job.waitForCompletion(true); } catch (InterruptedException | ClassNotFoundException | IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error during mapreduce job1."); return 2; } /** * Job 2: Adjacency outGraph */ try { Configuration conf2 = new Configuration(); Job job2 = Job.getInstance(conf2); job2.setJarByClass(PageRank.class); // specify a mapper job2.setMapperClass(AdjMapper.class); // specify a reducer job2.setReducerClass(AdjReducer.class); // specify output types job2.setOutputKeyClass(Text.class); job2.setOutputValueClass(Text.class); // specify input and output DIRECTORIES FileInputFormat.addInputPath(job2, new Path((args[1] + tmp + "/job1"))); job2.setInputFormatClass(TextInputFormat.class); FileOutputFormat.setOutputPath(job2, new Path((args[1] + tmp + "/job2"))); job2.setOutputFormatClass(TextOutputFormat.class); job2.waitForCompletion(true); } catch (InterruptedException | ClassNotFoundException | IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error during mapreduce job2."); return 2; } /** * Job 3: PageCount */ try { Configuration conf3 = new Configuration(); /** * Change output separator to "=" instead of default \t for this job */ conf3.set("mapreduce.output.textoutputformat.separator", "="); Job job3 = Job.getInstance(conf3); job3.setJarByClass(PageRank.class); // specify a mapper job3.setMapperClass(PageCountMapper.class); // specify a reducer job3.setReducerClass(PageCountReducer.class); // specify output types job3.setOutputKeyClass(Text.class); job3.setOutputValueClass(IntWritable.class); // specify input and output DIRECTORIES FileInputFormat.addInputPath(job3, new Path((args[1] + tmp + "/job2"))); job3.setInputFormatClass(TextInputFormat.class); FileOutputFormat.setOutputPath(job3, new Path((args[1] + tmp + "/job3"))); job3.setOutputFormatClass(TextOutputFormat.class); job3.waitForCompletion(true); } catch (InterruptedException | ClassNotFoundException | IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error during mapreduce job3."); return 2; } /** * Job 4: PageRank */ for (int i = 1; i < 9; i++) { try { Configuration conf4 = new Configuration(); /** * Read number of nodes from the output of job 3 : pageCount */ Path path = new Path((args[1] + tmp + "/job3")); FileSystem fs = path.getFileSystem(conf4); RemoteIterator<LocatedFileStatus> ri = fs.listFiles(path, true); int n = 0; Pattern pt = Pattern.compile("(\\d+)"); while (ri.hasNext()) { LocatedFileStatus lfs = ri.next(); if (lfs.isFile() && n == 0) { FSDataInputStream inputStream = fs.open(lfs.getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(inputStream)); String s = null; while ((s = br.readLine()) != null) { Matcher mt = pt.matcher(s); if (mt.find()) { n = new Integer(mt.group(1)); break; } } } } /** * Done reading number of nodes, make it available to MapReduce * job key: N */ conf4.setInt("N", n); Job job4 = Job.getInstance(conf4); job4.setJarByClass(PageRank.class); // specify a mapper job4.setMapperClass(PageRankMapper.class); // specify a reducer job4.setReducerClass(PageRankReducer.class); // specify output types job4.setOutputKeyClass(Text.class); job4.setOutputValueClass(Text.class); // specify input and output DIRECTORIES if (i == 1) { FileInputFormat.addInputPath(job4, new Path((args[1] + tmp + "/job2"))); } else { FileInputFormat.addInputPath(job4, new Path((args[1] + tmp + "/job4/" + (i - 1)))); } job4.setInputFormatClass(TextInputFormat.class); FileOutputFormat.setOutputPath(job4, new Path((args[1] + tmp + "/job4/" + i))); job4.setOutputFormatClass(TextOutputFormat.class); job4.waitForCompletion(true); } catch (InterruptedException | ClassNotFoundException | IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error during mapreduce job4."); return 2; } } /** * Job 5: Sort iteration 1 and iteration 8 */ int returnCode = 0; for (int i = 0; i < 2; i++) { try { Configuration conf5 = new Configuration(); /** * Read number of nodes from the output of job 3 : pageCount */ Path path = new Path((args[1] + tmp + "/job3")); FileSystem fs = path.getFileSystem(conf5); RemoteIterator<LocatedFileStatus> ri = fs.listFiles(path, true); int n = 0; Pattern pt = Pattern.compile("(\\d+)"); while (ri.hasNext()) { LocatedFileStatus lfs = ri.next(); if (lfs.isFile() && n == 0) { FSDataInputStream inputStream = fs.open(lfs.getPath()); BufferedReader br = new BufferedReader(new InputStreamReader(inputStream)); String s = null; while ((s = br.readLine()) != null) { Matcher mt = pt.matcher(s); if (mt.find()) { n = new Integer(mt.group(1)); break; } } } } /** * Done reading number of nodes, make it available to MapReduce * job key: N */ conf5.setInt("N", n); Job job5 = Job.getInstance(conf5); /** * one reducer only */ job5.setNumReduceTasks(1); job5.setSortComparatorClass(MyWritableComparator.class); job5.setJarByClass(PageRank.class); // specify a mapper job5.setMapperClass(SortMapper.class); job5.setMapOutputKeyClass(DoubleWritable.class); job5.setMapOutputValueClass(Text.class); // specify a reducer job5.setReducerClass(SortReducer.class); // specify output types job5.setOutputKeyClass(Text.class); job5.setOutputValueClass(DoubleWritable.class); // specify input and output DIRECTORIES int y = 7 * i + 1; FileInputFormat.addInputPath(job5, new Path((args[1] + tmp + "/job4/" + y))); job5.setInputFormatClass(TextInputFormat.class); FileOutputFormat.setOutputPath(job5, new Path((args[1] + tmp + "/job5/" + y))); job5.setOutputFormatClass(TextOutputFormat.class); returnCode = job5.waitForCompletion(true) ? 0 : 1; } catch (InterruptedException | ClassNotFoundException | IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error during mapreduce job5."); return 2; } } /** * Copy necessary output files to args[1] /** * Copy necessary output files to args[1] */ /** * Rename and copy OutLinkGraph */ try { Configuration conf = new Configuration(); Path outLinkGraph = new Path((args[1] + tmp + "/job2/part-r-00000")); FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf); Path output = new Path(args[1] + "/results/PageRank.outlink.out"); FileSystem outputFS = output.getFileSystem(conf); org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf); } catch (IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error while copying results."); return 2; } /** * Rename and copy total number of pages */ try { Configuration conf = new Configuration(); Path outLinkGraph = new Path((args[1] + tmp + "/job3/part-r-00000")); FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf); Path output = new Path(args[1] + "/results/PageRank.n.out"); FileSystem outputFS = output.getFileSystem(conf); org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf); } catch (IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error while copying results."); return 2; } /** * Rename and copy iteration 1 */ try { Configuration conf = new Configuration(); Path outLinkGraph = new Path((args[1] + tmp + "/job5/1/part-r-00000")); FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf); Path output = new Path(args[1] + "/results/PageRank.iter1.out"); FileSystem outputFS = output.getFileSystem(conf); org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf); } catch (IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error while copying results."); return 2; } /** * Rename and copy iteration 8 */ try { Configuration conf = new Configuration(); Path outLinkGraph = new Path((args[1] + tmp + "/job5/8/part-r-00000")); FileSystem outLinkGraphFS = outLinkGraph.getFileSystem(conf); Path output = new Path(args[1] + "/results/PageRank.iter8.out"); FileSystem outputFS = output.getFileSystem(conf); org.apache.hadoop.fs.FileUtil.copy(outLinkGraphFS, outLinkGraph, outputFS, output, false, true, conf); } catch (IOException ex) { Logger.getLogger(PageRank.class.getName()).log(Level.SEVERE, ex.toString(), ex); System.err.println("Error while copying results."); return 2; } return returnCode; }
From source file:com.cloudera.impala.util.LoadMetadataUtil.java
License:Apache License
/** * Identical to loadFileDescriptors, except using the ListLocatedStatus HDFS API to load * file status.//from w w w . j a va 2 s. co m * TODO: Got AnalysisException error: Failed to load metadata for table * CAUSED BY: ClassCastException: DFSClient#getVolumeBlockLocations expected to be * passed HdfsBlockLocations * TODO: Use new HDFS API resolved by CDH-30342. */ public static List<FileDescriptor> loadViaListLocatedStatus(FileSystem fs, Path partDirPath, Map<String, List<FileDescriptor>> oldFileDescMap, HdfsFileFormat fileFormat, Map<FsKey, FileBlocksInfo> perFsFileBlocks, boolean isMarkedCached, String tblName, ListMap<TNetworkAddress> hostIndex, Map<String, List<FileDescriptor>> fileDescMap) throws FileNotFoundException, IOException { List<FileDescriptor> fileDescriptors = Lists.newArrayList(); RemoteIterator<LocatedFileStatus> fileStatusItor = fs.listLocatedStatus(partDirPath); while (fileStatusItor.hasNext()) { LocatedFileStatus fileStatus = fileStatusItor.next(); FileDescriptor fd = getFileDescriptor(fs, fileStatus, fileFormat, oldFileDescMap, isMarkedCached, perFsFileBlocks, tblName, hostIndex); if (fd == null) continue; // Add partition dir to fileDescMap if it does not exist. String partitionDir = fileStatus.getPath().getParent().toString(); if (!fileDescMap.containsKey(partitionDir)) { fileDescMap.put(partitionDir, new ArrayList<FileDescriptor>()); } fileDescMap.get(partitionDir).add(fd); // Add to the list of FileDescriptors for this partition. fileDescriptors.add(fd); } return fileDescriptors; }
From source file:com.datatorrent.stram.client.FSAgent.java
License:Apache License
public List<String> listFiles(String dir) throws IOException { List<String> files = new ArrayList<String>(); Path path = new Path(dir); FileStatus fileStatus = fileSystem.getFileStatus(path); if (!fileStatus.isDirectory()) { throw new FileNotFoundException("Cannot read directory " + dir); }/* w w w.j av a 2s .c o m*/ RemoteIterator<LocatedFileStatus> it = fileSystem.listFiles(path, false); while (it.hasNext()) { LocatedFileStatus lfs = it.next(); files.add(lfs.getPath().getName()); } return files; }