List of usage examples for org.apache.hadoop.fs FileUtil stat2Paths
public static Path[] stat2Paths(FileStatus[] stats)
From source file:org.mrgeo.hdfs.tile.FileSplit.java
License:Apache License
public void generateSplits(Path parent, Configuration conf) throws IOException { List<FileSplitInfo> list = new ArrayList<FileSplitInfo>(); // get a Hadoop file system handle final FileSystem fs = getFileSystem(parent); // get the list of paths of the subdirectories of the parent final Path[] paths = FileUtil.stat2Paths(fs.listStatus(parent)); Arrays.sort(paths);//from w w w. j a va 2 s.com int partition = 0; // look inside each subdirectory for a data dir and keep track for (final Path p : paths) { Path mapfile = null; final FileStatus[] dirFiles = fs.listStatus(p); for (final FileStatus dirFile : dirFiles) { if (dirFile.getPath().getName().equals("data")) { mapfile = dirFile.getPath().getParent(); break; } } if (mapfile != null) { RasterWritable val = new RasterWritable(); MapFile.Reader reader = createMapFileReader(conf, mapfile); TileIdWritable firstKey = (TileIdWritable) reader.getClosest(new TileIdWritable(0), val); TileIdWritable lastKey = (TileIdWritable) reader.getClosest(new TileIdWritable(Long.MAX_VALUE), val, true); if (firstKey != null && lastKey != null) { list.add(new FileSplit.FileSplitInfo(firstKey.get(), lastKey.get(), mapfile.getName(), partition)); } partition++; } } splits = list.toArray(new FileSplit.FileSplitInfo[list.size()]); }
From source file:org.mrgeo.hdfs.tile.HdfsMrsTileReader.java
License:Apache License
@Override public long calculateTileCount() { int count = 0; try {// w ww . jav a 2s .c o m final FileSystem fs = imagePath.getFileSystem(conf); final Path[] names = FileUtil.stat2Paths(fs.listStatus(imagePath)); Arrays.sort(names); final DataOutputBuffer key = new DataOutputBuffer(); for (final Path name : names) { final FileStatus[] dirFiles = fs.listStatus(name); for (final FileStatus dirFile : dirFiles) { if (dirFile.getPath().getName().equals("index")) { SequenceFile.Reader index = new SequenceFile.Reader(fs, dirFile.getPath(), conf); try { while (index.nextRawKey(key) >= 0) { count++; } } finally { index.close(); } } } } return count; } catch (final IOException e) { throw new MrsTileException(e); } }
From source file:org.mrgeo.hdfs.tile.SplitFile.java
License:Apache License
private String[] findPartitions(final String splitFileDir) throws IOException { Path path = new Path(splitFileDir); final ArrayList<String> partitions = new ArrayList<String>(); // get a Hadoop file system handle final FileSystem fs = path.getFileSystem(conf); // get the list of paths of the subdirectories of the parent final Path[] paths = FileUtil.stat2Paths(fs.listStatus(path)); Arrays.sort(paths);/*from w w w. j a v a 2 s .c o m*/ // look inside each subdirectory for a data dir and keep track for (final Path p : paths) { boolean isMapFileDir = false; final FileStatus[] dirFiles = fs.listStatus(p); for (final FileStatus dirFile : dirFiles) { if (dirFile.getPath().getName().equals("data")) { isMapFileDir = true; break; } } if (isMapFileDir) { // need to be relative to the path, so we can just use getName() partitions.add(p.getName()); } } return partitions.toArray(new String[0]); }
From source file:org.springframework.data.hadoop.test.HadoopClusterTests.java
License:Apache License
@Test public void testConfiguredConfigurationWithJobRun() throws Exception { assertTrue(ctx.containsBean("hadoopConfiguration")); Configuration config = (Configuration) ctx.getBean("hadoopConfiguration"); assertNotNull(config);//from w w w .j a va2s.c o m Path inDir = new Path("testing/jobconf/input"); Path outDir = new Path("testing/jobconf/output"); FileSystem fs = FileSystem.get(config); fs.delete(inDir, true); fs.delete(outDir, true); fs.mkdirs(inDir); OutputStream os = fs.create(new Path(inDir, "text.txt")); Writer wr = new OutputStreamWriter(os); wr.write("b a\n"); wr.close(); JobRunner runner = (JobRunner) ctx.getBean("runner"); runner.call(); Path[] outputFiles = FileUtil .stat2Paths(fs.listStatus(outDir, new Utils.OutputFileUtils.OutputFilesFilter())); assertEquals(1, outputFiles.length); InputStream in = fs.open(outputFiles[0]); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); assertEquals("foo\t1", reader.readLine()); assertNull(reader.readLine()); reader.close(); }
From source file:org.springframework.data.hadoop.test.junit.AbstractMapReduceTests.java
License:Apache License
/** * Finds an array of output file {@link Path}s resulted as * mapreduce job run./*from ww w.jav a2 s . co m*/ * * @param outputDirectory the path to jobs output directory * @return list of output files * @throws FileNotFoundException if given path was not found * @throws IOException if general access error occured */ protected Path[] getOutputFilePaths(Path outputDirectory) throws FileNotFoundException, IOException { return FileUtil.stat2Paths( getFileSystem().listStatus(outputDirectory, new Utils.OutputFileUtils.OutputFilesFilter())); }
From source file:org.trafodion.sql.HBaseAccess.HBulkLoadClient.java
License:Apache License
public boolean doBulkLoad(String prepLocation, String tableName, boolean quasiSecure, boolean snapshot) throws Exception { if (logger.isDebugEnabled()) logger.debug("HBulkLoadClient.doBulkLoad() - start"); if (logger.isDebugEnabled()) logger.debug("HBulkLoadClient.doBulkLoad() - Prep Location: " + prepLocation + ", Table Name:" + tableName + ", quasisecure : " + quasiSecure + ", snapshot: " + snapshot); HTable table = new HTable(config, tableName); LoadIncrementalHFiles loader = new LoadIncrementalHFiles(config); Path prepPath = new Path(prepLocation); prepPath = prepPath.makeQualified(prepPath.toUri(), null); FileSystem prepFs = FileSystem.get(prepPath.toUri(), config); Path[] hFams = FileUtil.stat2Paths(prepFs.listStatus(prepPath)); if (quasiSecure) { throw new Exception( "HBulkLoadClient.doBulkLoad() - cannot perform load. Trafodion on secure HBase mode is not implemented yet"); } else {/*from ww w .j a v a2s . c o m*/ if (logger.isDebugEnabled()) logger.debug("HBulkLoadClient.doBulkLoad() - adjusting hfiles permissions"); for (Path hfam : hFams) { Path[] hfiles = FileUtil.stat2Paths(prepFs.listStatus(hfam)); prepFs.setPermission(hfam, PERM_ALL_ACCESS); for (Path hfile : hfiles) { if (logger.isDebugEnabled()) logger.debug("HBulkLoadClient.doBulkLoad() - adjusting hfile permissions:" + hfile); prepFs.setPermission(hfile, PERM_ALL_ACCESS); } //create _tmp dir used as temp space for Hfile processing FileSystem.mkdirs(prepFs, new Path(hfam, "_tmp"), PERM_ALL_ACCESS); } if (logger.isDebugEnabled()) logger.debug( "HBulkLoadClient.doBulkLoad() - bulk load started. Loading directly from preparation directory"); doSnapshotNBulkLoad(prepPath, tableName, table, loader, snapshot); if (logger.isDebugEnabled()) logger.debug("HBulkLoadClient.doBulkLoad() - bulk load is done "); } return true; }
From source file:org.trafodion.sql.HBaseAccess.SequenceFileWriter.java
License:Apache License
public boolean hdfsMergeFiles(String srcPathStr, String dstPathStr) throws Exception { if (logger.isDebugEnabled()) logger.debug("SequenceFileWriter.hdfsMergeFiles() - start"); if (logger.isDebugEnabled()) logger.debug("SequenceFileWriter.hdfsMergeFiles() - source Path: " + srcPathStr + ", destination File:" + dstPathStr);/* ww w . ja v a 2s . c o m*/ try { Path srcPath = new Path(srcPathStr); srcPath = srcPath.makeQualified(srcPath.toUri(), null); FileSystem srcFs = FileSystem.get(srcPath.toUri(), conf); Path dstPath = new Path(dstPathStr); dstPath = dstPath.makeQualified(dstPath.toUri(), null); FileSystem dstFs = FileSystem.get(dstPath.toUri(), conf); if (dstFs.exists(dstPath)) { if (logger.isDebugEnabled()) logger.debug("SequenceFileWriter.hdfsMergeFiles() - destination files exists"); // for this prototype we just delete the file-- will change in next code drops dstFs.delete(dstPath, false); // The caller should already have checked existence of file-- throw exception //throw new FileAlreadyExistsException(dstPath.toString()); } Path tmpSrcPath = new Path(srcPath, "tmp"); FileSystem.mkdirs(srcFs, tmpSrcPath, srcFs.getFileStatus(srcPath).getPermission()); logger.debug("SequenceFileWriter.hdfsMergeFiles() - tmp folder created."); Path[] files = FileUtil.stat2Paths(srcFs.listStatus(srcPath)); for (Path f : files) { srcFs.rename(f, tmpSrcPath); } // copyMerge and use false for the delete option since it removes the whole directory if (logger.isDebugEnabled()) logger.debug("SequenceFileWriter.hdfsMergeFiles() - copyMerge"); FileUtil.copyMerge(srcFs, tmpSrcPath, dstFs, dstPath, false, conf, null); if (logger.isDebugEnabled()) logger.debug("SequenceFileWriter.hdfsMergeFiles() - delete intermediate files"); srcFs.delete(tmpSrcPath, true); } catch (IOException e) { if (logger.isDebugEnabled()) logger.debug("SequenceFileWriter.hdfsMergeFiles() --exception:" + e); throw e; } return true; }
From source file:org.trafodion.sql.HBaseAccess.SequenceFileWriter.java
License:Apache License
public boolean hdfsCleanUnloadPath(String uldPathStr /*, boolean checkExistence, String mergeFileStr*/) throws Exception { if (logger.isDebugEnabled()) logger.debug("SequenceFileWriter.hdfsCleanUnloadPath() - start"); logger.debug("SequenceFileWriter.hdfsCleanUnloadPath() - unload Path: " + uldPathStr); try {/*from w w w. j a va 2s . c o m*/ Path uldPath = new Path(uldPathStr); uldPath = uldPath.makeQualified(uldPath.toUri(), null); FileSystem srcFs = FileSystem.get(uldPath.toUri(), conf); if (!srcFs.exists(uldPath)) { //unload location does not exist. hdfscreate will create it later //nothing to do logger.debug("SequenceFileWriter.hdfsCleanUnloadPath() -- unload location does not exist."); return true; } Path[] files = FileUtil.stat2Paths(srcFs.listStatus(uldPath)); logger.debug("SequenceFileWriter.hdfsCleanUnloadPath() - delete files"); for (Path f : files) { srcFs.delete(f, false); } } catch (IOException e) { logger.debug("SequenceFileWriter.hdfsCleanUnloadPath() -exception:" + e); throw e; } return true; }
From source file:org.unigram.likelike.lsh.TestLSHRecommendations.java
License:Apache License
private boolean dfsCheck(Configuration conf, Path outputPath) throws IOException { FileSystem fs = FileSystem.getLocal(conf); Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outputPath, new OutputLogFilter())); //if (outputFiles != null) { // TestCase.assertEquals(outputFiles.length, 1); //} else {//w ww .j a va2 s. c o m // TestCase.fail(); //} BufferedReader reader = this.asBufferedReader(fs.open(outputFiles[0])); String line; MultiHashMap resultMap = new MultiHashMap(); while ((line = reader.readLine()) != null) { String[] lineArray = line.split("\t"); resultMap.put(Long.parseLong(lineArray[0]), // target Long.parseLong(lineArray[1])); // recommended } this.check(resultMap); return true; }
From source file:ph.fingra.hadoop.mapred.common.CopyToLocalFile.java
License:Apache License
public void dirToFile(String srcdir, String dstfile) throws IOException { FileSystem fs = FileSystem.get(URI.create(srcdir), getConf()); FileSystem local = FileSystem.getLocal(getConf()); Path srcPath = new Path(srcdir); Path dstPath = new Path(dstfile); // delete existed destination local file if (local.exists(dstPath)) { local.delete(dstPath, true);/*from w w w.ja v a 2 s. c o m*/ } // get hdfs file list PathFilter resultFileFilter = new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith(ConstantVars.RESULT_FILE_PREFIX); } }; FileStatus[] status = fs.listStatus(srcPath, resultFileFilter); Path[] listedPaths = FileUtil.stat2Paths(status); if (listedPaths.length > 0) { // create local output stream FSDataOutputStream out = local.create(dstPath); for (int i = 0; i < listedPaths.length; i++) { // create hdfs input stream FSDataInputStream in = fs.open(listedPaths[i]); byte buffer[] = new byte[256]; int bytesRead = 0; while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0, bytesRead); } in.close(); } out.close(); } return; }