List of usage examples for org.apache.hadoop.fs PathFilter PathFilter
PathFilter
From source file:org.talend.components.simplefileio.runtime.utils.FileSystemUtil.java
License:Open Source License
/** * Return files in this folder, but do not return the hidden file(start with '_' or '.') * @param fs/*from ww w .j a v a 2 s . c o m*/ * @param folder */ public static FileStatus[] listSubFiles(FileSystem fs, Path folder) throws IOException { return fs.listStatus(folder, new PathFilter() { @Override public boolean accept(Path path) { String name = path.getName(); return !name.startsWith("_") && !name.startsWith("."); } }); }
From source file:org.terrier.indexing.HadoopIndexerReducer.java
License:Mozilla Public License
protected LinkedList<MapData> loadRunData(Context context) throws IOException { // Load in Run Data ArrayList<String> mapTaskIDs = new ArrayList<String>(); final LinkedList<MapData> runData = new LinkedList<MapData>(); DataInputStream runDataIn;// www.j av a2 s. c om final String jobId = context.getTaskAttemptID().getJobID().toString().replaceAll("job", "task"); final FileStatus[] files = FileSystem.get(context.getConfiguration()) .listStatus(FileOutputFormat.getOutputPath(context), new PathFilter() { @Override public boolean accept(Path path) { final String name = path.getName(); //1. is this a run file if (!(name.startsWith(jobId) && name.endsWith(".runs"))) return false; return true; } }); if (files == null || files.length == 0) { throw new IOException("No run status files found in " + FileOutputFormat.getOutputPath(context)); } final int thisPartition = context.getTaskAttemptID().getTaskID().getId(); final NewSplitEmittedTerm.SETPartitioner partitionChecker = new NewSplitEmittedTerm.SETPartitioner(); partitionChecker.setConf(context.getConfiguration()); MapData tempHRD; for (FileStatus file : files) { ExtensibleSinglePassIndexer.logger.info("Run data file " + file.getPath().toString() + " has length " + Files.length(file.getPath().toString())); runDataIn = new DataInputStream(Files.openFileStream(file.getPath().toString())); tempHRD = new MapData(runDataIn); //check to see if this file contained our split information if (mutipleIndices && partitionChecker.calculatePartition(tempHRD.getSplitnum(), context.getNumReduceTasks()) != thisPartition) continue; mapTaskIDs.add(tempHRD.getMap()); runData.add(tempHRD); runDataIn.close(); } // Sort by splitnum Collections.sort(runData); Collections.sort(mapTaskIDs, new IDComparator(runData)); // A list of the index shards MapIndexPrefixes = mapTaskIDs.toArray(new String[0]); return runData; }
From source file:org.wso2.carbon.hdfs.mgt.HDFSAdmin.java
License:Open Source License
/** * Mgt service return file and folder list of the give HDFS path * //from ww w .ja va 2 s . c o m * @param fsObjectPath * file system path which user need info about files and folders * @return list with files and folders in the given path * @throws HDFSServerManagementException */ public FolderInformation[] getCurrentUserFSObjects(String fsObjectPath) throws HDFSServerManagementException { boolean isCurrentUserSuperTenant = false; //Checks if the current user has a role assigned. Else throws an error. try { checkCurrentTenantUserHasRole(); isCurrentUserSuperTenant = hdfsAdminHelperInstance.isCurrentUserSuperTenant(); } catch (HDFSServerManagementException e) { throw e; } catch (UserStoreException e) { handleException(" User store exception", e); } FileSystem hdfsFS = null; //The folder path is filtered to be getting only the items from /user/ directory. if (fsObjectPath == null || (!isCurrentUserSuperTenant && fsObjectPath.equals(HDFSConstants.HDFS_ROOT_FOLDER))) { fsObjectPath = HDFSConstants.HDFS_USER_ROOT; } try { hdfsFS = hdfsAdminHelperInstance.getFSforUser(); } catch (IOException e1) { String msg = "Error occurred while trying to get File system instance"; handleException(msg, e1); } FileStatus[] fileStatusList = null; List<FolderInformation> folderInfo = new ArrayList<FolderInformation>(); try { if (hdfsFS != null && hdfsFS.exists(new Path(fsObjectPath))) { if (hdfsAdminHelperInstance.isCurrentUserSuperTenant()) { fileStatusList = hdfsFS.listStatus(new Path(fsObjectPath)); } else { fileStatusList = hdfsFS.listStatus(new Path(fsObjectPath), new PathFilter() { //the filter to be sent when retrieving the file paths. @Override public boolean accept(Path path) { String filter = null; CarbonContext carbonContext = CarbonContext.getThreadLocalCarbonContext(); if (hdfsAdminHelperInstance.isCurrentUserTenantAdmin()) { filter = carbonContext.getTenantDomain(); } else { filter = carbonContext.getTenantDomain() + HDFSConstants.UNDERSCORE + carbonContext.getUsername(); } return path.toString().contains(filter); } }); } //List the statuses of the files/directories in the given path if the path is a directory. if (fileStatusList != null) { for (FileStatus fileStatus : fileStatusList) { FolderInformation folder = new FolderInformation(); folder.setFolder(fileStatus.isDir()); folder.setName(fileStatus.getPath().getName()); folder.setFolderPath(fileStatus.getPath().toUri().getPath()); folder.setOwner(fileStatus.getOwner()); folder.setGroup(fileStatus.getGroup()); folder.setPermissions(fileStatus.getPermission().toString()); folderInfo.add(folder); } return folderInfo.toArray(new FolderInformation[folderInfo.size()]); } } } catch (Exception e) { String msg = "Error occurred while retrieving folder information"; handleException(msg, e); } return null; }
From source file:parquet.hadoop.PrintFooter.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 1) { System.err.println("usage PrintFooter <path>"); return;//www . j a va 2 s.c om } Path path = new Path(new URI(args[0])); final Configuration configuration = new Configuration(); final FileSystem fs = path.getFileSystem(configuration); FileStatus fileStatus = fs.getFileStatus(path); Path summary = new Path(fileStatus.getPath(), PARQUET_METADATA_FILE); if (fileStatus.isDir() && fs.exists(summary)) { System.out.println("reading summary file"); FileStatus summaryStatus = fs.getFileStatus(summary); List<Footer> readSummaryFile = ParquetFileReader.readSummaryFile(configuration, summaryStatus); for (Footer footer : readSummaryFile) { add(footer.getParquetMetadata()); } } else { List<FileStatus> statuses; if (fileStatus.isDir()) { System.out.println("listing files in " + fileStatus.getPath()); statuses = Arrays.asList(fs.listStatus(fileStatus.getPath(), new PathFilter() { @Override public boolean accept(Path path) { return !path.getName().startsWith("_"); } })); } else { statuses = new ArrayList<FileStatus>(); statuses.add(fileStatus); } System.out.println("opening " + statuses.size() + " files"); int i = 0; ExecutorService threadPool = Executors.newFixedThreadPool(5); try { long t0 = System.currentTimeMillis(); Deque<Future<ParquetMetadata>> footers = new LinkedBlockingDeque<Future<ParquetMetadata>>(); for (final FileStatus currentFile : statuses) { footers.add(threadPool.submit(new Callable<ParquetMetadata>() { @Override public ParquetMetadata call() throws Exception { try { ParquetMetadata footer = ParquetFileReader.readFooter(configuration, currentFile); return footer; } catch (Exception e) { throw new ParquetDecodingException("could not read footer", e); } } })); } int previousPercent = 0; int n = 60; System.out.print("0% ["); for (int j = 0; j < n; j++) { System.out.print(" "); } System.out.print("] 100%"); for (int j = 0; j < n + 6; j++) { System.out.print('\b'); } while (!footers.isEmpty()) { Future<ParquetMetadata> futureFooter = footers.removeFirst(); if (!futureFooter.isDone()) { footers.addLast(futureFooter); continue; } ParquetMetadata footer = futureFooter.get(); int currentPercent = (++i * n / statuses.size()); while (currentPercent > previousPercent) { System.out.print("*"); previousPercent++; } add(footer); } System.out.println(""); long t1 = System.currentTimeMillis(); System.out.println("read all footers in " + (t1 - t0) + " ms"); } finally { threadPool.shutdownNow(); } } Set<Entry<ColumnDescriptor, ColStats>> entries = stats.entrySet(); long total = 0; long totalUnc = 0; for (Entry<ColumnDescriptor, ColStats> entry : entries) { ColStats colStats = entry.getValue(); total += colStats.allStats.total; totalUnc += colStats.uncStats.total; } for (Entry<ColumnDescriptor, ColStats> entry : entries) { ColStats colStats = entry.getValue(); System.out.println( entry.getKey() + " " + percent(colStats.allStats.total, total) + "% of all space " + colStats); } System.out.println("number of blocks: " + blockCount); System.out.println("total data size: " + humanReadable(total) + " (raw " + humanReadable(totalUnc) + ")"); System.out.println("total record: " + humanReadable(recordCount)); System.out.println("average block size: " + humanReadable(total / blockCount) + " (raw " + humanReadable(totalUnc / blockCount) + ")"); System.out.println("average record count: " + humanReadable(recordCount / blockCount)); }
From source file:parquet.hadoop.TestParquetFileWriter.java
License:Apache License
@Test public void testMetaDataFile() throws Exception { File testDir = new File("target/test/TestParquetFileWriter/testMetaDataFileDir").getAbsoluteFile(); Path testDirPath = new Path(testDir.toURI()); Configuration configuration = new Configuration(); final FileSystem fs = testDirPath.getFileSystem(configuration); fs.delete(testDirPath, true);/*from w w w. j a v a2 s .c o m*/ fs.mkdirs(testDirPath); MessageType schema = MessageTypeParser.parseMessageType( "message m { required group a {required binary b;} required group c { required int64 d; }}"); createFile(configuration, new Path(testDirPath, "part0"), schema); createFile(configuration, new Path(testDirPath, "part1"), schema); createFile(configuration, new Path(testDirPath, "part2"), schema); FileStatus outputStatus = fs.getFileStatus(testDirPath); List<Footer> footers = ParquetFileReader.readAllFootersInParallel(configuration, outputStatus); validateFooters(footers); ParquetFileWriter.writeMetadataFile(configuration, testDirPath, footers); footers = ParquetFileReader.readFooters(configuration, outputStatus); validateFooters(footers); footers = ParquetFileReader.readFooters(configuration, fs.getFileStatus(new Path(testDirPath, "part0"))); assertEquals(1, footers.size()); final FileStatus metadataFile = fs .getFileStatus(new Path(testDirPath, ParquetFileWriter.PARQUET_METADATA_FILE)); final List<Footer> metadata = ParquetFileReader.readSummaryFile(configuration, metadataFile); validateFooters(metadata); footers = ParquetFileReader.readAllFootersInParallelUsingSummaryFiles(configuration, Arrays.asList(fs.listStatus(testDirPath, new PathFilter() { @Override public boolean accept(Path p) { return !p.getName().startsWith("_"); } }))); validateFooters(footers); fs.delete(metadataFile.getPath(), false); footers = ParquetFileReader.readAllFootersInParallelUsingSummaryFiles(configuration, Arrays.asList(fs.listStatus(testDirPath))); validateFooters(footers); }
From source file:ph.fingra.hadoop.mapred.common.CopyToLocalFile.java
License:Apache License
public void dirToFile(String srcdir, String dstfile) throws IOException { FileSystem fs = FileSystem.get(URI.create(srcdir), getConf()); FileSystem local = FileSystem.getLocal(getConf()); Path srcPath = new Path(srcdir); Path dstPath = new Path(dstfile); // delete existed destination local file if (local.exists(dstPath)) { local.delete(dstPath, true);// w w w . j a v a 2s . c o m } // get hdfs file list PathFilter resultFileFilter = new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith(ConstantVars.RESULT_FILE_PREFIX); } }; FileStatus[] status = fs.listStatus(srcPath, resultFileFilter); Path[] listedPaths = FileUtil.stat2Paths(status); if (listedPaths.length > 0) { // create local output stream FSDataOutputStream out = local.create(dstPath); for (int i = 0; i < listedPaths.length; i++) { // create hdfs input stream FSDataInputStream in = fs.open(listedPaths[i]); byte buffer[] = new byte[256]; int bytesRead = 0; while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0, bytesRead); } in.close(); } out.close(); } return; }
From source file:ph.fingra.hadoop.mapred.common.CopyWithinHdfsFile.java
License:Apache License
public void dirToFile(String srcdir, String dstfile) throws IOException { FileSystem shfs = FileSystem.get(URI.create(srcdir), getConf()); FileSystem thfs = FileSystem.get(URI.create(dstfile), getConf()); Path srcPath = new Path(srcdir); Path dstPath = new Path(dstfile); // delete existed destination local file if (thfs.exists(dstPath)) { thfs.delete(dstPath, true);//from w w w. ja v a 2 s. c om } // get hdfs file list PathFilter resultFileFilter = new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith(ConstantVars.RESULT_FILE_PREFIX); } }; FileStatus[] status = shfs.listStatus(srcPath, resultFileFilter); Path[] listedPaths = FileUtil.stat2Paths(status); if (listedPaths.length > 0) { // create hdfs output stream FSDataOutputStream out = thfs.create(dstPath); for (int i = 0; i < listedPaths.length; i++) { // create hdfs input stream FSDataInputStream in = shfs.open(listedPaths[i]); byte buffer[] = new byte[256]; int bytesRead = 0; while ((bytesRead = in.read(buffer)) > 0) { out.write(buffer, 0, bytesRead); } in.close(); } out.close(); } return; }
From source file:ph.fingra.hadoop.mapred.common.HdfsFileUtil.java
License:Apache License
public static int getDateMatchedFileCount(Path srcpath) throws IOException { int count = 0; Path parentPath = null;// w ww.ja va 2 s . c o m String date_ext = null; // directory path parentPath = srcpath.getParent(); // date pattern Pattern p = Pattern.compile("([0-9]{4})\\-([0-9]{2})\\-([0-9]{2})"); Matcher m = p.matcher(srcpath.getName()); if (m.find()) { // suffix part like "yyyy-MM-dd.txt" in file name date_ext = srcpath.getName().substring(m.start()/*, m.end()*/); } Configuration conf = new Configuration(); FileSystem hdfs = FileSystem.get(conf); // get matched file list final String suffix = date_ext; PathFilter resultFileFilter = new PathFilter() { @Override public boolean accept(Path path) { return path.getName().endsWith(suffix); } }; try { FileStatus[] status = hdfs.listStatus(parentPath, resultFileFilter); if (status != null) { Path[] listedPaths = FileUtil.stat2Paths(status); if (listedPaths != null) { count = listedPaths.length; } } } catch (FileNotFoundException ignore) { } catch (InvalidInputException ignore) { ; // throw not FileNotFoundException but InvalidInputException // at Hadoop 1.x version } return count; }
From source file:ph.fingra.hadoop.mapred.common.HdfsFileUtil.java
License:Apache License
public static boolean deleteNBackupFile(String srcdir, String srcfile, int maxcount, String runday, final String dbfnameprefix) throws IOException { Configuration conf = new Configuration(); FileSystem hdfs = FileSystem.get(conf); Path targetPath = null;/*from w w w . j a va2 s. c o m*/ Path rootPath = new Path(srcdir); Path sourcePath = new Path(srcfile); String target_day = ""; String target_file = ""; boolean success = false; // if not exist srcfile, stop backup and return true if (hdfs.exists(sourcePath) == false) { return true; } // make backup file name as yesterday date target_day = DateTimeUtil.addDays(runday, -1, "yyyyMMdd"); target_file = srcfile + "-" + target_day; //System.out.println("target_file - " + target_file); targetPath = new Path(target_file); // delete backup file if exist same name, then rename source file to backup file if (hdfs.exists(new Path(target_file))) { hdfs.delete(targetPath, true); } success = hdfs.rename(sourcePath, targetPath); // get bakup file list PathFilter resultFileFilter = new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith(dbfnameprefix + "-"); } }; try { FileStatus[] status = hdfs.listStatus(rootPath, resultFileFilter); Path[] listedPaths = FileUtil.stat2Paths(status); // delete more than maximum number of backup files if (listedPaths.length > maxcount) { Comparator<Path> c = new Comparator<Path>() { public int compare(Path o1, Path o2) { int ret = 0; ret = o1.getName().compareTo(o2.getName()); return -(ret); // order by reverse of the period } }; Arrays.sort(listedPaths, c); for (int i = maxcount; i < listedPaths.length; i++) { Path path = listedPaths[i]; hdfs.delete(path, true); } } } catch (FileNotFoundException ignore) { } catch (InvalidInputException ignore) { ; // throw not FileNotFoundException but InvalidInputException // at Hadoop 1.x version } return success; }
From source file:ph.fingra.hadoop.mapred.common.HdfsFileUtil.java
License:Apache License
public static boolean deleteOriginFiles(FingraphConfig config, String year, String month, String day) throws IOException { Configuration conf = new Configuration(); FileSystem hdfs = FileSystem.get(conf); String root_uri = config.getHadoop_user_path() + (config.getHadoop_user_path().endsWith("/") ? "" : "/") + config.getSetting().getHfs_input_path() + (config.getSetting().getHfs_input_path().endsWith("/") ? "" : "/"); root_uri = root_uri.replaceAll("\\{yyyy\\}", year); root_uri = root_uri.replaceAll("\\{MM\\}", month); root_uri = root_uri.replaceAll("\\{dd\\}", day); String file_uri = config.getSetting().getOrigin_input_file(); file_uri = file_uri.replaceAll("\\{yyyy\\}", year); file_uri = file_uri.replaceAll("\\{MM\\}", month); file_uri = file_uri.replaceAll("\\{dd\\}", day); file_uri = file_uri.replace("*", "[\\w]*"); final String patt = "^" + file_uri + "$"; //System.out.println(patt); Path rootPath = new Path(root_uri); boolean success = false; // get matched file list PathFilter resultFileFilter = new PathFilter() { @Override//from w w w .j av a2s. c om public boolean accept(Path path) { return path.getName().matches(patt); } }; try { FileStatus[] status = hdfs.listStatus(rootPath, resultFileFilter); if (status != null) { Path[] listedPaths = FileUtil.stat2Paths(status); if (listedPaths != null) { for (Path path : listedPaths) { success = hdfs.delete(path, true); } } } } catch (FileNotFoundException ignore) { } catch (InvalidInputException ignore) { ; // throw not FileNotFoundException but InvalidInputException // at Hadoop 1.x version } return success; }