List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:gobblin.util.JobLauncherUtils.java
License:Apache License
/** * Cleanup staging data of all tasks of a job. * * @param state a {@link State} instance storing job configuration properties * @param logger a {@link Logger} used for logging *///from w w w .java 2s . com public static void cleanJobStagingData(State state, Logger logger) throws IOException { Preconditions.checkArgument(state.contains(ConfigurationKeys.WRITER_STAGING_DIR), "Missing required property " + ConfigurationKeys.WRITER_STAGING_DIR); Preconditions.checkArgument(state.contains(ConfigurationKeys.WRITER_OUTPUT_DIR), "Missing required property " + ConfigurationKeys.WRITER_OUTPUT_DIR); String writerFsUri = state.getProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, ConfigurationKeys.LOCAL_FS_URI); FileSystem fs = getFsWithProxy(state, writerFsUri, WriterUtils.getFsConfiguration(state)); Path jobStagingPath = new Path(state.getProp(ConfigurationKeys.WRITER_STAGING_DIR)); logger.info("Cleaning up staging directory " + jobStagingPath); HadoopUtils.deletePath(fs, jobStagingPath, true); if (fs.exists(jobStagingPath.getParent()) && fs.listStatus(jobStagingPath.getParent()).length == 0) { logger.info("Deleting directory " + jobStagingPath.getParent()); HadoopUtils.deletePath(fs, jobStagingPath.getParent(), true); } Path jobOutputPath = new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR)); logger.info("Cleaning up output directory " + jobOutputPath); HadoopUtils.deletePath(fs, jobOutputPath, true); if (fs.exists(jobOutputPath.getParent()) && fs.listStatus(jobOutputPath.getParent()).length == 0) { logger.info("Deleting directory " + jobOutputPath.getParent()); HadoopUtils.deletePath(fs, jobOutputPath.getParent(), true); } if (state.contains(ConfigurationKeys.ROW_LEVEL_ERR_FILE)) { if (state.getPropAsBoolean(ConfigurationKeys.CLEAN_ERR_DIR, ConfigurationKeys.DEFAULT_CLEAN_ERR_DIR)) { Path jobErrPath = new Path(ConfigurationKeys.ROW_LEVEL_ERR_FILE); log.info("Cleaning up err directory : " + jobErrPath); HadoopUtils.deleteIfExists(fs, jobErrPath, true); } } }
From source file:gobblin.util.PathUtils.java
License:Apache License
/** * Finds the deepest ancestor of input that is not a glob. *///from ww w. j a v a 2s . com public static Path deepestNonGlobPath(Path input) { Path commonRoot = input; while (commonRoot != null && isGlob(commonRoot)) { commonRoot = commonRoot.getParent(); } return commonRoot; }
From source file:gobblin.util.PathUtils.java
License:Apache License
/** * Deletes empty directories starting with startPath and all ancestors up to but not including limitPath. * @param fs {@link FileSystem} where paths are located. * @param limitPath only {@link Path}s that are strict descendants of this path will be deleted. * @param startPath first {@link Path} to delete. Afterwards empty ancestors will be deleted. * @throws IOException/* www . j a v a 2 s.co m*/ */ public static void deleteEmptyParentDirectories(FileSystem fs, Path limitPath, Path startPath) throws IOException { if (PathUtils.isAncestor(limitPath, startPath) && !PathUtils.getPathWithoutSchemeAndAuthority(limitPath) .equals(PathUtils.getPathWithoutSchemeAndAuthority(startPath)) && fs.listStatus(startPath).length == 0) { if (!fs.delete(startPath, false)) { log.warn("Failed to delete empty directory " + startPath); } else { log.info("Deleted empty directory " + startPath); } deleteEmptyParentDirectories(fs, limitPath, startPath.getParent()); } }
From source file:gobblin.util.PullFileLoader.java
License:Apache License
/** * Find and load all pull files under a base {@link Path} recursively. * @param path base {@link Path} where pull files should be found recursively. * @param sysProps A {@link Config} used as fallback. * @param loadGlobalProperties if true, will also load at most one *.properties file per directory from the * {@link #rootDirectory} to the pull file {@link Path} for each pull file. * @return The loaded {@link Config}s./*from w w w .j a va2 s . c om*/ */ public Collection<Config> loadPullFilesRecursively(Path path, Config sysProps, boolean loadGlobalProperties) { try { Config fallback = sysProps; if (loadGlobalProperties && PathUtils.isAncestor(this.rootDirectory, path.getParent())) { fallback = loadAncestorGlobalConfigs(path.getParent(), fallback); } return loadPullFilesRecursivelyHelper(path, fallback, loadGlobalProperties); } catch (IOException ioe) { return Lists.newArrayList(); } }
From source file:gobblin.util.PullFileLoader.java
License:Apache License
/** * Load at most one *.properties files from path and each ancestor of path up to and including {@link #rootDirectory}. * Higher directories will serve as fallback for lower directories, and sysProps will serve as fallback for all of them. * @throws IOException/*from w ww . j a v a 2 s . c om*/ */ private Config loadAncestorGlobalConfigs(Path path, Config sysProps) throws IOException { Config config = sysProps; if (!PathUtils.isAncestor(this.rootDirectory, path)) { log.warn(String.format( "Loaded path %s is not a descendant of root path %s. Cannot load global properties.", path, this.rootDirectory)); } else { List<Path> ancestorPaths = Lists.newArrayList(); while (PathUtils.isAncestor(this.rootDirectory, path)) { ancestorPaths.add(path); path = path.getParent(); } List<Path> reversedAncestors = Lists.reverse(ancestorPaths); for (Path ancestor : reversedAncestors) { config = findAndLoadGlobalConfigInDirectory(ancestor, config); } } return config; }
From source file:gobblin.util.recordcount.CompactionRecordCountProvider.java
License:Apache License
/** * This method currently supports converting the given {@link Path} from {@link IngestionRecordCountProvider}. * The converted {@link Path} will start with {@link #M_OUTPUT_FILE_PREFIX}. *//* w w w .j a v a2 s . c o m*/ @Override public Path convertPath(Path path, RecordCountProvider src) { if (this.getClass().equals(src.getClass())) { return path; } else if (src.getClass().equals(IngestionRecordCountProvider.class)) { String newFileName = constructFileName(M_OUTPUT_FILE_PREFIX, src.getRecordCount(path)); return new Path(path.getParent(), newFileName); } else { throw getNotImplementedException(src); } }
From source file:gobblin.util.SchedulerUtils.java
License:Apache License
/** * Load job configurations from job configuration files affected by changes to the given common properties file. * From a general file system./*from w w w. ja v a2s . c o m*/ * @param sysProps Gobblin framework configuration properties * @param commonPropsPath the path of common properties file with changes * @param jobConfigPathDir the path for root job configuration file directory * @return a list of job configurations in the form of {@link java.util.Properties} */ public static List<Properties> loadGenericJobConfigs(Properties sysProps, Path commonPropsPath, Path jobConfigPathDir) throws ConfigurationException, IOException { PullFileLoader loader = new PullFileLoader(jobConfigPathDir, jobConfigPathDir.getFileSystem(new Configuration()), getJobConfigurationFileExtensions(sysProps), PullFileLoader.DEFAULT_HOCON_PULL_FILE_EXTENSIONS); Config sysConfig = ConfigUtils.propertiesToConfig(sysProps); Collection<Config> configs = loader.loadPullFilesRecursively(commonPropsPath.getParent(), sysConfig, true); List<Properties> jobConfigs = Lists.newArrayList(); for (Config config : configs) { try { jobConfigs.add(resolveTemplate(ConfigUtils.configToProperties(config))); } catch (IOException ioe) { LOGGER.error("Could not parse job config at " + ConfigUtils.getString(config, ConfigurationKeys.JOB_CONFIG_FILE_PATH_KEY, "Unknown path"), ioe); } } return jobConfigs; }
From source file:gobblin.util.WriterUtils.java
License:Apache License
/** * Create the given dir as well as all missing ancestor dirs. All created dirs will have the given permission. * This should be used instead of {@link FileSystem#mkdirs(Path, FsPermission)}, since that method only sets * the permission for the given dir, and not recursively for the ancestor dirs. * * @param fs FileSystem// ww w .ja v a2 s . c om * @param path The dir to be created * @param perm The permission to be set * @throws IOException if failing to create dir or set permission. */ public static void mkdirsWithRecursivePermission(FileSystem fs, Path path, FsPermission perm) throws IOException { if (fs.exists(path)) { return; } if (path.getParent() != null && !fs.exists(path.getParent())) { mkdirsWithRecursivePermission(fs, path.getParent(), perm); } if (!fs.mkdirs(path, perm)) { throw new IOException(String.format("Unable to mkdir %s with permission %s", path, perm)); } // Double check permission, since fs.mkdirs() may not guarantee to set the permission correctly if (!fs.getFileStatus(path).getPermission().equals(perm)) { fs.setPermission(path, perm); } }
From source file:hibench.DataGenerator.java
License:Apache License
public void createHtmlPages(Path dummy, HtmlConf html) throws IOException { LOG.info("Creating Html Pages..."); Path fout = new Path(dummy.getParent(), "tmp"); JobConf job = new JobConf(WebDataGen.class); String jobname = "Create html pages to " + fout.getName(); job.setJobName(jobname);//from w w w . ja v a 2 s .c om html.setJobConf(job); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(CreateHtmlPagesMapper.class); job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, dummy); // first create result files under tmp folder FileOutputFormat.setOutputPath(job, fout); // begin from dummy file job.setInputFormat(NLineInputFormat.class); // use MultipleTextOutputFormat to produce three out files defined // in PathConf, i.e., LINK, PAGE_ZLINK_TABLE, PAGE_ZWORD_TABLE job.setOutputFormat(HtmlMultipleTextOutputFormat.class); LOG.info("Running Job: " + jobname); LOG.info("Dummy file: " + dummy); LOG.info("Multiple result Html files as <links, words, urls>"); JobClient.runJob(job); LOG.info("Finished Running Job: " + jobname); // Move result files under tmp into parent path // and remove the empty tmp path finally DataPaths.moveFilesToParent(fout); }
From source file:hibench.DataPaths.java
License:Apache License
public static void moveFilesToParent(Path src) throws IOException { FileSystem fs = src.getFileSystem(new Configuration()); Path parent = src.getParent(); FileStatus[] flist = fs.listStatus(src); for (FileStatus file : flist) { if (null != file) { fs.rename(file.getPath(), new Path(parent, file.getPath().getName())); }//from www. j a v a2s . c om } fs.delete(src, true); fs.close(); }