Example usage for org.apache.hadoop.fs Path getParent

List of usage examples for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent() 

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:gobblin.util.JobLauncherUtils.java

License:Apache License

/**
 * Cleanup staging data of all tasks of a job.
 *
 * @param state a {@link State} instance storing job configuration properties
 * @param logger a {@link Logger} used for logging
 *///from w w  w .java 2s . com
public static void cleanJobStagingData(State state, Logger logger) throws IOException {
    Preconditions.checkArgument(state.contains(ConfigurationKeys.WRITER_STAGING_DIR),
            "Missing required property " + ConfigurationKeys.WRITER_STAGING_DIR);
    Preconditions.checkArgument(state.contains(ConfigurationKeys.WRITER_OUTPUT_DIR),
            "Missing required property " + ConfigurationKeys.WRITER_OUTPUT_DIR);

    String writerFsUri = state.getProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI,
            ConfigurationKeys.LOCAL_FS_URI);
    FileSystem fs = getFsWithProxy(state, writerFsUri, WriterUtils.getFsConfiguration(state));

    Path jobStagingPath = new Path(state.getProp(ConfigurationKeys.WRITER_STAGING_DIR));
    logger.info("Cleaning up staging directory " + jobStagingPath);
    HadoopUtils.deletePath(fs, jobStagingPath, true);

    if (fs.exists(jobStagingPath.getParent()) && fs.listStatus(jobStagingPath.getParent()).length == 0) {
        logger.info("Deleting directory " + jobStagingPath.getParent());
        HadoopUtils.deletePath(fs, jobStagingPath.getParent(), true);
    }

    Path jobOutputPath = new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR));
    logger.info("Cleaning up output directory " + jobOutputPath);
    HadoopUtils.deletePath(fs, jobOutputPath, true);

    if (fs.exists(jobOutputPath.getParent()) && fs.listStatus(jobOutputPath.getParent()).length == 0) {
        logger.info("Deleting directory " + jobOutputPath.getParent());
        HadoopUtils.deletePath(fs, jobOutputPath.getParent(), true);
    }

    if (state.contains(ConfigurationKeys.ROW_LEVEL_ERR_FILE)) {
        if (state.getPropAsBoolean(ConfigurationKeys.CLEAN_ERR_DIR, ConfigurationKeys.DEFAULT_CLEAN_ERR_DIR)) {
            Path jobErrPath = new Path(ConfigurationKeys.ROW_LEVEL_ERR_FILE);
            log.info("Cleaning up err directory : " + jobErrPath);
            HadoopUtils.deleteIfExists(fs, jobErrPath, true);
        }
    }
}

From source file:gobblin.util.PathUtils.java

License:Apache License

/**
 * Finds the deepest ancestor of input that is not a glob.
 *///from  ww  w.  j a v a 2s . com
public static Path deepestNonGlobPath(Path input) {
    Path commonRoot = input;

    while (commonRoot != null && isGlob(commonRoot)) {
        commonRoot = commonRoot.getParent();
    }
    return commonRoot;
}

From source file:gobblin.util.PathUtils.java

License:Apache License

/**
 * Deletes empty directories starting with startPath and all ancestors up to but not including limitPath.
 * @param fs {@link FileSystem} where paths are located.
 * @param limitPath only {@link Path}s that are strict descendants of this path will be deleted.
 * @param startPath first {@link Path} to delete. Afterwards empty ancestors will be deleted.
 * @throws IOException/*  www . j  a  v a 2  s.co m*/
 */
public static void deleteEmptyParentDirectories(FileSystem fs, Path limitPath, Path startPath)
        throws IOException {
    if (PathUtils.isAncestor(limitPath, startPath)
            && !PathUtils.getPathWithoutSchemeAndAuthority(limitPath)
                    .equals(PathUtils.getPathWithoutSchemeAndAuthority(startPath))
            && fs.listStatus(startPath).length == 0) {
        if (!fs.delete(startPath, false)) {
            log.warn("Failed to delete empty directory " + startPath);
        } else {
            log.info("Deleted empty directory " + startPath);
        }
        deleteEmptyParentDirectories(fs, limitPath, startPath.getParent());
    }
}

From source file:gobblin.util.PullFileLoader.java

License:Apache License

/**
 * Find and load all pull files under a base {@link Path} recursively.
 * @param path base {@link Path} where pull files should be found recursively.
 * @param sysProps A {@link Config} used as fallback.
 * @param loadGlobalProperties if true, will also load at most one *.properties file per directory from the
 *          {@link #rootDirectory} to the pull file {@link Path} for each pull file.
 * @return The loaded {@link Config}s./*from w  w w  .j a  va2 s .  c om*/
 */
public Collection<Config> loadPullFilesRecursively(Path path, Config sysProps, boolean loadGlobalProperties) {
    try {
        Config fallback = sysProps;
        if (loadGlobalProperties && PathUtils.isAncestor(this.rootDirectory, path.getParent())) {
            fallback = loadAncestorGlobalConfigs(path.getParent(), fallback);
        }
        return loadPullFilesRecursivelyHelper(path, fallback, loadGlobalProperties);
    } catch (IOException ioe) {
        return Lists.newArrayList();
    }
}

From source file:gobblin.util.PullFileLoader.java

License:Apache License

/**
 * Load at most one *.properties files from path and each ancestor of path up to and including {@link #rootDirectory}.
 * Higher directories will serve as fallback for lower directories, and sysProps will serve as fallback for all of them.
 * @throws IOException/*from w ww  . j  a v a 2  s . c om*/
 */
private Config loadAncestorGlobalConfigs(Path path, Config sysProps) throws IOException {
    Config config = sysProps;

    if (!PathUtils.isAncestor(this.rootDirectory, path)) {
        log.warn(String.format(
                "Loaded path %s is not a descendant of root path %s. Cannot load global properties.", path,
                this.rootDirectory));
    } else {

        List<Path> ancestorPaths = Lists.newArrayList();
        while (PathUtils.isAncestor(this.rootDirectory, path)) {
            ancestorPaths.add(path);
            path = path.getParent();
        }

        List<Path> reversedAncestors = Lists.reverse(ancestorPaths);
        for (Path ancestor : reversedAncestors) {
            config = findAndLoadGlobalConfigInDirectory(ancestor, config);
        }
    }
    return config;
}

From source file:gobblin.util.recordcount.CompactionRecordCountProvider.java

License:Apache License

/**
 * This method currently supports converting the given {@link Path} from {@link IngestionRecordCountProvider}.
 * The converted {@link Path} will start with {@link #M_OUTPUT_FILE_PREFIX}.
 *//*  w w w  .j a v a2  s  . c  o  m*/
@Override
public Path convertPath(Path path, RecordCountProvider src) {
    if (this.getClass().equals(src.getClass())) {
        return path;
    } else if (src.getClass().equals(IngestionRecordCountProvider.class)) {
        String newFileName = constructFileName(M_OUTPUT_FILE_PREFIX, src.getRecordCount(path));
        return new Path(path.getParent(), newFileName);
    } else {
        throw getNotImplementedException(src);
    }
}

From source file:gobblin.util.SchedulerUtils.java

License:Apache License

/**
 * Load job configurations from job configuration files affected by changes to the given common properties file.
 * From a general file system./*from  w  w  w. ja v  a2s .  c  o m*/
 * @param sysProps Gobblin framework configuration properties
 * @param commonPropsPath the path of common properties file with changes
 * @param jobConfigPathDir the path for root job configuration file directory
 * @return a list of job configurations in the form of {@link java.util.Properties}
 */
public static List<Properties> loadGenericJobConfigs(Properties sysProps, Path commonPropsPath,
        Path jobConfigPathDir) throws ConfigurationException, IOException {

    PullFileLoader loader = new PullFileLoader(jobConfigPathDir,
            jobConfigPathDir.getFileSystem(new Configuration()), getJobConfigurationFileExtensions(sysProps),
            PullFileLoader.DEFAULT_HOCON_PULL_FILE_EXTENSIONS);
    Config sysConfig = ConfigUtils.propertiesToConfig(sysProps);
    Collection<Config> configs = loader.loadPullFilesRecursively(commonPropsPath.getParent(), sysConfig, true);

    List<Properties> jobConfigs = Lists.newArrayList();
    for (Config config : configs) {
        try {
            jobConfigs.add(resolveTemplate(ConfigUtils.configToProperties(config)));
        } catch (IOException ioe) {
            LOGGER.error("Could not parse job config at "
                    + ConfigUtils.getString(config, ConfigurationKeys.JOB_CONFIG_FILE_PATH_KEY, "Unknown path"),
                    ioe);
        }
    }

    return jobConfigs;
}

From source file:gobblin.util.WriterUtils.java

License:Apache License

/**
 * Create the given dir as well as all missing ancestor dirs. All created dirs will have the given permission.
 * This should be used instead of {@link FileSystem#mkdirs(Path, FsPermission)}, since that method only sets
 * the permission for the given dir, and not recursively for the ancestor dirs.
 *
 * @param fs FileSystem//  ww w .ja v a2 s  . c  om
 * @param path The dir to be created
 * @param perm The permission to be set
 * @throws IOException if failing to create dir or set permission.
 */
public static void mkdirsWithRecursivePermission(FileSystem fs, Path path, FsPermission perm)
        throws IOException {
    if (fs.exists(path)) {
        return;
    }
    if (path.getParent() != null && !fs.exists(path.getParent())) {
        mkdirsWithRecursivePermission(fs, path.getParent(), perm);
    }
    if (!fs.mkdirs(path, perm)) {
        throw new IOException(String.format("Unable to mkdir %s with permission %s", path, perm));
    }

    // Double check permission, since fs.mkdirs() may not guarantee to set the permission correctly
    if (!fs.getFileStatus(path).getPermission().equals(perm)) {
        fs.setPermission(path, perm);
    }
}

From source file:hibench.DataGenerator.java

License:Apache License

public void createHtmlPages(Path dummy, HtmlConf html) throws IOException {

    LOG.info("Creating Html Pages...");

    Path fout = new Path(dummy.getParent(), "tmp");

    JobConf job = new JobConf(WebDataGen.class);
    String jobname = "Create html pages to " + fout.getName();

    job.setJobName(jobname);//from w  w  w .  ja v  a 2 s .c om

    html.setJobConf(job);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(CreateHtmlPagesMapper.class);

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, dummy);

    // first create result files under tmp folder
    FileOutputFormat.setOutputPath(job, fout);

    // begin from dummy file
    job.setInputFormat(NLineInputFormat.class);

    // use MultipleTextOutputFormat to produce three out files defined
    // in PathConf, i.e., LINK, PAGE_ZLINK_TABLE, PAGE_ZWORD_TABLE
    job.setOutputFormat(HtmlMultipleTextOutputFormat.class);

    LOG.info("Running Job: " + jobname);
    LOG.info("Dummy file: " + dummy);
    LOG.info("Multiple result Html files as <links, words, urls>");
    JobClient.runJob(job);
    LOG.info("Finished Running Job: " + jobname);

    // Move result files under tmp into parent path
    // and remove the empty tmp path finally 
    DataPaths.moveFilesToParent(fout);
}

From source file:hibench.DataPaths.java

License:Apache License

public static void moveFilesToParent(Path src) throws IOException {
    FileSystem fs = src.getFileSystem(new Configuration());
    Path parent = src.getParent();

    FileStatus[] flist = fs.listStatus(src);
    for (FileStatus file : flist) {
        if (null != file) {
            fs.rename(file.getPath(), new Path(parent, file.getPath().getName()));
        }//from  www. j  a  v  a2s .  c om
    }
    fs.delete(src, true);
    fs.close();
}