Example usage for org.apache.hadoop.fs Path getParent

List of usage examples for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent() 

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:com.uber.hoodie.hive.TestUtil.java

License:Apache License

private static HoodieLogFile generateLogData(Path parquetFilePath, boolean isLogSchemaSimple)
        throws IOException, InterruptedException, URISyntaxException {
    Schema schema = (isLogSchemaSimple ? SchemaTestUtil.getSimpleSchema() : SchemaTestUtil.getEvolvedSchema());
    HoodieDataFile dataFile = new HoodieDataFile(fileSystem.getFileStatus(parquetFilePath));
    // Write a log file for this parquet file
    Writer logWriter = HoodieLogFormat.newWriterBuilder().onParentPath(parquetFilePath.getParent())
            .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId(dataFile.getFileId())
            .overBaseCommit(dataFile.getCommitTime()).withFs(fileSystem).build();
    List<IndexedRecord> records = (isLogSchemaSimple ? SchemaTestUtil.generateTestRecords(0, 100)
            : SchemaTestUtil.generateEvolvedTestRecords(100, 100));
    Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap();
    header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, dataFile.getCommitTime());
    header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString());
    HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header);
    logWriter.appendBlock(dataBlock);/*from  www .  j a va 2  s .  c  om*/
    logWriter.close();
    return logWriter.getLogFile();
}

From source file:com.uber.hoodie.utilities.HoodieSnapshotCopier.java

License:Apache License

public void snapshot(JavaSparkContext jsc, String baseDir, final String outputDir,
        final boolean shouldAssumeDatePartitioning) throws IOException {
    FileSystem fs = FSUtils.getFs(baseDir, jsc.hadoopConfiguration());
    final SerializableConfiguration serConf = new SerializableConfiguration(jsc.hadoopConfiguration());
    final HoodieTableMetaClient tableMetadata = new HoodieTableMetaClient(fs.getConf(), baseDir);
    final TableFileSystemView.ReadOptimizedView fsView = new HoodieTableFileSystemView(tableMetadata,
            tableMetadata.getActiveTimeline().getCommitsTimeline().filterCompletedInstants());
    // Get the latest commit
    Optional<HoodieInstant> latestCommit = tableMetadata.getActiveTimeline().getCommitsTimeline()
            .filterCompletedInstants().lastInstant();
    if (!latestCommit.isPresent()) {
        logger.warn("No commits present. Nothing to snapshot");
        return;/*from  w  ww  . j a va 2s .co m*/
    }
    final String latestCommitTimestamp = latestCommit.get().getTimestamp();
    logger.info(String.format("Starting to snapshot latest version files which are also no-late-than %s.",
            latestCommitTimestamp));

    List<String> partitions = FSUtils.getAllPartitionPaths(fs, baseDir, shouldAssumeDatePartitioning);
    if (partitions.size() > 0) {
        logger.info(String.format("The job needs to copy %d partitions.", partitions.size()));

        // Make sure the output directory is empty
        Path outputPath = new Path(outputDir);
        if (fs.exists(outputPath)) {
            logger.warn(
                    String.format("The output path %s targetBasePath already exists, deleting", outputPath));
            fs.delete(new Path(outputDir), true);
        }

        jsc.parallelize(partitions, partitions.size()).flatMap(partition -> {
            // Only take latest version files <= latestCommit.
            FileSystem fs1 = FSUtils.getFs(baseDir, serConf.get());
            List<Tuple2<String, String>> filePaths = new ArrayList<>();
            Stream<HoodieDataFile> dataFiles = fsView.getLatestDataFilesBeforeOrOn(partition,
                    latestCommitTimestamp);
            dataFiles.forEach(
                    hoodieDataFile -> filePaths.add(new Tuple2<>(partition, hoodieDataFile.getPath())));

            // also need to copy over partition metadata
            Path partitionMetaFile = new Path(new Path(baseDir, partition),
                    HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE);
            if (fs1.exists(partitionMetaFile)) {
                filePaths.add(new Tuple2<>(partition, partitionMetaFile.toString()));
            }

            return filePaths.iterator();
        }).foreach(tuple -> {
            String partition = tuple._1();
            Path sourceFilePath = new Path(tuple._2());
            Path toPartitionPath = new Path(outputDir, partition);
            FileSystem ifs = FSUtils.getFs(baseDir, serConf.get());

            if (!ifs.exists(toPartitionPath)) {
                ifs.mkdirs(toPartitionPath);
            }
            FileUtil.copy(ifs, sourceFilePath, ifs, new Path(toPartitionPath, sourceFilePath.getName()), false,
                    ifs.getConf());
        });

        // Also copy the .commit files
        logger.info(String.format("Copying .commit files which are no-late-than %s.", latestCommitTimestamp));
        FileStatus[] commitFilesToCopy = fs.listStatus(
                new Path(baseDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME), (commitFilePath) -> {
                    if (commitFilePath.getName().equals(HoodieTableConfig.HOODIE_PROPERTIES_FILE)) {
                        return true;
                    } else {
                        String commitTime = FSUtils.getCommitFromCommitFile(commitFilePath.getName());
                        return HoodieTimeline.compareTimestamps(commitTime, latestCommitTimestamp,
                                HoodieTimeline.LESSER_OR_EQUAL);
                    }
                });
        for (FileStatus commitStatus : commitFilesToCopy) {
            Path targetFilePath = new Path(outputDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
                    + commitStatus.getPath().getName());
            if (!fs.exists(targetFilePath.getParent())) {
                fs.mkdirs(targetFilePath.getParent());
            }
            if (fs.exists(targetFilePath)) {
                logger.error(String.format("The target output commit file (%s targetBasePath) already exists.",
                        targetFilePath));
            }
            FileUtil.copy(fs, commitStatus.getPath(), fs, targetFilePath, false, fs.getConf());
        }
    } else {
        logger.info("The job has 0 partition to copy.");
    }

    // Create the _SUCCESS tag
    Path successTagPath = new Path(outputDir + "/_SUCCESS");
    if (!fs.exists(successTagPath)) {
        logger.info(String.format("Creating _SUCCESS under targetBasePath: $s", outputDir));
        fs.createNewFile(successTagPath);
    }
}

From source file:com.wandisco.s3hdfs.rewrite.redirect.Redirect.java

License:Apache License

String replaceSrcs(String uri, String srcBucket, String srcObject) throws IOException {
    // /root/user/bucket/object/version/.obj
    Path path = new Path(uri);
    String version = path.getParent().getName();
    String user = path.getParent().getParent().getParent().getParent().getName();
    String root = path.getParent().getParent().getParent().getParent().getParent().getName();
    S3HdfsPath newPath = new S3HdfsPath(root, user, srcBucket, srcObject, version, null);
    return ADD_WEBHDFS(newPath.getFullHdfsObjPath());
}

From source file:com.yahoo.storm.yarn.Util.java

License:Open Source License

@SuppressWarnings("rawtypes")
static Path createConfigurationFileInFs(FileSystem fs, String appHome, Map stormConf,
        YarnConfiguration yarnConf) throws IOException {
    // dump stringwriter's content into FS conf/storm.yaml
    Path confDst = new Path(fs.getHomeDirectory(), appHome + Path.SEPARATOR + STORM_CONF_PATH_STRING);
    Path dirDst = confDst.getParent();
    fs.mkdirs(dirDst);/*  ww  w  . java  2 s  .  com*/

    //storm.yaml
    FSDataOutputStream out = fs.create(confDst);
    Yaml yaml = new Yaml();
    OutputStreamWriter writer = new OutputStreamWriter(out);
    rmNulls(stormConf);
    yaml.dump(stormConf, writer);
    writer.close();
    out.close();

    //yarn-site.xml
    Path yarn_site_xml = new Path(dirDst, "yarn-site.xml");
    out = fs.create(yarn_site_xml);
    writer = new OutputStreamWriter(out);
    yarnConf.writeXml(writer);
    writer.close();
    out.close();

    //logback.xml
    Path logback_xml = new Path(dirDst, "logback.xml");
    out = fs.create(logback_xml);
    CreateLogbackXML(out);
    out.close();

    return dirDst;
}

From source file:com.yahoo.storm.yarn.Util.java

License:Open Source License

private static List<String> findAllJarsInPaths(String... pathStrs) throws IOException {
    java.nio.file.FileSystem fs = FileSystems.getDefault();
    final PathMatcher matcher = fs.getPathMatcher("glob:**.jar");
    final LinkedHashSet<String> pathSet = new LinkedHashSet<String>();
    for (String pathStr : pathStrs) {
        java.nio.file.Path start = fs.getPath(pathStr);
        Files.walkFileTree(start, new SimpleFileVisitor<java.nio.file.Path>() {
            @Override//  w w  w . j  a  v  a  2s . com
            public FileVisitResult visitFile(java.nio.file.Path path, BasicFileAttributes attrs)
                    throws IOException {
                if (attrs.isRegularFile() && matcher.matches(path) && !pathSet.contains(path)) {
                    java.nio.file.Path parent = path.getParent();
                    pathSet.add(parent + File.separator + "*");
                    return FileVisitResult.SKIP_SIBLINGS;
                }
                return FileVisitResult.CONTINUE;
            }
        });
    }
    final List<String> toRet = new ArrayList<String>();
    for (String p : pathSet) {
        toRet.add(p);
    }
    return toRet;
}

From source file:com.yahoo.storm.yarn.Util.java

License:Open Source License

/**
 * Returns a boolean to denote whether a cache file is visible to all(public)
 * or not//from   w w  w  .j  ava 2 s.c om
 * @param fs  Hadoop file system
 * @param path  file path
 * @return true if the path is visible to all, false otherwise
 * @throws IOException
 */
static boolean isPublic(FileSystem fs, Path path) throws IOException {
    //the leaf level file should be readable by others
    if (!checkPermissionOfOther(fs, path, FsAction.READ)) {
        return false;
    }
    return ancestorsHaveExecutePermissions(fs, path.getParent());
}

From source file:com.yahoo.storm.yarn.Util.java

License:Open Source License

/**
 * Returns true if all ancestors of the specified path have the 'execute'
 * permission set for all users (i.e. that other users can traverse
 * the directory hierarchy to the given path)
 *//* ww  w  .j av a  2  s .  c  o m*/
static boolean ancestorsHaveExecutePermissions(FileSystem fs, Path path) throws IOException {
    Path current = path;
    while (current != null) {
        //the subdirs in the path should have execute permissions for others
        if (!checkPermissionOfOther(fs, current, FsAction.EXECUTE)) {
            return false;
        }
        current = current.getParent();
    }
    return true;
}

From source file:com.yss.util.YarnUtil.java

License:Open Source License

@SuppressWarnings("rawtypes")
public static Path createConfigurationFileInFs(FileSystem fs, String appHome, Map stormConf,
        YarnConfiguration yarnConf) throws IOException {
    // dump stringwriter's content into FS conf/storm.yaml
    Path confDst = new Path(fs.getHomeDirectory(), appHome + Path.SEPARATOR + STORM_CONF_PATH_STRING);
    Path dirDst = confDst.getParent();
    fs.mkdirs(dirDst);// w  w  w  .j a  v a 2  s.  c o m

    //storm.yaml
    FSDataOutputStream out = fs.create(confDst);
    Yaml yaml = new Yaml();
    OutputStreamWriter writer = new OutputStreamWriter(out);
    rmNulls(stormConf);

    yaml.dump(stormConf, writer);

    writer.close();
    out.close();

    //yarn-site.xml
    Path yarn_site_xml = new Path(dirDst, "yarn-site.xml");
    out = fs.create(yarn_site_xml);
    writer = new OutputStreamWriter(out);
    yarnConf.writeXml(writer);
    writer.close();
    out.close();
    return dirDst;
}

From source file:com.yss.util.YarnUtil.java

License:Open Source License

/**
 * Returns a boolean to denote whether a cache file is visible to all(public)
 * or not/*from w  ww  .  jav a  2s.com*/
 * @param fs  Hadoop file system
 * @param path  file path
 * @return true if the path is visible to all, false otherwise
 * @throws IOException
 */
public static boolean isPublic(FileSystem fs, Path path) throws IOException {
    //the leaf level file should be readable by others
    if (!checkPermissionOfOther(fs, path, FsAction.READ)) {
        return false;
    }
    return ancestorsHaveExecutePermissions(fs, path.getParent());
}

From source file:com.zjy.mongo.splitter.BSONSplitter.java

License:Apache License

/**
 * Get the path to the ".splits" file for a BSON file.
 * @param filePath the path to the BSON file.
 * @param conf the Hadoop configuration.
 * @return the path to the ".splits" file.
 *///www .j  a  va  2s  . c o m
public static Path getSplitsFilePath(final Path filePath, final Configuration conf) {
    String splitsPath = MongoConfigUtil.getBSONSplitsPath(conf);
    String splitsFileName = "." + filePath.getName() + ".splits";
    if (null == splitsPath) {
        return new Path(filePath.getParent(), splitsFileName);
    }
    return new Path(splitsPath, splitsFileName);
}