List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:com.uber.hoodie.hive.TestUtil.java
License:Apache License
private static HoodieLogFile generateLogData(Path parquetFilePath, boolean isLogSchemaSimple) throws IOException, InterruptedException, URISyntaxException { Schema schema = (isLogSchemaSimple ? SchemaTestUtil.getSimpleSchema() : SchemaTestUtil.getEvolvedSchema()); HoodieDataFile dataFile = new HoodieDataFile(fileSystem.getFileStatus(parquetFilePath)); // Write a log file for this parquet file Writer logWriter = HoodieLogFormat.newWriterBuilder().onParentPath(parquetFilePath.getParent()) .withFileExtension(HoodieLogFile.DELTA_EXTENSION).withFileId(dataFile.getFileId()) .overBaseCommit(dataFile.getCommitTime()).withFs(fileSystem).build(); List<IndexedRecord> records = (isLogSchemaSimple ? SchemaTestUtil.generateTestRecords(0, 100) : SchemaTestUtil.generateEvolvedTestRecords(100, 100)); Map<HoodieLogBlock.HeaderMetadataType, String> header = Maps.newHashMap(); header.put(HoodieLogBlock.HeaderMetadataType.INSTANT_TIME, dataFile.getCommitTime()); header.put(HoodieLogBlock.HeaderMetadataType.SCHEMA, schema.toString()); HoodieAvroDataBlock dataBlock = new HoodieAvroDataBlock(records, header); logWriter.appendBlock(dataBlock);/*from www . j a va 2 s . c om*/ logWriter.close(); return logWriter.getLogFile(); }
From source file:com.uber.hoodie.utilities.HoodieSnapshotCopier.java
License:Apache License
public void snapshot(JavaSparkContext jsc, String baseDir, final String outputDir, final boolean shouldAssumeDatePartitioning) throws IOException { FileSystem fs = FSUtils.getFs(baseDir, jsc.hadoopConfiguration()); final SerializableConfiguration serConf = new SerializableConfiguration(jsc.hadoopConfiguration()); final HoodieTableMetaClient tableMetadata = new HoodieTableMetaClient(fs.getConf(), baseDir); final TableFileSystemView.ReadOptimizedView fsView = new HoodieTableFileSystemView(tableMetadata, tableMetadata.getActiveTimeline().getCommitsTimeline().filterCompletedInstants()); // Get the latest commit Optional<HoodieInstant> latestCommit = tableMetadata.getActiveTimeline().getCommitsTimeline() .filterCompletedInstants().lastInstant(); if (!latestCommit.isPresent()) { logger.warn("No commits present. Nothing to snapshot"); return;/*from w ww . j a va 2s .co m*/ } final String latestCommitTimestamp = latestCommit.get().getTimestamp(); logger.info(String.format("Starting to snapshot latest version files which are also no-late-than %s.", latestCommitTimestamp)); List<String> partitions = FSUtils.getAllPartitionPaths(fs, baseDir, shouldAssumeDatePartitioning); if (partitions.size() > 0) { logger.info(String.format("The job needs to copy %d partitions.", partitions.size())); // Make sure the output directory is empty Path outputPath = new Path(outputDir); if (fs.exists(outputPath)) { logger.warn( String.format("The output path %s targetBasePath already exists, deleting", outputPath)); fs.delete(new Path(outputDir), true); } jsc.parallelize(partitions, partitions.size()).flatMap(partition -> { // Only take latest version files <= latestCommit. FileSystem fs1 = FSUtils.getFs(baseDir, serConf.get()); List<Tuple2<String, String>> filePaths = new ArrayList<>(); Stream<HoodieDataFile> dataFiles = fsView.getLatestDataFilesBeforeOrOn(partition, latestCommitTimestamp); dataFiles.forEach( hoodieDataFile -> filePaths.add(new Tuple2<>(partition, hoodieDataFile.getPath()))); // also need to copy over partition metadata Path partitionMetaFile = new Path(new Path(baseDir, partition), HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE); if (fs1.exists(partitionMetaFile)) { filePaths.add(new Tuple2<>(partition, partitionMetaFile.toString())); } return filePaths.iterator(); }).foreach(tuple -> { String partition = tuple._1(); Path sourceFilePath = new Path(tuple._2()); Path toPartitionPath = new Path(outputDir, partition); FileSystem ifs = FSUtils.getFs(baseDir, serConf.get()); if (!ifs.exists(toPartitionPath)) { ifs.mkdirs(toPartitionPath); } FileUtil.copy(ifs, sourceFilePath, ifs, new Path(toPartitionPath, sourceFilePath.getName()), false, ifs.getConf()); }); // Also copy the .commit files logger.info(String.format("Copying .commit files which are no-late-than %s.", latestCommitTimestamp)); FileStatus[] commitFilesToCopy = fs.listStatus( new Path(baseDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME), (commitFilePath) -> { if (commitFilePath.getName().equals(HoodieTableConfig.HOODIE_PROPERTIES_FILE)) { return true; } else { String commitTime = FSUtils.getCommitFromCommitFile(commitFilePath.getName()); return HoodieTimeline.compareTimestamps(commitTime, latestCommitTimestamp, HoodieTimeline.LESSER_OR_EQUAL); } }); for (FileStatus commitStatus : commitFilesToCopy) { Path targetFilePath = new Path(outputDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitStatus.getPath().getName()); if (!fs.exists(targetFilePath.getParent())) { fs.mkdirs(targetFilePath.getParent()); } if (fs.exists(targetFilePath)) { logger.error(String.format("The target output commit file (%s targetBasePath) already exists.", targetFilePath)); } FileUtil.copy(fs, commitStatus.getPath(), fs, targetFilePath, false, fs.getConf()); } } else { logger.info("The job has 0 partition to copy."); } // Create the _SUCCESS tag Path successTagPath = new Path(outputDir + "/_SUCCESS"); if (!fs.exists(successTagPath)) { logger.info(String.format("Creating _SUCCESS under targetBasePath: $s", outputDir)); fs.createNewFile(successTagPath); } }
From source file:com.wandisco.s3hdfs.rewrite.redirect.Redirect.java
License:Apache License
String replaceSrcs(String uri, String srcBucket, String srcObject) throws IOException { // /root/user/bucket/object/version/.obj Path path = new Path(uri); String version = path.getParent().getName(); String user = path.getParent().getParent().getParent().getParent().getName(); String root = path.getParent().getParent().getParent().getParent().getParent().getName(); S3HdfsPath newPath = new S3HdfsPath(root, user, srcBucket, srcObject, version, null); return ADD_WEBHDFS(newPath.getFullHdfsObjPath()); }
From source file:com.yahoo.storm.yarn.Util.java
License:Open Source License
@SuppressWarnings("rawtypes") static Path createConfigurationFileInFs(FileSystem fs, String appHome, Map stormConf, YarnConfiguration yarnConf) throws IOException { // dump stringwriter's content into FS conf/storm.yaml Path confDst = new Path(fs.getHomeDirectory(), appHome + Path.SEPARATOR + STORM_CONF_PATH_STRING); Path dirDst = confDst.getParent(); fs.mkdirs(dirDst);/* ww w . java 2 s . com*/ //storm.yaml FSDataOutputStream out = fs.create(confDst); Yaml yaml = new Yaml(); OutputStreamWriter writer = new OutputStreamWriter(out); rmNulls(stormConf); yaml.dump(stormConf, writer); writer.close(); out.close(); //yarn-site.xml Path yarn_site_xml = new Path(dirDst, "yarn-site.xml"); out = fs.create(yarn_site_xml); writer = new OutputStreamWriter(out); yarnConf.writeXml(writer); writer.close(); out.close(); //logback.xml Path logback_xml = new Path(dirDst, "logback.xml"); out = fs.create(logback_xml); CreateLogbackXML(out); out.close(); return dirDst; }
From source file:com.yahoo.storm.yarn.Util.java
License:Open Source License
private static List<String> findAllJarsInPaths(String... pathStrs) throws IOException { java.nio.file.FileSystem fs = FileSystems.getDefault(); final PathMatcher matcher = fs.getPathMatcher("glob:**.jar"); final LinkedHashSet<String> pathSet = new LinkedHashSet<String>(); for (String pathStr : pathStrs) { java.nio.file.Path start = fs.getPath(pathStr); Files.walkFileTree(start, new SimpleFileVisitor<java.nio.file.Path>() { @Override// w w w . j a v a 2s . com public FileVisitResult visitFile(java.nio.file.Path path, BasicFileAttributes attrs) throws IOException { if (attrs.isRegularFile() && matcher.matches(path) && !pathSet.contains(path)) { java.nio.file.Path parent = path.getParent(); pathSet.add(parent + File.separator + "*"); return FileVisitResult.SKIP_SIBLINGS; } return FileVisitResult.CONTINUE; } }); } final List<String> toRet = new ArrayList<String>(); for (String p : pathSet) { toRet.add(p); } return toRet; }
From source file:com.yahoo.storm.yarn.Util.java
License:Open Source License
/** * Returns a boolean to denote whether a cache file is visible to all(public) * or not//from w w w .j ava 2 s.c om * @param fs Hadoop file system * @param path file path * @return true if the path is visible to all, false otherwise * @throws IOException */ static boolean isPublic(FileSystem fs, Path path) throws IOException { //the leaf level file should be readable by others if (!checkPermissionOfOther(fs, path, FsAction.READ)) { return false; } return ancestorsHaveExecutePermissions(fs, path.getParent()); }
From source file:com.yahoo.storm.yarn.Util.java
License:Open Source License
/** * Returns true if all ancestors of the specified path have the 'execute' * permission set for all users (i.e. that other users can traverse * the directory hierarchy to the given path) *//* ww w .j av a 2 s . c o m*/ static boolean ancestorsHaveExecutePermissions(FileSystem fs, Path path) throws IOException { Path current = path; while (current != null) { //the subdirs in the path should have execute permissions for others if (!checkPermissionOfOther(fs, current, FsAction.EXECUTE)) { return false; } current = current.getParent(); } return true; }
From source file:com.yss.util.YarnUtil.java
License:Open Source License
@SuppressWarnings("rawtypes") public static Path createConfigurationFileInFs(FileSystem fs, String appHome, Map stormConf, YarnConfiguration yarnConf) throws IOException { // dump stringwriter's content into FS conf/storm.yaml Path confDst = new Path(fs.getHomeDirectory(), appHome + Path.SEPARATOR + STORM_CONF_PATH_STRING); Path dirDst = confDst.getParent(); fs.mkdirs(dirDst);// w w w .j a v a 2 s. c o m //storm.yaml FSDataOutputStream out = fs.create(confDst); Yaml yaml = new Yaml(); OutputStreamWriter writer = new OutputStreamWriter(out); rmNulls(stormConf); yaml.dump(stormConf, writer); writer.close(); out.close(); //yarn-site.xml Path yarn_site_xml = new Path(dirDst, "yarn-site.xml"); out = fs.create(yarn_site_xml); writer = new OutputStreamWriter(out); yarnConf.writeXml(writer); writer.close(); out.close(); return dirDst; }
From source file:com.yss.util.YarnUtil.java
License:Open Source License
/** * Returns a boolean to denote whether a cache file is visible to all(public) * or not/*from w ww . jav a 2s.com*/ * @param fs Hadoop file system * @param path file path * @return true if the path is visible to all, false otherwise * @throws IOException */ public static boolean isPublic(FileSystem fs, Path path) throws IOException { //the leaf level file should be readable by others if (!checkPermissionOfOther(fs, path, FsAction.READ)) { return false; } return ancestorsHaveExecutePermissions(fs, path.getParent()); }
From source file:com.zjy.mongo.splitter.BSONSplitter.java
License:Apache License
/** * Get the path to the ".splits" file for a BSON file. * @param filePath the path to the BSON file. * @param conf the Hadoop configuration. * @return the path to the ".splits" file. *///www .j a va 2s . c o m public static Path getSplitsFilePath(final Path filePath, final Configuration conf) { String splitsPath = MongoConfigUtil.getBSONSplitsPath(conf); String splitsFileName = "." + filePath.getName() + ".splits"; if (null == splitsPath) { return new Path(filePath.getParent(), splitsFileName); } return new Path(splitsPath, splitsFileName); }