List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:com.uber.hoodie.io.storage.HoodieWrapperFileSystem.java
License:Apache License
private FSDataOutputStream wrapOutputStream(final Path path, FSDataOutputStream fsDataOutputStream) throws IOException { if (fsDataOutputStream instanceof SizeAwareFSDataOutputStream) { return fsDataOutputStream; }/*from w ww . j a v a 2 s. c o m*/ SizeAwareFSDataOutputStream os = new SizeAwareFSDataOutputStream(fsDataOutputStream, new Runnable() { @Override public void run() { openStreams.remove(path.getName()); } }); openStreams.put(path.getName(), os); return os; }
From source file:com.uber.hoodie.utilities.HoodieSnapshotCopier.java
License:Apache License
public void snapshot(JavaSparkContext jsc, String baseDir, final String outputDir, final boolean shouldAssumeDatePartitioning) throws IOException { FileSystem fs = FSUtils.getFs(baseDir, jsc.hadoopConfiguration()); final SerializableConfiguration serConf = new SerializableConfiguration(jsc.hadoopConfiguration()); final HoodieTableMetaClient tableMetadata = new HoodieTableMetaClient(fs.getConf(), baseDir); final TableFileSystemView.ReadOptimizedView fsView = new HoodieTableFileSystemView(tableMetadata, tableMetadata.getActiveTimeline().getCommitsTimeline().filterCompletedInstants()); // Get the latest commit Optional<HoodieInstant> latestCommit = tableMetadata.getActiveTimeline().getCommitsTimeline() .filterCompletedInstants().lastInstant(); if (!latestCommit.isPresent()) { logger.warn("No commits present. Nothing to snapshot"); return;// ww w . j a v a 2 s . co m } final String latestCommitTimestamp = latestCommit.get().getTimestamp(); logger.info(String.format("Starting to snapshot latest version files which are also no-late-than %s.", latestCommitTimestamp)); List<String> partitions = FSUtils.getAllPartitionPaths(fs, baseDir, shouldAssumeDatePartitioning); if (partitions.size() > 0) { logger.info(String.format("The job needs to copy %d partitions.", partitions.size())); // Make sure the output directory is empty Path outputPath = new Path(outputDir); if (fs.exists(outputPath)) { logger.warn( String.format("The output path %s targetBasePath already exists, deleting", outputPath)); fs.delete(new Path(outputDir), true); } jsc.parallelize(partitions, partitions.size()).flatMap(partition -> { // Only take latest version files <= latestCommit. FileSystem fs1 = FSUtils.getFs(baseDir, serConf.get()); List<Tuple2<String, String>> filePaths = new ArrayList<>(); Stream<HoodieDataFile> dataFiles = fsView.getLatestDataFilesBeforeOrOn(partition, latestCommitTimestamp); dataFiles.forEach( hoodieDataFile -> filePaths.add(new Tuple2<>(partition, hoodieDataFile.getPath()))); // also need to copy over partition metadata Path partitionMetaFile = new Path(new Path(baseDir, partition), HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE); if (fs1.exists(partitionMetaFile)) { filePaths.add(new Tuple2<>(partition, partitionMetaFile.toString())); } return filePaths.iterator(); }).foreach(tuple -> { String partition = tuple._1(); Path sourceFilePath = new Path(tuple._2()); Path toPartitionPath = new Path(outputDir, partition); FileSystem ifs = FSUtils.getFs(baseDir, serConf.get()); if (!ifs.exists(toPartitionPath)) { ifs.mkdirs(toPartitionPath); } FileUtil.copy(ifs, sourceFilePath, ifs, new Path(toPartitionPath, sourceFilePath.getName()), false, ifs.getConf()); }); // Also copy the .commit files logger.info(String.format("Copying .commit files which are no-late-than %s.", latestCommitTimestamp)); FileStatus[] commitFilesToCopy = fs.listStatus( new Path(baseDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME), (commitFilePath) -> { if (commitFilePath.getName().equals(HoodieTableConfig.HOODIE_PROPERTIES_FILE)) { return true; } else { String commitTime = FSUtils.getCommitFromCommitFile(commitFilePath.getName()); return HoodieTimeline.compareTimestamps(commitTime, latestCommitTimestamp, HoodieTimeline.LESSER_OR_EQUAL); } }); for (FileStatus commitStatus : commitFilesToCopy) { Path targetFilePath = new Path(outputDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitStatus.getPath().getName()); if (!fs.exists(targetFilePath.getParent())) { fs.mkdirs(targetFilePath.getParent()); } if (fs.exists(targetFilePath)) { logger.error(String.format("The target output commit file (%s targetBasePath) already exists.", targetFilePath)); } FileUtil.copy(fs, commitStatus.getPath(), fs, targetFilePath, false, fs.getConf()); } } else { logger.info("The job has 0 partition to copy."); } // Create the _SUCCESS tag Path successTagPath = new Path(outputDir + "/_SUCCESS"); if (!fs.exists(successTagPath)) { logger.info(String.format("Creating _SUCCESS under targetBasePath: $s", outputDir)); fs.createNewFile(successTagPath); } }
From source file:com.unisa.hive.util.ExcludeTmp.java
License:Open Source License
public boolean accept(Path p) { String name = p.getName(); return !name.startsWith("_") && !name.startsWith(".") && !name.endsWith(".tmp"); }
From source file:com.wipro.ats.bdre.dq.DQDriver.java
License:Apache License
@Override public int run(String[] arg) throws Exception { String processId = arg[0];//w w w .ja v a 2s. co m String sPath = arg[1]; String destDir = arg[2]; Properties props = new GetProperties().getProperties(processId, "dq"); LOGGER.debug("props=" + props); Configuration conf = getConf(); conf.set("dq.process.id", processId); Job job = Job.getInstance(conf); job.setJobName("Data Quality " + processId); job.setJarByClass(DQDriver.class); job.setMapperClass(DQMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); //Reducer is not required job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); Path inputFilePath = new Path(sPath); FileInputFormat.addInputPath(job, inputFilePath); FileOutputFormat.setOutputPath(job, removeIfExistAndSetOutputPath(conf, destDir)); MultipleOutputs.addNamedOutput(job, DQConstants.GOOD_RECORDS_FILE, TextOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, DQConstants.BAD_RECORDS_FILE, TextOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, DQConstants.FILE_REPORT_FILE, TextOutputFormat.class, Text.class, NullWritable.class); if (!job.waitForCompletion(true)) { return 1; } Path outputDir = new Path(destDir); FileSystem srcFs = outputDir.getFileSystem(getConf()); FileSystem destFs = outputDir.getFileSystem(getConf()); //Valid Records Path goodFilesSrcDir = new Path(destDir + "/" + DQConstants.INTERMEDIATE_GOOD_RECORD_OUTPUT_DIR); //Input and quality filtered file should have same name (but different path) Path goodDestFile = new Path(destDir + "/" + inputFilePath.getName()); if (srcFs.exists(goodFilesSrcDir)) { FileUtil.copyMerge(srcFs, goodFilesSrcDir, destFs, goodDestFile, true, conf, ""); } // Invalid Records Path badFilesSrcDir = new Path(destDir + "/" + DQConstants.INTERMEDIATE_BAD_RECORD_OUTPUT_DIR); Path badDestFile = new Path(destDir + "/" + DQConstants.BAD_RECORDS_FILE); if (srcFs.exists(badFilesSrcDir)) { FileUtil.copyMerge(srcFs, badFilesSrcDir, destFs, badDestFile, true, conf, ""); } // Preparing report aggregation job Job fileReportAggregationJob = Job.getInstance(conf); fileReportAggregationJob.setJobName("File Report Computing " + processId); fileReportAggregationJob.setJarByClass(DQMain.class); fileReportAggregationJob.setMapperClass(DQFileReportMapper.class); fileReportAggregationJob.setMapOutputKeyClass(Text.class); fileReportAggregationJob.setMapOutputValueClass(IntWritable.class); fileReportAggregationJob.setReducerClass(DQFileReportReducer.class); fileReportAggregationJob.setOutputKeyClass(Text.class); fileReportAggregationJob.setOutputValueClass(Text.class); fileReportAggregationJob.setNumReduceTasks(1); Path fileReportDir = new Path(destDir + "/" + DQConstants.INTERMEDIATE_REPORT_OUTPUT_DIR); Path fileReportOutputDir = new Path(destDir + "/" + DQConstants.AGGREGATED_REPORT_PLACEHOLDER_FOLDER); FileInputFormat.addInputPath(fileReportAggregationJob, fileReportDir); FileOutputFormat.setOutputPath(fileReportAggregationJob, fileReportOutputDir); if (!fileReportAggregationJob.waitForCompletion(true)) { return 1; } // Merge Report Records MR stuffs Path reportsSrcDir = new Path(destDir + "/" + DQConstants.AGGREGATED_REPORT_PLACEHOLDER_FOLDER); Path reportsDestFile = new Path(destDir + "/" + DQConstants.FILE_REPORT_FILE); FileUtil.copyMerge(srcFs, reportsSrcDir, destFs, reportsDestFile, true, conf, ""); Path reportDestFile = new Path(outputDir.toString() + "/" + DQConstants.FILE_REPORT_FILE); //Read the report file from HDFS and report the percentage DQStats dqStats = getQualityStats(getConf(), reportDestFile); LOGGER.info("Percentage of good records :" + dqStats.getGoodPercent()); props = new GetProperties().getProperties(processId, "dq"); String strThreshold = props.getProperty("min.pass.threshold.percent"); float threshold = Float.parseFloat(strThreshold); dqStats.setThreshold(threshold); //Update the result in metadata logResult(dqStats, processId, 0L); if (dqStats.getGoodPercent() < threshold) { LOGGER.error("DQ check did not pass"); throw new DQValidationException(dqStats); } LOGGER.info(dqStats); FileChecksum hdfsChecksum = destFs.getFileChecksum(goodDestFile); String fileHash = hdfsChecksum == null ? "0" : hdfsChecksum.toString(); //Return file info oozie params RegisterFileInfo registerFileInfo = new RegisterFileInfo(); registerFileInfo.setBatchId(null); registerFileInfo.setCreationTs(new Timestamp(new Date().getTime())); registerFileInfo.setFileHash(fileHash); registerFileInfo.setFileSize(destFs.getFileStatus(goodDestFile).getLen()); registerFileInfo.setPath(goodDestFile.toString()); registerFileInfo.setSubProcessId(Integer.parseInt(processId)); OozieUtil oozieUtil = new OozieUtil(); oozieUtil.persistBeanData(registerFileInfo, false); return 0; }
From source file:com.xiaomi.linden.hadoop.indexing.reduce.ShardWriter.java
License:Apache License
private void moveFromTempToPerm(Path temp, Path perm) throws IOException { FileStatus[] fileStatus = localFs.listStatus(temp); // move the files created in temp dir except segments_N and segments.gen for (int i = 0; i < fileStatus.length; i++) { Path path = fileStatus[i].getPath(); String name = path.getName(); try {/*from ww w . j av a2 s. c o m*/ if (!fs.exists(new Path(perm, name))) { fs.copyFromLocalFile(path, new Path(perm, name)); } else { moveToTrash(conf, perm); fs.copyFromLocalFile(path, new Path(perm, name)); } } catch (Exception e) { logger.error("Exception in moveFromTempToPerm", e); } } }
From source file:com.yolodata.tbana.hadoop.mapred.shuttl.bucket.HadoopBucketFactory.java
License:Open Source License
public static HadoopBucket createUsingPathToBucket(Path path, Index index) { return new HadoopBucket(path, index, path.getName()); }
From source file:com.yolodata.tbana.hadoop.mapred.shuttl.bucket.search.BucketFinderTest.java
License:Open Source License
private Index createIndexWithBuckets() throws IOException { Path root = FileSystemTestUtils.createEmptyDir(fs); String[] directories = { "db_28800000_28810000_index", "db_28810001_28820000_index", "db_28820001_28830000_index" }; FileSystemTestUtils.createDirectories(fs, root, directories); return new Index(root.toString(), root.getName()); }
From source file:com.yolodata.tbana.hadoop.mapred.shuttl.index.Index.java
License:Open Source License
public Index(Path path) { this.path = path.toString(); this.name = path.getName(); }
From source file:com.yolodata.tbana.util.search.filter.BucketFilter.java
License:Open Source License
public boolean accept(Path path, SearchFilter dependency) throws IOException { if (!dependency.accept(path.toString())) return false; return validateBucketName(path.getName()); }
From source file:com.yolodata.tbana.util.search.filter.NameFilter.java
License:Open Source License
@Override public boolean accept(String path) throws IOException { Path p = new Path(path); return matchesAnyName(p.getName()); }