List of usage examples for org.apache.hadoop.fs FileUtil copy
public static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource, Configuration conf) throws IOException
From source file:com.quantcast.qfs.hadoop.QuantcastFileSystem.java
License:Apache License
public void copyToLocalFile(boolean delSrc, Path src, Path dst) throws IOException { FileUtil.copy(this, src, localFs, dst, delSrc, getConf()); }
From source file:com.redsqirl.workflow.server.connect.HDFSInterface.java
License:Open Source License
/** * Create a copy of a path/* www. j a va 2 s . c o m*/ * * @param in_path * @param out_path * @return Error Message * @throws RemoteException */ @Override public String copy(String in_path, String out_path) throws RemoteException { String error = null; try { Path oldP = new Path(in_path), newP = new Path(out_path); HdfsFileChecker hChN = new HdfsFileChecker(newP); HdfsFileChecker hChO = new HdfsFileChecker(oldP); if (!hChN.exists() && hChO.exists()) { FileSystem fs = NameNodeVar.getFS(); FileUtil.copy(fs, oldP, fs, newP, false, NameNodeVar.getConf()); } else { error = LanguageManagerWF.getText("HdfsInterface.ouputexists"); } // hChN.close(); // hChO.close(); } catch (IOException e) { logger.error(e.getMessage()); error = LanguageManagerWF.getText("HdfsInterface.errormove", new Object[] { e.getMessage() }); } if (error != null) { logger.debug(error); } return error; }
From source file:com.redsqirl.workflow.server.Workflow.java
License:Open Source License
/** * Save the xml part of a workflow./*from w ww. ja va 2 s. c o m*/ * * @param filePath * the xml file path to write in. * @return null if OK, or a description of the error. * @throws RemoteException */ public String save(final String filePath) throws RemoteException { String error = null; File file = null; try { String[] path = filePath.split("/"); String fileName = path[path.length - 1]; String tempPath = WorkflowPrefManager.getPathuserpref() + "/tmp/" + fileName + "_" + RandomString.getRandomName(4); file = new File(tempPath); logger.debug("Save xml: " + file.getAbsolutePath()); file.getParentFile().mkdirs(); Document doc = null; try { doc = saveInXML(); } catch (IOException e) { error = e.getMessage(); } if (error == null) { logger.debug("write the file..."); // write the content into xml file logger.debug("Check Null text nodes..."); XmlUtils.checkForNullTextNodes(doc.getDocumentElement(), ""); TransformerFactory transformerFactory = TransformerFactory.newInstance(); Transformer transformer = transformerFactory.newTransformer(); transformer.setOutputProperty(OutputKeys.INDENT, "yes"); DOMSource source = new DOMSource(doc); StreamResult result = new StreamResult(file); logger.debug(4); transformer.transform(source, result); logger.debug(5); FileSystem fs = NameNodeVar.getFS(); fs.moveFromLocalFile(new Path(tempPath), new Path(filePath)); if (filePath.startsWith(WorkflowPrefManager.getBackupPath())) { saved = false; this.path = null; } else { this.path = filePath; saved = true; changed = false; String bckPath = getBackupName(createBackupDir()); FileUtil.copy(fs, new Path(filePath), fs, new Path(bckPath), false, NameNodeVar.getConf()); cleanUpBackup(); } logger.debug("file saved successfully"); } } catch (Exception e) { error = LanguageManagerWF.getText("workflow.writeXml", new Object[] { e.getMessage() }); logger.error(error, e); try { logger.debug("Attempt to delete " + file.getAbsolutePath()); file.delete(); } catch (Exception e1) { } } Log.flushAllLogs(); return error; }
From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();// w w w . j a v a 2s . c o m addOutputOption(); addOption("lambda", null, "regularization parameter", true); addOption("implicitFeedback", null, "data consists of implicit feedback?", String.valueOf(false)); addOption("alpha", null, "confidence parameter (only used on implicit feedback)", String.valueOf(40)); addOption("numFeatures", null, "dimension of the feature space", true); addOption("numIterations", null, "number of iterations", true); addOption("indexSizes", null, "index sizes Path", true); addOption("startIteration", null, "start iteration number", String.valueOf(0)); addOption("oldM", null, "old M matrix Path.", null); addOption("largeUserFeatures", null, "true if user x feature matrix is too large for memory", String.valueOf(true)); addOption("rmseCurve", null, "true if want to extract rmse curve", String.valueOf(true)); addOption("cleanUp", null, "true if want to clean up temporary matrix", String.valueOf(true)); addOption("useTransform", null, "true if using logarithm as transform", String.valueOf(true)); addOption("rateIndex", null, "0 based index for rate column in input file.", String.valueOf(2)); Map<String, String> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } try { /** step 0: fetch dimention of training set matrix. */ Map<String, String> indexSizesTmp = ALSMatrixUtil.fetchTextFiles(new Path(getOption("indexSizes")), DELIMETER, Arrays.asList(0), Arrays.asList(1)); numFeatures = Integer.parseInt(parsedArgs.get("--numFeatures")); numIterations = Integer.parseInt(parsedArgs.get("--numIterations")); lambda = Double.parseDouble(parsedArgs.get("--lambda")); alpha = Double.parseDouble(parsedArgs.get("--alpha")); implicitFeedback = Boolean.parseBoolean(parsedArgs.get("--implicitFeedback")); numUsers = Integer.parseInt(indexSizesTmp.get("0")); numItems = Integer.parseInt(indexSizesTmp.get("1")); numTaskTrackers = HadoopClusterUtil.getNumberOfTaskTrackers(getConf()) * multiplyMapTasks; startIteration = Integer.parseInt(parsedArgs.get("--startIteration")); largeUserFeatures = Boolean.parseBoolean(getOption("largeUserFeatures")); useRMSECurve = Boolean.parseBoolean(getOption("rmseCurve")); cleanUp = Boolean.parseBoolean(getOption("cleanUp")); useTransform = Boolean.parseBoolean(getOption("useTransform")); rateIndex = Integer.parseInt(getOption("rateIndex")); FileSystem fs = FileSystem.get(getConf()); if (!fs.exists(pathToTransformed())) { if (useTransform) { // transform price into rating Job transformJob = prepareJob(getInputPath(), pathToTransformed(), TextInputFormat.class, TransformColumnValueMapper.class, NullWritable.class, Text.class, TextOutputFormat.class); transformJob.waitForCompletion(true); } else { FileUtil.copy(FileSystem.get(getConf()), getInputPath(), FileSystem.get(getConf()), pathToTransformed(), false, getConf()); } } /* if (getOption("oldM") != null) { runOnetimeSolver(pathToTransformed(), getOutputPath("U"), new Path(getOption("oldM"))); return 0; } */ /* * compute the factorization A = U M' * * where A (users x items) is the matrix of known ratings * U (users x features) is the representation of users in the feature space * M (items x features) is the representation of items in the feature space */ if (startIteration == 0) { if (!fs.exists(pathToItemRatings())) { // create A' Job itemRatings = prepareJob(pathToTransformed(), pathToItemRatings(), TextInputFormat.class, ItemRatingVectorsMapper.class, IntWritable.class, VectorWritable.class, VectorSumReducer.class, IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class); itemRatings.setCombinerClass(VectorSumReducer.class); long matrixSizeExp = (long) (8L * numUsers * numFeatures * SAFE_MARGIN); long memoryThreshold = HadoopClusterUtil.PHYSICAL_MEMERY_LIMIT / (long) HadoopClusterUtil.MAP_TASKS_PER_NODE; int numTaskPerDataNode = Math.max(1, (int) (HadoopClusterUtil.PHYSICAL_MEMERY_LIMIT / (double) matrixSizeExp)); //log.info("matrix Size: " + matrixSizeExp + ", memorhThreshold: " + memoryThreshold + ", numTaskPerDataNode: " + numTaskPerDataNode); if (matrixSizeExp > memoryThreshold) { //log.info("A: {}", numTaskPerDataNode * HadoopClusterUtil.getNumberOfTaskTrackers(getConf())); int numReducer = Math.min( numTaskPerDataNode * HadoopClusterUtil.getNumberOfTaskTrackers(getConf()), HadoopClusterUtil.getMaxMapTasks(getConf())); //log.info("Number Of Reducer: " + numReducer); itemRatings.setNumReduceTasks(numReducer); } itemRatings.waitForCompletion(true); } if (!fs.exists(pathToUserRatings())) { Job userRatings = prepareJob(pathToItemRatings(), pathToUserRatings(), TransposeMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class, VectorWritable.class); userRatings.setNumReduceTasks(HadoopClusterUtil.getNumberOfTaskTrackers(getConf())); userRatings.setCombinerClass(MergeVectorsCombiner.class); userRatings.setNumReduceTasks(HadoopClusterUtil.getMaxMapTasks(getConf())); userRatings.waitForCompletion(true); } if (!fs.exists(getOutputPath("userItemCnt"))) { // count item per user Job userItemCntsJob = prepareJob(pathToUserRatings(), getOutputPath("userItemCnt"), SequenceFileInputFormat.class, UserItemCntsMapper.class, IntWritable.class, IntWritable.class, SequenceFileOutputFormat.class); userItemCntsJob.setJobName("user ratings count"); userItemCntsJob.waitForCompletion(true); } if (!fs.exists(getTempPath("averageRatings"))) { //TODO this could be fiddled into one of the upper jobs Job averageItemRatings = prepareJob(pathToItemRatings(), getTempPath("averageRatings"), AverageRatingMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class, VectorWritable.class); averageItemRatings.setCombinerClass(MergeVectorsCombiner.class); averageItemRatings.waitForCompletion(true); } if (!fs.exists(new Path(pathToM(-1), "part-m-00000"))) { Vector averageRatings = ALSMatrixUtil.readFirstRow(getTempPath("averageRatings"), getConf()); /** create an initial M */ initializeM(averageRatings); } } for (int currentIteration = startIteration; currentIteration < numIterations; currentIteration++) { DistributedRowMatrix curM = new DistributedRowMatrix(pathToM(currentIteration - 1), getTempPath("Mtemp/tmp-" + String.valueOf(currentIteration - 1) + "/M"), numItems, numFeatures); curM.setConf(getConf()); DistributedRowMatrix YtransposeY = curM.times(curM); /** broadcast M, read A row-wise, recompute U row-wise */ log.info("Recomputing U (iteration {}/{})", currentIteration, numIterations); runSolver(pathToUserRatings(), pathToU(currentIteration), pathToM(currentIteration - 1), YtransposeY.getRowPath(), numItems, false); DistributedRowMatrix curU = new DistributedRowMatrix(pathToU(currentIteration), getTempPath("Utmp/tmp-" + String.valueOf(currentIteration) + "/U"), numUsers, numFeatures); curU.setConf(getConf()); DistributedRowMatrix XtransposeX = curU.times(curU); /** broadcast U, read A' row-wise, recompute M row-wise */ log.info("Recomputing M (iteration {}/{})", currentIteration, numIterations); runSolver(pathToItemRatings(), pathToM(currentIteration), pathToU(currentIteration), XtransposeX.getRowPath(), numUsers, largeUserFeatures); /** calculate rmse on each updated matrix U, M and decide to further iteration */ if (currentIteration > startIteration && useRMSECurve) { Pair<Integer, Double> UsquaredError = calculateMatrixDistanceSquared( pathToU(currentIteration - 1), pathToU(currentIteration), currentIteration); Pair<Integer, Double> MsquaredError = calculateMatrixDistanceSquared( pathToM(currentIteration - 1), pathToM(currentIteration), currentIteration); String currentRMSE = currentIteration + DELIMETER + UsquaredError.getFirst() + DELIMETER + UsquaredError.getSecond() + DELIMETER + MsquaredError.getFirst() + DELIMETER + MsquaredError.getSecond() + DefaultOptionCreator.NEWLINE; rmsePerIteration += currentRMSE; log.info("iteration {}: {}", currentIteration, currentRMSE); } if (currentIteration >= startIteration + 2 && cleanUp) { fs.deleteOnExit(pathToU(currentIteration - 2)); fs.deleteOnExit(pathToM(currentIteration - 2)); } } return 0; } catch (Exception e) { e.printStackTrace(); return -1; } finally { if (useRMSECurve) { HadoopClusterUtil.writeToHdfs(getConf(), getOutputPath("RMSE"), rmsePerIteration); } } }
From source file:com.splicemachine.fs.localfs.SpliceFileSystem.java
License:Apache License
@Override public void copyFromLocalFile(boolean delSrc, Path src, Path dst) throws IOException { FileUtil.copy(this, src, this, dst, delSrc, getConf()); }
From source file:com.splicemachine.fs.localfs.SpliceFileSystem.java
License:Apache License
@Override public void copyToLocalFile(boolean delSrc, Path src, Path dst) throws IOException { FileUtil.copy(this, src, this, dst, delSrc, getConf()); }
From source file:com.splout.db.integration.HadoopIntegrationTest.java
License:Apache License
@Override public int run(String[] args) throws Exception { // Validate params etc JCommander jComm = new JCommander(this); jComm.setProgramName("Splout Hadoop Compatibility Integration Test"); try {//from www. j a v a 2 s . co m jComm.parse(args); } catch (ParameterException e) { System.err.println(e.getMessage()); jComm.usage(); System.exit(-1); } Path tmpHdfsPath = new Path( "tmp-" + HadoopIntegrationTest.class.getName() + "-" + System.currentTimeMillis()); FileSystem fS = tmpHdfsPath.getFileSystem(getConf()); fS.mkdirs(tmpHdfsPath); fS.mkdirs(new Path(tmpHdfsPath, "input")); fS.mkdirs(new Path(tmpHdfsPath, "output")); boolean isLocal = FileSystem.get(conf).equals(FileSystem.getLocal(conf)); if (!isLocal) { SploutHadoopConfiguration.addSQLite4JavaNativeLibsToDC(conf); } tmpHdfsPath = tmpHdfsPath.makeQualified(fS); Path pageCounts = new Path(input); FileUtil.copy(FileSystem.getLocal(getConf()), pageCounts, fS, new Path(tmpHdfsPath, "input"), false, getConf()); SimpleGeneratorCMD generator = new SimpleGeneratorCMD(); generator.setConf(getConf()); if (generator.run(new String[] { "-tb", "pagecountsintegration", "-t", "pagecounts", "-i", tmpHdfsPath + "/input", "-o", tmpHdfsPath + "/output", "-s", "projectcode:string, pagename:string, visits:int, bytes:long", "-pby", "projectcode,pagename", "-sep", "\" \"", "-p", "2", "-e", engine }) < 0) { throw new RuntimeException("Generator failed!"); } SploutClient client = new SploutClient(qnode); QNodeStatus status = client.overview(); long previousVersion = -1; if (status.getTablespaceMap().get("pagecountsintegration") != null) { previousVersion = status.getTablespaceMap().get("pagecountsintegration").getVersion(); } DeployerCMD deployer = new DeployerCMD(); deployer.setConf(getConf()); if (deployer.run(new String[] { "-r", "2", "-q", qnode, "-root", tmpHdfsPath + "/output", "-ts", "pagecountsintegration" }) < 0) { throw new RuntimeException("Deployer failed!"); } long waitedSoFar = 0; status = client.overview(); while (status.getTablespaceMap().get("pagecountsintegration") == null || previousVersion == status.getTablespaceMap().get("pagecountsintegration").getVersion()) { Thread.sleep(2000); waitedSoFar += 2000; status = client.overview(); if (waitedSoFar > 90000) { throw new RuntimeException( "Deploy must have failed in Splout's server. Waiting too much for it to complete."); } } previousVersion = status.getTablespaceMap().get("pagecountsintegration").getVersion(); QueryStatus qStatus = client.query("pagecountsintegration", "*", "SELECT * FROM pagecounts;", null); System.out.println(qStatus.getResult()); if (qStatus.getResult() == null) { throw new RuntimeException("Something failed as query() is returning null!"); } System.out.println("Everything fine."); return 1; }
From source file:com.tdunning.plume.local.lazy.MapRedExecutor.java
License:Apache License
/** * This method can be called to execute a {@link PlumeWorkflow} by using Hadoop Map-Reduce implementation. * It will build the execution tree, optimize it and convert each MSCR step into a MapRed job. * It will launch MSCR jobs in parallel when it is allowable to do so by using a ThreadPool. If one MSCR fails, * all the work flow is canceled. Because it stores the result in a temporary folder, it will only flush the final * result to the API parameter if the work flow has been executed successfully. * // w ww . ja v a 2s. c o m * @param workFlow The {@link PlumeWorkflow} to execute * @param outputTo Output folder where the result of the work flow will be stored if executed successfully * * @throws IOException If the work flow had to be canceled * @throws InterruptedException */ public void execute(PlumeWorkflow workFlow, String outputTo) throws IOException, InterruptedException { Optimizer optimizer = new Optimizer(); ExecutionStep step = optimizer.optimize(workFlow); int nStep = 0; final String workFlowId = workFlow.getClass().getName() + "-" + System.currentTimeMillis(); do { nStep++; log.info("Begin execution step " + nStep + " for workflow " + workFlow.getClass().getName()); // Create a latch to mark the end of a concurrent step where all MSCRs can be executed in parallel final CountDownLatch latch = new CountDownLatch(step.mscrSteps.size()); // Create a signal that can be flagged if one of the MSCRs fail to abort all the workFlow // - I have chosen an AtomicBoolean in case this flag can be re-set to false under some circumstance - final AtomicBoolean abort = new AtomicBoolean(false); // For each MSCR that can be executed concurrently... for (final MSCR mscr : step.mscrSteps) { final String workFlowOutputPath = tmpOutputFolder + "/" + workFlowId; final String jobId = workFlowId + "/" + mscr.getId(); final String jobOutputPath = tmpOutputFolder + "/" + jobId; log.info("Triggering execution of jobId " + jobId + ". Its output will be saved to " + jobOutputPath); // ... Get its MapRed Job final Job job = getMapRed(mscr, workFlow, workFlowOutputPath, jobOutputPath); final FileSystem fS = FileSystem.getLocal(job.getConfiguration()); // ... Submit it to the ThreadPool executor.submit(new Runnable() { @Override public void run() { try { job.waitForCompletion(true); // job completed successfully - materialize outputs log.info("jobId " + jobId + " completed successfully, now materializing outputs."); for (Map.Entry<PCollection<?>, Integer> entry : mscr.getNumberedChannels().entrySet()) { LazyCollection<?> oCol = (LazyCollection<?>) mscr.getOutputChannels() .get(entry.getKey()).output; // Move this output to somewhere recognizable - this executor's tmp folder + this PCollection's Plume Id // This way, mappers that read unmaterialized collections will know where to find intermediate states. FileStatus[] files = fS.listStatus(new Path(jobOutputPath)); Path materializedPath = new Path(workFlowOutputPath + "/" + oCol.getPlumeId()); fS.mkdirs(materializedPath); for (FileStatus file : files) { if (file.getPath().getName().startsWith(entry.getValue() + "-r-")) { FileUtil.copy(fS, file.getPath(), fS, materializedPath, false, job.getConfiguration()); oCol.setFile(materializedPath.toString()); } } log.info( "Materialized plume output " + oCol.getPlumeId() + " to " + oCol.getFile()); } } catch (IOException e) { log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e); abort.set(true); // Flag the premature end of this workflow } catch (InterruptedException e) { log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e); abort.set(true); // Flag the premature end of this workflow } catch (ClassNotFoundException e) { log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e); abort.set(true); // Flag the premature end of this workflow } finally { latch.countDown(); // Count down under any circumstance } } }); } latch.await(); // wait until all MSCRs from this step are completed if (abort.get()) { throw new IOException("Current Workflow was aborted"); } step = step.nextStep; } while (step != null); log.info("Workflow ended correctly."); // Move temporary result to where API user wants to: WARN: Local-specific implementation Files.move(new File(tmpOutputFolder + "/" + workFlowId), new File(outputTo)); }
From source file:com.trendmicro.hdfs.webdav.HDFSResource.java
License:Apache License
@Override public void copy(final DavResource resource, final boolean shallow) throws DavException { if (!exists()) { throw new DavException(DavServletResponse.SC_NOT_FOUND); }/*w w w.ja v a 2s .c om*/ if (!shallow || !isCollection()) { final HDFSResource dfsResource = (HDFSResource) resource; final Path destPath = dfsResource.getPath(); if (LOG.isDebugEnabled()) { LOG.debug("Copying '" + path.toUri().getPath() + "' to '" + destPath.toUri().getPath() + "'"); } try { user.doAs(new PrivilegedExceptionAction<Void>() { public Void run() throws Exception { FileSystem fs = FileSystem.get(conf); FileUtil.copy(fs, path, fs, destPath, false, conf); return null; } }); } catch (IOException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } return; } // TODO: Currently no support for shallow copy; however, this is // only relevant if the source resource is a collection throw new DavException(DavServletResponse.SC_FORBIDDEN, "Shallow copies are not supported"); }
From source file:com.uber.hoodie.utilities.HoodieSnapshotCopier.java
License:Apache License
public void snapshot(JavaSparkContext jsc, String baseDir, final String outputDir, final boolean shouldAssumeDatePartitioning) throws IOException { FileSystem fs = FSUtils.getFs(baseDir, jsc.hadoopConfiguration()); final SerializableConfiguration serConf = new SerializableConfiguration(jsc.hadoopConfiguration()); final HoodieTableMetaClient tableMetadata = new HoodieTableMetaClient(fs.getConf(), baseDir); final TableFileSystemView.ReadOptimizedView fsView = new HoodieTableFileSystemView(tableMetadata, tableMetadata.getActiveTimeline().getCommitsTimeline().filterCompletedInstants()); // Get the latest commit Optional<HoodieInstant> latestCommit = tableMetadata.getActiveTimeline().getCommitsTimeline() .filterCompletedInstants().lastInstant(); if (!latestCommit.isPresent()) { logger.warn("No commits present. Nothing to snapshot"); return;//from ww w . j a va 2 s.c om } final String latestCommitTimestamp = latestCommit.get().getTimestamp(); logger.info(String.format("Starting to snapshot latest version files which are also no-late-than %s.", latestCommitTimestamp)); List<String> partitions = FSUtils.getAllPartitionPaths(fs, baseDir, shouldAssumeDatePartitioning); if (partitions.size() > 0) { logger.info(String.format("The job needs to copy %d partitions.", partitions.size())); // Make sure the output directory is empty Path outputPath = new Path(outputDir); if (fs.exists(outputPath)) { logger.warn( String.format("The output path %s targetBasePath already exists, deleting", outputPath)); fs.delete(new Path(outputDir), true); } jsc.parallelize(partitions, partitions.size()).flatMap(partition -> { // Only take latest version files <= latestCommit. FileSystem fs1 = FSUtils.getFs(baseDir, serConf.get()); List<Tuple2<String, String>> filePaths = new ArrayList<>(); Stream<HoodieDataFile> dataFiles = fsView.getLatestDataFilesBeforeOrOn(partition, latestCommitTimestamp); dataFiles.forEach( hoodieDataFile -> filePaths.add(new Tuple2<>(partition, hoodieDataFile.getPath()))); // also need to copy over partition metadata Path partitionMetaFile = new Path(new Path(baseDir, partition), HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE); if (fs1.exists(partitionMetaFile)) { filePaths.add(new Tuple2<>(partition, partitionMetaFile.toString())); } return filePaths.iterator(); }).foreach(tuple -> { String partition = tuple._1(); Path sourceFilePath = new Path(tuple._2()); Path toPartitionPath = new Path(outputDir, partition); FileSystem ifs = FSUtils.getFs(baseDir, serConf.get()); if (!ifs.exists(toPartitionPath)) { ifs.mkdirs(toPartitionPath); } FileUtil.copy(ifs, sourceFilePath, ifs, new Path(toPartitionPath, sourceFilePath.getName()), false, ifs.getConf()); }); // Also copy the .commit files logger.info(String.format("Copying .commit files which are no-late-than %s.", latestCommitTimestamp)); FileStatus[] commitFilesToCopy = fs.listStatus( new Path(baseDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME), (commitFilePath) -> { if (commitFilePath.getName().equals(HoodieTableConfig.HOODIE_PROPERTIES_FILE)) { return true; } else { String commitTime = FSUtils.getCommitFromCommitFile(commitFilePath.getName()); return HoodieTimeline.compareTimestamps(commitTime, latestCommitTimestamp, HoodieTimeline.LESSER_OR_EQUAL); } }); for (FileStatus commitStatus : commitFilesToCopy) { Path targetFilePath = new Path(outputDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitStatus.getPath().getName()); if (!fs.exists(targetFilePath.getParent())) { fs.mkdirs(targetFilePath.getParent()); } if (fs.exists(targetFilePath)) { logger.error(String.format("The target output commit file (%s targetBasePath) already exists.", targetFilePath)); } FileUtil.copy(fs, commitStatus.getPath(), fs, targetFilePath, false, fs.getConf()); } } else { logger.info("The job has 0 partition to copy."); } // Create the _SUCCESS tag Path successTagPath = new Path(outputDir + "/_SUCCESS"); if (!fs.exists(successTagPath)) { logger.info(String.format("Creating _SUCCESS under targetBasePath: $s", outputDir)); fs.createNewFile(successTagPath); } }