Example usage for org.apache.hadoop.fs FileUtil copy

List of usage examples for org.apache.hadoop.fs FileUtil copy

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileUtil copy.

Prototype

public static boolean copy(FileSystem srcFS, Path src, FileSystem dstFS, Path dst, boolean deleteSource,
        Configuration conf) throws IOException 

Source Link

Document

Copy files between FileSystems.

Usage

From source file:com.quantcast.qfs.hadoop.QuantcastFileSystem.java

License:Apache License

public void copyToLocalFile(boolean delSrc, Path src, Path dst) throws IOException {
    FileUtil.copy(this, src, localFs, dst, delSrc, getConf());
}

From source file:com.redsqirl.workflow.server.connect.HDFSInterface.java

License:Open Source License

/**
 * Create a copy of a path/*  www. j a  va  2  s  . c  o m*/
 * 
 * @param in_path
 * @param out_path
 * @return Error Message
 * @throws RemoteException
 */
@Override
public String copy(String in_path, String out_path) throws RemoteException {
    String error = null;
    try {
        Path oldP = new Path(in_path), newP = new Path(out_path);
        HdfsFileChecker hChN = new HdfsFileChecker(newP);
        HdfsFileChecker hChO = new HdfsFileChecker(oldP);
        if (!hChN.exists() && hChO.exists()) {
            FileSystem fs = NameNodeVar.getFS();
            FileUtil.copy(fs, oldP, fs, newP, false, NameNodeVar.getConf());
        } else {
            error = LanguageManagerWF.getText("HdfsInterface.ouputexists");
        }
        // hChN.close();
        // hChO.close();

    } catch (IOException e) {
        logger.error(e.getMessage());
        error = LanguageManagerWF.getText("HdfsInterface.errormove", new Object[] { e.getMessage() });
    }
    if (error != null) {
        logger.debug(error);
    }
    return error;
}

From source file:com.redsqirl.workflow.server.Workflow.java

License:Open Source License

/**
 * Save the xml part of a workflow./*from  w  ww.  ja va 2 s.  c  o m*/
 * 
 * @param filePath
 *            the xml file path to write in.
 * @return null if OK, or a description of the error.
 * @throws RemoteException
 */
public String save(final String filePath) throws RemoteException {
    String error = null;
    File file = null;

    try {
        String[] path = filePath.split("/");
        String fileName = path[path.length - 1];
        String tempPath = WorkflowPrefManager.getPathuserpref() + "/tmp/" + fileName + "_"
                + RandomString.getRandomName(4);
        file = new File(tempPath);
        logger.debug("Save xml: " + file.getAbsolutePath());
        file.getParentFile().mkdirs();
        Document doc = null;
        try {
            doc = saveInXML();
        } catch (IOException e) {
            error = e.getMessage();
        }

        if (error == null) {
            logger.debug("write the file...");
            // write the content into xml file
            logger.debug("Check Null text nodes...");
            XmlUtils.checkForNullTextNodes(doc.getDocumentElement(), "");
            TransformerFactory transformerFactory = TransformerFactory.newInstance();
            Transformer transformer = transformerFactory.newTransformer();
            transformer.setOutputProperty(OutputKeys.INDENT, "yes");
            DOMSource source = new DOMSource(doc);
            StreamResult result = new StreamResult(file);
            logger.debug(4);
            transformer.transform(source, result);
            logger.debug(5);

            FileSystem fs = NameNodeVar.getFS();
            fs.moveFromLocalFile(new Path(tempPath), new Path(filePath));

            if (filePath.startsWith(WorkflowPrefManager.getBackupPath())) {
                saved = false;
                this.path = null;
            } else {
                this.path = filePath;
                saved = true;
                changed = false;
                String bckPath = getBackupName(createBackupDir());
                FileUtil.copy(fs, new Path(filePath), fs, new Path(bckPath), false, NameNodeVar.getConf());
                cleanUpBackup();
            }

            logger.debug("file saved successfully");
        }
    } catch (Exception e) {
        error = LanguageManagerWF.getText("workflow.writeXml", new Object[] { e.getMessage() });
        logger.error(error, e);
        try {
            logger.debug("Attempt to delete " + file.getAbsolutePath());
            file.delete();
        } catch (Exception e1) {
        }
    }
    Log.flushAllLogs();

    return error;
}

From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    addInputOption();// w  w  w  .  j a v  a 2s . c  o  m
    addOutputOption();
    addOption("lambda", null, "regularization parameter", true);
    addOption("implicitFeedback", null, "data consists of implicit feedback?", String.valueOf(false));
    addOption("alpha", null, "confidence parameter (only used on implicit feedback)", String.valueOf(40));
    addOption("numFeatures", null, "dimension of the feature space", true);
    addOption("numIterations", null, "number of iterations", true);
    addOption("indexSizes", null, "index sizes Path", true);
    addOption("startIteration", null, "start iteration number", String.valueOf(0));
    addOption("oldM", null, "old M matrix Path.", null);
    addOption("largeUserFeatures", null, "true if user x feature matrix is too large for memory",
            String.valueOf(true));
    addOption("rmseCurve", null, "true if want to extract rmse curve", String.valueOf(true));
    addOption("cleanUp", null, "true if want to clean up temporary matrix", String.valueOf(true));
    addOption("useTransform", null, "true if using logarithm as transform", String.valueOf(true));
    addOption("rateIndex", null, "0 based index for rate column in input file.", String.valueOf(2));
    Map<String, String> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    try {
        /** step 0: fetch dimention of training set matrix. */
        Map<String, String> indexSizesTmp = ALSMatrixUtil.fetchTextFiles(new Path(getOption("indexSizes")),
                DELIMETER, Arrays.asList(0), Arrays.asList(1));

        numFeatures = Integer.parseInt(parsedArgs.get("--numFeatures"));
        numIterations = Integer.parseInt(parsedArgs.get("--numIterations"));
        lambda = Double.parseDouble(parsedArgs.get("--lambda"));
        alpha = Double.parseDouble(parsedArgs.get("--alpha"));
        implicitFeedback = Boolean.parseBoolean(parsedArgs.get("--implicitFeedback"));
        numUsers = Integer.parseInt(indexSizesTmp.get("0"));
        numItems = Integer.parseInt(indexSizesTmp.get("1"));

        numTaskTrackers = HadoopClusterUtil.getNumberOfTaskTrackers(getConf()) * multiplyMapTasks;
        startIteration = Integer.parseInt(parsedArgs.get("--startIteration"));
        largeUserFeatures = Boolean.parseBoolean(getOption("largeUserFeatures"));
        useRMSECurve = Boolean.parseBoolean(getOption("rmseCurve"));
        cleanUp = Boolean.parseBoolean(getOption("cleanUp"));
        useTransform = Boolean.parseBoolean(getOption("useTransform"));
        rateIndex = Integer.parseInt(getOption("rateIndex"));
        FileSystem fs = FileSystem.get(getConf());
        if (!fs.exists(pathToTransformed())) {
            if (useTransform) {
                // transform price into rating
                Job transformJob = prepareJob(getInputPath(), pathToTransformed(), TextInputFormat.class,
                        TransformColumnValueMapper.class, NullWritable.class, Text.class,
                        TextOutputFormat.class);
                transformJob.waitForCompletion(true);
            } else {

                FileUtil.copy(FileSystem.get(getConf()), getInputPath(), FileSystem.get(getConf()),
                        pathToTransformed(), false, getConf());
            }
        }
        /*
        if (getOption("oldM") != null) {
          runOnetimeSolver(pathToTransformed(), getOutputPath("U"), new Path(getOption("oldM")));
          return 0;
        }
        */
        /*
            * compute the factorization A = U M'
            *
            * where A (users x items) is the matrix of known ratings
            *           U (users x features) is the representation of users in the feature space
            *           M (items x features) is the representation of items in the feature space
            */
        if (startIteration == 0) {
            if (!fs.exists(pathToItemRatings())) {
                // create A' 
                Job itemRatings = prepareJob(pathToTransformed(), pathToItemRatings(), TextInputFormat.class,
                        ItemRatingVectorsMapper.class, IntWritable.class, VectorWritable.class,
                        VectorSumReducer.class, IntWritable.class, VectorWritable.class,
                        SequenceFileOutputFormat.class);
                itemRatings.setCombinerClass(VectorSumReducer.class);
                long matrixSizeExp = (long) (8L * numUsers * numFeatures * SAFE_MARGIN);
                long memoryThreshold = HadoopClusterUtil.PHYSICAL_MEMERY_LIMIT
                        / (long) HadoopClusterUtil.MAP_TASKS_PER_NODE;
                int numTaskPerDataNode = Math.max(1,
                        (int) (HadoopClusterUtil.PHYSICAL_MEMERY_LIMIT / (double) matrixSizeExp));
                //log.info("matrix Size: " + matrixSizeExp + ", memorhThreshold: " + memoryThreshold + ", numTaskPerDataNode: " + numTaskPerDataNode);
                if (matrixSizeExp > memoryThreshold) {
                    //log.info("A: {}", numTaskPerDataNode * HadoopClusterUtil.getNumberOfTaskTrackers(getConf()));
                    int numReducer = Math.min(
                            numTaskPerDataNode * HadoopClusterUtil.getNumberOfTaskTrackers(getConf()),
                            HadoopClusterUtil.getMaxMapTasks(getConf()));
                    //log.info("Number Of Reducer: " + numReducer);
                    itemRatings.setNumReduceTasks(numReducer);
                }

                itemRatings.waitForCompletion(true);
            }

            if (!fs.exists(pathToUserRatings())) {
                Job userRatings = prepareJob(pathToItemRatings(), pathToUserRatings(), TransposeMapper.class,
                        IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class,
                        VectorWritable.class);
                userRatings.setNumReduceTasks(HadoopClusterUtil.getNumberOfTaskTrackers(getConf()));
                userRatings.setCombinerClass(MergeVectorsCombiner.class);
                userRatings.setNumReduceTasks(HadoopClusterUtil.getMaxMapTasks(getConf()));
                userRatings.waitForCompletion(true);
            }
            if (!fs.exists(getOutputPath("userItemCnt"))) {
                // count item per user
                Job userItemCntsJob = prepareJob(pathToUserRatings(), getOutputPath("userItemCnt"),
                        SequenceFileInputFormat.class, UserItemCntsMapper.class, IntWritable.class,
                        IntWritable.class, SequenceFileOutputFormat.class);
                userItemCntsJob.setJobName("user ratings count");
                userItemCntsJob.waitForCompletion(true);
            }

            if (!fs.exists(getTempPath("averageRatings"))) {
                //TODO this could be fiddled into one of the upper jobs
                Job averageItemRatings = prepareJob(pathToItemRatings(), getTempPath("averageRatings"),
                        AverageRatingMapper.class, IntWritable.class, VectorWritable.class,
                        MergeVectorsReducer.class, IntWritable.class, VectorWritable.class);
                averageItemRatings.setCombinerClass(MergeVectorsCombiner.class);
                averageItemRatings.waitForCompletion(true);
            }
            if (!fs.exists(new Path(pathToM(-1), "part-m-00000"))) {
                Vector averageRatings = ALSMatrixUtil.readFirstRow(getTempPath("averageRatings"), getConf());

                /** create an initial M */
                initializeM(averageRatings);
            }
        }

        for (int currentIteration = startIteration; currentIteration < numIterations; currentIteration++) {
            DistributedRowMatrix curM = new DistributedRowMatrix(pathToM(currentIteration - 1),
                    getTempPath("Mtemp/tmp-" + String.valueOf(currentIteration - 1) + "/M"), numItems,
                    numFeatures);
            curM.setConf(getConf());
            DistributedRowMatrix YtransposeY = curM.times(curM);
            /** broadcast M, read A row-wise, recompute U row-wise */
            log.info("Recomputing U (iteration {}/{})", currentIteration, numIterations);
            runSolver(pathToUserRatings(), pathToU(currentIteration), pathToM(currentIteration - 1),
                    YtransposeY.getRowPath(), numItems, false);

            DistributedRowMatrix curU = new DistributedRowMatrix(pathToU(currentIteration),
                    getTempPath("Utmp/tmp-" + String.valueOf(currentIteration) + "/U"), numUsers, numFeatures);
            curU.setConf(getConf());
            DistributedRowMatrix XtransposeX = curU.times(curU);

            /** broadcast U, read A' row-wise, recompute M row-wise */
            log.info("Recomputing M (iteration {}/{})", currentIteration, numIterations);
            runSolver(pathToItemRatings(), pathToM(currentIteration), pathToU(currentIteration),
                    XtransposeX.getRowPath(), numUsers, largeUserFeatures);

            /** calculate rmse on each updated matrix U, M and decide to further iteration */
            if (currentIteration > startIteration && useRMSECurve) {
                Pair<Integer, Double> UsquaredError = calculateMatrixDistanceSquared(
                        pathToU(currentIteration - 1), pathToU(currentIteration), currentIteration);
                Pair<Integer, Double> MsquaredError = calculateMatrixDistanceSquared(
                        pathToM(currentIteration - 1), pathToM(currentIteration), currentIteration);
                String currentRMSE = currentIteration + DELIMETER + UsquaredError.getFirst() + DELIMETER
                        + UsquaredError.getSecond() + DELIMETER + MsquaredError.getFirst() + DELIMETER
                        + MsquaredError.getSecond() + DefaultOptionCreator.NEWLINE;
                rmsePerIteration += currentRMSE;
                log.info("iteration {}: {}", currentIteration, currentRMSE);
            }
            if (currentIteration >= startIteration + 2 && cleanUp) {
                fs.deleteOnExit(pathToU(currentIteration - 2));
                fs.deleteOnExit(pathToM(currentIteration - 2));
            }
        }
        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return -1;
    } finally {
        if (useRMSECurve) {
            HadoopClusterUtil.writeToHdfs(getConf(), getOutputPath("RMSE"), rmsePerIteration);
        }
    }
}

From source file:com.splicemachine.fs.localfs.SpliceFileSystem.java

License:Apache License

@Override
public void copyFromLocalFile(boolean delSrc, Path src, Path dst) throws IOException {
    FileUtil.copy(this, src, this, dst, delSrc, getConf());
}

From source file:com.splicemachine.fs.localfs.SpliceFileSystem.java

License:Apache License

@Override
public void copyToLocalFile(boolean delSrc, Path src, Path dst) throws IOException {
    FileUtil.copy(this, src, this, dst, delSrc, getConf());
}

From source file:com.splout.db.integration.HadoopIntegrationTest.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    // Validate params etc
    JCommander jComm = new JCommander(this);
    jComm.setProgramName("Splout Hadoop Compatibility Integration Test");
    try {//from   www. j  a  v  a  2  s . co  m
        jComm.parse(args);
    } catch (ParameterException e) {
        System.err.println(e.getMessage());
        jComm.usage();
        System.exit(-1);
    }

    Path tmpHdfsPath = new Path(
            "tmp-" + HadoopIntegrationTest.class.getName() + "-" + System.currentTimeMillis());
    FileSystem fS = tmpHdfsPath.getFileSystem(getConf());
    fS.mkdirs(tmpHdfsPath);
    fS.mkdirs(new Path(tmpHdfsPath, "input"));
    fS.mkdirs(new Path(tmpHdfsPath, "output"));
    boolean isLocal = FileSystem.get(conf).equals(FileSystem.getLocal(conf));
    if (!isLocal) {
        SploutHadoopConfiguration.addSQLite4JavaNativeLibsToDC(conf);
    }

    tmpHdfsPath = tmpHdfsPath.makeQualified(fS);

    Path pageCounts = new Path(input);
    FileUtil.copy(FileSystem.getLocal(getConf()), pageCounts, fS, new Path(tmpHdfsPath, "input"), false,
            getConf());

    SimpleGeneratorCMD generator = new SimpleGeneratorCMD();
    generator.setConf(getConf());
    if (generator.run(new String[] { "-tb", "pagecountsintegration", "-t", "pagecounts", "-i",
            tmpHdfsPath + "/input", "-o", tmpHdfsPath + "/output", "-s",
            "projectcode:string, pagename:string, visits:int, bytes:long", "-pby", "projectcode,pagename",
            "-sep", "\" \"", "-p", "2", "-e", engine }) < 0) {
        throw new RuntimeException("Generator failed!");
    }

    SploutClient client = new SploutClient(qnode);
    QNodeStatus status = client.overview();
    long previousVersion = -1;
    if (status.getTablespaceMap().get("pagecountsintegration") != null) {
        previousVersion = status.getTablespaceMap().get("pagecountsintegration").getVersion();
    }

    DeployerCMD deployer = new DeployerCMD();
    deployer.setConf(getConf());
    if (deployer.run(new String[] { "-r", "2", "-q", qnode, "-root", tmpHdfsPath + "/output", "-ts",
            "pagecountsintegration" }) < 0) {
        throw new RuntimeException("Deployer failed!");
    }

    long waitedSoFar = 0;

    status = client.overview();
    while (status.getTablespaceMap().get("pagecountsintegration") == null
            || previousVersion == status.getTablespaceMap().get("pagecountsintegration").getVersion()) {
        Thread.sleep(2000);
        waitedSoFar += 2000;
        status = client.overview();
        if (waitedSoFar > 90000) {
            throw new RuntimeException(
                    "Deploy must have failed in Splout's server. Waiting too much for it to complete.");
        }
    }

    previousVersion = status.getTablespaceMap().get("pagecountsintegration").getVersion();

    QueryStatus qStatus = client.query("pagecountsintegration", "*", "SELECT * FROM pagecounts;", null);
    System.out.println(qStatus.getResult());

    if (qStatus.getResult() == null) {
        throw new RuntimeException("Something failed as query() is returning null!");
    }

    System.out.println("Everything fine.");
    return 1;
}

From source file:com.tdunning.plume.local.lazy.MapRedExecutor.java

License:Apache License

/**
 * This method can be called to execute a {@link PlumeWorkflow} by using Hadoop Map-Reduce implementation.
 * It will build the execution tree, optimize it and convert each MSCR step into a MapRed job. 
 * It will launch MSCR jobs in parallel when it is allowable to do so by using a ThreadPool. If one MSCR fails,
 * all the work flow is canceled. Because it stores the result in a temporary folder, it will only flush the final
 * result to the API parameter if the work flow has been executed successfully.
 * // w ww .  ja v  a 2s.  c  o m
 * @param workFlow The {@link PlumeWorkflow} to execute 
 * @param outputTo Output folder where the result of the work flow will be stored if executed successfully
 * 
 * @throws IOException If the work flow had to be canceled
 * @throws InterruptedException 
 */
public void execute(PlumeWorkflow workFlow, String outputTo) throws IOException, InterruptedException {
    Optimizer optimizer = new Optimizer();
    ExecutionStep step = optimizer.optimize(workFlow);
    int nStep = 0;
    final String workFlowId = workFlow.getClass().getName() + "-" + System.currentTimeMillis();
    do {
        nStep++;
        log.info("Begin execution step " + nStep + " for workflow " + workFlow.getClass().getName());
        // Create a latch to mark the end of a concurrent step where all MSCRs can be executed in parallel
        final CountDownLatch latch = new CountDownLatch(step.mscrSteps.size());
        // Create a signal that can be flagged if one of the MSCRs fail to abort all the workFlow
        // - I have chosen an AtomicBoolean in case this flag can be re-set to false under some circumstance -
        final AtomicBoolean abort = new AtomicBoolean(false);
        // For each MSCR that can be executed concurrently...
        for (final MSCR mscr : step.mscrSteps) {
            final String workFlowOutputPath = tmpOutputFolder + "/" + workFlowId;
            final String jobId = workFlowId + "/" + mscr.getId();
            final String jobOutputPath = tmpOutputFolder + "/" + jobId;
            log.info("Triggering execution of jobId " + jobId + ". Its output will be saved to "
                    + jobOutputPath);
            // ... Get its MapRed Job
            final Job job = getMapRed(mscr, workFlow, workFlowOutputPath, jobOutputPath);
            final FileSystem fS = FileSystem.getLocal(job.getConfiguration());
            // ... Submit it to the ThreadPool
            executor.submit(new Runnable() {
                @Override
                public void run() {
                    try {
                        job.waitForCompletion(true);
                        // job completed successfully - materialize outputs
                        log.info("jobId " + jobId + " completed successfully, now materializing outputs.");
                        for (Map.Entry<PCollection<?>, Integer> entry : mscr.getNumberedChannels().entrySet()) {
                            LazyCollection<?> oCol = (LazyCollection<?>) mscr.getOutputChannels()
                                    .get(entry.getKey()).output;
                            // Move this output to somewhere recognizable - this executor's tmp folder + this PCollection's Plume Id
                            // This way, mappers that read unmaterialized collections will know where to find intermediate states.
                            FileStatus[] files = fS.listStatus(new Path(jobOutputPath));
                            Path materializedPath = new Path(workFlowOutputPath + "/" + oCol.getPlumeId());
                            fS.mkdirs(materializedPath);
                            for (FileStatus file : files) {
                                if (file.getPath().getName().startsWith(entry.getValue() + "-r-")) {
                                    FileUtil.copy(fS, file.getPath(), fS, materializedPath, false,
                                            job.getConfiguration());
                                    oCol.setFile(materializedPath.toString());
                                }
                            }
                            log.info(
                                    "Materialized plume output " + oCol.getPlumeId() + " to " + oCol.getFile());
                        }
                    } catch (IOException e) {
                        log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e);
                        abort.set(true); // Flag the premature end of this workflow
                    } catch (InterruptedException e) {
                        log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e);
                        abort.set(true); // Flag the premature end of this workflow
                    } catch (ClassNotFoundException e) {
                        log.warn("One Job failed: " + jobId + ", current Workflow will be aborted ", e);
                        abort.set(true); // Flag the premature end of this workflow
                    } finally {
                        latch.countDown(); // Count down under any circumstance
                    }
                }
            });
        }
        latch.await(); // wait until all MSCRs from this step are completed
        if (abort.get()) {
            throw new IOException("Current Workflow was aborted");
        }
        step = step.nextStep;
    } while (step != null);
    log.info("Workflow ended correctly.");
    // Move temporary result to where API user wants to: WARN: Local-specific implementation
    Files.move(new File(tmpOutputFolder + "/" + workFlowId), new File(outputTo));
}

From source file:com.trendmicro.hdfs.webdav.HDFSResource.java

License:Apache License

@Override
public void copy(final DavResource resource, final boolean shallow) throws DavException {
    if (!exists()) {
        throw new DavException(DavServletResponse.SC_NOT_FOUND);
    }/*w  w  w.ja v  a  2s  .c  om*/
    if (!shallow || !isCollection()) {
        final HDFSResource dfsResource = (HDFSResource) resource;
        final Path destPath = dfsResource.getPath();
        if (LOG.isDebugEnabled()) {
            LOG.debug("Copying '" + path.toUri().getPath() + "' to '" + destPath.toUri().getPath() + "'");
        }
        try {
            user.doAs(new PrivilegedExceptionAction<Void>() {
                public Void run() throws Exception {
                    FileSystem fs = FileSystem.get(conf);
                    FileUtil.copy(fs, path, fs, destPath, false, conf);
                    return null;
                }
            });
        } catch (IOException e) {
            throw new RuntimeException(e);
        } catch (InterruptedException e) {
            throw new RuntimeException(e);
        }
        return;
    }
    // TODO: Currently no support for shallow copy; however, this is
    // only relevant if the source resource is a collection
    throw new DavException(DavServletResponse.SC_FORBIDDEN, "Shallow copies are not supported");
}

From source file:com.uber.hoodie.utilities.HoodieSnapshotCopier.java

License:Apache License

public void snapshot(JavaSparkContext jsc, String baseDir, final String outputDir,
        final boolean shouldAssumeDatePartitioning) throws IOException {
    FileSystem fs = FSUtils.getFs(baseDir, jsc.hadoopConfiguration());
    final SerializableConfiguration serConf = new SerializableConfiguration(jsc.hadoopConfiguration());
    final HoodieTableMetaClient tableMetadata = new HoodieTableMetaClient(fs.getConf(), baseDir);
    final TableFileSystemView.ReadOptimizedView fsView = new HoodieTableFileSystemView(tableMetadata,
            tableMetadata.getActiveTimeline().getCommitsTimeline().filterCompletedInstants());
    // Get the latest commit
    Optional<HoodieInstant> latestCommit = tableMetadata.getActiveTimeline().getCommitsTimeline()
            .filterCompletedInstants().lastInstant();
    if (!latestCommit.isPresent()) {
        logger.warn("No commits present. Nothing to snapshot");
        return;//from   ww  w .  j a  va 2 s.c  om
    }
    final String latestCommitTimestamp = latestCommit.get().getTimestamp();
    logger.info(String.format("Starting to snapshot latest version files which are also no-late-than %s.",
            latestCommitTimestamp));

    List<String> partitions = FSUtils.getAllPartitionPaths(fs, baseDir, shouldAssumeDatePartitioning);
    if (partitions.size() > 0) {
        logger.info(String.format("The job needs to copy %d partitions.", partitions.size()));

        // Make sure the output directory is empty
        Path outputPath = new Path(outputDir);
        if (fs.exists(outputPath)) {
            logger.warn(
                    String.format("The output path %s targetBasePath already exists, deleting", outputPath));
            fs.delete(new Path(outputDir), true);
        }

        jsc.parallelize(partitions, partitions.size()).flatMap(partition -> {
            // Only take latest version files <= latestCommit.
            FileSystem fs1 = FSUtils.getFs(baseDir, serConf.get());
            List<Tuple2<String, String>> filePaths = new ArrayList<>();
            Stream<HoodieDataFile> dataFiles = fsView.getLatestDataFilesBeforeOrOn(partition,
                    latestCommitTimestamp);
            dataFiles.forEach(
                    hoodieDataFile -> filePaths.add(new Tuple2<>(partition, hoodieDataFile.getPath())));

            // also need to copy over partition metadata
            Path partitionMetaFile = new Path(new Path(baseDir, partition),
                    HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE);
            if (fs1.exists(partitionMetaFile)) {
                filePaths.add(new Tuple2<>(partition, partitionMetaFile.toString()));
            }

            return filePaths.iterator();
        }).foreach(tuple -> {
            String partition = tuple._1();
            Path sourceFilePath = new Path(tuple._2());
            Path toPartitionPath = new Path(outputDir, partition);
            FileSystem ifs = FSUtils.getFs(baseDir, serConf.get());

            if (!ifs.exists(toPartitionPath)) {
                ifs.mkdirs(toPartitionPath);
            }
            FileUtil.copy(ifs, sourceFilePath, ifs, new Path(toPartitionPath, sourceFilePath.getName()), false,
                    ifs.getConf());
        });

        // Also copy the .commit files
        logger.info(String.format("Copying .commit files which are no-late-than %s.", latestCommitTimestamp));
        FileStatus[] commitFilesToCopy = fs.listStatus(
                new Path(baseDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME), (commitFilePath) -> {
                    if (commitFilePath.getName().equals(HoodieTableConfig.HOODIE_PROPERTIES_FILE)) {
                        return true;
                    } else {
                        String commitTime = FSUtils.getCommitFromCommitFile(commitFilePath.getName());
                        return HoodieTimeline.compareTimestamps(commitTime, latestCommitTimestamp,
                                HoodieTimeline.LESSER_OR_EQUAL);
                    }
                });
        for (FileStatus commitStatus : commitFilesToCopy) {
            Path targetFilePath = new Path(outputDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/"
                    + commitStatus.getPath().getName());
            if (!fs.exists(targetFilePath.getParent())) {
                fs.mkdirs(targetFilePath.getParent());
            }
            if (fs.exists(targetFilePath)) {
                logger.error(String.format("The target output commit file (%s targetBasePath) already exists.",
                        targetFilePath));
            }
            FileUtil.copy(fs, commitStatus.getPath(), fs, targetFilePath, false, fs.getConf());
        }
    } else {
        logger.info("The job has 0 partition to copy.");
    }

    // Create the _SUCCESS tag
    Path successTagPath = new Path(outputDir + "/_SUCCESS");
    if (!fs.exists(successTagPath)) {
        logger.info(String.format("Creating _SUCCESS under targetBasePath: $s", outputDir));
        fs.createNewFile(successTagPath);
    }
}