Example usage for org.apache.hadoop.fs FileSystem rename

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem rename.

Prototype

public abstract boolean rename(Path src, Path dst) throws IOException;

Source Link

Document

Renames Path src to Path dst.

Usage

From source file:com.linkedin.mr_kluj.StagedOutputJob.java

License:Apache License

@Override
public boolean waitForCompletion(boolean verbose)
        throws IOException, InterruptedException, ClassNotFoundException {
    final Path actualOutputPath = FileOutputFormat.getOutputPath(this);
    final Path stagedPath = new Path(String.format("%s/%s/staged", stagingPrefix, System.currentTimeMillis()));

    FileOutputFormat.setOutputPath(this, stagedPath);

    final Thread hook = new Thread(new Runnable() {
        public void run() {
            try {
                killJob();/*from  w  w  w  .  j  a va 2s .  c  o  m*/
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    });

    Runtime.getRuntime().addShutdownHook(hook);

    final boolean retVal = super.waitForCompletion(verbose);

    Runtime.getRuntime().removeShutdownHook(hook);

    if (retVal) {
        FileSystem fs = actualOutputPath.getFileSystem(getConfiguration());

        fs.mkdirs(actualOutputPath);

        if (getConfiguration().getBoolean("com.linkedin.mr_kluj.delete.output.path", true)) {
            log.info(String.format("Deleting data at old path[%s]", actualOutputPath));
            fs.delete(actualOutputPath, true);
        }

        for (FileStatus fileStatus : FSUtils.spiderPath(fs, stagedPath)) {
            Path thisStagedPath = fileStatus.getPath();
            Path thisActualOutputPath = new Path(fileStatus.getPath().toString().replace(stagedPath.toString(),
                    actualOutputPath.toString()));

            log.info(String.format("Moving from staged path[%s] to final resting place[%s]", thisStagedPath,
                    thisActualOutputPath));
            fs.mkdirs(thisActualOutputPath.getParent());
            if (!fs.rename(thisStagedPath, thisActualOutputPath)) {
                log.info("Rename failed!");
                return false;
            }
        }

        return true;
    }

    log.warn("retVal was false for some reason...");
    return retVal;
}

From source file:com.linkedin.pinot.hadoop.job.SegmentCreationJob.java

License:Apache License

public void run() throws Exception {
    LOGGER.info("Starting {}", getClass().getSimpleName());

    FileSystem fs = FileSystem.get(getConf());
    Path inputPathPattern = new Path(_inputSegmentDir);

    if (fs.exists(new Path(_stagingDir))) {
        LOGGER.warn("Found the temp folder, deleting it");
        fs.delete(new Path(_stagingDir), true);
    }/*ww w .  ja  va  2s .co m*/
    fs.mkdirs(new Path(_stagingDir));
    fs.mkdirs(new Path(_stagingDir + "/input/"));

    if (fs.exists(new Path(_outputDir))) {
        LOGGER.warn("Found the output folder, deleting it");
        fs.delete(new Path(_outputDir), true);
    }
    fs.mkdirs(new Path(_outputDir));

    List<FileStatus> inputDataFiles = new ArrayList<FileStatus>();
    FileStatus[] fileStatusArr = fs.globStatus(inputPathPattern);
    for (FileStatus fileStatus : fileStatusArr) {
        inputDataFiles.addAll(getDataFilesFromPath(fs, fileStatus.getPath()));
    }

    for (int seqId = 0; seqId < inputDataFiles.size(); ++seqId) {
        FileStatus file = inputDataFiles.get(seqId);
        String completeFilePath = " " + file.getPath().toString() + " " + seqId;
        Path newOutPutFile = new Path((_stagingDir + "/input/"
                + file.getPath().toString().replace('.', '_').replace('/', '_').replace(':', '_') + ".txt"));
        FSDataOutputStream stream = fs.create(newOutPutFile);
        stream.writeUTF(completeFilePath);
        stream.flush();
        stream.close();
    }

    Job job = Job.getInstance(getConf());

    job.setJarByClass(SegmentCreationJob.class);
    job.setJobName(_jobName);

    job.setMapperClass(HadoopSegmentCreationMapper.class);

    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        job.getConfiguration().set("mapreduce.job.credentials.binary",
                System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(_stagingDir + "/input/"));
    FileOutputFormat.setOutputPath(job, new Path(_stagingDir + "/output/"));

    job.getConfiguration().setInt(JobContext.NUM_MAPS, inputDataFiles.size());
    job.getConfiguration().set("data.schema", new ObjectMapper().writeValueAsString(_dataSchema));

    job.setMaxReduceAttempts(1);
    job.setMaxMapAttempts(0);
    job.setNumReduceTasks(0);
    for (Object key : _properties.keySet()) {
        job.getConfiguration().set(key.toString(), _properties.getProperty(key.toString()));
    }

    if (_depsJarPath != null && _depsJarPath.length() > 0) {
        addDepsJarToDistributedCache(new Path(_depsJarPath), job);
    }

    // Submit the job for execution.
    job.waitForCompletion(true);
    if (!job.isSuccessful()) {
        throw new RuntimeException("Job failed : " + job);
    }

    LOGGER.info("Moving Segment Tar files from {} to: {}", _stagingDir + "/output/segmentTar", _outputDir);
    FileStatus[] segmentArr = fs.listStatus(new Path(_stagingDir + "/output/segmentTar"));
    for (FileStatus segment : segmentArr) {
        fs.rename(segment.getPath(), new Path(_outputDir, segment.getPath().getName()));
    }

    // Delete temporary directory.
    LOGGER.info("Cleanup the working directory.");
    LOGGER.info("Deleting the dir: {}", _stagingDir);
    fs.delete(new Path(_stagingDir), true);
}

From source file:com.linkedin.thirdeye.bootstrap.segment.create.SegmentCreationPhaseJob.java

License:Apache License

public Job run() throws Exception {

    Job job = Job.getInstance(getConf());

    job.setJarByClass(SegmentCreationPhaseJob.class);
    job.setJobName(name);//w  ww  .j a v  a 2 s . com

    FileSystem fs = FileSystem.get(getConf());

    Configuration configuration = job.getConfiguration();

    String schemaPath = getAndSetConfiguration(configuration, SEGMENT_CREATION_SCHEMA_PATH);
    LOGGER.info("Schema path : {}", schemaPath);
    String configPath = getAndSetConfiguration(configuration, SEGMENT_CREATION_CONFIG_PATH);
    LOGGER.info("Config path : {}", configPath);
    Schema dataSchema = createSchema(configPath);
    LOGGER.info("Data schema : {}", dataSchema);
    String inputSegmentDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_INPUT_PATH);
    LOGGER.info("Input path : {}", inputSegmentDir);
    String outputDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_OUTPUT_PATH);
    LOGGER.info("Output path : {}", outputDir);
    String stagingDir = new File(outputDir, TEMP).getAbsolutePath();
    LOGGER.info("Staging dir : {}", stagingDir);
    String tableName = getAndSetConfiguration(configuration, SEGMENT_CREATION_SEGMENT_TABLE_NAME);
    LOGGER.info("Segment table name : {}", tableName);

    // Create temporary directory
    if (fs.exists(new Path(stagingDir))) {
        LOGGER.warn("Found the temp folder, deleting it");
        fs.delete(new Path(stagingDir), true);
    }
    fs.mkdirs(new Path(stagingDir));
    fs.mkdirs(new Path(stagingDir + "/input/"));

    if (fs.exists(new Path(outputDir))) {
        LOGGER.warn("Found the output folder deleting it");
        fs.delete(new Path(outputDir), true);
    }
    fs.mkdirs(new Path(outputDir));

    Path inputPathPattern = new Path(inputSegmentDir);
    List<FileStatus> inputDataFiles = Arrays.asList(fs.listStatus(inputPathPattern));
    LOGGER.info("size {}", inputDataFiles.size());

    try {
        for (int seqId = 0; seqId < inputDataFiles.size(); ++seqId) {
            FileStatus file = inputDataFiles.get(seqId);
            String completeFilePath = " " + file.getPath().toString() + " " + seqId;
            Path newOutPutFile = new Path((stagingDir + "/input/"
                    + file.getPath().toString().replace('.', '_').replace('/', '_').replace(':', '_')
                    + ".txt"));
            FSDataOutputStream stream = fs.create(newOutPutFile);
            LOGGER.info("wrote {}", completeFilePath);
            stream.writeUTF(completeFilePath);
            stream.flush();
            stream.close();
        }
    } catch (Exception e) {
        LOGGER.error("Exception while reading input files ", e);
    }

    job.setMapperClass(SegmentCreationPhaseMapReduceJob.SegmentCreationMapper.class);

    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        job.getConfiguration().set("mapreduce.job.credentials.binary",
                System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(stagingDir + "/input/"));
    FileOutputFormat.setOutputPath(job, new Path(stagingDir + "/output/"));

    job.getConfiguration().setInt(JobContext.NUM_MAPS, inputDataFiles.size());
    job.getConfiguration().set("data.schema", OBJECT_MAPPER.writeValueAsString(dataSchema));
    if (!fs.exists(new Path(schemaPath))) {
        OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValue(fs.create(new Path(schemaPath), false),
                dataSchema);
    }

    job.setMaxReduceAttempts(1);
    job.setMaxMapAttempts(0);
    job.setNumReduceTasks(0);
    for (Object key : props.keySet()) {
        job.getConfiguration().set(key.toString(), props.getProperty(key.toString()));
    }

    job.waitForCompletion(true);
    if (!job.isSuccessful()) {
        throw new RuntimeException("Job failed : " + job);
    }

    LOGGER.info("Moving Segment Tar files from {} to: {}", stagingDir + "/output/segmentTar", outputDir);
    FileStatus[] segmentArr = fs.listStatus(new Path(stagingDir + "/output/segmentTar"));
    for (FileStatus segment : segmentArr) {
        fs.rename(segment.getPath(), new Path(outputDir, segment.getPath().getName()));
    }

    // Delete temporary directory.
    LOGGER.info("Cleanup the working directory.");
    LOGGER.info("Deleting the dir: {}", stagingDir);
    fs.delete(new Path(stagingDir), true);

    return job;
}

From source file:com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseJob.java

License:Apache License

public Job run() throws Exception {

    Job job = Job.getInstance(getConf());

    job.setJarByClass(SegmentCreationPhaseJob.class);
    job.setJobName(name);/*from  www  .j a v a 2s  .  c  o m*/

    FileSystem fs = FileSystem.get(getConf());

    Configuration configuration = job.getConfiguration();

    String inputSegmentDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_INPUT_PATH);
    LOGGER.info("Input path : {}", inputSegmentDir);
    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputSegmentDir);
    LOGGER.info("Schema : {}", avroSchema);
    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
    LOGGER.info("ThirdEyeConfig {}", thirdeyeConfig.encode());
    String outputDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_OUTPUT_PATH);
    LOGGER.info("Output path : {}", outputDir);
    Path stagingDir = new Path(outputDir, TEMP);
    LOGGER.info("Staging dir : {}", stagingDir);
    String segmentWallClockStart = getAndSetConfiguration(configuration, SEGMENT_CREATION_WALLCLOCK_START_TIME);
    LOGGER.info("Segment wallclock start time : {}", segmentWallClockStart);
    String segmentWallClockEnd = getAndSetConfiguration(configuration, SEGMENT_CREATION_WALLCLOCK_END_TIME);
    LOGGER.info("Segment wallclock end time : {}", segmentWallClockEnd);
    String schedule = getAndSetConfiguration(configuration, SEGMENT_CREATION_SCHEDULE);
    LOGGER.info("Segment schedule : {}", schedule);
    String isBackfill = props.getProperty(SEGMENT_CREATION_BACKFILL.toString(), DEFAULT_BACKFILL);
    configuration.set(SEGMENT_CREATION_BACKFILL.toString(), isBackfill);
    LOGGER.info("Is Backfill : {}", configuration.get(SEGMENT_CREATION_BACKFILL.toString()));

    // Create temporary directory
    if (fs.exists(stagingDir)) {
        LOGGER.warn("Found the temp folder, deleting it");
        fs.delete(stagingDir, true);
    }
    fs.mkdirs(stagingDir);
    fs.mkdirs(new Path(stagingDir + "/input/"));

    // Create output directory
    if (fs.exists(new Path(outputDir))) {
        LOGGER.warn("Found the output folder deleting it");
        fs.delete(new Path(outputDir), true);
    }
    fs.mkdirs(new Path(outputDir));

    // Read input files
    List<FileStatus> inputDataFiles = new ArrayList<>();
    for (String input : inputSegmentDir.split(",")) {
        Path inputPathPattern = new Path(input);
        inputDataFiles.addAll(Arrays.asList(fs.listStatus(inputPathPattern)));
    }
    LOGGER.info("size {}", inputDataFiles.size());

    try {
        for (int seqId = 0; seqId < inputDataFiles.size(); ++seqId) {
            FileStatus file = inputDataFiles.get(seqId);
            String completeFilePath = " " + file.getPath().toString() + " " + seqId;
            Path newOutPutFile = new Path((stagingDir + "/input/"
                    + file.getPath().toString().replace('.', '_').replace('/', '_').replace(':', '_')
                    + ".txt"));
            FSDataOutputStream stream = fs.create(newOutPutFile);
            LOGGER.info("wrote {}", completeFilePath);
            stream.writeUTF(completeFilePath);
            stream.flush();
            stream.close();
        }
    } catch (Exception e) {
        LOGGER.error("Exception while reading input files ", e);
    }

    job.setMapperClass(SegmentCreationPhaseMapReduceJob.SegmentCreationMapper.class);

    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        job.getConfiguration().set("mapreduce.job.credentials.binary",
                System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(stagingDir + "/input/"));
    FileOutputFormat.setOutputPath(job, new Path(stagingDir + "/output/"));

    job.getConfiguration().setInt(JobContext.NUM_MAPS, inputDataFiles.size());
    job.getConfiguration().set(SEGMENT_CREATION_THIRDEYE_CONFIG.toString(),
            OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));

    job.setMaxReduceAttempts(1);
    job.setMaxMapAttempts(0);
    job.setNumReduceTasks(0);
    for (Object key : props.keySet()) {
        job.getConfiguration().set(key.toString(), props.getProperty(key.toString()));
    }

    job.waitForCompletion(true);
    if (!job.isSuccessful()) {
        throw new RuntimeException("Job failed : " + job);
    }

    LOGGER.info("Moving Segment Tar files from {} to: {}", stagingDir + "/output/segmentTar", outputDir);
    FileStatus[] segmentArr = fs.listStatus(new Path(stagingDir + "/output/segmentTar"));
    for (FileStatus segment : segmentArr) {
        fs.rename(segment.getPath(), new Path(outputDir, segment.getPath().getName()));
    }

    // Delete temporary directory.
    LOGGER.info("Cleanup the working directory.");
    LOGGER.info("Deleting the dir: {}", stagingDir);
    fs.delete(stagingDir, true);

    return job;
}

From source file:com.linkedin.whiteelephant.mapreduce.lib.job.StagedOutputJob.java

License:Apache License

@Override
public boolean waitForCompletion(boolean verbose)
        throws IOException, InterruptedException, ClassNotFoundException {
    final Path actualOutputPath = FileOutputFormat.getOutputPath(this);
    final Path stagedPath = new Path(String.format("%s/%s/staged", stagingPrefix, System.currentTimeMillis()));

    FileOutputFormat.setOutputPath(this, stagedPath);

    final Thread hook = new Thread(new Runnable() {
        @Override//from  w w  w .ja  v  a  2s .  c  om
        public void run() {
            try {
                killJob();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    });

    Runtime.getRuntime().addShutdownHook(hook);

    final boolean retVal = super.waitForCompletion(verbose);
    Runtime.getRuntime().removeShutdownHook(hook);

    if (retVal) {
        FileSystem fs = actualOutputPath.getFileSystem(getConfiguration());

        fs.mkdirs(actualOutputPath);

        log.info(String.format("Deleting data at old path[%s]", actualOutputPath));
        fs.delete(actualOutputPath, true);

        log.info(String.format("Moving from staged path[%s] to final resting place[%s]", stagedPath,
                actualOutputPath));
        return fs.rename(stagedPath, actualOutputPath);
    }

    log.warn("retVal was false for some reason...");
    return retVal;
}

From source file:com.liveramp.cascading_ext.FileSystemHelper.java

License:Apache License

/**
 * Safely renames a path by retrying the operation <code>numTries</code> times
 * and sleeping <code>delayBetweenTries</code> seconds between each try. If it
 * still fails, it throws an IOException.
 *
 * @param fs                the filesystem object
 * @param src               the directory to be renamed
 * @param dst               the new name of the directory
 * @param numTries          number of tries to attempt the operation
 * @param delayBetweenTries the sleep delta between tries in millis
 * @throws IOException/*from  w  w w.  j  a va2s .  co  m*/
 */
public static void safeRename(FileSystem fs, Path src, Path dst, int numTries, long delayBetweenTries)
        throws IOException {
    while (numTries-- > 0) {
        if (fs.rename(src, dst)) {
            return;
        } else {
            try {
                Thread.sleep(delayBetweenTries);
            } catch (InterruptedException ie) {
                throw new RuntimeException(ie);
            }
        }
    }

    throw new IOException("Could not rename the file from \"" + src + "\" to \"" + dst + "\"!");
}

From source file:com.liveramp.hank.hadoop.DomainBuilderAbstractOutputFormat.java

License:Apache License

public static void moveContentsAndDelete(Path srcDir, Path dstDir, FileSystem fs, Logger logger)
        throws IOException {
    if (!fs.exists(srcDir)) {
        return;/*  w w  w  .  ja v a  2 s .  c om*/
    }
    if (fs.exists(srcDir) && !fs.isDirectory(srcDir)) {
        throw new IllegalArgumentException(srcDir + " is not a directory");
    }
    if (fs.exists(dstDir) && !fs.isDirectory(dstDir)) {
        throw new IllegalArgumentException(dstDir + " is not a directory");
    }
    if (logger.isDebugEnabled()) {
        logger.debug("Moving contents of: " + srcDir + " to: " + dstDir);
    }
    FileStatus[] files = fs.listStatus(srcDir);
    for (FileStatus file : files) {
        Path sourcePath = file.getPath();
        Path targetPath = new Path(dstDir, file.getPath().getName());
        if (logger.isDebugEnabled()) {
            logger.debug("Moving: " + sourcePath + " to: " + targetPath);
        }
        if (!fs.mkdirs(targetPath.getParent())) {
            throw new IOException("Failed at creating directory " + targetPath.getParent());
        }
        if (!fs.rename(sourcePath, targetPath)) {
            throw new IOException("Failed at renaming " + sourcePath + " to " + targetPath);
        }
    }
    fs.delete(srcDir);
}

From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java

License:Apache License

/**
 * Closes the current part file.//  w  w  w.ja  v a 2 s  . c  o  m
 *
 * <p>
 * This moves the current in-progress part file to a pending file and adds it to the list
 * of pending files in our bucket state.
 */
private void closeCurrentPartFile(BucketState<T> bucketState) throws Exception {
    if (bucketState.isWriterOpen) {
        bucketState.writer.close();
        bucketState.isWriterOpen = false;
    }

    if (bucketState.currentFile != null) {
        Path currentPartPath = new Path(bucketState.currentFile);
        Path inProgressPath = new Path(currentPartPath.getParent(),
                inProgressPrefix + currentPartPath.getName()).suffix(inProgressSuffix);
        Path pendingPath = new Path(currentPartPath.getParent(), pendingPrefix + currentPartPath.getName())
                .suffix(pendingSuffix);
        FileSystem fs = inProgressPath.getFileSystem(hadoopConf);
        fs.rename(inProgressPath, pendingPath);
        LOG.debug("Moving in-progress bucket {} to pending file {}", inProgressPath, pendingPath);
        bucketState.pendingFiles.add(currentPartPath.toString());
        bucketState.currentFile = null;
    }
}

From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java

License:Apache License

@Override
public void notifyCheckpointComplete(long checkpointId) throws Exception {
    synchronized (state.bucketStates) {
        Iterator<Map.Entry<String, BucketState<T>>> it = state.bucketStates.entrySet().iterator();
        while (it.hasNext()) {
            BucketState<T> bucketState = it.next().getValue();
            synchronized (bucketState.pendingFilesPerCheckpoint) {
                Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet();
                Set<Long> checkpointsToRemove = new HashSet<>();
                for (Long pastCheckpointId : pastCheckpointIds) {
                    if (pastCheckpointId <= checkpointId) {
                        LOG.debug("Moving pending files to final location for checkpoint {}", pastCheckpointId);
                        // All the pending files are buckets that have been completed but are waiting to be renamed
                        // to their final name
                        for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) {
                            Path finalPath = new Path(filename);
                            Path pendingPath = new Path(finalPath.getParent(),
                                    pendingPrefix + finalPath.getName()).suffix(pendingSuffix);

                            FileSystem fs = pendingPath.getFileSystem(hadoopConf);
                            fs.rename(pendingPath, finalPath);
                            LOG.debug(/*  w ww  .  j  av  a2s .  c o  m*/
                                    "Moving pending file {} to final location having completed checkpoint {}.",
                                    pendingPath, pastCheckpointId);
                        }
                        checkpointsToRemove.add(pastCheckpointId);
                    }
                }
                if (!bucketState.isWriterOpen && bucketState.pendingFiles.isEmpty()) {
                    // We've dealt with all the pending files and the writer for this bucket is not currently open.
                    // Therefore this bucket is currently inactive and we can remove it from our state.
                    it.remove();
                } else {
                    for (Long toRemove : checkpointsToRemove) {
                        bucketState.pendingFilesPerCheckpoint.remove(toRemove);
                    }
                }
            }
        }
    }
}

From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java

License:Apache License

@Override
public void restoreState(State<T> state) {
    this.state = state;

    FileSystem fs;
    try {/*from w  w w  .j  a  v  a  2 s  .c  om*/
        fs = new Path(basePath).getFileSystem(HadoopFileSystem.getHadoopConfiguration());
    } catch (IOException e) {
        LOG.error("Error while creating FileSystem in checkpoint restore.", e);
        throw new RuntimeException("Error while creating FileSystem in checkpoint restore.", e);
    }

    for (BucketState<T> bucketState : state.bucketStates.values()) {
        // we can clean all the pending files since they where renamed to final files
        // after this checkpoint was successful
        bucketState.pendingFiles.clear();

        if (bucketState.currentFile != null) {
            // We were writing to a file when the last checkpoint occured. This file can either
            // be still in-progress or became a pending file at some point after the checkpoint.
            // Either way, we have to truncate it back to a valid state (or write a .valid-length)
            // file that specifies up to which length it is valid and rename it to the final name
            // before starting a new bucket file.
            Path partPath = new Path(bucketState.currentFile);
            try {
                Path partPendingPath = new Path(partPath.getParent(), pendingPrefix + partPath.getName())
                        .suffix(pendingSuffix);
                Path partInProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName())
                        .suffix(inProgressSuffix);

                if (fs.exists(partPendingPath)) {
                    LOG.debug(
                            "In-progress file {} has been moved to pending after checkpoint, moving to final location.",
                            partPath);
                    // has been moved to pending in the mean time, rename to final location
                    fs.rename(partPendingPath, partPath);
                } else if (fs.exists(partInProgressPath)) {
                    LOG.debug("In-progress file {} is still in-progress, moving to final location.", partPath);
                    // it was still in progress, rename to final path
                    fs.rename(partInProgressPath, partPath);
                } else if (fs.exists(partPath)) {
                    LOG.debug("In-Progress file {} was already moved to final location {}.",
                            bucketState.currentFile, partPath);
                } else {
                    LOG.debug(
                            "In-Progress file {} was neither moved to pending nor is still in progress. Possibly, "
                                    + "it was moved to final location by a previous snapshot restore",
                            bucketState.currentFile);
                }

                refTruncate = reflectTruncate(fs);
                // truncate it or write a ".valid-length" file to specify up to which point it is valid
                if (refTruncate != null) {
                    LOG.debug("Truncating {} to valid length {}", partPath, bucketState.currentFileValidLength);
                    // some-one else might still hold the lease from a previous try, we are
                    // recovering, after all ...
                    if (fs instanceof DistributedFileSystem) {
                        DistributedFileSystem dfs = (DistributedFileSystem) fs;
                        LOG.debug("Trying to recover file lease {}", partPath);
                        dfs.recoverLease(partPath);
                        boolean isclosed = dfs.isFileClosed(partPath);
                        StopWatch sw = new StopWatch();
                        sw.start();
                        while (!isclosed) {
                            if (sw.getTime() > asyncTimeout) {
                                break;
                            }
                            try {
                                Thread.sleep(500);
                            } catch (InterruptedException e1) {
                                // ignore it
                            }
                            isclosed = dfs.isFileClosed(partPath);
                        }
                    }
                    Boolean truncated = (Boolean) refTruncate.invoke(fs, partPath,
                            bucketState.currentFileValidLength);
                    if (!truncated) {
                        LOG.debug("Truncate did not immediately complete for {}, waiting...", partPath);

                        // we must wait for the asynchronous truncate operation to complete
                        StopWatch sw = new StopWatch();
                        sw.start();
                        long newLen = fs.getFileStatus(partPath).getLen();
                        while (newLen != bucketState.currentFileValidLength) {
                            if (sw.getTime() > asyncTimeout) {
                                break;
                            }
                            try {
                                Thread.sleep(500);
                            } catch (InterruptedException e1) {
                                // ignore it
                            }
                            newLen = fs.getFileStatus(partPath).getLen();
                        }
                        if (newLen != bucketState.currentFileValidLength) {
                            throw new RuntimeException("Truncate did not truncate to right length. Should be "
                                    + bucketState.currentFileValidLength + " is " + newLen + ".");
                        }
                    }

                } else {
                    LOG.debug("Writing valid-length file for {} to specify valid length {}", partPath,
                            bucketState.currentFileValidLength);
                    Path validLengthFilePath = new Path(partPath.getParent(),
                            validLengthPrefix + partPath.getName()).suffix(validLengthSuffix);
                    if (!fs.exists(validLengthFilePath)) {
                        FSDataOutputStream lengthFileOut = fs.create(validLengthFilePath);
                        lengthFileOut.writeUTF(Long.toString(bucketState.currentFileValidLength));
                        lengthFileOut.close();
                    }
                }

                // Now that we've restored the bucket to a valid state, reset the current file info
                bucketState.currentFile = null;
                bucketState.currentFileValidLength = -1;
            } catch (IOException e) {
                LOG.error("Error while restoring BucketingSink state.", e);
                throw new RuntimeException("Error while restoring BucketingSink state.", e);
            } catch (InvocationTargetException | IllegalAccessException e) {
                LOG.error("Cound not invoke truncate.", e);
                throw new RuntimeException("Could not invoke truncate.", e);
            }
        }

        LOG.debug("Clearing pending/in-progress files.");

        // Move files that are confirmed by a checkpoint but did not get moved to final location
        // because the checkpoint notification did not happen before a failure

        Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet();
        LOG.debug("Moving pending files to final location on restore.");
        for (Long pastCheckpointId : pastCheckpointIds) {
            // All the pending files are buckets that have been completed but are waiting to be renamed
            // to their final name
            for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) {
                Path finalPath = new Path(filename);
                Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName())
                        .suffix(pendingSuffix);

                try {
                    if (fs.exists(pendingPath)) {
                        LOG.debug(
                                "(RESTORE) Moving pending file {} to final location after complete checkpoint {}.",
                                pendingPath, pastCheckpointId);
                        fs.rename(pendingPath, finalPath);
                    }
                } catch (IOException e) {
                    LOG.error("(RESTORE) Error while renaming pending file {} to final path {}: {}",
                            pendingPath, finalPath, e);
                    throw new RuntimeException(
                            "Error while renaming pending file " + pendingPath + " to final path " + finalPath,
                            e);
                }
            }
        }

        synchronized (bucketState.pendingFilesPerCheckpoint) {
            bucketState.pendingFilesPerCheckpoint.clear();
        }
    }

    // we need to get this here since open() has not yet been called
    int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
    // delete pending files
    try {

        RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true);

        while (bucketFiles.hasNext()) {
            LocatedFileStatus file = bucketFiles.next();
            if (file.getPath().toString().endsWith(pendingSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting pending file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
            if (file.getPath().toString().endsWith(inProgressSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting in-progress file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting old pending files: {}", e);
        throw new RuntimeException("Error while deleting old pending files.", e);
    }
}