Example usage for org.apache.hadoop.fs FileSystem rename

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem rename.

Prototype

public abstract boolean rename(Path src, Path dst) throws IOException;

Source Link

Document

Renames Path src to Path dst.

Usage

From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java

License:Apache License

private void moveFiles(Path srcDir, Path destDir) throws IOException {
    if (!srcDir.equals(destDir)) {
        // TODO: src and dest may be on different FS
        FileSystem fs = destDir.getFileSystem(jobConf);
        Preconditions.checkState(fs.exists(destDir) || fs.mkdirs(destDir),
                "Failed to create dest path " + destDir);
        if (overwrite) {
            // delete existing files for overwrite
            // TODO: support setting auto-purge?
            final boolean purge = true;
            // Note we assume the srcDir is a hidden dir, otherwise it will be deleted if it's a sub-dir of destDir
            FileStatus[] existingFiles = fs.listStatus(destDir, FileUtils.HIDDEN_FILES_PATH_FILTER);
            if (existingFiles != null) {
                HiveShim hiveShim = HiveShimLoader.loadHiveShim();
                for (FileStatus existingFile : existingFiles) {
                    Preconditions.checkState(hiveShim.moveToTrash(fs, existingFile.getPath(), jobConf, purge),
                            "Failed to overwrite existing file " + existingFile);
                }/*w w  w  . j ava2  s  .c  o m*/
            }
        }
        FileStatus[] srcFiles = fs.listStatus(srcDir, FileUtils.HIDDEN_FILES_PATH_FILTER);
        for (FileStatus srcFile : srcFiles) {
            Path srcPath = srcFile.getPath();
            Path destPath = new Path(destDir, srcPath.getName());
            int count = 1;
            while (!fs.rename(srcPath, destPath)) {
                String name = srcPath.getName() + "_copy_" + count;
                destPath = new Path(destDir, name);
                count++;
            }
        }
    }
}

From source file:org.apache.flink.hadoopcompatibility.mapreduce.HadoopOutputFormat.java

License:Apache License

/**
 * commit the task by moving the output file out from the temporary directory.
 * @throws IOException//w w w.j  a v  a 2s  .com
 */
@Override
public void close() throws IOException {
    try {
        this.recordWriter.close(this.context);
    } catch (InterruptedException e) {
        throw new IOException("Could not close RecordReader.", e);
    }

    if (this.fileOutputCommitter.needsTaskCommit(this.context)) {
        this.fileOutputCommitter.commitTask(this.context);
    }
    this.fileOutputCommitter.commitJob(this.context);

    Path outputPath = new Path(this.configuration.get("mapred.output.dir"));

    // rename tmp-* files to final name
    FileSystem fs = FileSystem.get(outputPath.toUri(), this.configuration);

    final Pattern p = Pattern.compile("tmp-(.)-([0-9]+)");

    // isDirectory does not work in hadoop 1
    if (fs.getFileStatus(outputPath).isDir()) {
        FileStatus[] files = fs.listStatus(outputPath);

        for (FileStatus f : files) {
            Matcher m = p.matcher(f.getPath().getName());
            if (m.matches()) {
                int part = Integer.valueOf(m.group(2));
                fs.rename(f.getPath(), new Path(outputPath.toString() + "/" + part));
            }
        }
    }
}

From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java

License:Apache License

/**
 * Closes the current part file.//from   w ww . ja  v  a  2  s  . c  om
 *
 * <p>
 * This moves the current in-progress part file to a pending file and adds it to the list
 * of pending files in our bucket state.
 */
private void closeCurrentPartFile() throws Exception {
    if (isWriterOpen) {
        writer.close();
        isWriterOpen = false;
    }

    if (currentPartPath != null) {
        Path inProgressPath = new Path(currentPartPath.getParent(),
                inProgressPrefix + currentPartPath.getName()).suffix(inProgressSuffix);
        Path pendingPath = new Path(currentPartPath.getParent(), pendingPrefix + currentPartPath.getName())
                .suffix(pendingSuffix);
        FileSystem fs = inProgressPath.getFileSystem(new org.apache.hadoop.conf.Configuration());
        fs.rename(inProgressPath, pendingPath);
        LOG.debug("Moving in-progress bucket {} to pending file {}", inProgressPath, pendingPath);
        this.bucketState.pendingFiles.add(currentPartPath.toString());
    }
}

From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java

License:Apache License

@Override
public void notifyCheckpointComplete(long checkpointId) throws Exception {
    synchronized (bucketState.pendingFilesPerCheckpoint) {
        Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet();
        Set<Long> checkpointsToRemove = new HashSet<>();
        for (Long pastCheckpointId : pastCheckpointIds) {
            if (pastCheckpointId <= checkpointId) {
                LOG.debug("Moving pending files to final location for checkpoint {}", pastCheckpointId);
                // All the pending files are buckets that have been completed but are waiting to be renamed
                // to their final name
                for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) {
                    Path finalPath = new Path(filename);
                    Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName())
                            .suffix(pendingSuffix);

                    FileSystem fs = pendingPath.getFileSystem(new org.apache.hadoop.conf.Configuration());
                    fs.rename(pendingPath, finalPath);
                    LOG.debug("Moving pending file {} to final location after complete checkpoint {}.",
                            pendingPath, pastCheckpointId);
                }//from www .  ja  va  2 s. c o  m
                checkpointsToRemove.add(pastCheckpointId);
            }
        }
        for (Long toRemove : checkpointsToRemove) {
            bucketState.pendingFilesPerCheckpoint.remove(toRemove);
        }
    }
}

From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java

License:Apache License

@Override
public void restoreState(BucketState state) {
    bucketState = state;/*from   www  .  ja va 2s.c om*/
    // we can clean all the pending files since they where renamed to final files
    // after this checkpoint was successfull
    bucketState.pendingFiles.clear();
    FileSystem fs = null;
    try {
        fs = new Path(basePath).getFileSystem(new org.apache.hadoop.conf.Configuration());
    } catch (IOException e) {
        LOG.error("Error while creating FileSystem in checkpoint restore.", e);
        throw new RuntimeException("Error while creating FileSystem in checkpoint restore.", e);
    }
    if (bucketState.currentFile != null) {
        // We were writing to a file when the last checkpoint occured. This file can either
        // be still in-progress or became a pending file at some point after the checkpoint.
        // Either way, we have to truncate it back to a valid state (or write a .valid-length)
        // file that specifies up to which length it is valid and rename it to the final name
        // before starting a new bucket file.
        Path partPath = new Path(bucketState.currentFile);
        try {
            Path partPendingPath = new Path(partPath.getParent(), pendingPrefix + partPath.getName())
                    .suffix(pendingSuffix);
            Path partInProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName())
                    .suffix(inProgressSuffix);

            if (fs.exists(partPendingPath)) {
                LOG.debug(
                        "In-progress file {} has been moved to pending after checkpoint, moving to final location.",
                        partPath);
                // has been moved to pending in the mean time, rename to final location
                fs.rename(partPendingPath, partPath);
            } else if (fs.exists(partInProgressPath)) {
                LOG.debug("In-progress file {} is still in-progress, moving to final location.", partPath);
                // it was still in progress, rename to final path
                fs.rename(partInProgressPath, partPath);
            } else if (fs.exists(partPath)) {
                LOG.debug("In-Progress file {} was already moved to final location {}.",
                        bucketState.currentFile, partPath);
            } else {
                LOG.debug(
                        "In-Progress file {} was neither moved to pending nor is still in progress. Possibly, "
                                + "it was moved to final location by a previous snapshot restore",
                        bucketState.currentFile);
            }

            refTruncate = reflectTruncate(fs);
            // truncate it or write a ".valid-length" file to specify up to which point it is valid
            if (refTruncate != null) {
                LOG.debug("Truncating {} to valid length {}", partPath, bucketState.currentFileValidLength);
                // some-one else might still hold the lease from a previous try, we are
                // recovering, after all ...
                if (fs instanceof DistributedFileSystem) {
                    DistributedFileSystem dfs = (DistributedFileSystem) fs;
                    LOG.debug("Trying to recover file lease {}", partPath);
                    dfs.recoverLease(partPath);
                    boolean isclosed = dfs.isFileClosed(partPath);
                    StopWatch sw = new StopWatch();
                    sw.start();
                    while (!isclosed) {
                        if (sw.getTime() > asyncTimeout) {
                            break;
                        }
                        try {
                            Thread.sleep(500);
                        } catch (InterruptedException e1) {
                            // ignore it
                        }
                        isclosed = dfs.isFileClosed(partPath);
                    }
                }
                Boolean truncated = (Boolean) refTruncate.invoke(fs, partPath,
                        bucketState.currentFileValidLength);
                if (!truncated) {
                    LOG.debug("Truncate did not immediately complete for {}, waiting...", partPath);

                    // we must wait for the asynchronous truncate operation to complete
                    StopWatch sw = new StopWatch();
                    sw.start();
                    long newLen = fs.getFileStatus(partPath).getLen();
                    while (newLen != bucketState.currentFileValidLength) {
                        if (sw.getTime() > asyncTimeout) {
                            break;
                        }
                        try {
                            Thread.sleep(500);
                        } catch (InterruptedException e1) {
                            // ignore it
                        }
                        newLen = fs.getFileStatus(partPath).getLen();
                    }
                    if (newLen != bucketState.currentFileValidLength) {
                        throw new RuntimeException("Truncate did not truncate to right length. Should be "
                                + bucketState.currentFileValidLength + " is " + newLen + ".");
                    }
                }

            } else {
                LOG.debug("Writing valid-length file for {} to specify valid length {}", partPath,
                        bucketState.currentFileValidLength);
                Path validLengthFilePath = new Path(partPath.getParent(),
                        validLengthPrefix + partPath.getName()).suffix(validLengthSuffix);
                if (!fs.exists(validLengthFilePath)) {
                    FSDataOutputStream lengthFileOut = fs.create(validLengthFilePath);
                    lengthFileOut.writeUTF(Long.toString(bucketState.currentFileValidLength));
                    lengthFileOut.close();
                }
            }

            // invalidate in the state object
            bucketState.currentFile = null;
            bucketState.currentFileValidLength = -1;
        } catch (IOException e) {
            LOG.error("Error while restoring RollingSink state.", e);
            throw new RuntimeException("Error while restoring RollingSink state.", e);
        } catch (InvocationTargetException | IllegalAccessException e) {
            LOG.error("Cound not invoke truncate.", e);
            throw new RuntimeException("Could not invoke truncate.", e);
        }
    }

    LOG.debug("Clearing pending/in-progress files.");

    // Move files that are confirmed by a checkpoint but did not get moved to final location
    // because the checkpoint notification did not happen before a failure

    Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet();
    LOG.debug("Moving pending files to final location on restore.");
    for (Long pastCheckpointId : pastCheckpointIds) {
        // All the pending files are buckets that have been completed but are waiting to be renamed
        // to their final name
        for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) {
            Path finalPath = new Path(filename);
            Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName())
                    .suffix(pendingSuffix);

            try {
                if (fs.exists(pendingPath)) {
                    LOG.debug(
                            "(RESTORE) Moving pending file {} to final location after complete checkpoint {}.",
                            pendingPath, pastCheckpointId);
                    fs.rename(pendingPath, finalPath);
                }
            } catch (IOException e) {
                LOG.error("(RESTORE) Error while renaming pending file {} to final path {}: {}", pendingPath,
                        finalPath, e);
                throw new RuntimeException(
                        "Error while renaming pending file " + pendingPath + " to final path " + finalPath, e);
            }
        }
    }
    bucketState.pendingFiles.clear();
    synchronized (bucketState.pendingFilesPerCheckpoint) {
        bucketState.pendingFilesPerCheckpoint.clear();
    }

    // we need to get this here since open() has not yet been called
    int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
    // delete pending files
    try {

        RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true);

        while (bucketFiles.hasNext()) {
            LocatedFileStatus file = bucketFiles.next();
            if (file.getPath().toString().endsWith(pendingSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting pending file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
            if (file.getPath().toString().endsWith(inProgressSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting in-progress file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting old pending files: {}", e);
        throw new RuntimeException("Error while deleting old pending files.", e);
    }
}

From source file:org.apache.flume.sink.customhdfs.BucketWriter.java

License:Apache License

/**
 * Rename bucketPath file from .tmp to permanent location.
 *///  w ww  . j  a  v a 2 s . co m
// When this bucket writer is rolled based on rollCount or
// rollSize, the same instance is reused for the new file. But if
// the previous file was not closed/renamed,
// the bucket writer fields no longer point to it and hence need
// to be passed in from the thread attempting to close it. Even
// when the bucket writer is closed due to close timeout,
// this method can get called from the scheduled thread so the
// file gets closed later - so an implicit reference to this
// bucket writer would still be alive in the Callable instance.
private void renameBucket(String bucketPath, String targetPath, final FileSystem fs)
        throws IOException, InterruptedException {
    if (bucketPath.equals(targetPath)) {
        return;
    }

    final Path srcPath = new Path(bucketPath);
    final Path dstPath = new Path(targetPath);

    callWithTimeout(new CallRunner<Void>() {
        @Override
        public Void call() throws Exception {
            if (fs.exists(srcPath)) { // could block
                LOG.info("Renaming " + srcPath + " to " + dstPath);
                renameTries.incrementAndGet();
                fs.rename(srcPath, dstPath); // could block
                if (impalaTableFill.workable) {
                    impalaTableFill.impalaTableFillData(dstPath.toString());
                }
            }
            return null;
        }
    });
}

From source file:org.apache.gobblin.compaction.mapreduce.CompactorOutputCommitter.java

License:Apache License

/**
 * Commits the task, moving files to their final committed location by delegating to
 * {@link FileOutputCommitter} to perform the actual moving. First, renames the
 * files to include the count of records contained within the file and a timestamp,
 * in the form {recordCount}.{timestamp}.avro. Then, the files are moved to their
 * committed location.//w w w . jav  a  2s.co  m
 */
@Override
public void commitTask(TaskAttemptContext context) throws IOException {
    Path workPath = getWorkPath();
    FileSystem fs = workPath.getFileSystem(context.getConfiguration());

    if (fs.exists(workPath)) {
        long recordCount = getRecordCountFromCounter(context,
                RecordKeyDedupReducerBase.EVENT_COUNTER.RECORD_COUNT);
        String fileNamePrefix;
        if (recordCount == 0) {

            // recordCount == 0 indicates that it is a map-only, non-dedup job, and thus record count should
            // be obtained from mapper counter.
            fileNamePrefix = CompactionRecordCountProvider.M_OUTPUT_FILE_PREFIX;
            recordCount = getRecordCountFromCounter(context, RecordKeyMapperBase.EVENT_COUNTER.RECORD_COUNT);
        } else {
            fileNamePrefix = CompactionRecordCountProvider.MR_OUTPUT_FILE_PREFIX;
        }
        String fileName = CompactionRecordCountProvider.constructFileName(fileNamePrefix,
                "." + compactionFileExtension, recordCount);

        for (FileStatus status : fs.listStatus(workPath, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return FilenameUtils.isExtension(path.getName(), compactionFileExtension);
            }
        })) {
            Path newPath = new Path(status.getPath().getParent(), fileName);
            LOG.info(String.format("Renaming %s to %s", status.getPath(), newPath));
            fs.rename(status.getPath(), newPath);
        }
    }

    super.commitTask(context);
}

From source file:org.apache.gobblin.compaction.verify.InputRecordCountHelper.java

License:Apache License

private static void saveState(FileSystem fs, Path dir, State state) throws IOException {
    Path tmpFile = new Path(dir, STATE_FILE + ".tmp");
    Path newFile = new Path(dir, STATE_FILE);
    fs.delete(tmpFile, false);/*from   ww  w  .ja v a2s  .  c  o m*/
    try (DataOutputStream dataOutputStream = new DataOutputStream(
            fs.create(new Path(dir, STATE_FILE + ".tmp")))) {
        state.write(dataOutputStream);
    }

    // Caution: We are deleting right before renaming because rename doesn't support atomic overwrite options from FileSystem API.
    fs.delete(newFile, false);
    fs.rename(tmpFile, newFile);
}

From source file:org.apache.gobblin.data.management.copy.splitter.DistcpFileSplitter.java

License:Apache License

/**
 * Merges all the splits for a given file.
 * Should be called on the target/destination file system (after blocks have been copied to targetFs).
 * @param fs {@link FileSystem} where file parts exist.
 * @param file {@link CopyableFile} to merge.
 * @param workUnits {@link WorkUnitState}s for all parts of this file.
 * @param parentPath {@link Path} where the parts of the file are located.
 * @return a {@link WorkUnit} equivalent to the distcp work unit if the file had not been split.
 * @throws IOException/*from  w w  w .  jav a 2  s.c  o  m*/
 */
private static WorkUnitState mergeSplits(FileSystem fs, CopyableFile file, Collection<WorkUnitState> workUnits,
        Path parentPath) throws IOException {

    log.info(String.format("File %s was written in %d parts. Merging.", file.getDestination(),
            workUnits.size()));
    Path[] parts = new Path[workUnits.size()];
    for (WorkUnitState workUnit : workUnits) {
        if (!isSplitWorkUnit(workUnit)) {
            throw new IOException("Not a split work unit.");
        }
        Split split = getSplit(workUnit).get();
        parts[split.getSplitNumber()] = new Path(parentPath, split.getPartName());
    }

    Path target = new Path(parentPath, file.getDestination().getName());

    fs.rename(parts[0], target);
    fs.concat(target, Arrays.copyOfRange(parts, 1, parts.length));

    WorkUnitState finalWorkUnit = workUnits.iterator().next();
    finalWorkUnit.removeProp(SPLIT_KEY);
    return finalWorkUnit;
}

From source file:org.apache.gobblin.util.HadoopUtils.java

License:Apache License

/**
 * A wrapper around {@link FileSystem#rename(Path, Path)} which throws {@link IOException} if
 * {@link FileSystem#rename(Path, Path)} returns False.
 *///from   w  w w. j  a v  a  2  s.  co m
public static void renamePath(FileSystem fs, Path oldName, Path newName, boolean overwrite) throws IOException {
    if (!fs.exists(oldName)) {
        throw new FileNotFoundException(
                String.format("Failed to rename %s to %s: src not found", oldName, newName));
    }
    if (fs.exists(newName)) {
        if (overwrite) {
            HadoopUtils.moveToTrash(fs, newName);
        } else {
            throw new FileAlreadyExistsException(
                    String.format("Failed to rename %s to %s: dst already exists", oldName, newName));
        }
    }
    if (!fs.rename(oldName, newName)) {
        throw new IOException(String.format("Failed to rename %s to %s", oldName, newName));
    }
}