Example usage for org.apache.hadoop.fs FileSystem rename

List of usage examples for org.apache.hadoop.fs FileSystem rename

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem rename.

Prototype

public abstract boolean rename(Path src, Path dst) throws IOException;

Source Link

Document

Renames Path src to Path dst.

Usage

From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java

License:Apache License

private void moveFiles(Path srcDir, Path destDir) throws IOException {
    if (!srcDir.equals(destDir)) {
        // TODO: src and dest may be on different FS
        FileSystem fs = destDir.getFileSystem(jobConf);
        Preconditions.checkState(fs.exists(destDir) || fs.mkdirs(destDir),
                "Failed to create dest path " + destDir);
        if (overwrite) {
            // delete existing files for overwrite
            // TODO: support setting auto-purge?
            final boolean purge = true;
            // Note we assume the srcDir is a hidden dir, otherwise it will be deleted if it's a sub-dir of destDir
            FileStatus[] existingFiles = fs.listStatus(destDir, FileUtils.HIDDEN_FILES_PATH_FILTER);
            if (existingFiles != null) {
                HiveShim hiveShim = HiveShimLoader.loadHiveShim();
                for (FileStatus existingFile : existingFiles) {
                    Preconditions.checkState(hiveShim.moveToTrash(fs, existingFile.getPath(), jobConf, purge),
                            "Failed to overwrite existing file " + existingFile);
                }/*w w  w  . j ava2  s  .c  o m*/
            }
        }
        FileStatus[] srcFiles = fs.listStatus(srcDir, FileUtils.HIDDEN_FILES_PATH_FILTER);
        for (FileStatus srcFile : srcFiles) {
            Path srcPath = srcFile.getPath();
            Path destPath = new Path(destDir, srcPath.getName());
            int count = 1;
            while (!fs.rename(srcPath, destPath)) {
                String name = srcPath.getName() + "_copy_" + count;
                destPath = new Path(destDir, name);
                count++;
            }
        }
    }
}

From source file:org.apache.flink.hadoopcompatibility.mapreduce.HadoopOutputFormat.java

License:Apache License

/**
 * commit the task by moving the output file out from the temporary directory.
 * @throws IOException//w w w.j  a v  a 2s  .com
 */
@Override
public void close() throws IOException {
    try {
        this.recordWriter.close(this.context);
    } catch (InterruptedException e) {
        throw new IOException("Could not close RecordReader.", e);
    }

    if (this.fileOutputCommitter.needsTaskCommit(this.context)) {
        this.fileOutputCommitter.commitTask(this.context);
    }
    this.fileOutputCommitter.commitJob(this.context);

    Path outputPath = new Path(this.configuration.get("mapred.output.dir"));

    // rename tmp-* files to final name
    FileSystem fs = FileSystem.get(outputPath.toUri(), this.configuration);

    final Pattern p = Pattern.compile("tmp-(.)-([0-9]+)");

    // isDirectory does not work in hadoop 1
    if (fs.getFileStatus(outputPath).isDir()) {
        FileStatus[] files = fs.listStatus(outputPath);

        for (FileStatus f : files) {
            Matcher m = p.matcher(f.getPath().getName());
            if (m.matches()) {
                int part = Integer.valueOf(m.group(2));
                fs.rename(f.getPath(), new Path(outputPath.toString() + "/" + part));
            }
        }
    }
}

From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java

License:Apache License

/**
 * Closes the current part file.//from   w ww . ja  v  a  2  s  . c  om
 *
 * <p>
 * This moves the current in-progress part file to a pending file and adds it to the list
 * of pending files in our bucket state.
 */
private void closeCurrentPartFile() throws Exception {
    if (isWriterOpen) {
        writer.close();
        isWriterOpen = false;
    }

    if (currentPartPath != null) {
        Path inProgressPath = new Path(currentPartPath.getParent(),
                inProgressPrefix + currentPartPath.getName()).suffix(inProgressSuffix);
        Path pendingPath = new Path(currentPartPath.getParent(), pendingPrefix + currentPartPath.getName())
                .suffix(pendingSuffix);
        FileSystem fs = inProgressPath.getFileSystem(new org.apache.hadoop.conf.Configuration());
        fs.rename(inProgressPath, pendingPath);
        LOG.debug("Moving in-progress bucket {} to pending file {}", inProgressPath, pendingPath);
        this.bucketState.pendingFiles.add(currentPartPath.toString());
    }
}

From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java

License:Apache License

@Override
public void notifyCheckpointComplete(long checkpointId) throws Exception {
    synchronized (bucketState.pendingFilesPerCheckpoint) {
        Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet();
        Set<Long> checkpointsToRemove = new HashSet<>();
        for (Long pastCheckpointId : pastCheckpointIds) {
            if (pastCheckpointId <= checkpointId) {
                LOG.debug("Moving pending files to final location for checkpoint {}", pastCheckpointId);
                // All the pending files are buckets that have been completed but are waiting to be renamed
                // to their final name
                for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) {
                    Path finalPath = new Path(filename);
                    Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName())
                            .suffix(pendingSuffix);

                    FileSystem fs = pendingPath.getFileSystem(new org.apache.hadoop.conf.Configuration());
                    fs.rename(pendingPath, finalPath);
                    LOG.debug("Moving pending file {} to final location after complete checkpoint {}.",
                            pendingPath, pastCheckpointId);
                }//from www .  ja  va  2 s. c o  m
                checkpointsToRemove.add(pastCheckpointId);
            }
        }
        for (Long toRemove : checkpointsToRemove) {
            bucketState.pendingFilesPerCheckpoint.remove(toRemove);
        }
    }
}

From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java

License:Apache License

@Override
public void restoreState(BucketState state) {
    bucketState = state;/*from   www  .  ja va 2s.c om*/
    // we can clean all the pending files since they where renamed to final files
    // after this checkpoint was successfull
    bucketState.pendingFiles.clear();
    FileSystem fs = null;
    try {
        fs = new Path(basePath).getFileSystem(new org.apache.hadoop.conf.Configuration());
    } catch (IOException e) {
        LOG.error("Error while creating FileSystem in checkpoint restore.", e);
        throw new RuntimeException("Error while creating FileSystem in checkpoint restore.", e);
    }
    if (bucketState.currentFile != null) {
        // We were writing to a file when the last checkpoint occured. This file can either
        // be still in-progress or became a pending file at some point after the checkpoint.
        // Either way, we have to truncate it back to a valid state (or write a .valid-length)
        // file that specifies up to which length it is valid and rename it to the final name
        // before starting a new bucket file.
        Path partPath = new Path(bucketState.currentFile);
        try {
            Path partPendingPath = new Path(partPath.getParent(), pendingPrefix + partPath.getName())
                    .suffix(pendingSuffix);
            Path partInProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName())
                    .suffix(inProgressSuffix);

            if (fs.exists(partPendingPath)) {
                LOG.debug(
                        "In-progress file {} has been moved to pending after checkpoint, moving to final location.",
                        partPath);
                // has been moved to pending in the mean time, rename to final location
                fs.rename(partPendingPath, partPath);
            } else if (fs.exists(partInProgressPath)) {
                LOG.debug("In-progress file {} is still in-progress, moving to final location.", partPath);
                // it was still in progress, rename to final path
                fs.rename(partInProgressPath, partPath);
            } else if (fs.exists(partPath)) {
                LOG.debug("In-Progress file {} was already moved to final location {}.",
                        bucketState.currentFile, partPath);
            } else {
                LOG.debug(
                        "In-Progress file {} was neither moved to pending nor is still in progress. Possibly, "
                                + "it was moved to final location by a previous snapshot restore",
                        bucketState.currentFile);
            }

            refTruncate = reflectTruncate(fs);
            // truncate it or write a ".valid-length" file to specify up to which point it is valid
            if (refTruncate != null) {
                LOG.debug("Truncating {} to valid length {}", partPath, bucketState.currentFileValidLength);
                // some-one else might still hold the lease from a previous try, we are
                // recovering, after all ...
                if (fs instanceof DistributedFileSystem) {
                    DistributedFileSystem dfs = (DistributedFileSystem) fs;
                    LOG.debug("Trying to recover file lease {}", partPath);
                    dfs.recoverLease(partPath);
                    boolean isclosed = dfs.isFileClosed(partPath);
                    StopWatch sw = new StopWatch();
                    sw.start();
                    while (!isclosed) {
                        if (sw.getTime() > asyncTimeout) {
                            break;
                        }
                        try {
                            Thread.sleep(500);
                        } catch (InterruptedException e1) {
                            // ignore it
                        }
                        isclosed = dfs.isFileClosed(partPath);
                    }
                }
                Boolean truncated = (Boolean) refTruncate.invoke(fs, partPath,
                        bucketState.currentFileValidLength);
                if (!truncated) {
                    LOG.debug("Truncate did not immediately complete for {}, waiting...", partPath);

                    // we must wait for the asynchronous truncate operation to complete
                    StopWatch sw = new StopWatch();
                    sw.start();
                    long newLen = fs.getFileStatus(partPath).getLen();
                    while (newLen != bucketState.currentFileValidLength) {
                        if (sw.getTime() > asyncTimeout) {
                            break;
                        }
                        try {
                            Thread.sleep(500);
                        } catch (InterruptedException e1) {
                            // ignore it
                        }
                        newLen = fs.getFileStatus(partPath).getLen();
                    }
                    if (newLen != bucketState.currentFileValidLength) {
                        throw new RuntimeException("Truncate did not truncate to right length. Should be "
                                + bucketState.currentFileValidLength + " is " + newLen + ".");
                    }
                }

            } else {
                LOG.debug("Writing valid-length file for {} to specify valid length {}", partPath,
                        bucketState.currentFileValidLength);
                Path validLengthFilePath = new Path(partPath.getParent(),
                        validLengthPrefix + partPath.getName()).suffix(validLengthSuffix);
                if (!fs.exists(validLengthFilePath)) {
                    FSDataOutputStream lengthFileOut = fs.create(validLengthFilePath);
                    lengthFileOut.writeUTF(Long.toString(bucketState.currentFileValidLength));
                    lengthFileOut.close();
                }
            }

            // invalidate in the state object
            bucketState.currentFile = null;
            bucketState.currentFileValidLength = -1;
        } catch (IOException e) {
            LOG.error("Error while restoring RollingSink state.", e);
            throw new RuntimeException("Error while restoring RollingSink state.", e);
        } catch (InvocationTargetException | IllegalAccessException e) {
            LOG.error("Cound not invoke truncate.", e);
            throw new RuntimeException("Could not invoke truncate.", e);
        }
    }

    LOG.debug("Clearing pending/in-progress files.");

    // Move files that are confirmed by a checkpoint but did not get moved to final location
    // because the checkpoint notification did not happen before a failure

    Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet();
    LOG.debug("Moving pending files to final location on restore.");
    for (Long pastCheckpointId : pastCheckpointIds) {
        // All the pending files are buckets that have been completed but are waiting to be renamed
        // to their final name
        for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) {
            Path finalPath = new Path(filename);
            Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName())
                    .suffix(pendingSuffix);

            try {
                if (fs.exists(pendingPath)) {
                    LOG.debug(
                            "(RESTORE) Moving pending file {} to final location after complete checkpoint {}.",
                            pendingPath, pastCheckpointId);
                    fs.rename(pendingPath, finalPath);
                }
            } catch (IOException e) {
                LOG.error("(RESTORE) Error while renaming pending file {} to final path {}: {}", pendingPath,
                        finalPath, e);
                throw new RuntimeException(
                        "Error while renaming pending file " + pendingPath + " to final path " + finalPath, e);
            }
        }
    }
    bucketState.pendingFiles.clear();
    synchronized (bucketState.pendingFilesPerCheckpoint) {
        bucketState.pendingFilesPerCheckpoint.clear();
    }

    // we need to get this here since open() has not yet been called
    int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
    // delete pending files
    try {

        RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true);

        while (bucketFiles.hasNext()) {
            LocatedFileStatus file = bucketFiles.next();
            if (file.getPath().toString().endsWith(pendingSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting pending file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
            if (file.getPath().toString().endsWith(inProgressSuffix)) {
                // only delete files that contain our subtask index
                if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) {
                    LOG.debug("(RESTORE) Deleting in-progress file {}", file.getPath().toString());
                    fs.delete(file.getPath(), true);
                }
            }
        }
    } catch (IOException e) {
        LOG.error("Error while deleting old pending files: {}", e);
        throw new RuntimeException("Error while deleting old pending files.", e);
    }
}

From source file:org.apache.flume.sink.customhdfs.BucketWriter.java

License:Apache License

/**
 * Rename bucketPath file from .tmp to permanent location.
 *///  w ww  . j  a  v a 2 s . co m
// When this bucket writer is rolled based on rollCount or
// rollSize, the same instance is reused for the new file. But if
// the previous file was not closed/renamed,
// the bucket writer fields no longer point to it and hence need
// to be passed in from the thread attempting to close it. Even
// when the bucket writer is closed due to close timeout,
// this method can get called from the scheduled thread so the
// file gets closed later - so an implicit reference to this
// bucket writer would still be alive in the Callable instance.
private void renameBucket(String bucketPath, String targetPath, final FileSystem fs)
        throws IOException, InterruptedException {
    if (bucketPath.equals(targetPath)) {
        return;
    }

    final Path srcPath = new Path(bucketPath);
    final Path dstPath = new Path(targetPath);

    callWithTimeout(new CallRunner<Void>() {
        @Override
        public Void call() throws Exception {
            if (fs.exists(srcPath)) { // could block
                LOG.info("Renaming " + srcPath + " to " + dstPath);
                renameTries.incrementAndGet();
                fs.rename(srcPath, dstPath); // could block
                if (impalaTableFill.workable) {
                    impalaTableFill.impalaTableFillData(dstPath.toString());
                }
            }
            return null;
        }
    });
}

From source file:org.apache.gobblin.compaction.mapreduce.CompactorOutputCommitter.java

License:Apache License

/**
 * Commits the task, moving files to their final committed location by delegating to
 * {@link FileOutputCommitter} to perform the actual moving. First, renames the
 * files to include the count of records contained within the file and a timestamp,
 * in the form {recordCount}.{timestamp}.avro. Then, the files are moved to their
 * committed location.//w w w . jav  a  2s.co  m
 */
@Override
public void commitTask(TaskAttemptContext context) throws IOException {
    Path workPath = getWorkPath();
    FileSystem fs = workPath.getFileSystem(context.getConfiguration());

    if (fs.exists(workPath)) {
        long recordCount = getRecordCountFromCounter(context,
                RecordKeyDedupReducerBase.EVENT_COUNTER.RECORD_COUNT);
        String fileNamePrefix;
        if (recordCount == 0) {

            // recordCount == 0 indicates that it is a map-only, non-dedup job, and thus record count should
            // be obtained from mapper counter.
            fileNamePrefix = CompactionRecordCountProvider.M_OUTPUT_FILE_PREFIX;
            recordCount = getRecordCountFromCounter(context, RecordKeyMapperBase.EVENT_COUNTER.RECORD_COUNT);
        } else {
            fileNamePrefix = CompactionRecordCountProvider.MR_OUTPUT_FILE_PREFIX;
        }
        String fileName = CompactionRecordCountProvider.constructFileName(fileNamePrefix,
                "." + compactionFileExtension, recordCount);

        for (FileStatus status : fs.listStatus(workPath, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return FilenameUtils.isExtension(path.getName(), compactionFileExtension);
            }
        })) {
            Path newPath = new Path(status.getPath().getParent(), fileName);
            LOG.info(String.format("Renaming %s to %s", status.getPath(), newPath));
            fs.rename(status.getPath(), newPath);
        }
    }

    super.commitTask(context);
}

From source file:org.apache.gobblin.compaction.verify.InputRecordCountHelper.java

License:Apache License

private static void saveState(FileSystem fs, Path dir, State state) throws IOException {
    Path tmpFile = new Path(dir, STATE_FILE + ".tmp");
    Path newFile = new Path(dir, STATE_FILE);
    fs.delete(tmpFile, false);/*from   ww  w  .ja v a2s  .  c  o m*/
    try (DataOutputStream dataOutputStream = new DataOutputStream(
            fs.create(new Path(dir, STATE_FILE + ".tmp")))) {
        state.write(dataOutputStream);
    }

    // Caution: We are deleting right before renaming because rename doesn't support atomic overwrite options from FileSystem API.
    fs.delete(newFile, false);
    fs.rename(tmpFile, newFile);
}

From source file:org.apache.gobblin.data.management.copy.splitter.DistcpFileSplitter.java

License:Apache License

/**
 * Merges all the splits for a given file.
 * Should be called on the target/destination file system (after blocks have been copied to targetFs).
 * @param fs {@link FileSystem} where file parts exist.
 * @param file {@link CopyableFile} to merge.
 * @param workUnits {@link WorkUnitState}s for all parts of this file.
 * @param parentPath {@link Path} where the parts of the file are located.
 * @return a {@link WorkUnit} equivalent to the distcp work unit if the file had not been split.
 * @throws IOException/*from  w w  w .  jav a 2  s.c  o  m*/
 */
private static WorkUnitState mergeSplits(FileSystem fs, CopyableFile file, Collection<WorkUnitState> workUnits,
        Path parentPath) throws IOException {

    log.info(String.format("File %s was written in %d parts. Merging.", file.getDestination(),
            workUnits.size()));
    Path[] parts = new Path[workUnits.size()];
    for (WorkUnitState workUnit : workUnits) {
        if (!isSplitWorkUnit(workUnit)) {
            throw new IOException("Not a split work unit.");
        }
        Split split = getSplit(workUnit).get();
        parts[split.getSplitNumber()] = new Path(parentPath, split.getPartName());
    }

    Path target = new Path(parentPath, file.getDestination().getName());

    fs.rename(parts[0], target);
    fs.concat(target, Arrays.copyOfRange(parts, 1, parts.length));

    WorkUnitState finalWorkUnit = workUnits.iterator().next();
    finalWorkUnit.removeProp(SPLIT_KEY);
    return finalWorkUnit;
}

From source file:org.apache.gobblin.util.HadoopUtils.java

License:Apache License

/**
 * A wrapper around {@link FileSystem#rename(Path, Path)} which throws {@link IOException} if
 * {@link FileSystem#rename(Path, Path)} returns False.
 *///from   w  w w. j  a v  a  2  s.  co m
public static void renamePath(FileSystem fs, Path oldName, Path newName, boolean overwrite) throws IOException {
    if (!fs.exists(oldName)) {
        throw new FileNotFoundException(
                String.format("Failed to rename %s to %s: src not found", oldName, newName));
    }
    if (fs.exists(newName)) {
        if (overwrite) {
            HadoopUtils.moveToTrash(fs, newName);
        } else {
            throw new FileAlreadyExistsException(
                    String.format("Failed to rename %s to %s: dst already exists", oldName, newName));
        }
    }
    if (!fs.rename(oldName, newName)) {
        throw new IOException(String.format("Failed to rename %s to %s", oldName, newName));
    }
}