List of usage examples for org.apache.hadoop.fs FileSystem rename
public abstract boolean rename(Path src, Path dst) throws IOException;
From source file:org.apache.flink.batch.connectors.hive.HiveTableOutputFormat.java
License:Apache License
private void moveFiles(Path srcDir, Path destDir) throws IOException { if (!srcDir.equals(destDir)) { // TODO: src and dest may be on different FS FileSystem fs = destDir.getFileSystem(jobConf); Preconditions.checkState(fs.exists(destDir) || fs.mkdirs(destDir), "Failed to create dest path " + destDir); if (overwrite) { // delete existing files for overwrite // TODO: support setting auto-purge? final boolean purge = true; // Note we assume the srcDir is a hidden dir, otherwise it will be deleted if it's a sub-dir of destDir FileStatus[] existingFiles = fs.listStatus(destDir, FileUtils.HIDDEN_FILES_PATH_FILTER); if (existingFiles != null) { HiveShim hiveShim = HiveShimLoader.loadHiveShim(); for (FileStatus existingFile : existingFiles) { Preconditions.checkState(hiveShim.moveToTrash(fs, existingFile.getPath(), jobConf, purge), "Failed to overwrite existing file " + existingFile); }/*w w w . j ava2 s .c o m*/ } } FileStatus[] srcFiles = fs.listStatus(srcDir, FileUtils.HIDDEN_FILES_PATH_FILTER); for (FileStatus srcFile : srcFiles) { Path srcPath = srcFile.getPath(); Path destPath = new Path(destDir, srcPath.getName()); int count = 1; while (!fs.rename(srcPath, destPath)) { String name = srcPath.getName() + "_copy_" + count; destPath = new Path(destDir, name); count++; } } } }
From source file:org.apache.flink.hadoopcompatibility.mapreduce.HadoopOutputFormat.java
License:Apache License
/** * commit the task by moving the output file out from the temporary directory. * @throws IOException//w w w.j a v a 2s .com */ @Override public void close() throws IOException { try { this.recordWriter.close(this.context); } catch (InterruptedException e) { throw new IOException("Could not close RecordReader.", e); } if (this.fileOutputCommitter.needsTaskCommit(this.context)) { this.fileOutputCommitter.commitTask(this.context); } this.fileOutputCommitter.commitJob(this.context); Path outputPath = new Path(this.configuration.get("mapred.output.dir")); // rename tmp-* files to final name FileSystem fs = FileSystem.get(outputPath.toUri(), this.configuration); final Pattern p = Pattern.compile("tmp-(.)-([0-9]+)"); // isDirectory does not work in hadoop 1 if (fs.getFileStatus(outputPath).isDir()) { FileStatus[] files = fs.listStatus(outputPath); for (FileStatus f : files) { Matcher m = p.matcher(f.getPath().getName()); if (m.matches()) { int part = Integer.valueOf(m.group(2)); fs.rename(f.getPath(), new Path(outputPath.toString() + "/" + part)); } } } }
From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java
License:Apache License
/** * Closes the current part file.//from w ww . ja v a 2 s . c om * * <p> * This moves the current in-progress part file to a pending file and adds it to the list * of pending files in our bucket state. */ private void closeCurrentPartFile() throws Exception { if (isWriterOpen) { writer.close(); isWriterOpen = false; } if (currentPartPath != null) { Path inProgressPath = new Path(currentPartPath.getParent(), inProgressPrefix + currentPartPath.getName()).suffix(inProgressSuffix); Path pendingPath = new Path(currentPartPath.getParent(), pendingPrefix + currentPartPath.getName()) .suffix(pendingSuffix); FileSystem fs = inProgressPath.getFileSystem(new org.apache.hadoop.conf.Configuration()); fs.rename(inProgressPath, pendingPath); LOG.debug("Moving in-progress bucket {} to pending file {}", inProgressPath, pendingPath); this.bucketState.pendingFiles.add(currentPartPath.toString()); } }
From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java
License:Apache License
@Override public void notifyCheckpointComplete(long checkpointId) throws Exception { synchronized (bucketState.pendingFilesPerCheckpoint) { Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet(); Set<Long> checkpointsToRemove = new HashSet<>(); for (Long pastCheckpointId : pastCheckpointIds) { if (pastCheckpointId <= checkpointId) { LOG.debug("Moving pending files to final location for checkpoint {}", pastCheckpointId); // All the pending files are buckets that have been completed but are waiting to be renamed // to their final name for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) { Path finalPath = new Path(filename); Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName()) .suffix(pendingSuffix); FileSystem fs = pendingPath.getFileSystem(new org.apache.hadoop.conf.Configuration()); fs.rename(pendingPath, finalPath); LOG.debug("Moving pending file {} to final location after complete checkpoint {}.", pendingPath, pastCheckpointId); }//from www . ja va 2 s. c o m checkpointsToRemove.add(pastCheckpointId); } } for (Long toRemove : checkpointsToRemove) { bucketState.pendingFilesPerCheckpoint.remove(toRemove); } } }
From source file:org.apache.flink.streaming.connectors.fs.RollingSink.java
License:Apache License
@Override public void restoreState(BucketState state) { bucketState = state;/*from www . ja va 2s.c om*/ // we can clean all the pending files since they where renamed to final files // after this checkpoint was successfull bucketState.pendingFiles.clear(); FileSystem fs = null; try { fs = new Path(basePath).getFileSystem(new org.apache.hadoop.conf.Configuration()); } catch (IOException e) { LOG.error("Error while creating FileSystem in checkpoint restore.", e); throw new RuntimeException("Error while creating FileSystem in checkpoint restore.", e); } if (bucketState.currentFile != null) { // We were writing to a file when the last checkpoint occured. This file can either // be still in-progress or became a pending file at some point after the checkpoint. // Either way, we have to truncate it back to a valid state (or write a .valid-length) // file that specifies up to which length it is valid and rename it to the final name // before starting a new bucket file. Path partPath = new Path(bucketState.currentFile); try { Path partPendingPath = new Path(partPath.getParent(), pendingPrefix + partPath.getName()) .suffix(pendingSuffix); Path partInProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName()) .suffix(inProgressSuffix); if (fs.exists(partPendingPath)) { LOG.debug( "In-progress file {} has been moved to pending after checkpoint, moving to final location.", partPath); // has been moved to pending in the mean time, rename to final location fs.rename(partPendingPath, partPath); } else if (fs.exists(partInProgressPath)) { LOG.debug("In-progress file {} is still in-progress, moving to final location.", partPath); // it was still in progress, rename to final path fs.rename(partInProgressPath, partPath); } else if (fs.exists(partPath)) { LOG.debug("In-Progress file {} was already moved to final location {}.", bucketState.currentFile, partPath); } else { LOG.debug( "In-Progress file {} was neither moved to pending nor is still in progress. Possibly, " + "it was moved to final location by a previous snapshot restore", bucketState.currentFile); } refTruncate = reflectTruncate(fs); // truncate it or write a ".valid-length" file to specify up to which point it is valid if (refTruncate != null) { LOG.debug("Truncating {} to valid length {}", partPath, bucketState.currentFileValidLength); // some-one else might still hold the lease from a previous try, we are // recovering, after all ... if (fs instanceof DistributedFileSystem) { DistributedFileSystem dfs = (DistributedFileSystem) fs; LOG.debug("Trying to recover file lease {}", partPath); dfs.recoverLease(partPath); boolean isclosed = dfs.isFileClosed(partPath); StopWatch sw = new StopWatch(); sw.start(); while (!isclosed) { if (sw.getTime() > asyncTimeout) { break; } try { Thread.sleep(500); } catch (InterruptedException e1) { // ignore it } isclosed = dfs.isFileClosed(partPath); } } Boolean truncated = (Boolean) refTruncate.invoke(fs, partPath, bucketState.currentFileValidLength); if (!truncated) { LOG.debug("Truncate did not immediately complete for {}, waiting...", partPath); // we must wait for the asynchronous truncate operation to complete StopWatch sw = new StopWatch(); sw.start(); long newLen = fs.getFileStatus(partPath).getLen(); while (newLen != bucketState.currentFileValidLength) { if (sw.getTime() > asyncTimeout) { break; } try { Thread.sleep(500); } catch (InterruptedException e1) { // ignore it } newLen = fs.getFileStatus(partPath).getLen(); } if (newLen != bucketState.currentFileValidLength) { throw new RuntimeException("Truncate did not truncate to right length. Should be " + bucketState.currentFileValidLength + " is " + newLen + "."); } } } else { LOG.debug("Writing valid-length file for {} to specify valid length {}", partPath, bucketState.currentFileValidLength); Path validLengthFilePath = new Path(partPath.getParent(), validLengthPrefix + partPath.getName()).suffix(validLengthSuffix); if (!fs.exists(validLengthFilePath)) { FSDataOutputStream lengthFileOut = fs.create(validLengthFilePath); lengthFileOut.writeUTF(Long.toString(bucketState.currentFileValidLength)); lengthFileOut.close(); } } // invalidate in the state object bucketState.currentFile = null; bucketState.currentFileValidLength = -1; } catch (IOException e) { LOG.error("Error while restoring RollingSink state.", e); throw new RuntimeException("Error while restoring RollingSink state.", e); } catch (InvocationTargetException | IllegalAccessException e) { LOG.error("Cound not invoke truncate.", e); throw new RuntimeException("Could not invoke truncate.", e); } } LOG.debug("Clearing pending/in-progress files."); // Move files that are confirmed by a checkpoint but did not get moved to final location // because the checkpoint notification did not happen before a failure Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet(); LOG.debug("Moving pending files to final location on restore."); for (Long pastCheckpointId : pastCheckpointIds) { // All the pending files are buckets that have been completed but are waiting to be renamed // to their final name for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) { Path finalPath = new Path(filename); Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName()) .suffix(pendingSuffix); try { if (fs.exists(pendingPath)) { LOG.debug( "(RESTORE) Moving pending file {} to final location after complete checkpoint {}.", pendingPath, pastCheckpointId); fs.rename(pendingPath, finalPath); } } catch (IOException e) { LOG.error("(RESTORE) Error while renaming pending file {} to final path {}: {}", pendingPath, finalPath, e); throw new RuntimeException( "Error while renaming pending file " + pendingPath + " to final path " + finalPath, e); } } } bucketState.pendingFiles.clear(); synchronized (bucketState.pendingFilesPerCheckpoint) { bucketState.pendingFilesPerCheckpoint.clear(); } // we need to get this here since open() has not yet been called int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask(); // delete pending files try { RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true); while (bucketFiles.hasNext()) { LocatedFileStatus file = bucketFiles.next(); if (file.getPath().toString().endsWith(pendingSuffix)) { // only delete files that contain our subtask index if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) { LOG.debug("(RESTORE) Deleting pending file {}", file.getPath().toString()); fs.delete(file.getPath(), true); } } if (file.getPath().toString().endsWith(inProgressSuffix)) { // only delete files that contain our subtask index if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) { LOG.debug("(RESTORE) Deleting in-progress file {}", file.getPath().toString()); fs.delete(file.getPath(), true); } } } } catch (IOException e) { LOG.error("Error while deleting old pending files: {}", e); throw new RuntimeException("Error while deleting old pending files.", e); } }
From source file:org.apache.flume.sink.customhdfs.BucketWriter.java
License:Apache License
/** * Rename bucketPath file from .tmp to permanent location. */// w ww . j a v a 2 s . co m // When this bucket writer is rolled based on rollCount or // rollSize, the same instance is reused for the new file. But if // the previous file was not closed/renamed, // the bucket writer fields no longer point to it and hence need // to be passed in from the thread attempting to close it. Even // when the bucket writer is closed due to close timeout, // this method can get called from the scheduled thread so the // file gets closed later - so an implicit reference to this // bucket writer would still be alive in the Callable instance. private void renameBucket(String bucketPath, String targetPath, final FileSystem fs) throws IOException, InterruptedException { if (bucketPath.equals(targetPath)) { return; } final Path srcPath = new Path(bucketPath); final Path dstPath = new Path(targetPath); callWithTimeout(new CallRunner<Void>() { @Override public Void call() throws Exception { if (fs.exists(srcPath)) { // could block LOG.info("Renaming " + srcPath + " to " + dstPath); renameTries.incrementAndGet(); fs.rename(srcPath, dstPath); // could block if (impalaTableFill.workable) { impalaTableFill.impalaTableFillData(dstPath.toString()); } } return null; } }); }
From source file:org.apache.gobblin.compaction.mapreduce.CompactorOutputCommitter.java
License:Apache License
/** * Commits the task, moving files to their final committed location by delegating to * {@link FileOutputCommitter} to perform the actual moving. First, renames the * files to include the count of records contained within the file and a timestamp, * in the form {recordCount}.{timestamp}.avro. Then, the files are moved to their * committed location.//w w w . jav a 2s.co m */ @Override public void commitTask(TaskAttemptContext context) throws IOException { Path workPath = getWorkPath(); FileSystem fs = workPath.getFileSystem(context.getConfiguration()); if (fs.exists(workPath)) { long recordCount = getRecordCountFromCounter(context, RecordKeyDedupReducerBase.EVENT_COUNTER.RECORD_COUNT); String fileNamePrefix; if (recordCount == 0) { // recordCount == 0 indicates that it is a map-only, non-dedup job, and thus record count should // be obtained from mapper counter. fileNamePrefix = CompactionRecordCountProvider.M_OUTPUT_FILE_PREFIX; recordCount = getRecordCountFromCounter(context, RecordKeyMapperBase.EVENT_COUNTER.RECORD_COUNT); } else { fileNamePrefix = CompactionRecordCountProvider.MR_OUTPUT_FILE_PREFIX; } String fileName = CompactionRecordCountProvider.constructFileName(fileNamePrefix, "." + compactionFileExtension, recordCount); for (FileStatus status : fs.listStatus(workPath, new PathFilter() { @Override public boolean accept(Path path) { return FilenameUtils.isExtension(path.getName(), compactionFileExtension); } })) { Path newPath = new Path(status.getPath().getParent(), fileName); LOG.info(String.format("Renaming %s to %s", status.getPath(), newPath)); fs.rename(status.getPath(), newPath); } } super.commitTask(context); }
From source file:org.apache.gobblin.compaction.verify.InputRecordCountHelper.java
License:Apache License
private static void saveState(FileSystem fs, Path dir, State state) throws IOException { Path tmpFile = new Path(dir, STATE_FILE + ".tmp"); Path newFile = new Path(dir, STATE_FILE); fs.delete(tmpFile, false);/*from ww w .ja v a2s . c o m*/ try (DataOutputStream dataOutputStream = new DataOutputStream( fs.create(new Path(dir, STATE_FILE + ".tmp")))) { state.write(dataOutputStream); } // Caution: We are deleting right before renaming because rename doesn't support atomic overwrite options from FileSystem API. fs.delete(newFile, false); fs.rename(tmpFile, newFile); }
From source file:org.apache.gobblin.data.management.copy.splitter.DistcpFileSplitter.java
License:Apache License
/** * Merges all the splits for a given file. * Should be called on the target/destination file system (after blocks have been copied to targetFs). * @param fs {@link FileSystem} where file parts exist. * @param file {@link CopyableFile} to merge. * @param workUnits {@link WorkUnitState}s for all parts of this file. * @param parentPath {@link Path} where the parts of the file are located. * @return a {@link WorkUnit} equivalent to the distcp work unit if the file had not been split. * @throws IOException/*from w w w . jav a 2 s.c o m*/ */ private static WorkUnitState mergeSplits(FileSystem fs, CopyableFile file, Collection<WorkUnitState> workUnits, Path parentPath) throws IOException { log.info(String.format("File %s was written in %d parts. Merging.", file.getDestination(), workUnits.size())); Path[] parts = new Path[workUnits.size()]; for (WorkUnitState workUnit : workUnits) { if (!isSplitWorkUnit(workUnit)) { throw new IOException("Not a split work unit."); } Split split = getSplit(workUnit).get(); parts[split.getSplitNumber()] = new Path(parentPath, split.getPartName()); } Path target = new Path(parentPath, file.getDestination().getName()); fs.rename(parts[0], target); fs.concat(target, Arrays.copyOfRange(parts, 1, parts.length)); WorkUnitState finalWorkUnit = workUnits.iterator().next(); finalWorkUnit.removeProp(SPLIT_KEY); return finalWorkUnit; }
From source file:org.apache.gobblin.util.HadoopUtils.java
License:Apache License
/** * A wrapper around {@link FileSystem#rename(Path, Path)} which throws {@link IOException} if * {@link FileSystem#rename(Path, Path)} returns False. *///from w w w. j a v a 2 s. co m public static void renamePath(FileSystem fs, Path oldName, Path newName, boolean overwrite) throws IOException { if (!fs.exists(oldName)) { throw new FileNotFoundException( String.format("Failed to rename %s to %s: src not found", oldName, newName)); } if (fs.exists(newName)) { if (overwrite) { HadoopUtils.moveToTrash(fs, newName); } else { throw new FileAlreadyExistsException( String.format("Failed to rename %s to %s: dst already exists", oldName, newName)); } } if (!fs.rename(oldName, newName)) { throw new IOException(String.format("Failed to rename %s to %s", oldName, newName)); } }