List of usage examples for org.apache.hadoop.fs Path getFileSystem
public FileSystem getFileSystem(Configuration conf) throws IOException
From source file:com.inmobi.conduit.distcp.tools.DistCp.java
License:Apache License
/** * Setup output format appropriately/* www. j av a 2s.c o m*/ * * @param job - Job handle * @throws IOException - Exception if any */ private void configureOutputFormat(Job job) throws IOException { final Configuration configuration = job.getConfiguration(); Path targetPath = inputOptions.getTargetPath(); targetPath = targetPath.makeQualified(targetPath.getFileSystem(configuration)); if (inputOptions.shouldAtomicCommit()) { Path workDir = inputOptions.getAtomicWorkPath(); if (workDir == null) { workDir = targetPath.getParent(); } workDir = new Path(workDir, WIP_PREFIX + targetPath.getName() + rand.nextInt()); FileSystem workFS = workDir.getFileSystem(configuration); FileSystem targetFS = targetPath.getFileSystem(configuration); if (!DistCpUtils.compareFs(targetFS, workFS)) { throw new IllegalArgumentException("Work path " + workDir + " and target path " + targetPath + " are in different file system"); } CopyOutputFormat.setWorkingDirectory(job, workDir); } else { CopyOutputFormat.setWorkingDirectory(job, targetPath); } CopyOutputFormat.setCommitDirectory(job, targetPath); Path counterFilePath = inputOptions.getOutPutDirectory(); if (counterFilePath == null) { LOG.error("Output directory is null for distcp"); } else { LOG.info("DistCp output directory path: " + counterFilePath); CopyOutputFormat.setOutputPath(job, counterFilePath); } }
From source file:com.inmobi.conduit.distcp.tools.FileBasedCopyListing.java
License:Apache License
protected static List<Path> fetchFileList(Path sourceListing, Configuration conf) throws IOException { List<Path> result = new ArrayList<Path>(); FileSystem fs = sourceListing.getFileSystem(conf); BufferedReader input = null;//from ww w.j a v a 2 s. c o m try { input = new BufferedReader(new InputStreamReader(fs.open(sourceListing))); String line = input.readLine(); while (line != null) { result.add(new Path(line)); line = input.readLine(); } } finally { IOUtils.closeStream(input); } return result; }
From source file:com.inmobi.conduit.distcp.tools.GlobbedCopyListing.java
License:Apache License
/** * Implementation of CopyListing::buildListing(). * Creates the copy listing by "globbing" all source-paths. * @param pathToListingFile: The location at which the copy-listing file * is to be created. * @param options: Input Options for DistCp (indicating source/target paths.) * @throws IOException//from w ww. j av a 2 s. c o m */ @Override public void doBuildListing(Path pathToListingFile, DistCpOptions options) throws IOException { List<Path> globbedPaths = new ArrayList<Path>(); if (options.getSourcePaths().isEmpty()) { throw new InvalidInputException("Nothing to process. Source paths::EMPTY"); } for (Path p : options.getSourcePaths()) { FileSystem fs = p.getFileSystem(getConf()); FileStatus[] inputs = fs.globStatus(p); if (inputs != null && inputs.length > 0) { for (FileStatus onePath : inputs) { globbedPaths.add(onePath.getPath()); } } else { throw new InvalidInputException(p + " doesn't exist"); } } DistCpOptions optionsGlobbed = new DistCpOptions(globbedPaths, options.getTargetPath()); optionsGlobbed.setSyncFolder(options.shouldSyncFolder()); optionsGlobbed.setOverwrite(options.shouldOverwrite()); optionsGlobbed.setDeleteMissing(options.shouldDeleteMissing()); optionsGlobbed.setPreserveSrcPath(options.shouldPreserveSrcPath()); optionsGlobbed.setSkipPathValidation(options.isSkipPathValidation()); optionsGlobbed.setUseSimpleFileListing(options.isUseSimpleFileListing()); simpleListing.buildListing(pathToListingFile, optionsGlobbed); }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
private void cleanupTempFiles(JobContext context) { try {/*from www . j a va2 s . c o m*/ Configuration conf = HadoopCompat.getConfiguration(context); Path targetWorkPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH)); FileSystem targetFS = targetWorkPath.getFileSystem(conf); String jobId = HadoopCompat.getJobId(context).toString(); deleteAttemptTempFiles(targetWorkPath, targetFS, jobId); deleteAttemptTempFiles(targetWorkPath.getParent(), targetFS, jobId); } catch (Throwable t) { LOG.warn("Unable to cleanup temp files", t); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
/** * Cleanup meta folder and other temporary files * * @param conf - Job Configuration/*from w w w . j ava 2 s . c om*/ */ private void cleanup(Configuration conf) { Path metaFolder = new Path(conf.get(DistCpConstants.CONF_LABEL_META_FOLDER)); try { FileSystem fs = metaFolder.getFileSystem(conf); LOG.info("Cleaning up temporary work folder: " + metaFolder); fs.delete(metaFolder, true); } catch (IOException ignore) { LOG.error("Exception encountered ", ignore); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
private void preserveFileAttributes(Configuration conf) throws IOException { String attrSymbols = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS); LOG.info("About to preserve attributes: " + attrSymbols); EnumSet<FileAttribute> attributes = DistCpUtils.unpackAttributes(attrSymbols); Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH)); FileSystem clusterFS = sourceListing.getFileSystem(conf); SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sourceListing, conf); long totalLen = clusterFS.getFileStatus(sourceListing).getLen(); Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH)); long preservedEntries = 0; try {/*from w w w .j a v a 2 s . c om*/ FileStatus srcFileStatus = new FileStatus(); Text srcRelPath = new Text(); while (sourceReader.next(srcRelPath, srcFileStatus)) { if (!srcFileStatus.isDir()) continue; Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath); //Skip the root folder, preserve the status after atomic commit is complete //If it is changed any earlier, then atomic commit may fail if (targetRoot.equals(targetFile)) continue; FileSystem targetFS = targetFile.getFileSystem(conf); DistCpUtils.preserve(targetFS, targetFile, srcFileStatus, attributes); HadoopCompat.progress(taskAttemptContext); HadoopCompat.setStatus(taskAttemptContext, "Preserving status on directory entries. [" + sourceReader.getPosition() * 100 / totalLen + "%]"); } } finally { IOUtils.closeStream(sourceReader); } LOG.info("Preserved status on " + preservedEntries + " dir entries on target"); }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
private void deleteMissing(Configuration conf) throws IOException { LOG.info("-delete option is enabled. About to remove entries from " + "target that are missing in source"); Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH)); FileSystem clusterFS = sourceListing.getFileSystem(conf); Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing); Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq"); CopyListing target = new GlobbedCopyListing(conf, null); List<Path> targets = new ArrayList<Path>(1); Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH)); targets.add(targetFinalPath);/*from ww w.j av a2 s .com*/ DistCpOptions options = new DistCpOptions(targets, new Path("/NONE")); target.buildListing(targetListing, options); Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing); long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen(); SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sortedSourceListing, conf); SequenceFile.Reader targetReader = new SequenceFile.Reader(clusterFS, sortedTargetListing, conf); long deletedEntries = 0; try { FileStatus srcFileStatus = new FileStatus(); Text srcRelPath = new Text(); FileStatus trgtFileStatus = new FileStatus(); Text trgtRelPath = new Text(); FileSystem targetFS = targetFinalPath.getFileSystem(conf); boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus); while (targetReader.next(trgtRelPath, trgtFileStatus)) { while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) { srcAvailable = sourceReader.next(srcRelPath, srcFileStatus); } if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue; boolean result = (!targetFS.exists(trgtFileStatus.getPath()) || targetFS.delete(trgtFileStatus.getPath(), true)); if (result) { LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source"); deletedEntries++; } else { throw new IOException("Unable to delete " + trgtFileStatus.getPath()); } HadoopCompat.progress(taskAttemptContext); HadoopCompat.setStatus(taskAttemptContext, "Deleting missing files from target. [" + targetReader.getPosition() * 100 / totalLen + "%]"); } } finally { IOUtils.closeStream(sourceReader); IOUtils.closeStream(targetReader); } LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0)); }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
private void commitData(Configuration conf) throws IOException { Path workDir = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH)); Path finalDir = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH)); FileSystem targetFS = workDir.getFileSystem(conf); LOG.info("Atomic commit enabled. Moving " + workDir + " to " + finalDir); if (targetFS.exists(finalDir) && targetFS.exists(workDir)) if (!targetFS.delete(finalDir, true)) { LOG.error("Unable to delete pre-existing final-data at " + finalDir); throw new IOException("Atomic commit failed. Pre-existing final data" + " in " + finalDir + " could not be cleared, before commit."); }/*www .ja va 2s . c o m*/ boolean result = targetFS.rename(workDir, finalDir); if (!result) { LOG.warn("Rename failed. Perhaps data already moved. Verifying..."); result = targetFS.exists(finalDir) && !targetFS.exists(workDir); } if (result) { LOG.info("Data committed successfully to " + finalDir); HadoopCompat.setStatus(taskAttemptContext, "Data committed successfully to " + finalDir); } else { LOG.error("Unable to commit data to " + finalDir); throw new IOException( "Atomic commit failed. Temporary data in " + workDir + ", Unable to move to " + finalDir); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyMapper.java
License:Apache License
@Override public void setup(Context context) throws IOException, InterruptedException { conf = context.getConfiguration();/*from w w w. ja v a 2s .co m*/ syncFolders = conf.getBoolean(DistCpOptionSwitch.SYNC_FOLDERS.getConfigLabel(), false); ignoreFailures = conf.getBoolean(DistCpOptionSwitch.IGNORE_FAILURES.getConfigLabel(), false); skipCrc = conf.getBoolean(DistCpOptionSwitch.SKIP_CRC.getConfigLabel(), false); overWrite = conf.getBoolean(DistCpOptionSwitch.OVERWRITE.getConfigLabel(), false); preserve = DistCpUtils.unpackAttributes(conf.get(DistCpOptionSwitch.PRESERVE_STATUS.getConfigLabel())); targetWorkPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH)); Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH)); targetFS = targetFinalPath.getFileSystem(conf); if (targetFS.exists(targetFinalPath) && targetFS.isFile(targetFinalPath)) { overWrite = true; // When target is an existing file, overwrite it. } if (conf.get(DistCpConstants.CONF_LABEL_SSL_CONF) != null) { initializeSSLConf(); } startEpoch = System.currentTimeMillis(); }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyMapper.java
License:Apache License
/** * Implementation of the Mapper<>::map(). Does the copy. * @param relPath: The target path.//from ww w .j a v a 2 s. c om * @param sourceFileStatus: The source path. * @throws IOException */ @Override public void map(Text relPath, FileStatus sourceFileStatus, Context context) throws IOException, InterruptedException { Path sourcePath = sourceFileStatus.getPath(); Map<Long, Long> received = null; if (context.getConfiguration().getBoolean(ConduitConstants.AUDIT_ENABLED_KEY, true)) { received = new HashMap<Long, Long>(); } if (LOG.isDebugEnabled()) LOG.debug("DistCpMapper::map(): Received " + sourcePath + ", " + relPath); Path target = new Path(targetWorkPath.makeQualified(targetFS) + relPath.toString()); EnumSet<DistCpOptions.FileAttribute> fileAttributes = getFileAttributeSettings(context); final String description = "Copying " + sourcePath + " to " + target; context.setStatus(description); LOG.info(description); try { FileStatus sourceCurrStatus; FileSystem sourceFS; try { sourceFS = sourcePath.getFileSystem(conf); sourceCurrStatus = sourceFS.getFileStatus(sourcePath); } catch (FileNotFoundException e) { throw new IOException(new RetriableFileCopyCommand.CopyReadException(e)); } FileStatus targetStatus = null; try { targetStatus = targetFS.getFileStatus(target); } catch (FileNotFoundException ignore) { } if (targetStatus != null && (targetStatus.isDir() != sourceCurrStatus.isDir())) { throw new IOException("Can't replace " + target + ". Target is " + getFileType(targetStatus) + ", Source is " + getFileType(sourceCurrStatus)); } if (sourceCurrStatus.isDir()) { createTargetDirsWithRetry(description, target, context); return; } if (skipFile(sourceFS, sourceCurrStatus, target)) { LOG.info("Skipping copy of " + sourceCurrStatus.getPath() + " to " + target); updateSkipCounters(context, sourceCurrStatus); } else { String streamName = null; if (!relPath.toString().isEmpty()) { Path relativePath = new Path(relPath.toString()); if (relativePath.depth() > 2) { // path is for mirror service and is of format // /conduit/streams/<streamName>/2013/09/12 Path tmpPath = relativePath; while (tmpPath.getParent() != null && !tmpPath.getParent().getName().equals("streams")) { tmpPath = tmpPath.getParent(); } streamName = tmpPath.getName(); } else { // path is for merge service and of form /<stream name>/filename.gz streamName = relativePath.getParent().getName(); } } copyFileWithRetry(description, sourceCurrStatus, target, context, fileAttributes, received); // generate audit counters if (received != null) { for (Entry<Long, Long> entry : received.entrySet()) { String counterNameValue = getCounterNameValue(streamName, sourcePath.getName(), entry.getKey(), entry.getValue()); context.write(NullWritable.get(), new Text(counterNameValue)); } } } DistCpUtils.preserve(target.getFileSystem(conf), target, sourceCurrStatus, fileAttributes); } catch (IOException exception) { handleFailures(exception, sourceFileStatus, target, context); } }