List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:com.ibm.stocator.fs.swift2d.systemtests.TestSwiftFileSystemBasicOps.java
License:Apache License
@Test(timeout = SwiftTestConstants.SWIFT_TEST_TIMEOUT) public void testOverwriteDirectory() throws Throwable { Path path = new Path(getBaseURI() + "/test/testOverwriteDirectory"); try {//from w ww. j a va 2 s. c om sFileSystem.mkdirs(path.getParent()); String text = "Testing a put to a file " + System.currentTimeMillis(); writeTextFile(sFileSystem, path, text, false); assertFileHasLength(sFileSystem, path, text.length()); } finally { delete(sFileSystem, path); } }
From source file:com.iflytek.spider.util.LockUtil.java
License:Apache License
/** * Create a lock file./* ww w .java 2 s. c o m*/ * @param fs filesystem * @param lockFile name of the lock file * @param accept if true, and the target file exists, consider it valid. If false * and the target file exists, throw an IOException. * @throws IOException if accept is false, and the target file already exists, * or if it's a directory. */ public static void createLockFile(FileSystem fs, Path lockFile, boolean accept) throws IOException { if (fs.exists(lockFile)) { if (!accept) throw new IOException("lock file " + lockFile + " already exists."); if (fs.getFileStatus(lockFile).isDir()) throw new IOException("lock file " + lockFile + " already exists and is a directory."); // do nothing - the file already exists. } else { // make sure parents exist fs.mkdirs(lockFile.getParent()); fs.createNewFile(lockFile); } }
From source file:com.ikanow.aleph2.analytics.hadoop.assets.BeFileInputReader.java
License:Open Source License
/** Returns the temporal (or not) directory in which to place raw files * @param currentPath/*from w ww . j av a 2 s. c o m*/ * @return * @throws Exception */ private Path createArchivePath(Path currentPath) throws Exception { final String timeGroupingFormat = TimeUtils.getTimePeriod(Optionals .of(() -> _dataBucket.data_schema().storage_schema().processed().grouping_time_period()).orElse("")) .validation(fail -> "", success -> TimeUtils.getTimeBasedSuffix(success, Optional.of(ChronoUnit.MINUTES))); final String timeGroup = timeGroupingFormat.isEmpty() ? IStorageService.NO_TIME_SUFFIX : (new SimpleDateFormat(timeGroupingFormat)).format(start); Path storedPath = Path.mergePaths(currentPath.getParent().getParent().getParent().getParent() // (ie up 3 to the root, ie managed_bucket==first subdir) , new Path(IStorageService.STORED_DATA_SUFFIX_RAW + timeGroup)); return storedPath; }
From source file:com.inmobi.conduit.distcp.DistcpBaseService.java
License:Apache License
protected String getTopicNameFromDestnPath(Path destnPath) { String destnPathAsString = destnPath.toString(); String destnDirAsString = new Path(destCluster.getFinalDestDirRoot()).toString(); String pathWithoutRoot = destnPathAsString.substring(destnDirAsString.length()); Path tmpPath = new Path(pathWithoutRoot); while (tmpPath.depth() != 1) tmpPath = tmpPath.getParent(); return tmpPath.getName(); }
From source file:com.inmobi.conduit.distcp.MirrorStreamService.java
License:Apache License
private void createCommitPaths(LinkedHashMap<FileStatus, Path> commitPaths, List<FileStatus> streamPaths) { /* Path eg in streamPaths - * /conduit/system/distcp_mirror_<srcCluster>_<destCluster>/conduit/streams * /<streamName>/2012/1/13/15/7/<hostname>-<streamName>-2012-01-16-07 * -21_00000.gz/*from w w w. ja va2 s . c o m*/ * * or it could be an emptyDir like * /* Path eg in streamPaths - * /conduit/system/distcp_mirror_<srcCluster>_<destCluster>/conduit/streams * /<streamName>/2012/1/13/15/7/ * */ for (FileStatus fileStatus : streamPaths) { String fileName = null; Path prefixDir = null; if (fileStatus.isDir()) { //empty directory prefixDir = fileStatus.getPath(); } else { fileName = fileStatus.getPath().getName(); prefixDir = fileStatus.getPath().getParent(); } Path min = prefixDir; Path hr = min.getParent(); Path day = hr.getParent(); Path month = day.getParent(); Path year = month.getParent(); Path streamName = year.getParent(); String finalPath = getDestCluster().getFinalDestDirRoot() + File.separator + streamName.getName() + File.separator + year.getName() + File.separator + month.getName() + File.separator + day.getName() + File.separator + hr.getName() + File.separator + min.getName(); if (fileName != null) { finalPath += File.separator + fileName; } commitPaths.put(fileStatus, new Path(finalPath)); LOG.debug("Going to commit [" + fileStatus.getPath() + "] to [" + finalPath + "]"); } }
From source file:com.inmobi.conduit.distcp.tools.DistCp.java
License:Apache License
/** * Setup output format appropriately// w ww . j av a 2 s . c o m * * @param job - Job handle * @throws IOException - Exception if any */ private void configureOutputFormat(Job job) throws IOException { final Configuration configuration = job.getConfiguration(); Path targetPath = inputOptions.getTargetPath(); targetPath = targetPath.makeQualified(targetPath.getFileSystem(configuration)); if (inputOptions.shouldAtomicCommit()) { Path workDir = inputOptions.getAtomicWorkPath(); if (workDir == null) { workDir = targetPath.getParent(); } workDir = new Path(workDir, WIP_PREFIX + targetPath.getName() + rand.nextInt()); FileSystem workFS = workDir.getFileSystem(configuration); FileSystem targetFS = targetPath.getFileSystem(configuration); if (!DistCpUtils.compareFs(targetFS, workFS)) { throw new IllegalArgumentException("Work path " + workDir + " and target path " + targetPath + " are in different file system"); } CopyOutputFormat.setWorkingDirectory(job, workDir); } else { CopyOutputFormat.setWorkingDirectory(job, targetPath); } CopyOutputFormat.setCommitDirectory(job, targetPath); Path counterFilePath = inputOptions.getOutPutDirectory(); if (counterFilePath == null) { LOG.error("Output directory is null for distcp"); } else { LOG.info("DistCp output directory path: " + counterFilePath); CopyOutputFormat.setOutputPath(job, counterFilePath); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
private void cleanupTempFiles(JobContext context) { try {//from w w w . j av a2 s . c om Configuration conf = HadoopCompat.getConfiguration(context); Path targetWorkPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH)); FileSystem targetFS = targetWorkPath.getFileSystem(conf); String jobId = HadoopCompat.getJobId(context).toString(); deleteAttemptTempFiles(targetWorkPath, targetFS, jobId); deleteAttemptTempFiles(targetWorkPath.getParent(), targetFS, jobId); } catch (Throwable t) { LOG.warn("Unable to cleanup temp files", t); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java
License:Apache License
private void deleteMissing(Configuration conf) throws IOException { LOG.info("-delete option is enabled. About to remove entries from " + "target that are missing in source"); Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH)); FileSystem clusterFS = sourceListing.getFileSystem(conf); Path sortedSourceListing = DistCpUtils.sortListing(clusterFS, conf, sourceListing); Path targetListing = new Path(sourceListing.getParent(), "targetListing.seq"); CopyListing target = new GlobbedCopyListing(conf, null); List<Path> targets = new ArrayList<Path>(1); Path targetFinalPath = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_FINAL_PATH)); targets.add(targetFinalPath);/*from w ww . j a v a 2 s. co m*/ DistCpOptions options = new DistCpOptions(targets, new Path("/NONE")); target.buildListing(targetListing, options); Path sortedTargetListing = DistCpUtils.sortListing(clusterFS, conf, targetListing); long totalLen = clusterFS.getFileStatus(sortedTargetListing).getLen(); SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sortedSourceListing, conf); SequenceFile.Reader targetReader = new SequenceFile.Reader(clusterFS, sortedTargetListing, conf); long deletedEntries = 0; try { FileStatus srcFileStatus = new FileStatus(); Text srcRelPath = new Text(); FileStatus trgtFileStatus = new FileStatus(); Text trgtRelPath = new Text(); FileSystem targetFS = targetFinalPath.getFileSystem(conf); boolean srcAvailable = sourceReader.next(srcRelPath, srcFileStatus); while (targetReader.next(trgtRelPath, trgtFileStatus)) { while (srcAvailable && trgtRelPath.compareTo(srcRelPath) > 0) { srcAvailable = sourceReader.next(srcRelPath, srcFileStatus); } if (srcAvailable && trgtRelPath.equals(srcRelPath)) continue; boolean result = (!targetFS.exists(trgtFileStatus.getPath()) || targetFS.delete(trgtFileStatus.getPath(), true)); if (result) { LOG.info("Deleted " + trgtFileStatus.getPath() + " - Missing at source"); deletedEntries++; } else { throw new IOException("Unable to delete " + trgtFileStatus.getPath()); } HadoopCompat.progress(taskAttemptContext); HadoopCompat.setStatus(taskAttemptContext, "Deleting missing files from target. [" + targetReader.getPosition() * 100 / totalLen + "%]"); } } finally { IOUtils.closeStream(sourceReader); IOUtils.closeStream(targetReader); } LOG.info("Deleted " + deletedEntries + " from target: " + targets.get(0)); }
From source file:com.inmobi.conduit.distcp.tools.mapred.CopyMapper.java
License:Apache License
/** * Implementation of the Mapper<>::map(). Does the copy. * @param relPath: The target path./*w w w . ja v a 2 s . c om*/ * @param sourceFileStatus: The source path. * @throws IOException */ @Override public void map(Text relPath, FileStatus sourceFileStatus, Context context) throws IOException, InterruptedException { Path sourcePath = sourceFileStatus.getPath(); Map<Long, Long> received = null; if (context.getConfiguration().getBoolean(ConduitConstants.AUDIT_ENABLED_KEY, true)) { received = new HashMap<Long, Long>(); } if (LOG.isDebugEnabled()) LOG.debug("DistCpMapper::map(): Received " + sourcePath + ", " + relPath); Path target = new Path(targetWorkPath.makeQualified(targetFS) + relPath.toString()); EnumSet<DistCpOptions.FileAttribute> fileAttributes = getFileAttributeSettings(context); final String description = "Copying " + sourcePath + " to " + target; context.setStatus(description); LOG.info(description); try { FileStatus sourceCurrStatus; FileSystem sourceFS; try { sourceFS = sourcePath.getFileSystem(conf); sourceCurrStatus = sourceFS.getFileStatus(sourcePath); } catch (FileNotFoundException e) { throw new IOException(new RetriableFileCopyCommand.CopyReadException(e)); } FileStatus targetStatus = null; try { targetStatus = targetFS.getFileStatus(target); } catch (FileNotFoundException ignore) { } if (targetStatus != null && (targetStatus.isDir() != sourceCurrStatus.isDir())) { throw new IOException("Can't replace " + target + ". Target is " + getFileType(targetStatus) + ", Source is " + getFileType(sourceCurrStatus)); } if (sourceCurrStatus.isDir()) { createTargetDirsWithRetry(description, target, context); return; } if (skipFile(sourceFS, sourceCurrStatus, target)) { LOG.info("Skipping copy of " + sourceCurrStatus.getPath() + " to " + target); updateSkipCounters(context, sourceCurrStatus); } else { String streamName = null; if (!relPath.toString().isEmpty()) { Path relativePath = new Path(relPath.toString()); if (relativePath.depth() > 2) { // path is for mirror service and is of format // /conduit/streams/<streamName>/2013/09/12 Path tmpPath = relativePath; while (tmpPath.getParent() != null && !tmpPath.getParent().getName().equals("streams")) { tmpPath = tmpPath.getParent(); } streamName = tmpPath.getName(); } else { // path is for merge service and of form /<stream name>/filename.gz streamName = relativePath.getParent().getName(); } } copyFileWithRetry(description, sourceCurrStatus, target, context, fileAttributes, received); // generate audit counters if (received != null) { for (Entry<Long, Long> entry : received.entrySet()) { String counterNameValue = getCounterNameValue(streamName, sourcePath.getName(), entry.getKey(), entry.getValue()); context.write(NullWritable.get(), new Text(counterNameValue)); } } } DistCpUtils.preserve(target.getFileSystem(conf), target, sourceCurrStatus, fileAttributes); } catch (IOException exception) { handleFailures(exception, sourceFileStatus, target, context); } }
From source file:com.inmobi.conduit.distcp.tools.mapred.lib.DynamicInputChunkSet.java
License:Apache License
/** * Constructor, to initialize the context in which DynamicInputChunks are * used./*from w w w. j ava 2 s . c om*/ * @param configuration The Configuration instance, as received from the * DynamicInputFormat or DynamicRecordReader. * @throws IOException Exception in case of failure. */ public DynamicInputChunkSet(Configuration configuration) throws IOException { this.configuration = configuration; Path listingFilePath = new Path(getListingFilePath(configuration)); chunkRootPath = new Path(listingFilePath.getParent(), "chunkDir"); fs = chunkRootPath.getFileSystem(configuration); chunkFilePrefix = listingFilePath.getName() + ".chunk."; }