List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java
License:LGPL
/** * Initialize DFSCopyFileMapper specific job-configuration. * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments//from w w w . j a v a 2 s. c o m */ private static void setup(final Configuration conf, final JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); // set boolean values final boolean update = args.flags.contains(Options.UPDATE); final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE); jobConf.setBoolean(Options.UPDATE.propertyname, update); jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite); jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname, args.flags.contains(Options.IGNORE_READ_FAILURES)); jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); long maxBytesPerMap = conf.getLong(BYTES_PER_MAP_LABEL, BYTES_PER_MAP); FileSystem dstfs = args.dst.getFileSystem(conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_distcp_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (null == parent) { // If dst is '/' on S3, it might not exist yet, but dst.getParent() // will return null. In this case, use '/' as its own parent to // prevent // NPE errors below. parent = args.dst; } if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_distcp_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory OR we're updating/overwriting // the contents of the destination directory. final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite; int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { // skip file if the src and the dst files are the same. skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst)); // skip file if it exceed file limit or size limit skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit; if (!skipfile) { ++fileCount; byteCount += child.getLen(); // if (LOG.isTraceEnabled()) { // LOG.trace("adding file " + child.getPath()); // } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > maxBytesPerMap) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { getLogger().info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_distcp_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); if (dststatus != null && args.flags.contains(Options.DELETE)) { deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf); } Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_distcp_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); // Explicitly create the tmpDir to ensure that it can be cleaned // up by fullyDelete() later. tmpDir.getFileSystem(conf).mkdirs(tmpDir); getLogger().info("srcCount=" + srcCount); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(byteCount, jobConf); }
From source file:fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils.java
License:LGPL
/** * Create a new path with the same parent directory and basename but without * another extension.// w w w .ja v a 2 s . co m * @param path base path to use * @param extension extension to add * @return a new Path object */ public static Path newPathWithOtherExtension(final Path path, final String extension) { if (path == null) { throw new NullPointerException("Path is null"); } if (extension == null) { throw new NullPointerException("Extension is null"); } return new Path(path.getParent(), StringUtils.basename(path.getName()) + extension); }
From source file:gobblin.cluster.GobblinHelixJobLauncher.java
License:Apache License
/** * Persist a single {@link WorkUnit} (flattened) to a file. */// w ww.j a v a 2 s. c o m private String persistWorkUnit(final Path workUnitFileDir, final WorkUnit workUnit, ParallelRunner stateSerDeRunner) throws IOException { final StateStore stateStore; String workUnitFileName = workUnit.getId(); if (workUnit instanceof MultiWorkUnit) { workUnitFileName += MULTI_WORK_UNIT_FILE_EXTENSION; stateStore = stateStores.mwuStateStore; } else { workUnitFileName += WORK_UNIT_FILE_EXTENSION; stateStore = stateStores.wuStateStore; } Path workUnitFile = new Path(workUnitFileDir, workUnitFileName); final String fileName = workUnitFile.getName(); final String storeName = workUnitFile.getParent().getName(); stateSerDeRunner.submitCallable(new Callable<Void>() { @Override public Void call() throws Exception { stateStore.put(storeName, fileName, workUnit); return null; } }, "Serialize state to store " + storeName + " file " + fileName); return workUnitFile.toString(); }
From source file:gobblin.cluster.GobblinHelixTask.java
License:Apache License
@Override public TaskResult run() { SharedResourcesBroker<GobblinScopeTypes> globalBroker = null; try (Closer closer = Closer.create()) { closer.register(MDC.putCloseable(ConfigurationKeys.JOB_NAME_KEY, this.jobName)); closer.register(MDC.putCloseable(ConfigurationKeys.JOB_KEY_KEY, this.jobKey)); Path workUnitFilePath = new Path( this.taskConfig.getConfigMap().get(GobblinClusterConfigurationKeys.WORK_UNIT_FILE_PATH)); String fileName = workUnitFilePath.getName(); String storeName = workUnitFilePath.getParent().getName(); WorkUnit workUnit;//from w ww . j a v a 2 s.co m if (workUnitFilePath.getName().endsWith(AbstractJobLauncher.MULTI_WORK_UNIT_FILE_EXTENSION)) { workUnit = stateStores.mwuStateStore.getAll(storeName, fileName).get(0); } else { workUnit = stateStores.wuStateStore.getAll(storeName, fileName).get(0); } // The list of individual WorkUnits (flattened) to run List<WorkUnit> workUnits = Lists.newArrayList(); if (workUnit instanceof MultiWorkUnit) { // Flatten the MultiWorkUnit so the job configuration properties can be added to each individual WorkUnits List<WorkUnit> flattenedWorkUnits = JobLauncherUtils .flattenWorkUnits(((MultiWorkUnit) workUnit).getWorkUnits()); workUnits.addAll(flattenedWorkUnits); } else { workUnits.add(workUnit); } globalBroker = SharedResourcesBrokerFactory.createDefaultTopLevelBroker( ConfigFactory.parseProperties(this.jobState.getProperties()), GobblinScopeTypes.GLOBAL.defaultScopeInstance()); SharedResourcesBroker<GobblinScopeTypes> jobBroker = globalBroker .newSubscopedBuilder(new JobScopeInstance(this.jobState.getJobName(), this.jobState.getJobId())) .build(); GobblinMultiTaskAttempt.runWorkUnits(this.jobId, this.participantId, this.jobState, workUnits, this.taskStateTracker, this.taskExecutor, this.stateStores.taskStateStore, GobblinMultiTaskAttempt.CommitPolicy.IMMEDIATE, jobBroker); return new TaskResult(TaskResult.Status.COMPLETED, String.format("completed tasks: %d", workUnits.size())); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); return new TaskResult(TaskResult.Status.CANCELED, ""); } catch (Throwable t) { LOGGER.error("GobblinHelixTask failed due to " + t.getMessage(), t); return new TaskResult(TaskResult.Status.ERROR, Throwables.getStackTraceAsString(t)); } finally { if (globalBroker != null) { try { globalBroker.close(); } catch (IOException ioe) { LOGGER.error("Could not close shared resources broker.", ioe); } } } }
From source file:gobblin.compaction.mapreduce.MRCompactor.java
License:Apache License
/** * Rename all the source directories for a specific dataset *///from www .ja v a 2s . c o m public static void renameSourceDirAsCompactionComplete(FileSystem fs, Dataset dataset) { try { for (Path path : dataset.getRenamePaths()) { Path newPath = new Path(path.getParent(), path.getName() + MRCompactor.COMPACTION_RENAME_SOURCE_DIR_SUFFIX); LOG.info("[{}] Renaming {} to {}", dataset.getDatasetName(), path, newPath); fs.rename(path, newPath); } } catch (Exception e) { LOG.error("Rename input path failed", e); } }
From source file:gobblin.compaction.mapreduce.MRCompactorJobRunnerFilenameRecordCountProviderTest.java
License:Apache License
@Test public void testFileNameRecordCountProvider() throws IOException { String originalFilename = "test.123.avro"; String suffixPattern = Pattern.quote(".late") + "[\\d]*"; Path testDir = new Path("/tmp/compactorFilenameRecordCountProviderTest"); FileSystem fs = FileSystem.getLocal(new Configuration()); try {//from w w w . ja v a2s . co m if (fs.exists(testDir)) { fs.delete(testDir, true); } fs.mkdirs(testDir); RecordCountProvider originFileNameFormat = new IngestionRecordCountProvider(); LateFileRecordCountProvider lateFileRecordCountProvider = new LateFileRecordCountProvider( originFileNameFormat); Path firstOutput = lateFileRecordCountProvider.constructLateFilePath(originalFilename, fs, testDir); Assert.assertEquals(new Path(testDir, originalFilename), firstOutput); Assert.assertEquals(123, lateFileRecordCountProvider.getRecordCount(firstOutput)); fs.create(firstOutput); Pattern pattern1 = Pattern.compile( Pattern.quote(Files.getNameWithoutExtension(originalFilename)) + suffixPattern + "\\.avro"); Path secondOutput = lateFileRecordCountProvider.constructLateFilePath(firstOutput.getName(), fs, testDir); Assert.assertEquals(testDir, secondOutput.getParent()); Assert.assertTrue(pattern1.matcher(secondOutput.getName()).matches()); Assert.assertEquals(123, lateFileRecordCountProvider.getRecordCount(secondOutput)); fs.create(secondOutput); Pattern pattern2 = Pattern.compile( Files.getNameWithoutExtension(originalFilename) + suffixPattern + suffixPattern + "\\.avro"); Path thirdOutput = lateFileRecordCountProvider.constructLateFilePath(secondOutput.getName(), fs, testDir); Assert.assertEquals(testDir, thirdOutput.getParent()); Assert.assertTrue(pattern2.matcher(thirdOutput.getName()).matches()); Assert.assertEquals(123, lateFileRecordCountProvider.getRecordCount(thirdOutput)); } finally { fs.delete(testDir, true); } }
From source file:gobblin.compliance.restore.RestorableHivePartitionDataset.java
License:Apache License
public void restore() throws IOException { State state = new State(this.state); this.datasetOwnerFs = ProxyUtils.getOwnerFs(state, this.datasetOwner); try (HiveProxyQueryExecutor queryExecutor = ProxyUtils.getQueryExecutor(state, this.datasetOwner, this.datasetToRestoreOwner, this.trashOwner)) { if (this.state.getPropAsBoolean(ComplianceConfigurationKeys.COMPLIANCE_JOB_SIMULATE, ComplianceConfigurationKeys.DEFAULT_COMPLIANCE_JOB_SIMULATE)) { log.info("Simulating restore of " + datasetURN() + " with " + this.datasetToRestore.datasetURN()); return; }/*from ww w . j av a 2 s.c om*/ Path trashPartitionLocation = getTrashPartitionLocation(); executeTrashTableQueries(queryExecutor); this.datasetOwnerFs.mkdirs(trashPartitionLocation.getParent()); this.datasetOwnerFs.rename(getLocation(), trashPartitionLocation); FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.NONE); HadoopUtils.setPermissions(trashPartitionLocation.getParent(), this.datasetOwner, this.trashOwner, this.datasetOwnerFs, permission); log.info("Moved dataset " + datasetURN() + " from " + getLocation() + " to trash location " + trashPartitionLocation); this.datasetOwnerFs.mkdirs(getLocation().getParent()); this.datasetOwnerFs.rename(this.datasetToRestore.getLocation(), getLocation().getParent()); HadoopUtils.setPermissions(getLocation().getParent(), this.datasetOwner, this.trashOwner, this.datasetOwnerFs, permission); log.info("Moved data from backup " + this.datasetToRestore.getLocation() + " to location " + getLocation()); executeDropPartitionQueries(queryExecutor); } }
From source file:gobblin.compliance.retention.HivePartitionVersionRetentionReaper.java
License:Apache License
/** * If simulate is set to true, will simply return. * If a version is pointing to a non-existing location, then drop the partition and close the jdbc connection. * If a version is pointing to the same location as of the dataset, then drop the partition and close the jdbc connection. * If a version is staging, it's data will be deleted and metadata is dropped. * IF a versions is backup, it's data will be moved to a backup dir, current metadata will be dropped and it will * be registered in the backup db./*from w w w.j a v a2 s . co m*/ */ @Override public void clean() throws IOException { Path versionLocation = ((HivePartitionRetentionVersion) this.datasetVersion).getLocation(); Path datasetLocation = ((CleanableHivePartitionDataset) this.cleanableDataset).getLocation(); String completeName = ((HivePartitionRetentionVersion) this.datasetVersion).datasetURN(); State state = new State(this.state); this.versionOwnerFs = ProxyUtils.getOwnerFs(state, this.versionOwner); try (HiveProxyQueryExecutor queryExecutor = ProxyUtils.getQueryExecutor(state, this.versionOwner, this.backUpOwner)) { Path newVersionLocation = getNewVersionLocation(); if (!this.versionOwnerFs.exists(versionLocation)) { log.info("Data versionLocation doesn't exist. Metadata will be dropped for the version " + completeName); } else if (datasetLocation.toString().equalsIgnoreCase(versionLocation.toString())) { log.info( "Dataset location is same as version location. Won't delete the data but metadata will be dropped for the version " + completeName); } else if (this.simulate) { log.info("Simulate is set to true. Won't move the version " + completeName); return; } else if (completeName.contains(ComplianceConfigurationKeys.STAGING)) { log.info("Deleting data from version " + completeName); this.versionOwnerFs.delete(versionLocation, true); } else if (completeName.contains(ComplianceConfigurationKeys.BACKUP)) { executeAlterQueries(queryExecutor); log.info("Creating new dir " + newVersionLocation.getParent().toString()); this.versionOwnerFs.mkdirs(newVersionLocation.getParent()); log.info("Moving data from " + versionLocation + " to " + newVersionLocation); this.versionOwnerFs.rename(versionLocation, newVersionLocation); FsPermission permission = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.NONE); HadoopUtils.setPermissions(newVersionLocation.getParent(), this.versionOwner, this.backUpOwner, this.versionOwnerFs, permission); } executeDropVersionQueries(queryExecutor); } }
From source file:gobblin.config.store.hdfs.SimpleHadoopFilesystemConfigStore.java
License:Apache License
/** * Deploy configs provided by {@link FsDeploymentConfig#getDeployableConfigSource()} to HDFS. * For each {@link ConfigStream} returned by {@link DeployableConfigSource#getConfigStreams()}, creates a resource on HDFS. * <br>/*ww w .ja va 2s . c o m*/ * <ul> Does the following: * <li> Read {@link ConfigStream}s and write them to HDFS * <li> Create parent directories of {@link ConfigStream#getConfigPath()} if required * <li> Set {@link FsDeploymentConfig#getStorePermissions()} to all resourced created on HDFS * <li> Update current active version in the store metadata file. * </ul> * * <p> * For example: If "test-root" is a resource in classpath and all resources under it needs to be deployed, * <br> * <br> * <b>In Classpath:</b><br> * <blockquote> <code> * test-root<br> *  /data<br> *   /set1<br> *    /main.conf<br> *  /tag<br> *   /tag1<br> *    /main.conf<br> * </code> </blockquote> * </p> * * <p> * A new version 2.0.0 {@link FsDeploymentConfig#getNewVersion()} is created on HDFS under <code>this.physicalStoreRoot/_CONFIG_STORE</code> * <br> * <br> * <b>On HDFS after deploy:</b><br> * <blockquote> <code> * /_CONFIG_STORE<br> *  /2.0.0<br> *   /data<br> *    /set1<br> *     /main.conf<br> *   /tag<br> *    /tag1<br> *     /main.conf<br> * </code> </blockquote> * </p> * */ @Override public void deploy(FsDeploymentConfig deploymentConfig) throws IOException { log.info("Deploying with config : " + deploymentConfig); Path hdfsconfigStoreRoot = new Path(this.physicalStoreRoot.getPath(), CONFIG_STORE_NAME); if (!this.fs.exists(hdfsconfigStoreRoot)) { throw new IOException("Config store root not present at " + this.physicalStoreRoot.getPath()); } Path hdfsNewVersionPath = new Path(hdfsconfigStoreRoot, deploymentConfig.getNewVersion()); if (!this.fs.exists(hdfsNewVersionPath)) { this.fs.mkdirs(hdfsNewVersionPath, deploymentConfig.getStorePermissions()); Set<ConfigStream> confStreams = deploymentConfig.getDeployableConfigSource().getConfigStreams(); for (ConfigStream confStream : confStreams) { String confAtPath = confStream.getConfigPath(); log.info("Copying resource at : " + confAtPath); Path hdsfConfPath = new Path(hdfsNewVersionPath, confAtPath); if (!this.fs.exists(hdsfConfPath.getParent())) { this.fs.mkdirs(hdsfConfPath.getParent()); } // If an empty directory needs to created it may not have a stream. if (confStream.getInputStream().isPresent()) { // Read the resource as a stream from the classpath and write it to HDFS try (SeekableFSInputStream inputStream = new SeekableFSInputStream( confStream.getInputStream().get()); FSDataOutputStream os = this.fs.create(hdsfConfPath, false)) { StreamUtils.copy(inputStream, os); } } } // Set permission for newly copied files for (FileStatus fileStatus : FileListUtils.listPathsRecursively(this.fs, hdfsNewVersionPath, FileListUtils.NO_OP_PATH_FILTER)) { this.fs.setPermission(fileStatus.getPath(), deploymentConfig.getStorePermissions()); } } else { log.warn(String.format( "STORE WITH VERSION %s ALREADY EXISTS. NEW RESOURCES WILL NOT BE COPIED. ONLY STORE MEATADATA FILE WILL BE UPDATED TO %s", deploymentConfig.getNewVersion(), deploymentConfig.getNewVersion())); } this.storeMetadata.setCurrentVersion(deploymentConfig.getNewVersion()); log.info(String.format("New version %s of config store deployed at %s", deploymentConfig.getNewVersion(), hdfsconfigStoreRoot)); }
From source file:gobblin.config.store.hdfs.SimpleHadoopFilesystemConfigStoreFactory.java
License:Apache License
/** * This method determines the physical location of the {@link SimpleHadoopFilesystemConfigStore} root directory on HDFS. It does * this by taking the {@link URI} given by the user and back-tracing the path. It checks if each parent directory * contains the folder {@link SimpleHadoopFilesystemConfigStore#CONFIG_STORE_NAME}. It the assumes this {@link Path} is the root * directory./*from www. j a v a 2 s . c om*/ * * <p> * If the given configKey does not have an authority, then this method assumes the given {@link URI#getPath()} does * not contain the dataset root. In which case it uses the {@link #getDefaultRootDir()} as the root directory. If * the default root dir does not contain the {@link SimpleHadoopFilesystemConfigStore#CONFIG_STORE_NAME} then a * {@link ConfigStoreCreationException} is thrown. * </p> */ private URI getStoreRoot(FileSystem fs, URI configKey) throws ConfigStoreCreationException { if (Strings.isNullOrEmpty(configKey.getAuthority())) { if (getDefaultStoreURILazy() != null) { return getDefaultStoreURILazy(); } else if (isAuthorityRequired()) { throw new ConfigStoreCreationException(configKey, "No default store has been configured."); } } Path path = new Path(configKey.getPath()); while (path != null) { try { // the abs URI may point to an unexist path for // 1. phantom node // 2. as URI did not specify the version if (fs.exists(path)) { for (FileStatus fileStatus : fs.listStatus(path)) { if (fileStatus.isDirectory() && fileStatus.getPath().getName() .equals(SimpleHadoopFilesystemConfigStore.CONFIG_STORE_NAME)) { return fs.getUri().resolve(fileStatus.getPath().getParent().toUri()); } } } } catch (IOException e) { throw new ConfigStoreCreationException(configKey, e); } path = path.getParent(); } throw new ConfigStoreCreationException(configKey, "Cannot find the store root!"); }