List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:org.apache.gobblin.publisher.TimePartitionedDataPublisher.java
License:Apache License
/** * This method needs to be overridden for TimePartitionedDataPublisher, since the output folder structure * contains timestamp, we have to move the files recursively. * * For example, move {writerOutput}/2015/04/08/15/output.avro to {publisherOutput}/2015/04/08/15/output.avro *///from w ww . j av a 2 s. c o m @Override protected void addWriterOutputToExistingDir(Path writerOutput, Path publisherOutput, WorkUnitState workUnitState, int branchId, ParallelRunner parallelRunner) throws IOException { for (FileStatus status : FileListUtils.listFilesRecursively(this.writerFileSystemByBranches.get(branchId), writerOutput)) { String filePathStr = status.getPath().toString(); String pathSuffix = filePathStr .substring(filePathStr.indexOf(writerOutput.toString()) + writerOutput.toString().length() + 1); Path outputPath = new Path(publisherOutput, pathSuffix); WriterUtils.mkdirsWithRecursivePermissionWithRetry(this.publisherFileSystemByBranches.get(branchId), outputPath.getParent(), this.permissions.get(branchId), this.retrierConfig); movePath(parallelRunner, workUnitState, status.getPath(), outputPath, branchId); } }
From source file:org.apache.gobblin.publisher.TimestampDataPublisher.java
License:Apache License
/** * Update destination path to put db and table name in format "dbname.tablename" using {@link #getDbTableName(String)} * and include timestamp/*from ww w .j a v a2 s .c o m*/ * * Input dst format: {finaldir}/{schemaName} * Output dst format: {finaldir}/{dbname.tablename}/{currenttimestamp} */ @Override protected void movePath(ParallelRunner parallelRunner, State state, Path src, Path dst, int branchId) throws IOException { String outputDir = dst.getParent().toString(); String schemaName = dst.getName(); Path newDst = new Path(new Path(outputDir, getDbTableName(schemaName)), timestamp); if (!this.publisherFileSystemByBranches.get(branchId).exists(newDst)) { WriterUtils.mkdirsWithRecursivePermissionWithRetry(this.publisherFileSystemByBranches.get(branchId), newDst.getParent(), this.permissions.get(branchId), this.retrierConfig); } super.movePath(parallelRunner, state, src, newDst, branchId); }
From source file:org.apache.gobblin.runtime.embedded.EmbeddedGobblin.java
License:Apache License
/** * Launch the Gobblin job asynchronously. This method will return when the Gobblin job has started. * @return a {@link JobExecutionDriver}. This object is a future that will resolve when the Gobblin job finishes. * @throws TimeoutException if the Gobblin job does not start within the launch timeout. *///from ww w. java 2 s . co m @NotOnCli public JobExecutionDriver runAsync() throws TimeoutException, InterruptedException { // Run function to distribute jars to workers in distributed mode this.distributeJarsFunction.run(); Config sysProps = ConfigFactory.parseMap(this.builtConfigMap).withFallback(this.defaultSysConfig); Config userConfig = ConfigFactory.parseMap(this.userConfigMap); JobSpec jobSpec; if (this.jobFile.isPresent()) { try { Path jobFilePath = this.jobFile.get(); PullFileLoader loader = new PullFileLoader(jobFilePath.getParent(), jobFilePath.getFileSystem(new Configuration()), PullFileLoader.DEFAULT_JAVA_PROPS_PULL_FILE_EXTENSIONS, PullFileLoader.DEFAULT_HOCON_PULL_FILE_EXTENSIONS); Config jobConfig = userConfig.withFallback(loader.loadPullFile(jobFilePath, sysProps, false)); ImmutableFSJobCatalog.JobSpecConverter converter = new ImmutableFSJobCatalog.JobSpecConverter( jobFilePath.getParent(), Optional.<String>absent()); jobSpec = converter.apply(jobConfig); } catch (IOException ioe) { throw new RuntimeException("Failed to run embedded Gobblin.", ioe); } } else { Config finalConfig = userConfig.withFallback(sysProps); if (this.template != null) { try { finalConfig = this.template.getResolvedConfig(finalConfig); } catch (SpecNotFoundException | JobTemplate.TemplateException exc) { throw new RuntimeException(exc); } } jobSpec = this.specBuilder.withConfig(finalConfig).build(); } ResolvedJobSpec resolvedJobSpec; try { resolvedJobSpec = new ResolvedJobSpec(jobSpec); } catch (SpecNotFoundException | JobTemplate.TemplateException exc) { throw new RuntimeException("Failed to resolved template.", exc); } final JobCatalog jobCatalog = new StaticJobCatalog(Optional.of(this.useLog), Lists.<JobSpec>newArrayList(resolvedJobSpec)); SimpleGobblinInstanceEnvironment instanceEnvironment = new SimpleGobblinInstanceEnvironment( "EmbeddedGobblinInstance", this.useLog, getSysConfig()); StandardGobblinInstanceDriver.Builder builder = new StandardGobblinInstanceDriver.Builder( Optional.<GobblinInstanceEnvironment>of(instanceEnvironment)).withLog(this.useLog) .withJobCatalog(jobCatalog).withImmediateJobScheduler(); for (GobblinInstancePluginFactory plugin : this.plugins) { builder.addPlugin(plugin); } final GobblinInstanceDriver driver = builder.build(); EmbeddedJobLifecycleListener listener = new EmbeddedJobLifecycleListener(this.useLog); driver.registerJobLifecycleListener(listener); driver.startAsync(); boolean started = listener.awaitStarted(this.launchTimeout.getTimeout(), this.launchTimeout.getTimeUnit()); if (!started) { dumpJStackOnTimeout("Launch"); log.warn("Timeout waiting for job to start. Aborting."); driver.stopAsync(); driver.awaitTerminated(this.shutdownTimeout.getTimeout(), this.shutdownTimeout.getTimeUnit()); throw new TimeoutException("Timeout waiting for job to start."); } final JobExecutionDriver jobDriver = listener.getJobDriver(); // Stop the Gobblin instance driver when the job finishes. Futures.addCallback(jobDriver, new FutureCallback<JobExecutionResult>() { @Override public void onSuccess(@Nullable JobExecutionResult result) { stopGobblinInstanceDriver(); } @Override public void onFailure(Throwable t) { stopGobblinInstanceDriver(); } private void stopGobblinInstanceDriver() { try { driver.stopAsync(); driver.awaitTerminated(EmbeddedGobblin.this.shutdownTimeout.getTimeout(), EmbeddedGobblin.this.shutdownTimeout.getTimeUnit()); } catch (TimeoutException te) { dumpJStackOnTimeout("stop gobblin instance driver"); log.error("Failed to shutdown Gobblin instance driver."); } } }); return listener.getJobDriver(); }
From source file:org.apache.gobblin.runtime.spec_store.FSSpecStore.java
License:Apache License
/** * @param specUri path of the spec//from w w w .jav a 2 s.c o m * @return empty string for topology spec, as topologies do not have a group, * group name for flow spec */ public static String getSpecGroup(Path specUri) { return specUri.getParent().getName(); }
From source file:org.apache.gobblin.service.modules.core.GitConfigMonitor.java
License:Apache License
/** * check whether the file has the proper naming and hierarchy * @param configFilePath the relative path from the repo root * @return false if the file does not conform */// w w w . j av a 2 s .c om private boolean checkConfigFilePath(String configFilePath) { // The config needs to stored at configDir/flowGroup/flowName.(pull|job|json|conf) Path configFile = new Path(configFilePath); String fileExtension = Files.getFileExtension(configFile.getName()); if (configFile.depth() != CONFIG_FILE_DEPTH || !configFile.getParent().getParent().getName().equals(folderName) || !(PullFileLoader.DEFAULT_JAVA_PROPS_PULL_FILE_EXTENSIONS.contains(fileExtension) || PullFileLoader.DEFAULT_JAVA_PROPS_PULL_FILE_EXTENSIONS.contains(fileExtension))) { log.warn("Changed file does not conform to directory structure and file name format, skipping: " + configFilePath); return false; } return true; }
From source file:org.apache.gobblin.service.modules.core.GitFlowGraphMonitor.java
License:Apache License
/** * Helper to check if a file has proper hierarchy. * @param filePath path of the node/edge file * @param depth expected depth of the file * @return true if the file conforms to the expected hierarchy *//*from w ww. j a va2 s . co m*/ private boolean checkFileLevelRelativeToRoot(Path filePath, int depth) { if (filePath == null) { return false; } Path path = filePath; for (int i = 0; i < depth - 1; i++) { path = path.getParent(); } if (!path.getName().equals(folderName)) { return false; } return true; }
From source file:org.apache.gobblin.service.modules.core.GitFlowGraphMonitor.java
License:Apache License
/** * Helper that overrides the data.node.id property with name derived from the node file path * @param nodeConfig node config/*from w w w . j av a2s . c o m*/ * @param nodeFilePath path of the node file * @return config with overridden data.node.id */ private Config getNodeConfigWithOverrides(Config nodeConfig, Path nodeFilePath) { String nodeId = nodeFilePath.getParent().getName(); return nodeConfig.withValue(FlowGraphConfigurationKeys.DATA_NODE_ID_KEY, ConfigValueFactory.fromAnyRef(nodeId)); }
From source file:org.apache.gobblin.service.modules.core.GitFlowGraphMonitor.java
License:Apache License
/** * Helper that overrides the flow edge properties with name derived from the edge file path * @param edgeConfig edge config/*from w w w . j av a 2s . co m*/ * @param edgeFilePath path of the edge file * @return config with overridden edge properties */ private Config getEdgeConfigWithOverrides(Config edgeConfig, Path edgeFilePath) { String source = edgeFilePath.getParent().getParent().getName(); String destination = edgeFilePath.getParent().getName(); String edgeName = Files.getNameWithoutExtension(edgeFilePath.getName()); return edgeConfig .withValue(FlowGraphConfigurationKeys.FLOW_EDGE_SOURCE_KEY, ConfigValueFactory.fromAnyRef(source)) .withValue(FlowGraphConfigurationKeys.FLOW_EDGE_DESTINATION_KEY, ConfigValueFactory.fromAnyRef(destination)) .withValue(FlowGraphConfigurationKeys.FLOW_EDGE_ID_KEY, ConfigValueFactory.fromAnyRef(getEdgeId(source, destination, edgeName))); }
From source file:org.apache.gobblin.util.HadoopUtilsTest.java
License:Apache License
@Test public void testMoveToTrash() throws IOException { Path hadoopUtilsTestDir = new Path(Files.createTempDir().getAbsolutePath(), "HadoopUtilsTestDir"); Configuration conf = new Configuration(); // Set the time to keep it in trash to 10 minutes. // 0 means object will be deleted instantly. conf.set("fs.trash.interval", "10"); FileSystem fs = FileSystem.getLocal(conf); Trash trash = new Trash(fs, conf); TrashPolicy trashPolicy = TrashPolicy.getInstance(conf, fs, fs.getHomeDirectory()); Path trashPath = trashPolicy.getCurrentTrashDir(); fs.mkdirs(hadoopUtilsTestDir);/*from w w w . j ava2 s . co m*/ Assert.assertTrue(fs.exists(hadoopUtilsTestDir)); trash.moveToTrash(hadoopUtilsTestDir.getParent()); Assert.assertFalse(fs.exists(hadoopUtilsTestDir)); Assert.assertTrue(fs.exists(trashPath)); }
From source file:org.apache.gobblin.util.JobLauncherUtils.java
License:Apache License
/** * Cleanup staging data of all tasks of a job. * * @param state a {@link State} instance storing job configuration properties * @param logger a {@link Logger} used for logging *//*from www. j a v a 2s .c o m*/ public static void cleanJobStagingData(State state, Logger logger) throws IOException { Preconditions.checkArgument(state.contains(ConfigurationKeys.WRITER_STAGING_DIR), "Missing required property " + ConfigurationKeys.WRITER_STAGING_DIR); Preconditions.checkArgument(state.contains(ConfigurationKeys.WRITER_OUTPUT_DIR), "Missing required property " + ConfigurationKeys.WRITER_OUTPUT_DIR); String writerFsUri = state.getProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI, ConfigurationKeys.LOCAL_FS_URI); FileSystem fs = getFsWithProxy(state, writerFsUri, WriterUtils.getFsConfiguration(state)); Path jobStagingPath = new Path(state.getProp(ConfigurationKeys.WRITER_STAGING_DIR)); logger.info("Cleaning up staging directory " + jobStagingPath); HadoopUtils.deletePath(fs, jobStagingPath, true); if (fs.exists(jobStagingPath.getParent()) && fs.listStatus(jobStagingPath.getParent()).length == 0) { logger.info("Deleting directory " + jobStagingPath.getParent()); HadoopUtils.deletePath(fs, jobStagingPath.getParent(), true); } Path jobOutputPath = new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR)); logger.info("Cleaning up output directory " + jobOutputPath); HadoopUtils.deletePath(fs, jobOutputPath, true); if (fs.exists(jobOutputPath.getParent()) && fs.listStatus(jobOutputPath.getParent()).length == 0) { logger.info("Deleting directory " + jobOutputPath.getParent()); HadoopUtils.deletePath(fs, jobOutputPath.getParent(), true); } if (state.contains(ConfigurationKeys.ROW_LEVEL_ERR_FILE)) { if (state.getPropAsBoolean(ConfigurationKeys.CLEAN_ERR_DIR, ConfigurationKeys.DEFAULT_CLEAN_ERR_DIR)) { Path jobErrPath = new Path(state.getProp(ConfigurationKeys.ROW_LEVEL_ERR_FILE)); log.info("Cleaning up err directory : " + jobErrPath); HadoopUtils.deleteIfExists(fs, jobErrPath, true); } } }