Example usage for org.apache.hadoop.fs Path getParent

List of usage examples for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent() 

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:org.apache.gobblin.publisher.TimePartitionedDataPublisher.java

License:Apache License

/**
 * This method needs to be overridden for TimePartitionedDataPublisher, since the output folder structure
 * contains timestamp, we have to move the files recursively.
 *
 * For example, move {writerOutput}/2015/04/08/15/output.avro to {publisherOutput}/2015/04/08/15/output.avro
 *///from  w ww  .  j  av  a 2 s.  c  o  m
@Override
protected void addWriterOutputToExistingDir(Path writerOutput, Path publisherOutput,
        WorkUnitState workUnitState, int branchId, ParallelRunner parallelRunner) throws IOException {

    for (FileStatus status : FileListUtils.listFilesRecursively(this.writerFileSystemByBranches.get(branchId),
            writerOutput)) {
        String filePathStr = status.getPath().toString();
        String pathSuffix = filePathStr
                .substring(filePathStr.indexOf(writerOutput.toString()) + writerOutput.toString().length() + 1);
        Path outputPath = new Path(publisherOutput, pathSuffix);

        WriterUtils.mkdirsWithRecursivePermissionWithRetry(this.publisherFileSystemByBranches.get(branchId),
                outputPath.getParent(), this.permissions.get(branchId), this.retrierConfig);

        movePath(parallelRunner, workUnitState, status.getPath(), outputPath, branchId);
    }
}

From source file:org.apache.gobblin.publisher.TimestampDataPublisher.java

License:Apache License

/**
 * Update destination path to put db and table name in format "dbname.tablename" using {@link #getDbTableName(String)}
 * and include timestamp/*from ww  w .j  a v  a2  s .c o  m*/
 *
 * Input dst format: {finaldir}/{schemaName}
 * Output dst format: {finaldir}/{dbname.tablename}/{currenttimestamp}
 */
@Override
protected void movePath(ParallelRunner parallelRunner, State state, Path src, Path dst, int branchId)
        throws IOException {

    String outputDir = dst.getParent().toString();
    String schemaName = dst.getName();
    Path newDst = new Path(new Path(outputDir, getDbTableName(schemaName)), timestamp);

    if (!this.publisherFileSystemByBranches.get(branchId).exists(newDst)) {
        WriterUtils.mkdirsWithRecursivePermissionWithRetry(this.publisherFileSystemByBranches.get(branchId),
                newDst.getParent(), this.permissions.get(branchId), this.retrierConfig);
    }

    super.movePath(parallelRunner, state, src, newDst, branchId);
}

From source file:org.apache.gobblin.runtime.embedded.EmbeddedGobblin.java

License:Apache License

/**
 * Launch the Gobblin job asynchronously. This method will return when the Gobblin job has started.
 * @return a {@link JobExecutionDriver}. This object is a future that will resolve when the Gobblin job finishes.
 * @throws TimeoutException if the Gobblin job does not start within the launch timeout.
 *///from  ww  w.  java 2  s  . co  m
@NotOnCli
public JobExecutionDriver runAsync() throws TimeoutException, InterruptedException {
    // Run function to distribute jars to workers in distributed mode
    this.distributeJarsFunction.run();

    Config sysProps = ConfigFactory.parseMap(this.builtConfigMap).withFallback(this.defaultSysConfig);
    Config userConfig = ConfigFactory.parseMap(this.userConfigMap);

    JobSpec jobSpec;
    if (this.jobFile.isPresent()) {
        try {
            Path jobFilePath = this.jobFile.get();
            PullFileLoader loader = new PullFileLoader(jobFilePath.getParent(),
                    jobFilePath.getFileSystem(new Configuration()),
                    PullFileLoader.DEFAULT_JAVA_PROPS_PULL_FILE_EXTENSIONS,
                    PullFileLoader.DEFAULT_HOCON_PULL_FILE_EXTENSIONS);
            Config jobConfig = userConfig.withFallback(loader.loadPullFile(jobFilePath, sysProps, false));
            ImmutableFSJobCatalog.JobSpecConverter converter = new ImmutableFSJobCatalog.JobSpecConverter(
                    jobFilePath.getParent(), Optional.<String>absent());
            jobSpec = converter.apply(jobConfig);
        } catch (IOException ioe) {
            throw new RuntimeException("Failed to run embedded Gobblin.", ioe);
        }
    } else {
        Config finalConfig = userConfig.withFallback(sysProps);
        if (this.template != null) {
            try {
                finalConfig = this.template.getResolvedConfig(finalConfig);
            } catch (SpecNotFoundException | JobTemplate.TemplateException exc) {
                throw new RuntimeException(exc);
            }
        }
        jobSpec = this.specBuilder.withConfig(finalConfig).build();
    }

    ResolvedJobSpec resolvedJobSpec;
    try {
        resolvedJobSpec = new ResolvedJobSpec(jobSpec);
    } catch (SpecNotFoundException | JobTemplate.TemplateException exc) {
        throw new RuntimeException("Failed to resolved template.", exc);
    }
    final JobCatalog jobCatalog = new StaticJobCatalog(Optional.of(this.useLog),
            Lists.<JobSpec>newArrayList(resolvedJobSpec));

    SimpleGobblinInstanceEnvironment instanceEnvironment = new SimpleGobblinInstanceEnvironment(
            "EmbeddedGobblinInstance", this.useLog, getSysConfig());

    StandardGobblinInstanceDriver.Builder builder = new StandardGobblinInstanceDriver.Builder(
            Optional.<GobblinInstanceEnvironment>of(instanceEnvironment)).withLog(this.useLog)
                    .withJobCatalog(jobCatalog).withImmediateJobScheduler();

    for (GobblinInstancePluginFactory plugin : this.plugins) {
        builder.addPlugin(plugin);
    }

    final GobblinInstanceDriver driver = builder.build();

    EmbeddedJobLifecycleListener listener = new EmbeddedJobLifecycleListener(this.useLog);
    driver.registerJobLifecycleListener(listener);

    driver.startAsync();

    boolean started = listener.awaitStarted(this.launchTimeout.getTimeout(), this.launchTimeout.getTimeUnit());
    if (!started) {
        dumpJStackOnTimeout("Launch");
        log.warn("Timeout waiting for job to start. Aborting.");
        driver.stopAsync();
        driver.awaitTerminated(this.shutdownTimeout.getTimeout(), this.shutdownTimeout.getTimeUnit());
        throw new TimeoutException("Timeout waiting for job to start.");
    }

    final JobExecutionDriver jobDriver = listener.getJobDriver();
    // Stop the Gobblin instance driver when the job finishes.
    Futures.addCallback(jobDriver, new FutureCallback<JobExecutionResult>() {
        @Override
        public void onSuccess(@Nullable JobExecutionResult result) {

            stopGobblinInstanceDriver();
        }

        @Override
        public void onFailure(Throwable t) {
            stopGobblinInstanceDriver();
        }

        private void stopGobblinInstanceDriver() {
            try {
                driver.stopAsync();
                driver.awaitTerminated(EmbeddedGobblin.this.shutdownTimeout.getTimeout(),
                        EmbeddedGobblin.this.shutdownTimeout.getTimeUnit());
            } catch (TimeoutException te) {
                dumpJStackOnTimeout("stop gobblin instance driver");
                log.error("Failed to shutdown Gobblin instance driver.");
            }
        }
    });

    return listener.getJobDriver();
}

From source file:org.apache.gobblin.runtime.spec_store.FSSpecStore.java

License:Apache License

/**
 * @param specUri path of the spec//from  w  w  w .jav  a 2 s.c  o  m
 * @return empty string for topology spec, as topologies do not have a group,
 *         group name for flow spec
 */
public static String getSpecGroup(Path specUri) {
    return specUri.getParent().getName();
}

From source file:org.apache.gobblin.service.modules.core.GitConfigMonitor.java

License:Apache License

/**
 * check whether the file has the proper naming and hierarchy
 * @param configFilePath the relative path from the repo root
 * @return false if the file does not conform
 *///  w  w  w .  j  av  a 2  s  .c om
private boolean checkConfigFilePath(String configFilePath) {
    // The config needs to stored at configDir/flowGroup/flowName.(pull|job|json|conf)
    Path configFile = new Path(configFilePath);
    String fileExtension = Files.getFileExtension(configFile.getName());

    if (configFile.depth() != CONFIG_FILE_DEPTH
            || !configFile.getParent().getParent().getName().equals(folderName)
            || !(PullFileLoader.DEFAULT_JAVA_PROPS_PULL_FILE_EXTENSIONS.contains(fileExtension)
                    || PullFileLoader.DEFAULT_JAVA_PROPS_PULL_FILE_EXTENSIONS.contains(fileExtension))) {
        log.warn("Changed file does not conform to directory structure and file name format, skipping: "
                + configFilePath);

        return false;
    }

    return true;
}

From source file:org.apache.gobblin.service.modules.core.GitFlowGraphMonitor.java

License:Apache License

/**
 * Helper to check if a file has proper hierarchy.
 * @param filePath path of the node/edge file
 * @param depth expected depth of the file
 * @return true if the file conforms to the expected hierarchy
 *//*from   w  ww. j  a va2  s  .  co m*/
private boolean checkFileLevelRelativeToRoot(Path filePath, int depth) {
    if (filePath == null) {
        return false;
    }
    Path path = filePath;
    for (int i = 0; i < depth - 1; i++) {
        path = path.getParent();
    }
    if (!path.getName().equals(folderName)) {
        return false;
    }
    return true;
}

From source file:org.apache.gobblin.service.modules.core.GitFlowGraphMonitor.java

License:Apache License

/**
 * Helper that overrides the data.node.id property with name derived from the node file path
 * @param nodeConfig node config/*from  w w w  .  j av a2s  .  c  o  m*/
 * @param nodeFilePath path of the node file
 * @return config with overridden data.node.id
 */
private Config getNodeConfigWithOverrides(Config nodeConfig, Path nodeFilePath) {
    String nodeId = nodeFilePath.getParent().getName();
    return nodeConfig.withValue(FlowGraphConfigurationKeys.DATA_NODE_ID_KEY,
            ConfigValueFactory.fromAnyRef(nodeId));
}

From source file:org.apache.gobblin.service.modules.core.GitFlowGraphMonitor.java

License:Apache License

/**
 * Helper that overrides the flow edge properties with name derived from the edge file path
 * @param edgeConfig edge config/*from   w  w w  . j av  a  2s . co  m*/
 * @param edgeFilePath path of the edge file
 * @return config with overridden edge properties
 */
private Config getEdgeConfigWithOverrides(Config edgeConfig, Path edgeFilePath) {
    String source = edgeFilePath.getParent().getParent().getName();
    String destination = edgeFilePath.getParent().getName();
    String edgeName = Files.getNameWithoutExtension(edgeFilePath.getName());

    return edgeConfig
            .withValue(FlowGraphConfigurationKeys.FLOW_EDGE_SOURCE_KEY, ConfigValueFactory.fromAnyRef(source))
            .withValue(FlowGraphConfigurationKeys.FLOW_EDGE_DESTINATION_KEY,
                    ConfigValueFactory.fromAnyRef(destination))
            .withValue(FlowGraphConfigurationKeys.FLOW_EDGE_ID_KEY,
                    ConfigValueFactory.fromAnyRef(getEdgeId(source, destination, edgeName)));
}

From source file:org.apache.gobblin.util.HadoopUtilsTest.java

License:Apache License

@Test
public void testMoveToTrash() throws IOException {
    Path hadoopUtilsTestDir = new Path(Files.createTempDir().getAbsolutePath(), "HadoopUtilsTestDir");
    Configuration conf = new Configuration();
    // Set the time to keep it in trash to 10 minutes.
    // 0 means object will be deleted instantly.
    conf.set("fs.trash.interval", "10");
    FileSystem fs = FileSystem.getLocal(conf);
    Trash trash = new Trash(fs, conf);
    TrashPolicy trashPolicy = TrashPolicy.getInstance(conf, fs, fs.getHomeDirectory());
    Path trashPath = trashPolicy.getCurrentTrashDir();

    fs.mkdirs(hadoopUtilsTestDir);/*from  w  w  w  .  j ava2  s  . co m*/
    Assert.assertTrue(fs.exists(hadoopUtilsTestDir));
    trash.moveToTrash(hadoopUtilsTestDir.getParent());
    Assert.assertFalse(fs.exists(hadoopUtilsTestDir));
    Assert.assertTrue(fs.exists(trashPath));
}

From source file:org.apache.gobblin.util.JobLauncherUtils.java

License:Apache License

/**
 * Cleanup staging data of all tasks of a job.
 *
 * @param state a {@link State} instance storing job configuration properties
 * @param logger a {@link Logger} used for logging
 *//*from   www.  j a v a  2s .c  o m*/
public static void cleanJobStagingData(State state, Logger logger) throws IOException {
    Preconditions.checkArgument(state.contains(ConfigurationKeys.WRITER_STAGING_DIR),
            "Missing required property " + ConfigurationKeys.WRITER_STAGING_DIR);
    Preconditions.checkArgument(state.contains(ConfigurationKeys.WRITER_OUTPUT_DIR),
            "Missing required property " + ConfigurationKeys.WRITER_OUTPUT_DIR);

    String writerFsUri = state.getProp(ConfigurationKeys.WRITER_FILE_SYSTEM_URI,
            ConfigurationKeys.LOCAL_FS_URI);
    FileSystem fs = getFsWithProxy(state, writerFsUri, WriterUtils.getFsConfiguration(state));

    Path jobStagingPath = new Path(state.getProp(ConfigurationKeys.WRITER_STAGING_DIR));
    logger.info("Cleaning up staging directory " + jobStagingPath);
    HadoopUtils.deletePath(fs, jobStagingPath, true);

    if (fs.exists(jobStagingPath.getParent()) && fs.listStatus(jobStagingPath.getParent()).length == 0) {
        logger.info("Deleting directory " + jobStagingPath.getParent());
        HadoopUtils.deletePath(fs, jobStagingPath.getParent(), true);
    }

    Path jobOutputPath = new Path(state.getProp(ConfigurationKeys.WRITER_OUTPUT_DIR));
    logger.info("Cleaning up output directory " + jobOutputPath);
    HadoopUtils.deletePath(fs, jobOutputPath, true);

    if (fs.exists(jobOutputPath.getParent()) && fs.listStatus(jobOutputPath.getParent()).length == 0) {
        logger.info("Deleting directory " + jobOutputPath.getParent());
        HadoopUtils.deletePath(fs, jobOutputPath.getParent(), true);
    }

    if (state.contains(ConfigurationKeys.ROW_LEVEL_ERR_FILE)) {
        if (state.getPropAsBoolean(ConfigurationKeys.CLEAN_ERR_DIR, ConfigurationKeys.DEFAULT_CLEAN_ERR_DIR)) {
            Path jobErrPath = new Path(state.getProp(ConfigurationKeys.ROW_LEVEL_ERR_FILE));
            log.info("Cleaning up err directory : " + jobErrPath);
            HadoopUtils.deleteIfExists(fs, jobErrPath, true);
        }
    }
}