Example usage for org.apache.hadoop.fs Path getParent

List of usage examples for org.apache.hadoop.fs Path getParent

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getParent.

Prototype

public Path getParent() 

Source Link

Document

Returns the parent of a path or null if at root.

Usage

From source file:gobblin.publisher.TimePartitionedDataPublisher.java

License:Apache License

/**
 * This method needs to be overridden for TimePartitionedDataPublisher, since the output folder structure
 * contains timestamp, we have to move the files recursively.
 *
 * For example, move {writerOutput}/2015/04/08/15/output.avro to {publisherOutput}/2015/04/08/15/output.avro
 *///from ww w  . j a v a2  s .c o m
@Override
protected void addWriterOutputToExistingDir(Path writerOutput, Path publisherOutput,
        WorkUnitState workUnitState, int branchId, ParallelRunner parallelRunner) throws IOException {

    for (FileStatus status : FileListUtils.listFilesRecursively(this.writerFileSystemByBranches.get(branchId),
            writerOutput)) {
        String filePathStr = status.getPath().toString();
        String pathSuffix = filePathStr
                .substring(filePathStr.indexOf(writerOutput.toString()) + writerOutput.toString().length() + 1);
        Path outputPath = new Path(publisherOutput, pathSuffix);

        WriterUtils.mkdirsWithRecursivePermission(this.publisherFileSystemByBranches.get(branchId),
                outputPath.getParent(), this.permissions.get(branchId));

        movePath(parallelRunner, workUnitState, status.getPath(), outputPath, branchId);
    }
}

From source file:gobblin.publisher.TimestampDataPublisher.java

License:Apache License

/**
 * Update destination path to put db and table name in format "dbname.tablename" using {@link #getDbTableName(String)}
 * and include timestamp// ww w.ja  va 2 s .  com
 *
 * Input dst format: {finaldir}/{schemaName}
 * Output dst format: {finaldir}/{dbname.tablename}/{currenttimestamp}
 */
@Override
protected void movePath(ParallelRunner parallelRunner, State state, Path src, Path dst, int branchId)
        throws IOException {

    String outputDir = dst.getParent().toString();
    String schemaName = dst.getName();
    Path newDst = new Path(new Path(outputDir, getDbTableName(schemaName)), timestamp);

    if (!this.publisherFileSystemByBranches.get(branchId).exists(newDst)) {
        WriterUtils.mkdirsWithRecursivePermission(this.publisherFileSystemByBranches.get(branchId),
                newDst.getParent(), this.permissions.get(branchId));
    }

    super.movePath(parallelRunner, state, src, newDst, branchId);
}

From source file:gobblin.qualitychecker.row.RowLevelErrFileWriter.java

License:Apache License

/**
 * Open a BufferedWriter/*from  ww w  .j  ava2  s .com*/
 * @param errFilePath path to write the file
 */
public void open(Path errFilePath) throws IOException {
    this.fs.mkdirs(errFilePath.getParent());
    OutputStream os = this.closer
            .register(this.fs.exists(errFilePath) ? this.fs.append(errFilePath) : this.fs.create(errFilePath));
    this.writer = this.closer.register(
            new BufferedWriter(new OutputStreamWriter(os, ConfigurationKeys.DEFAULT_CHARSET_ENCODING)));
}

From source file:gobblin.runtime.embedded.EmbeddedGobblin.java

License:Apache License

/**
 * Launch the Gobblin job asynchronously. This method will return when the Gobblin job has started.
 * @return a {@link JobExecutionDriver}. This object is a future that will resolve when the Gobblin job finishes.
 * @throws TimeoutException if the Gobblin job does not start within the launch timeout.
 *//*from  www.  j av a 2s .  c o m*/
@NotOnCli
public JobExecutionDriver runAsync() throws TimeoutException, InterruptedException {
    // Run function to distribute jars to workers in distributed mode
    this.distributeJarsFunction.run();

    Config sysProps = ConfigFactory.parseMap(this.builtConfigMap).withFallback(this.defaultSysConfig);
    Config userConfig = ConfigFactory.parseMap(this.userConfigMap);

    JobSpec jobSpec;
    if (this.jobFile.isPresent()) {
        try {
            Path jobFilePath = this.jobFile.get();
            PullFileLoader loader = new PullFileLoader(jobFilePath.getParent(),
                    jobFilePath.getFileSystem(new Configuration()),
                    PullFileLoader.DEFAULT_JAVA_PROPS_PULL_FILE_EXTENSIONS,
                    PullFileLoader.DEFAULT_HOCON_PULL_FILE_EXTENSIONS);
            Config jobConfig = userConfig.withFallback(loader.loadPullFile(jobFilePath, sysProps, false));
            ImmutableFSJobCatalog.JobSpecConverter converter = new ImmutableFSJobCatalog.JobSpecConverter(
                    jobFilePath.getParent(), Optional.<String>absent());
            jobSpec = converter.apply(jobConfig);
        } catch (IOException ioe) {
            throw new RuntimeException("Failed to run embedded Gobblin.", ioe);
        }
    } else {
        Config finalConfig = userConfig.withFallback(sysProps);
        if (this.template != null) {
            try {
                finalConfig = this.template.getResolvedConfig(finalConfig);
            } catch (SpecNotFoundException | JobTemplate.TemplateException exc) {
                throw new RuntimeException(exc);
            }
        }
        jobSpec = this.specBuilder.withConfig(finalConfig).build();
    }

    ResolvedJobSpec resolvedJobSpec;
    try {
        resolvedJobSpec = new ResolvedJobSpec(jobSpec);
    } catch (SpecNotFoundException | JobTemplate.TemplateException exc) {
        throw new RuntimeException("Failed to resolved template.", exc);
    }
    final JobCatalog jobCatalog = new StaticJobCatalog(Optional.of(this.useLog),
            Lists.<JobSpec>newArrayList(resolvedJobSpec));

    SimpleGobblinInstanceEnvironment instanceEnvironment = new SimpleGobblinInstanceEnvironment(
            "EmbeddedGobblinInstance", this.useLog, getSysConfig());

    StandardGobblinInstanceDriver.Builder builder = new StandardGobblinInstanceDriver.Builder(
            Optional.<GobblinInstanceEnvironment>of(instanceEnvironment)).withLog(this.useLog)
                    .withJobCatalog(jobCatalog).withImmediateJobScheduler();

    for (GobblinInstancePluginFactory plugin : this.plugins) {
        builder.addPlugin(plugin);
    }

    final GobblinInstanceDriver driver = builder.build();

    EmbeddedJobLifecycleListener listener = new EmbeddedJobLifecycleListener(this.useLog);
    driver.registerJobLifecycleListener(listener);

    driver.startAsync();

    boolean started = listener.awaitStarted(this.launchTimeout.getTimeout(), this.launchTimeout.getTimeUnit());
    if (!started) {
        log.warn("Timeout waiting for job to start. Aborting.");
        driver.stopAsync();
        driver.awaitTerminated(this.shutdownTimeout.getTimeout(), this.shutdownTimeout.getTimeUnit());
        throw new TimeoutException("Timeout waiting for job to start.");
    }

    final JobExecutionDriver jobDriver = listener.getJobDriver();
    // Stop the Gobblin instance driver when the job finishes.
    Futures.addCallback(jobDriver, new FutureCallback<JobExecutionResult>() {
        @Override
        public void onSuccess(@Nullable JobExecutionResult result) {

            stopGobblinInstanceDriver();
        }

        @Override
        public void onFailure(Throwable t) {
            stopGobblinInstanceDriver();
        }

        private void stopGobblinInstanceDriver() {
            try {
                driver.stopAsync();
                driver.awaitTerminated(EmbeddedGobblin.this.shutdownTimeout.getTimeout(),
                        EmbeddedGobblin.this.shutdownTimeout.getTimeUnit());
            } catch (TimeoutException te) {
                log.error("Failed to shutdown Gobblin instance driver.");
            }
        }
    });

    return listener.getJobDriver();
}

From source file:gobblin.scheduler.PathAlterationListenerAdaptorForMonitor.java

License:Apache License

@Override
public void onFileCreate(Path path) {
    String fileExtension = path.getName().substring(path.getName().lastIndexOf('.') + 1);
    String noExtFileName = path.getName().substring(0, path.getName().lastIndexOf('.'));
    if (fileExtension.equalsIgnoreCase(SchedulerUtils.JOB_PROPS_FILE_EXTENSION)) {
        //check no other properties pre-existed
        try {//from  w w  w .j  a  v a2  s.  co  m
            if (checkCommonPropExistance(path.getParent(), noExtFileName)) {
                return;
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
        LOG.info("Detected creation of common properties file" + path.toString());
        // New .properties file founded with some new attributes, reschedule jobs.
        loadNewCommonConfigAndHandleNewJob(path, JobScheduler.Action.RESCHEDULE);
        return;
    }
    if (!jobScheduler.jobConfigFileExtensions.contains(fileExtension)) {
        return;
    }
    LOG.info("Detected new job configuration file " + path.toString());

    loadNewJobConfigAndHandleNewJob(path, JobScheduler.Action.SCHEDULE);
}

From source file:gobblin.source.extractor.extract.google.GoogleDriveFileSystem.java

License:Apache License

/**
 * org.apache.hadoop.fs.Path assumes that there separator in file system naming and "/" is the separator.
 * When org.apache.hadoop.fs.Path sees "/" in path String, it splits into parent and name. As fileID is a random
 * String determined by Google and it can contain "/" itself, this method check if parent and name is separated and
 * restore "/" back to file ID.//from  w  w  w  .j  a  va  2  s. c om
 *
 * @param p
 * @return
 */
public static String toFileId(Path p) {
    if (p.isRoot()) {
        return "";
    }
    final String format = "%s" + Path.SEPARATOR + "%s";
    if (p.getParent() != null && StringUtils.isEmpty(p.getParent().getName())) {
        return p.getName();
    }
    return String.format(format, toFileId(p.getParent()), p.getName());
}

From source file:gobblin.util.HadoopUtils.java

License:Apache License

public static void deletePathAndEmptyAncestors(FileSystem fs, Path f, boolean recursive) throws IOException {
    deletePath(fs, f, recursive);/*  www.  ja  v  a  2  s . c  o  m*/
    Path parent = f.getParent();
    while (parent != null) {
        if (fs.exists(parent) && fs.listStatus(parent).length == 0) {
            deletePath(fs, parent, true);
            parent = parent.getParent();
        } else {
            break;
        }
    }
}

From source file:gobblin.util.HadoopUtils.java

License:Apache License

/**
 * Renames from to to if to doesn't exist in a non-thread-safe way.
 *
 * @param fs filesystem where rename will be executed.
 * @param from origin {@link Path}.//from w  ww . j a v a  2 s . c  o m
 * @param to target {@link Path}.
 * @return true if rename succeeded, false if the target already exists.
 * @throws IOException if rename failed for reasons other than target exists.
 */
public static boolean unsafeRenameIfNotExists(FileSystem fs, Path from, Path to) throws IOException {
    if (!fs.exists(to)) {
        if (!fs.exists(to.getParent())) {
            fs.mkdirs(to.getParent());
        }

        if (!renamePathHandleLocalFSRace(fs, from, to)) {
            if (!fs.exists(to)) {
                throw new IOException(String.format("Failed to rename %s to %s.", from, to));
            }

            return false;
        }
        return true;
    }
    return false;
}

From source file:gobblin.util.HeapDumpForTaskUtils.java

License:Apache License

/**
 * Generate the dumpScript, which is used when OOM error is thrown during task execution.
 * The current content dumpScript puts the .prof files to the DUMP_FOLDER within the same directory of the dumpScript.
 *
 * User needs to add the following options to the task java.opts:
 *
 * -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=./heapFileName.hprof -XX:OnOutOfMemoryError=./dumpScriptFileName
 *
 * @param dumpScript The path to the dumpScript, which needs to be added to the Distributed cache.
 * To use it, simply put the path of dumpScript to the gobblin config: job.hdfs.files.
 * @param fs File system//from   w w  w  .ja  v a  2 s.com
 * @param heapFileName the name of the .prof file.
 * @param chmod chmod for the dump script. For hdfs file, e.g, "hadoop fs -chmod 755"
 * @throws IOException
 */
public static void generateDumpScript(Path dumpScript, FileSystem fs, String heapFileName, String chmod)
        throws IOException {
    if (fs.exists(dumpScript)) {
        LOG.info("Heap dump script already exists: " + dumpScript);
        return;
    }

    try (BufferedWriter scriptWriter = new BufferedWriter(
            new OutputStreamWriter(fs.create(dumpScript), ConfigurationKeys.DEFAULT_CHARSET_ENCODING))) {
        Path dumpDir = new Path(dumpScript.getParent(), DUMP_FOLDER);
        if (!fs.exists(dumpDir)) {
            fs.mkdirs(dumpDir);
        }

        scriptWriter.write("#!/bin/sh\n");
        scriptWriter.write("if [ -n \"$HADOOP_PREFIX\" ]; then\n");
        scriptWriter.write("  ${HADOOP_PREFIX}/bin/hadoop dfs -put " + heapFileName + " " + dumpDir
                + "/${PWD//\\//_}.hprof\n");
        scriptWriter.write("else\n");
        scriptWriter.write("  ${HADOOP_HOME}/bin/hadoop dfs -put " + heapFileName + " " + dumpDir
                + "/${PWD//\\//_}.hprof\n");
        scriptWriter.write("fi\n");

    } catch (IOException ioe) {
        LOG.error("Heap dump script is not generated successfully.");
        if (fs.exists(dumpScript)) {
            fs.delete(dumpScript, true);
        }
        throw ioe;
    }
    Runtime.getRuntime().exec(chmod + " " + dumpScript);
}

From source file:gobblin.util.HeapDumpForTaskUtilsTest.java

License:Apache License

@Test
public void testGenerateDumpScript() throws IOException {
    Path dumpScript = new Path(TEST_DIR, SCRIPT_NAME);
    HeapDumpForTaskUtils.generateDumpScript(dumpScript, this.fs, "test.hprof", "chmod 777 ");
    Assert.assertEquals(true, this.fs.exists(dumpScript));
    Assert.assertEquals(true, this.fs.exists(new Path(dumpScript.getParent(), "dumps")));
    Closer closer = Closer.create();//from w w w. ja  v a  2 s  .c  o m
    try {
        BufferedReader scriptReader = closer
                .register(new BufferedReader(new InputStreamReader(this.fs.open(dumpScript))));
        Assert.assertEquals("#!/bin/sh", scriptReader.readLine());
        Assert.assertEquals("if [ -n \"$HADOOP_PREFIX\" ]; then", scriptReader.readLine());
        Assert.assertEquals(
                "  ${HADOOP_PREFIX}/bin/hadoop dfs -put test.hprof dumpScript/dumps/${PWD//\\//_}.hprof",
                scriptReader.readLine());
        Assert.assertEquals("else", scriptReader.readLine());
        Assert.assertEquals(
                "  ${HADOOP_HOME}/bin/hadoop dfs -put test.hprof dumpScript/dumps/${PWD//\\//_}.hprof",
                scriptReader.readLine());
        Assert.assertEquals("fi", scriptReader.readLine());
    } catch (Throwable t) {
        closer.rethrow(t);
    } finally {
        closer.close();
    }
}