List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:gobblin.publisher.TimePartitionedDataPublisher.java
License:Apache License
/** * This method needs to be overridden for TimePartitionedDataPublisher, since the output folder structure * contains timestamp, we have to move the files recursively. * * For example, move {writerOutput}/2015/04/08/15/output.avro to {publisherOutput}/2015/04/08/15/output.avro *///from ww w . j a v a2 s .c o m @Override protected void addWriterOutputToExistingDir(Path writerOutput, Path publisherOutput, WorkUnitState workUnitState, int branchId, ParallelRunner parallelRunner) throws IOException { for (FileStatus status : FileListUtils.listFilesRecursively(this.writerFileSystemByBranches.get(branchId), writerOutput)) { String filePathStr = status.getPath().toString(); String pathSuffix = filePathStr .substring(filePathStr.indexOf(writerOutput.toString()) + writerOutput.toString().length() + 1); Path outputPath = new Path(publisherOutput, pathSuffix); WriterUtils.mkdirsWithRecursivePermission(this.publisherFileSystemByBranches.get(branchId), outputPath.getParent(), this.permissions.get(branchId)); movePath(parallelRunner, workUnitState, status.getPath(), outputPath, branchId); } }
From source file:gobblin.publisher.TimestampDataPublisher.java
License:Apache License
/** * Update destination path to put db and table name in format "dbname.tablename" using {@link #getDbTableName(String)} * and include timestamp// ww w.ja va 2 s . com * * Input dst format: {finaldir}/{schemaName} * Output dst format: {finaldir}/{dbname.tablename}/{currenttimestamp} */ @Override protected void movePath(ParallelRunner parallelRunner, State state, Path src, Path dst, int branchId) throws IOException { String outputDir = dst.getParent().toString(); String schemaName = dst.getName(); Path newDst = new Path(new Path(outputDir, getDbTableName(schemaName)), timestamp); if (!this.publisherFileSystemByBranches.get(branchId).exists(newDst)) { WriterUtils.mkdirsWithRecursivePermission(this.publisherFileSystemByBranches.get(branchId), newDst.getParent(), this.permissions.get(branchId)); } super.movePath(parallelRunner, state, src, newDst, branchId); }
From source file:gobblin.qualitychecker.row.RowLevelErrFileWriter.java
License:Apache License
/** * Open a BufferedWriter/*from ww w .j ava2 s .com*/ * @param errFilePath path to write the file */ public void open(Path errFilePath) throws IOException { this.fs.mkdirs(errFilePath.getParent()); OutputStream os = this.closer .register(this.fs.exists(errFilePath) ? this.fs.append(errFilePath) : this.fs.create(errFilePath)); this.writer = this.closer.register( new BufferedWriter(new OutputStreamWriter(os, ConfigurationKeys.DEFAULT_CHARSET_ENCODING))); }
From source file:gobblin.runtime.embedded.EmbeddedGobblin.java
License:Apache License
/** * Launch the Gobblin job asynchronously. This method will return when the Gobblin job has started. * @return a {@link JobExecutionDriver}. This object is a future that will resolve when the Gobblin job finishes. * @throws TimeoutException if the Gobblin job does not start within the launch timeout. *//*from www. j av a 2s . c o m*/ @NotOnCli public JobExecutionDriver runAsync() throws TimeoutException, InterruptedException { // Run function to distribute jars to workers in distributed mode this.distributeJarsFunction.run(); Config sysProps = ConfigFactory.parseMap(this.builtConfigMap).withFallback(this.defaultSysConfig); Config userConfig = ConfigFactory.parseMap(this.userConfigMap); JobSpec jobSpec; if (this.jobFile.isPresent()) { try { Path jobFilePath = this.jobFile.get(); PullFileLoader loader = new PullFileLoader(jobFilePath.getParent(), jobFilePath.getFileSystem(new Configuration()), PullFileLoader.DEFAULT_JAVA_PROPS_PULL_FILE_EXTENSIONS, PullFileLoader.DEFAULT_HOCON_PULL_FILE_EXTENSIONS); Config jobConfig = userConfig.withFallback(loader.loadPullFile(jobFilePath, sysProps, false)); ImmutableFSJobCatalog.JobSpecConverter converter = new ImmutableFSJobCatalog.JobSpecConverter( jobFilePath.getParent(), Optional.<String>absent()); jobSpec = converter.apply(jobConfig); } catch (IOException ioe) { throw new RuntimeException("Failed to run embedded Gobblin.", ioe); } } else { Config finalConfig = userConfig.withFallback(sysProps); if (this.template != null) { try { finalConfig = this.template.getResolvedConfig(finalConfig); } catch (SpecNotFoundException | JobTemplate.TemplateException exc) { throw new RuntimeException(exc); } } jobSpec = this.specBuilder.withConfig(finalConfig).build(); } ResolvedJobSpec resolvedJobSpec; try { resolvedJobSpec = new ResolvedJobSpec(jobSpec); } catch (SpecNotFoundException | JobTemplate.TemplateException exc) { throw new RuntimeException("Failed to resolved template.", exc); } final JobCatalog jobCatalog = new StaticJobCatalog(Optional.of(this.useLog), Lists.<JobSpec>newArrayList(resolvedJobSpec)); SimpleGobblinInstanceEnvironment instanceEnvironment = new SimpleGobblinInstanceEnvironment( "EmbeddedGobblinInstance", this.useLog, getSysConfig()); StandardGobblinInstanceDriver.Builder builder = new StandardGobblinInstanceDriver.Builder( Optional.<GobblinInstanceEnvironment>of(instanceEnvironment)).withLog(this.useLog) .withJobCatalog(jobCatalog).withImmediateJobScheduler(); for (GobblinInstancePluginFactory plugin : this.plugins) { builder.addPlugin(plugin); } final GobblinInstanceDriver driver = builder.build(); EmbeddedJobLifecycleListener listener = new EmbeddedJobLifecycleListener(this.useLog); driver.registerJobLifecycleListener(listener); driver.startAsync(); boolean started = listener.awaitStarted(this.launchTimeout.getTimeout(), this.launchTimeout.getTimeUnit()); if (!started) { log.warn("Timeout waiting for job to start. Aborting."); driver.stopAsync(); driver.awaitTerminated(this.shutdownTimeout.getTimeout(), this.shutdownTimeout.getTimeUnit()); throw new TimeoutException("Timeout waiting for job to start."); } final JobExecutionDriver jobDriver = listener.getJobDriver(); // Stop the Gobblin instance driver when the job finishes. Futures.addCallback(jobDriver, new FutureCallback<JobExecutionResult>() { @Override public void onSuccess(@Nullable JobExecutionResult result) { stopGobblinInstanceDriver(); } @Override public void onFailure(Throwable t) { stopGobblinInstanceDriver(); } private void stopGobblinInstanceDriver() { try { driver.stopAsync(); driver.awaitTerminated(EmbeddedGobblin.this.shutdownTimeout.getTimeout(), EmbeddedGobblin.this.shutdownTimeout.getTimeUnit()); } catch (TimeoutException te) { log.error("Failed to shutdown Gobblin instance driver."); } } }); return listener.getJobDriver(); }
From source file:gobblin.scheduler.PathAlterationListenerAdaptorForMonitor.java
License:Apache License
@Override public void onFileCreate(Path path) { String fileExtension = path.getName().substring(path.getName().lastIndexOf('.') + 1); String noExtFileName = path.getName().substring(0, path.getName().lastIndexOf('.')); if (fileExtension.equalsIgnoreCase(SchedulerUtils.JOB_PROPS_FILE_EXTENSION)) { //check no other properties pre-existed try {//from w w w .j a v a2 s. co m if (checkCommonPropExistance(path.getParent(), noExtFileName)) { return; } } catch (IOException e) { e.printStackTrace(); } LOG.info("Detected creation of common properties file" + path.toString()); // New .properties file founded with some new attributes, reschedule jobs. loadNewCommonConfigAndHandleNewJob(path, JobScheduler.Action.RESCHEDULE); return; } if (!jobScheduler.jobConfigFileExtensions.contains(fileExtension)) { return; } LOG.info("Detected new job configuration file " + path.toString()); loadNewJobConfigAndHandleNewJob(path, JobScheduler.Action.SCHEDULE); }
From source file:gobblin.source.extractor.extract.google.GoogleDriveFileSystem.java
License:Apache License
/** * org.apache.hadoop.fs.Path assumes that there separator in file system naming and "/" is the separator. * When org.apache.hadoop.fs.Path sees "/" in path String, it splits into parent and name. As fileID is a random * String determined by Google and it can contain "/" itself, this method check if parent and name is separated and * restore "/" back to file ID.//from w w w .j a va 2 s. c om * * @param p * @return */ public static String toFileId(Path p) { if (p.isRoot()) { return ""; } final String format = "%s" + Path.SEPARATOR + "%s"; if (p.getParent() != null && StringUtils.isEmpty(p.getParent().getName())) { return p.getName(); } return String.format(format, toFileId(p.getParent()), p.getName()); }
From source file:gobblin.util.HadoopUtils.java
License:Apache License
public static void deletePathAndEmptyAncestors(FileSystem fs, Path f, boolean recursive) throws IOException { deletePath(fs, f, recursive);/* www. ja v a 2 s . c o m*/ Path parent = f.getParent(); while (parent != null) { if (fs.exists(parent) && fs.listStatus(parent).length == 0) { deletePath(fs, parent, true); parent = parent.getParent(); } else { break; } } }
From source file:gobblin.util.HadoopUtils.java
License:Apache License
/** * Renames from to to if to doesn't exist in a non-thread-safe way. * * @param fs filesystem where rename will be executed. * @param from origin {@link Path}.//from w ww . j a v a 2 s . c o m * @param to target {@link Path}. * @return true if rename succeeded, false if the target already exists. * @throws IOException if rename failed for reasons other than target exists. */ public static boolean unsafeRenameIfNotExists(FileSystem fs, Path from, Path to) throws IOException { if (!fs.exists(to)) { if (!fs.exists(to.getParent())) { fs.mkdirs(to.getParent()); } if (!renamePathHandleLocalFSRace(fs, from, to)) { if (!fs.exists(to)) { throw new IOException(String.format("Failed to rename %s to %s.", from, to)); } return false; } return true; } return false; }
From source file:gobblin.util.HeapDumpForTaskUtils.java
License:Apache License
/** * Generate the dumpScript, which is used when OOM error is thrown during task execution. * The current content dumpScript puts the .prof files to the DUMP_FOLDER within the same directory of the dumpScript. * * User needs to add the following options to the task java.opts: * * -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=./heapFileName.hprof -XX:OnOutOfMemoryError=./dumpScriptFileName * * @param dumpScript The path to the dumpScript, which needs to be added to the Distributed cache. * To use it, simply put the path of dumpScript to the gobblin config: job.hdfs.files. * @param fs File system//from w w w .ja v a 2 s.com * @param heapFileName the name of the .prof file. * @param chmod chmod for the dump script. For hdfs file, e.g, "hadoop fs -chmod 755" * @throws IOException */ public static void generateDumpScript(Path dumpScript, FileSystem fs, String heapFileName, String chmod) throws IOException { if (fs.exists(dumpScript)) { LOG.info("Heap dump script already exists: " + dumpScript); return; } try (BufferedWriter scriptWriter = new BufferedWriter( new OutputStreamWriter(fs.create(dumpScript), ConfigurationKeys.DEFAULT_CHARSET_ENCODING))) { Path dumpDir = new Path(dumpScript.getParent(), DUMP_FOLDER); if (!fs.exists(dumpDir)) { fs.mkdirs(dumpDir); } scriptWriter.write("#!/bin/sh\n"); scriptWriter.write("if [ -n \"$HADOOP_PREFIX\" ]; then\n"); scriptWriter.write(" ${HADOOP_PREFIX}/bin/hadoop dfs -put " + heapFileName + " " + dumpDir + "/${PWD//\\//_}.hprof\n"); scriptWriter.write("else\n"); scriptWriter.write(" ${HADOOP_HOME}/bin/hadoop dfs -put " + heapFileName + " " + dumpDir + "/${PWD//\\//_}.hprof\n"); scriptWriter.write("fi\n"); } catch (IOException ioe) { LOG.error("Heap dump script is not generated successfully."); if (fs.exists(dumpScript)) { fs.delete(dumpScript, true); } throw ioe; } Runtime.getRuntime().exec(chmod + " " + dumpScript); }
From source file:gobblin.util.HeapDumpForTaskUtilsTest.java
License:Apache License
@Test public void testGenerateDumpScript() throws IOException { Path dumpScript = new Path(TEST_DIR, SCRIPT_NAME); HeapDumpForTaskUtils.generateDumpScript(dumpScript, this.fs, "test.hprof", "chmod 777 "); Assert.assertEquals(true, this.fs.exists(dumpScript)); Assert.assertEquals(true, this.fs.exists(new Path(dumpScript.getParent(), "dumps"))); Closer closer = Closer.create();//from w w w. ja v a 2 s .c o m try { BufferedReader scriptReader = closer .register(new BufferedReader(new InputStreamReader(this.fs.open(dumpScript)))); Assert.assertEquals("#!/bin/sh", scriptReader.readLine()); Assert.assertEquals("if [ -n \"$HADOOP_PREFIX\" ]; then", scriptReader.readLine()); Assert.assertEquals( " ${HADOOP_PREFIX}/bin/hadoop dfs -put test.hprof dumpScript/dumps/${PWD//\\//_}.hprof", scriptReader.readLine()); Assert.assertEquals("else", scriptReader.readLine()); Assert.assertEquals( " ${HADOOP_HOME}/bin/hadoop dfs -put test.hprof dumpScript/dumps/${PWD//\\//_}.hprof", scriptReader.readLine()); Assert.assertEquals("fi", scriptReader.readLine()); } catch (Throwable t) { closer.rethrow(t); } finally { closer.close(); } }