List of usage examples for org.apache.hadoop.mapreduce.filecache DistributedCache addFileToClassPath
public static void addFileToClassPath(Path file, Configuration conf, FileSystem fs)
From source file:gobblin.runtime.mapreduce.MRJobLauncher.java
License:Apache License
/** * Add framework or job-specific jars to the classpath through DistributedCache * so the mappers can use them.//from w ww . j ava 2s. c om */ @SuppressWarnings("deprecation") private void addJars(Path jarFileDir, String jarFileList, Configuration conf) throws IOException { LocalFileSystem lfs = FileSystem.getLocal(conf); for (String jarFile : SPLITTER.split(jarFileList)) { Path srcJarFile = new Path(jarFile); FileStatus[] fileStatusList = lfs.globStatus(srcJarFile); for (FileStatus status : fileStatusList) { // DistributedCache requires absolute path, so we need to use makeQualified. Path destJarFile = new Path(this.fs.makeQualified(jarFileDir), status.getPath().getName()); if (!this.fs.exists(destJarFile)) { // Copy the jar file from local file system to HDFS this.fs.copyFromLocalFile(status.getPath(), destJarFile); } // Then add the jar file on HDFS to the classpath LOG.info(String.format("Adding %s to classpath", destJarFile)); DistributedCache.addFileToClassPath(destJarFile, conf, this.fs); } } }
From source file:gobblin.runtime.mapreduce.MRJobLauncher.java
License:Apache License
private void addHdfsJars(String hdfsJarFileList, Configuration conf) throws IOException { for (String jarFile : SPLITTER.split(hdfsJarFileList)) { FileStatus[] status = this.fs.listStatus(new Path(jarFile)); for (FileStatus fileStatus : status) { if (!fileStatus.isDirectory()) { Path path = new Path(jarFile, fileStatus.getPath().getName()); LOG.info(String.format("Adding %s to classpath", path)); DistributedCache.addFileToClassPath(path, conf, this.fs); }//from w w w .ja va 2s . c o m } } }
From source file:org.apache.gobblin.runtime.mapreduce.MRJobLauncher.java
License:Apache License
/** * Add framework or job-specific jars to the classpath through DistributedCache * so the mappers can use them./*from ww w. j a v a 2 s .com*/ */ @SuppressWarnings("deprecation") private void addJars(Path jarFileDir, String jarFileList, Configuration conf) throws IOException { LocalFileSystem lfs = FileSystem.getLocal(conf); for (String jarFile : SPLITTER.split(jarFileList)) { Path srcJarFile = new Path(jarFile); FileStatus[] fileStatusList = lfs.globStatus(srcJarFile); for (FileStatus status : fileStatusList) { // For each FileStatus there are chances it could fail in copying at the first attempt, due to file-existence // or file-copy is ongoing by other job instance since all Gobblin jobs share the same jar file directory. // the retryCount is to avoid cases (if any) where retry is going too far and causes job hanging. int retryCount = 0; boolean shouldFileBeAddedIntoDC = true; Path destJarFile = calculateDestJarFile(status, jarFileDir); // Adding destJarFile into HDFS until it exists and the size of file on targetPath matches the one on local path. while (!this.fs.exists(destJarFile) || fs.getFileStatus(destJarFile).getLen() != status.getLen()) { try { if (this.fs.exists(destJarFile) && fs.getFileStatus(destJarFile).getLen() != status.getLen()) { Thread.sleep(WAITING_TIME_ON_IMCOMPLETE_UPLOAD); throw new IOException("Waiting for file to complete on uploading ... "); } // Set the first parameter as false for not deleting sourceFile // Set the second parameter as false for not overwriting existing file on the target, by default it is true. // If the file is preExisted but overwrite flag set to false, then an IOException if thrown. this.fs.copyFromLocalFile(false, false, status.getPath(), destJarFile); } catch (IOException | InterruptedException e) { LOG.warn("Path:" + destJarFile + " is not copied successfully. Will require retry."); retryCount += 1; if (retryCount >= this.jarFileMaximumRetry) { LOG.error("The jar file:" + destJarFile + "failed in being copied into hdfs", e); // If retry reaches upper limit, skip copying this file. shouldFileBeAddedIntoDC = false; break; } } } if (shouldFileBeAddedIntoDC) { // Then add the jar file on HDFS to the classpath LOG.info(String.format("Adding %s to classpath", destJarFile)); DistributedCache.addFileToClassPath(destJarFile, conf, this.fs); } } } }