Example usage for org.apache.hadoop.mapreduce.filecache DistributedCache addFileToClassPath

List of usage examples for org.apache.hadoop.mapreduce.filecache DistributedCache addFileToClassPath

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.filecache DistributedCache addFileToClassPath.

Prototype

public static void addFileToClassPath(Path file, Configuration conf, FileSystem fs) 

Source Link

Document

Add a file path to the current set of classpath entries.

Usage

From source file:gobblin.runtime.mapreduce.MRJobLauncher.java

License:Apache License

/**
 * Add framework or job-specific jars to the classpath through DistributedCache
 * so the mappers can use them.//from   w ww  .  j ava 2s. c om
 */
@SuppressWarnings("deprecation")
private void addJars(Path jarFileDir, String jarFileList, Configuration conf) throws IOException {
    LocalFileSystem lfs = FileSystem.getLocal(conf);
    for (String jarFile : SPLITTER.split(jarFileList)) {
        Path srcJarFile = new Path(jarFile);
        FileStatus[] fileStatusList = lfs.globStatus(srcJarFile);
        for (FileStatus status : fileStatusList) {
            // DistributedCache requires absolute path, so we need to use makeQualified.
            Path destJarFile = new Path(this.fs.makeQualified(jarFileDir), status.getPath().getName());
            if (!this.fs.exists(destJarFile)) {
                // Copy the jar file from local file system to HDFS
                this.fs.copyFromLocalFile(status.getPath(), destJarFile);
            }
            // Then add the jar file on HDFS to the classpath
            LOG.info(String.format("Adding %s to classpath", destJarFile));
            DistributedCache.addFileToClassPath(destJarFile, conf, this.fs);
        }
    }
}

From source file:gobblin.runtime.mapreduce.MRJobLauncher.java

License:Apache License

private void addHdfsJars(String hdfsJarFileList, Configuration conf) throws IOException {
    for (String jarFile : SPLITTER.split(hdfsJarFileList)) {
        FileStatus[] status = this.fs.listStatus(new Path(jarFile));
        for (FileStatus fileStatus : status) {
            if (!fileStatus.isDirectory()) {
                Path path = new Path(jarFile, fileStatus.getPath().getName());
                LOG.info(String.format("Adding %s to classpath", path));
                DistributedCache.addFileToClassPath(path, conf, this.fs);
            }//from  w  w  w .ja va 2s  . c o m
        }
    }
}

From source file:org.apache.gobblin.runtime.mapreduce.MRJobLauncher.java

License:Apache License

/**
 * Add framework or job-specific jars to the classpath through DistributedCache
 * so the mappers can use them./*from ww  w.  j a v  a  2  s .com*/
 */
@SuppressWarnings("deprecation")
private void addJars(Path jarFileDir, String jarFileList, Configuration conf) throws IOException {
    LocalFileSystem lfs = FileSystem.getLocal(conf);
    for (String jarFile : SPLITTER.split(jarFileList)) {
        Path srcJarFile = new Path(jarFile);
        FileStatus[] fileStatusList = lfs.globStatus(srcJarFile);

        for (FileStatus status : fileStatusList) {
            // For each FileStatus there are chances it could fail in copying at the first attempt, due to file-existence
            // or file-copy is ongoing by other job instance since all Gobblin jobs share the same jar file directory.
            // the retryCount is to avoid cases (if any) where retry is going too far and causes job hanging.
            int retryCount = 0;
            boolean shouldFileBeAddedIntoDC = true;
            Path destJarFile = calculateDestJarFile(status, jarFileDir);
            // Adding destJarFile into HDFS until it exists and the size of file on targetPath matches the one on local path.
            while (!this.fs.exists(destJarFile) || fs.getFileStatus(destJarFile).getLen() != status.getLen()) {
                try {
                    if (this.fs.exists(destJarFile)
                            && fs.getFileStatus(destJarFile).getLen() != status.getLen()) {
                        Thread.sleep(WAITING_TIME_ON_IMCOMPLETE_UPLOAD);
                        throw new IOException("Waiting for file to complete on uploading ... ");
                    }
                    // Set the first parameter as false for not deleting sourceFile
                    // Set the second parameter as false for not overwriting existing file on the target, by default it is true.
                    // If the file is preExisted but overwrite flag set to false, then an IOException if thrown.
                    this.fs.copyFromLocalFile(false, false, status.getPath(), destJarFile);
                } catch (IOException | InterruptedException e) {
                    LOG.warn("Path:" + destJarFile + " is not copied successfully. Will require retry.");
                    retryCount += 1;
                    if (retryCount >= this.jarFileMaximumRetry) {
                        LOG.error("The jar file:" + destJarFile + "failed in being copied into hdfs", e);
                        // If retry reaches upper limit, skip copying this file.
                        shouldFileBeAddedIntoDC = false;
                        break;
                    }
                }
            }
            if (shouldFileBeAddedIntoDC) {
                // Then add the jar file on HDFS to the classpath
                LOG.info(String.format("Adding %s to classpath", destJarFile));
                DistributedCache.addFileToClassPath(destJarFile, conf, this.fs);
            }
        }
    }
}