Example usage for org.apache.hadoop.mapreduce MRJobConfig CACHE

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce MRJobConfig CACHE_LOCALARCHIVES.

Prototype

String CACHE_LOCALARCHIVES

To view the source code for org.apache.hadoop.mapreduce MRJobConfig CACHE_LOCALARCHIVES.

Click Source Link

Usage

From source file:com.scaleoutsoftware.soss.hserver.hadoop.DistributedCacheManager.java

License:Apache License

/**
 * Set up the distributed cache by localizing the resources, and updating
 * the configuration with references to the localized resources.
 * @param conf job configuration//from   w ww.j  a va  2  s  . c  o  m
 * @throws IOException
 */
public void setup(Configuration conf) throws IOException {
    //If we are not 0th worker, wait for 0th worker to set up the cache
    if (InvocationWorker.getIgWorkerIndex() > 0 && InvocationWorker.getNumberOfWorkers() > 1) {
        try {
            InvocationWorker.getSynchronizationBarrier().waitForComplete(ACTION_NAME, SYNCHRONIZATION_WAIT_MS,
                    WAIT_GRANULARITY_MS);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        return;
    }

    File workDir = new File(System.getProperty("user.dir"));

    // Generate YARN local resources objects corresponding to the distributed
    // cache configuration
    Map<String, LocalResource> localResources = new LinkedHashMap<String, LocalResource>();
    MRApps.setupDistributedCache(conf, localResources);

    //CODE CHANGE FROM ORIGINAL FILE:
    //We need to clear the resources from jar files, since they are distributed through the IG.
    //
    Iterator<Map.Entry<String, LocalResource>> iterator = localResources.entrySet().iterator();
    while (iterator.hasNext()) {
        Entry<String, LocalResource> entry = iterator.next();
        if (entry.getKey().endsWith(".jar")) {
            iterator.remove();
        }
    }

    // Generating unique numbers for FSDownload.

    AtomicLong uniqueNumberGenerator = new AtomicLong(System.currentTimeMillis());

    // Find which resources are to be put on the local classpath
    Map<String, Path> classpaths = new HashMap<String, Path>();
    Path[] archiveClassPaths = DistributedCache.getArchiveClassPaths(conf);
    if (archiveClassPaths != null) {
        for (Path p : archiveClassPaths) {
            FileSystem remoteFS = p.getFileSystem(conf);
            p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory()));
            classpaths.put(p.toUri().getPath().toString(), p);
        }
    }

    Path[] fileClassPaths = DistributedCache.getFileClassPaths(conf);
    if (fileClassPaths != null) {
        for (Path p : fileClassPaths) {
            FileSystem remoteFS = p.getFileSystem(conf);
            p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory()));
            classpaths.put(p.toUri().getPath().toString(), p);
        }
    }

    // Localize the resources
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(MRConfig.LOCAL_DIR);
    FileContext localFSFileContext = FileContext.getLocalFSFileContext();
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();

    ExecutorService exec = null;
    try {
        ThreadFactory tf = new ThreadFactoryBuilder()
                .setNameFormat("LocalDistributedCacheManager Downloader #%d").build();
        exec = Executors.newCachedThreadPool(tf);
        Path destPath = localDirAllocator.getLocalPathForWrite(".", conf);
        Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap();
        for (LocalResource resource : localResources.values()) {
            Callable<Path> download = new FSDownload(localFSFileContext, ugi, conf,
                    new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet())), resource);
            Future<Path> future = exec.submit(download);
            resourcesToPaths.put(resource, future);
        }
        for (Entry<String, LocalResource> entry : localResources.entrySet()) {
            LocalResource resource = entry.getValue();
            Path path;
            try {
                path = resourcesToPaths.get(resource).get();
            } catch (InterruptedException e) {
                throw new IOException(e);
            } catch (ExecutionException e) {
                throw new IOException(e);
            }
            String pathString = path.toUri().toString();
            String link = entry.getKey();
            String target = new File(path.toUri()).getPath();
            symlink(workDir, target, link);

            if (resource.getType() == LocalResourceType.ARCHIVE) {
                localArchives.add(pathString);
            } else if (resource.getType() == LocalResourceType.FILE) {
                localFiles.add(pathString);
            } else if (resource.getType() == LocalResourceType.PATTERN) {
                //PATTERN is not currently used in local mode
                throw new IllegalArgumentException(
                        "Resource type PATTERN is not " + "implemented yet. " + resource.getResource());
            }
            Path resourcePath;
            try {
                resourcePath = ConverterUtils.getPathFromYarnURL(resource.getResource());
            } catch (URISyntaxException e) {
                throw new IOException(e);
            }
            LOG.info(String.format("Localized %s as %s", resourcePath, path));
            String cp = resourcePath.toUri().getPath();
            if (classpaths.keySet().contains(cp)) {
                localClasspaths.add(path.toUri().getPath().toString());
            }
        }
    } finally {
        if (exec != null) {
            exec.shutdown();
        }
    }
    // Update the configuration object with localized data.
    if (!localArchives.isEmpty()) {
        conf.set(MRJobConfig.CACHE_LOCALARCHIVES,
                StringUtils.arrayToString(localArchives.toArray(new String[localArchives.size()])));
    }
    if (!localFiles.isEmpty()) {
        conf.set(MRJobConfig.CACHE_LOCALFILES,
                StringUtils.arrayToString(localFiles.toArray(new String[localArchives.size()])));
    }
    setupCalled = true;

    //If we are  0th worker, signal action complete
    if (InvocationWorker.getIgWorkerIndex() == 0 && InvocationWorker.getNumberOfWorkers() > 1) {
        try {
            InvocationWorker.getSynchronizationBarrier().signalComplete(ACTION_NAME);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2JobResourceManager.java

License:Apache License

/**
 * Prepare job resources. Resolve the classpath list and download it if needed.
 *
 * @param download {@code true} If need to download resources.
 * @param jobLocDir Work directory for the job.
 * @throws IgniteCheckedException If failed.
 */// www . j  a  va2s .co m
public void prepareJobEnvironment(boolean download, File jobLocDir) throws IgniteCheckedException {
    try {
        if (jobLocDir.exists())
            throw new IgniteCheckedException(
                    "Local job directory already exists: " + jobLocDir.getAbsolutePath());

        JobConf cfg = ctx.getJobConf();

        Collection<URL> clsPathUrls = new ArrayList<>();

        String mrDir = cfg.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (mrDir != null) {
            stagingDir = new Path(new URI(mrDir));

            if (download) {
                FileSystem fs = job.fileSystem(stagingDir.toUri(), cfg);

                if (!fs.exists(stagingDir))
                    throw new IgniteCheckedException("Failed to find map-reduce submission "
                            + "directory (does not exist): " + stagingDir);

                if (!FileUtil.copy(fs, stagingDir, jobLocDir, false, cfg))
                    throw new IgniteCheckedException("Failed to copy job submission directory "
                            + "contents to local file system " + "[path=" + stagingDir + ", locDir="
                            + jobLocDir.getAbsolutePath() + ", jobId=" + jobId + ']');
            }

            File jarJobFile = new File(jobLocDir, "job.jar");

            clsPathUrls.add(jarJobFile.toURI().toURL());

            rsrcSet.add(jarJobFile);
            rsrcSet.add(new File(jobLocDir, "job.xml"));
        } else if (!jobLocDir.mkdirs())
            throw new IgniteCheckedException(
                    "Failed to create local job directory: " + jobLocDir.getAbsolutePath());

        processFiles(jobLocDir, ctx.getCacheFiles(), download, false, null, MRJobConfig.CACHE_LOCALFILES);
        processFiles(jobLocDir, ctx.getCacheArchives(), download, true, null, MRJobConfig.CACHE_LOCALARCHIVES);
        processFiles(jobLocDir, ctx.getFileClassPaths(), download, false, clsPathUrls, null);
        processFiles(jobLocDir, ctx.getArchiveClassPaths(), download, true, clsPathUrls, null);

        if (!clsPathUrls.isEmpty())
            clsPath = clsPathUrls.toArray(new URL[clsPathUrls.size()]);

        setLocalFSWorkingDirectory(jobLocDir);
    } catch (URISyntaxException | IOException e) {
        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2JobResourceManager.java

License:Apache License

/**
 * Prepare job resources. Resolve the classpath list and download it if needed.
 *
 * @param download {@code true} If need to download resources.
 * @param jobLocDir Work directory for the job.
 * @throws IgniteCheckedException If failed.
 *///from   w ww  . j  a v a2s  .  c o m
public void prepareJobEnvironment(boolean download, File jobLocDir) throws IgniteCheckedException {
    try {
        if (jobLocDir.exists())
            throw new IgniteCheckedException(
                    "Local job directory already exists: " + jobLocDir.getAbsolutePath());

        JobConf cfg = ctx.getJobConf();

        String mrDir = cfg.get("mapreduce.job.dir");

        if (mrDir != null) {
            stagingDir = new Path(new URI(mrDir));

            if (download) {
                FileSystem fs = FileSystem.get(stagingDir.toUri(), cfg);

                if (!fs.exists(stagingDir))
                    throw new IgniteCheckedException(
                            "Failed to find map-reduce submission directory (does not exist): " + stagingDir);

                if (!FileUtil.copy(fs, stagingDir, jobLocDir, false, cfg))
                    throw new IgniteCheckedException(
                            "Failed to copy job submission directory contents to local file system " + "[path="
                                    + stagingDir + ", locDir=" + jobLocDir.getAbsolutePath() + ", jobId="
                                    + jobId + ']');
            }

            File jarJobFile = new File(jobLocDir, "job.jar");

            Collection<URL> clsPathUrls = new ArrayList<>();

            clsPathUrls.add(jarJobFile.toURI().toURL());

            rsrcSet.add(jarJobFile);
            rsrcSet.add(new File(jobLocDir, "job.xml"));

            processFiles(jobLocDir, ctx.getCacheFiles(), download, false, null, MRJobConfig.CACHE_LOCALFILES);
            processFiles(jobLocDir, ctx.getCacheArchives(), download, true, null,
                    MRJobConfig.CACHE_LOCALARCHIVES);
            processFiles(jobLocDir, ctx.getFileClassPaths(), download, false, clsPathUrls, null);
            processFiles(jobLocDir, ctx.getArchiveClassPaths(), download, true, clsPathUrls, null);

            if (!clsPathUrls.isEmpty()) {
                clsPath = new URL[clsPathUrls.size()];

                clsPathUrls.toArray(clsPath);
            }
        } else if (!jobLocDir.mkdirs())
            throw new IgniteCheckedException(
                    "Failed to create local job directory: " + jobLocDir.getAbsolutePath());

        setLocalFSWorkingDirectory(jobLocDir);
    } catch (URISyntaxException | IOException e) {
        throw new IgniteCheckedException(e);
    }
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2JobResourceManager.java

License:Open Source License

/**
 * Prepare job resources. Resolve the classpath list and download it if needed.
 *
 * @param download {@code true} If need to download resources.
 * @param jobLocDir Work directory for the job.
 * @throws GridException If failed./*from www .j  a v a 2  s. c om*/
 */
public void prepareJobEnvironment(boolean download, File jobLocDir) throws GridException {
    try {
        if (jobLocDir.exists())
            throw new GridException("Local job directory already exists: " + jobLocDir.getAbsolutePath());

        JobConf cfg = ctx.getJobConf();

        String mrDir = cfg.get("mapreduce.job.dir");

        if (mrDir != null) {
            stagingDir = new Path(new URI(mrDir));

            if (download) {
                FileSystem fs = FileSystem.get(stagingDir.toUri(), cfg);

                if (!fs.exists(stagingDir))
                    throw new GridException(
                            "Failed to find map-reduce submission directory (does not exist): " + stagingDir);

                if (!FileUtil.copy(fs, stagingDir, jobLocDir, false, cfg))
                    throw new GridException(
                            "Failed to copy job submission directory contents to local file system " + "[path="
                                    + stagingDir + ", locDir=" + jobLocDir.getAbsolutePath() + ", jobId="
                                    + jobId + ']');
            }

            File jarJobFile = new File(jobLocDir, "job.jar");

            Collection<URL> clsPathUrls = new ArrayList<>();

            clsPathUrls.add(jarJobFile.toURI().toURL());

            rsrcList.add(jarJobFile);
            rsrcList.add(new File(jobLocDir, "job.xml"));

            processFiles(jobLocDir, ctx.getCacheFiles(), download, false, null, MRJobConfig.CACHE_LOCALFILES);
            processFiles(jobLocDir, ctx.getCacheArchives(), download, true, null,
                    MRJobConfig.CACHE_LOCALARCHIVES);
            processFiles(jobLocDir, ctx.getFileClassPaths(), download, false, clsPathUrls, null);
            processFiles(jobLocDir, ctx.getArchiveClassPaths(), download, true, clsPathUrls, null);

            if (!clsPathUrls.isEmpty()) {
                clsPath = new URL[clsPathUrls.size()];

                clsPathUrls.toArray(clsPath);
            }
        } else if (!jobLocDir.mkdirs())
            throw new GridException("Failed to create local job directory: " + jobLocDir.getAbsolutePath());

        setLocalFSWorkingDirectory(jobLocDir);
    } catch (URISyntaxException | IOException e) {
        throw new GridException(e);
    }
}

Example usage for org.apache.hadoop.mapreduce MRJobConfig CACHE_LOCALARCHIVES

Introduction

Prototype

Usage