List of usage examples for org.apache.hadoop.mapreduce MRJobConfig CACHE_LOCALARCHIVES
String CACHE_LOCALARCHIVES
To view the source code for org.apache.hadoop.mapreduce MRJobConfig CACHE_LOCALARCHIVES.
Click Source Link
From source file:com.scaleoutsoftware.soss.hserver.hadoop.DistributedCacheManager.java
License:Apache License
/** * Set up the distributed cache by localizing the resources, and updating * the configuration with references to the localized resources. * @param conf job configuration//from w ww.j a va 2 s . c o m * @throws IOException */ public void setup(Configuration conf) throws IOException { //If we are not 0th worker, wait for 0th worker to set up the cache if (InvocationWorker.getIgWorkerIndex() > 0 && InvocationWorker.getNumberOfWorkers() > 1) { try { InvocationWorker.getSynchronizationBarrier().waitForComplete(ACTION_NAME, SYNCHRONIZATION_WAIT_MS, WAIT_GRANULARITY_MS); } catch (Exception e) { throw new RuntimeException(e); } return; } File workDir = new File(System.getProperty("user.dir")); // Generate YARN local resources objects corresponding to the distributed // cache configuration Map<String, LocalResource> localResources = new LinkedHashMap<String, LocalResource>(); MRApps.setupDistributedCache(conf, localResources); //CODE CHANGE FROM ORIGINAL FILE: //We need to clear the resources from jar files, since they are distributed through the IG. // Iterator<Map.Entry<String, LocalResource>> iterator = localResources.entrySet().iterator(); while (iterator.hasNext()) { Entry<String, LocalResource> entry = iterator.next(); if (entry.getKey().endsWith(".jar")) { iterator.remove(); } } // Generating unique numbers for FSDownload. AtomicLong uniqueNumberGenerator = new AtomicLong(System.currentTimeMillis()); // Find which resources are to be put on the local classpath Map<String, Path> classpaths = new HashMap<String, Path>(); Path[] archiveClassPaths = DistributedCache.getArchiveClassPaths(conf); if (archiveClassPaths != null) { for (Path p : archiveClassPaths) { FileSystem remoteFS = p.getFileSystem(conf); p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory())); classpaths.put(p.toUri().getPath().toString(), p); } } Path[] fileClassPaths = DistributedCache.getFileClassPaths(conf); if (fileClassPaths != null) { for (Path p : fileClassPaths) { FileSystem remoteFS = p.getFileSystem(conf); p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory())); classpaths.put(p.toUri().getPath().toString(), p); } } // Localize the resources LocalDirAllocator localDirAllocator = new LocalDirAllocator(MRConfig.LOCAL_DIR); FileContext localFSFileContext = FileContext.getLocalFSFileContext(); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); ExecutorService exec = null; try { ThreadFactory tf = new ThreadFactoryBuilder() .setNameFormat("LocalDistributedCacheManager Downloader #%d").build(); exec = Executors.newCachedThreadPool(tf); Path destPath = localDirAllocator.getLocalPathForWrite(".", conf); Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap(); for (LocalResource resource : localResources.values()) { Callable<Path> download = new FSDownload(localFSFileContext, ugi, conf, new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet())), resource); Future<Path> future = exec.submit(download); resourcesToPaths.put(resource, future); } for (Entry<String, LocalResource> entry : localResources.entrySet()) { LocalResource resource = entry.getValue(); Path path; try { path = resourcesToPaths.get(resource).get(); } catch (InterruptedException e) { throw new IOException(e); } catch (ExecutionException e) { throw new IOException(e); } String pathString = path.toUri().toString(); String link = entry.getKey(); String target = new File(path.toUri()).getPath(); symlink(workDir, target, link); if (resource.getType() == LocalResourceType.ARCHIVE) { localArchives.add(pathString); } else if (resource.getType() == LocalResourceType.FILE) { localFiles.add(pathString); } else if (resource.getType() == LocalResourceType.PATTERN) { //PATTERN is not currently used in local mode throw new IllegalArgumentException( "Resource type PATTERN is not " + "implemented yet. " + resource.getResource()); } Path resourcePath; try { resourcePath = ConverterUtils.getPathFromYarnURL(resource.getResource()); } catch (URISyntaxException e) { throw new IOException(e); } LOG.info(String.format("Localized %s as %s", resourcePath, path)); String cp = resourcePath.toUri().getPath(); if (classpaths.keySet().contains(cp)) { localClasspaths.add(path.toUri().getPath().toString()); } } } finally { if (exec != null) { exec.shutdown(); } } // Update the configuration object with localized data. if (!localArchives.isEmpty()) { conf.set(MRJobConfig.CACHE_LOCALARCHIVES, StringUtils.arrayToString(localArchives.toArray(new String[localArchives.size()]))); } if (!localFiles.isEmpty()) { conf.set(MRJobConfig.CACHE_LOCALFILES, StringUtils.arrayToString(localFiles.toArray(new String[localArchives.size()]))); } setupCalled = true; //If we are 0th worker, signal action complete if (InvocationWorker.getIgWorkerIndex() == 0 && InvocationWorker.getNumberOfWorkers() > 1) { try { InvocationWorker.getSynchronizationBarrier().signalComplete(ACTION_NAME); } catch (Exception e) { throw new RuntimeException(e); } } }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2JobResourceManager.java
License:Apache License
/** * Prepare job resources. Resolve the classpath list and download it if needed. * * @param download {@code true} If need to download resources. * @param jobLocDir Work directory for the job. * @throws IgniteCheckedException If failed. */// www . j a va2s .co m public void prepareJobEnvironment(boolean download, File jobLocDir) throws IgniteCheckedException { try { if (jobLocDir.exists()) throw new IgniteCheckedException( "Local job directory already exists: " + jobLocDir.getAbsolutePath()); JobConf cfg = ctx.getJobConf(); Collection<URL> clsPathUrls = new ArrayList<>(); String mrDir = cfg.get(MRJobConfig.MAPREDUCE_JOB_DIR); if (mrDir != null) { stagingDir = new Path(new URI(mrDir)); if (download) { FileSystem fs = job.fileSystem(stagingDir.toUri(), cfg); if (!fs.exists(stagingDir)) throw new IgniteCheckedException("Failed to find map-reduce submission " + "directory (does not exist): " + stagingDir); if (!FileUtil.copy(fs, stagingDir, jobLocDir, false, cfg)) throw new IgniteCheckedException("Failed to copy job submission directory " + "contents to local file system " + "[path=" + stagingDir + ", locDir=" + jobLocDir.getAbsolutePath() + ", jobId=" + jobId + ']'); } File jarJobFile = new File(jobLocDir, "job.jar"); clsPathUrls.add(jarJobFile.toURI().toURL()); rsrcSet.add(jarJobFile); rsrcSet.add(new File(jobLocDir, "job.xml")); } else if (!jobLocDir.mkdirs()) throw new IgniteCheckedException( "Failed to create local job directory: " + jobLocDir.getAbsolutePath()); processFiles(jobLocDir, ctx.getCacheFiles(), download, false, null, MRJobConfig.CACHE_LOCALFILES); processFiles(jobLocDir, ctx.getCacheArchives(), download, true, null, MRJobConfig.CACHE_LOCALARCHIVES); processFiles(jobLocDir, ctx.getFileClassPaths(), download, false, clsPathUrls, null); processFiles(jobLocDir, ctx.getArchiveClassPaths(), download, true, clsPathUrls, null); if (!clsPathUrls.isEmpty()) clsPath = clsPathUrls.toArray(new URL[clsPathUrls.size()]); setLocalFSWorkingDirectory(jobLocDir); } catch (URISyntaxException | IOException e) { throw new IgniteCheckedException(e); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2JobResourceManager.java
License:Apache License
/** * Prepare job resources. Resolve the classpath list and download it if needed. * * @param download {@code true} If need to download resources. * @param jobLocDir Work directory for the job. * @throws IgniteCheckedException If failed. *///from w ww . j a v a2s . c o m public void prepareJobEnvironment(boolean download, File jobLocDir) throws IgniteCheckedException { try { if (jobLocDir.exists()) throw new IgniteCheckedException( "Local job directory already exists: " + jobLocDir.getAbsolutePath()); JobConf cfg = ctx.getJobConf(); String mrDir = cfg.get("mapreduce.job.dir"); if (mrDir != null) { stagingDir = new Path(new URI(mrDir)); if (download) { FileSystem fs = FileSystem.get(stagingDir.toUri(), cfg); if (!fs.exists(stagingDir)) throw new IgniteCheckedException( "Failed to find map-reduce submission directory (does not exist): " + stagingDir); if (!FileUtil.copy(fs, stagingDir, jobLocDir, false, cfg)) throw new IgniteCheckedException( "Failed to copy job submission directory contents to local file system " + "[path=" + stagingDir + ", locDir=" + jobLocDir.getAbsolutePath() + ", jobId=" + jobId + ']'); } File jarJobFile = new File(jobLocDir, "job.jar"); Collection<URL> clsPathUrls = new ArrayList<>(); clsPathUrls.add(jarJobFile.toURI().toURL()); rsrcSet.add(jarJobFile); rsrcSet.add(new File(jobLocDir, "job.xml")); processFiles(jobLocDir, ctx.getCacheFiles(), download, false, null, MRJobConfig.CACHE_LOCALFILES); processFiles(jobLocDir, ctx.getCacheArchives(), download, true, null, MRJobConfig.CACHE_LOCALARCHIVES); processFiles(jobLocDir, ctx.getFileClassPaths(), download, false, clsPathUrls, null); processFiles(jobLocDir, ctx.getArchiveClassPaths(), download, true, clsPathUrls, null); if (!clsPathUrls.isEmpty()) { clsPath = new URL[clsPathUrls.size()]; clsPathUrls.toArray(clsPath); } } else if (!jobLocDir.mkdirs()) throw new IgniteCheckedException( "Failed to create local job directory: " + jobLocDir.getAbsolutePath()); setLocalFSWorkingDirectory(jobLocDir); } catch (URISyntaxException | IOException e) { throw new IgniteCheckedException(e); } }
From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2JobResourceManager.java
License:Open Source License
/** * Prepare job resources. Resolve the classpath list and download it if needed. * * @param download {@code true} If need to download resources. * @param jobLocDir Work directory for the job. * @throws GridException If failed./*from www .j a v a 2 s. c om*/ */ public void prepareJobEnvironment(boolean download, File jobLocDir) throws GridException { try { if (jobLocDir.exists()) throw new GridException("Local job directory already exists: " + jobLocDir.getAbsolutePath()); JobConf cfg = ctx.getJobConf(); String mrDir = cfg.get("mapreduce.job.dir"); if (mrDir != null) { stagingDir = new Path(new URI(mrDir)); if (download) { FileSystem fs = FileSystem.get(stagingDir.toUri(), cfg); if (!fs.exists(stagingDir)) throw new GridException( "Failed to find map-reduce submission directory (does not exist): " + stagingDir); if (!FileUtil.copy(fs, stagingDir, jobLocDir, false, cfg)) throw new GridException( "Failed to copy job submission directory contents to local file system " + "[path=" + stagingDir + ", locDir=" + jobLocDir.getAbsolutePath() + ", jobId=" + jobId + ']'); } File jarJobFile = new File(jobLocDir, "job.jar"); Collection<URL> clsPathUrls = new ArrayList<>(); clsPathUrls.add(jarJobFile.toURI().toURL()); rsrcList.add(jarJobFile); rsrcList.add(new File(jobLocDir, "job.xml")); processFiles(jobLocDir, ctx.getCacheFiles(), download, false, null, MRJobConfig.CACHE_LOCALFILES); processFiles(jobLocDir, ctx.getCacheArchives(), download, true, null, MRJobConfig.CACHE_LOCALARCHIVES); processFiles(jobLocDir, ctx.getFileClassPaths(), download, false, clsPathUrls, null); processFiles(jobLocDir, ctx.getArchiveClassPaths(), download, true, clsPathUrls, null); if (!clsPathUrls.isEmpty()) { clsPath = new URL[clsPathUrls.size()]; clsPathUrls.toArray(clsPath); } } else if (!jobLocDir.mkdirs()) throw new GridException("Failed to create local job directory: " + jobLocDir.getAbsolutePath()); setLocalFSWorkingDirectory(jobLocDir); } catch (URISyntaxException | IOException e) { throw new GridException(e); } }