List of usage examples for org.apache.hadoop.fs LocalDirAllocator getLocalPathForWrite
public Path getLocalPathForWrite(String pathStr, Configuration conf) throws IOException
From source file:com.scaleoutsoftware.soss.hserver.hadoop.DistributedCacheManager.java
License:Apache License
/** * Set up the distributed cache by localizing the resources, and updating * the configuration with references to the localized resources. * @param conf job configuration//from w ww . ja v a 2 s.c o m * @throws IOException */ public void setup(Configuration conf) throws IOException { //If we are not 0th worker, wait for 0th worker to set up the cache if (InvocationWorker.getIgWorkerIndex() > 0 && InvocationWorker.getNumberOfWorkers() > 1) { try { InvocationWorker.getSynchronizationBarrier().waitForComplete(ACTION_NAME, SYNCHRONIZATION_WAIT_MS, WAIT_GRANULARITY_MS); } catch (Exception e) { throw new RuntimeException(e); } return; } File workDir = new File(System.getProperty("user.dir")); // Generate YARN local resources objects corresponding to the distributed // cache configuration Map<String, LocalResource> localResources = new LinkedHashMap<String, LocalResource>(); MRApps.setupDistributedCache(conf, localResources); //CODE CHANGE FROM ORIGINAL FILE: //We need to clear the resources from jar files, since they are distributed through the IG. // Iterator<Map.Entry<String, LocalResource>> iterator = localResources.entrySet().iterator(); while (iterator.hasNext()) { Entry<String, LocalResource> entry = iterator.next(); if (entry.getKey().endsWith(".jar")) { iterator.remove(); } } // Generating unique numbers for FSDownload. AtomicLong uniqueNumberGenerator = new AtomicLong(System.currentTimeMillis()); // Find which resources are to be put on the local classpath Map<String, Path> classpaths = new HashMap<String, Path>(); Path[] archiveClassPaths = DistributedCache.getArchiveClassPaths(conf); if (archiveClassPaths != null) { for (Path p : archiveClassPaths) { FileSystem remoteFS = p.getFileSystem(conf); p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory())); classpaths.put(p.toUri().getPath().toString(), p); } } Path[] fileClassPaths = DistributedCache.getFileClassPaths(conf); if (fileClassPaths != null) { for (Path p : fileClassPaths) { FileSystem remoteFS = p.getFileSystem(conf); p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory())); classpaths.put(p.toUri().getPath().toString(), p); } } // Localize the resources LocalDirAllocator localDirAllocator = new LocalDirAllocator(MRConfig.LOCAL_DIR); FileContext localFSFileContext = FileContext.getLocalFSFileContext(); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); ExecutorService exec = null; try { ThreadFactory tf = new ThreadFactoryBuilder() .setNameFormat("LocalDistributedCacheManager Downloader #%d").build(); exec = Executors.newCachedThreadPool(tf); Path destPath = localDirAllocator.getLocalPathForWrite(".", conf); Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap(); for (LocalResource resource : localResources.values()) { Callable<Path> download = new FSDownload(localFSFileContext, ugi, conf, new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet())), resource); Future<Path> future = exec.submit(download); resourcesToPaths.put(resource, future); } for (Entry<String, LocalResource> entry : localResources.entrySet()) { LocalResource resource = entry.getValue(); Path path; try { path = resourcesToPaths.get(resource).get(); } catch (InterruptedException e) { throw new IOException(e); } catch (ExecutionException e) { throw new IOException(e); } String pathString = path.toUri().toString(); String link = entry.getKey(); String target = new File(path.toUri()).getPath(); symlink(workDir, target, link); if (resource.getType() == LocalResourceType.ARCHIVE) { localArchives.add(pathString); } else if (resource.getType() == LocalResourceType.FILE) { localFiles.add(pathString); } else if (resource.getType() == LocalResourceType.PATTERN) { //PATTERN is not currently used in local mode throw new IllegalArgumentException( "Resource type PATTERN is not " + "implemented yet. " + resource.getResource()); } Path resourcePath; try { resourcePath = ConverterUtils.getPathFromYarnURL(resource.getResource()); } catch (URISyntaxException e) { throw new IOException(e); } LOG.info(String.format("Localized %s as %s", resourcePath, path)); String cp = resourcePath.toUri().getPath(); if (classpaths.keySet().contains(cp)) { localClasspaths.add(path.toUri().getPath().toString()); } } } finally { if (exec != null) { exec.shutdown(); } } // Update the configuration object with localized data. if (!localArchives.isEmpty()) { conf.set(MRJobConfig.CACHE_LOCALARCHIVES, StringUtils.arrayToString(localArchives.toArray(new String[localArchives.size()]))); } if (!localFiles.isEmpty()) { conf.set(MRJobConfig.CACHE_LOCALFILES, StringUtils.arrayToString(localFiles.toArray(new String[localArchives.size()]))); } setupCalled = true; //If we are 0th worker, signal action complete if (InvocationWorker.getIgWorkerIndex() == 0 && InvocationWorker.getNumberOfWorkers() > 1) { try { InvocationWorker.getSynchronizationBarrier().signalComplete(ACTION_NAME); } catch (Exception e) { throw new RuntimeException(e); } } }
From source file:org.apache.tez.mapreduce.processor.MapUtils.java
License:Apache License
public static void configureLocalDirs(Configuration conf, String localDir) throws IOException { String[] localSysDirs = new String[1]; localSysDirs[0] = localDir;//from ww w . ja va 2s. c o m conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localSysDirs); conf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, localDir); LOG.info(TezRuntimeFrameworkConfigs.LOCAL_DIRS + " for child: " + conf.get(TezRuntimeFrameworkConfigs.LOCAL_DIRS)); LOG.info(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR + " for child: " + conf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR)); LocalDirAllocator lDirAlloc = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); Path workDir = null; // First, try to find the JOB_LOCAL_DIR on this host. try { workDir = lDirAlloc.getLocalPathToRead("work", conf); } catch (DiskErrorException e) { // DiskErrorException means dir not found. If not found, it will // be created below. } if (workDir == null) { // JOB_LOCAL_DIR doesn't exist on this host -- Create it. workDir = lDirAlloc.getLocalPathForWrite("work", conf); FileSystem lfs = FileSystem.getLocal(conf).getRaw(); boolean madeDir = false; try { madeDir = lfs.mkdirs(workDir); } catch (FileAlreadyExistsException e) { // Since all tasks will be running in their own JVM, the race condition // exists where multiple tasks could be trying to create this directory // at the same time. If this task loses the race, it's okay because // the directory already exists. madeDir = true; workDir = lDirAlloc.getLocalPathToRead("work", conf); } if (!madeDir) { throw new IOException("Mkdirs failed to create " + workDir.toString()); } } conf.set(MRFrameworkConfigs.JOB_LOCAL_DIR, workDir.toString()); }
From source file:org.apache.tez.mapreduce.processor.MRTask.java
License:Apache License
private void configureLocalDirs() throws IOException { // TODO NEWTEZ Is most of this functionality required ? jobConf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, processorContext.getWorkDirs()); if (jobConf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR) == null) { jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, System.getenv(Environment.PWD.name())); }//from w w w .j a v a 2 s. co m jobConf.setStrings(MRConfig.LOCAL_DIR, processorContext.getWorkDirs()); LocalDirAllocator lDirAlloc = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); Path workDir = null; // First, try to find the JOB_LOCAL_DIR on this host. try { workDir = lDirAlloc.getLocalPathToRead("work", jobConf); } catch (DiskErrorException e) { // DiskErrorException means dir not found. If not found, it will // be created below. } if (workDir == null) { // JOB_LOCAL_DIR doesn't exist on this host -- Create it. workDir = lDirAlloc.getLocalPathForWrite("work", jobConf); FileSystem lfs = FileSystem.getLocal(jobConf).getRaw(); boolean madeDir = false; try { madeDir = lfs.mkdirs(workDir); } catch (FileAlreadyExistsException e) { // Since all tasks will be running in their own JVM, the race condition // exists where multiple tasks could be trying to create this directory // at the same time. If this task loses the race, it's okay because // the directory already exists. madeDir = true; workDir = lDirAlloc.getLocalPathToRead("work", jobConf); } if (!madeDir) { throw new IOException("Mkdirs failed to create " + workDir.toString()); } } // TODO NEWTEZ Is this required ? jobConf.set(MRFrameworkConfigs.JOB_LOCAL_DIR, workDir.toString()); jobConf.set(MRJobConfig.JOB_LOCAL_DIR, workDir.toString()); }