List of usage examples for org.apache.hadoop.fs LocalDirAllocator LocalDirAllocator
public LocalDirAllocator(String contextCfgItemName)
From source file:com.scaleoutsoftware.soss.hserver.hadoop.DistributedCacheManager.java
License:Apache License
/** * Set up the distributed cache by localizing the resources, and updating * the configuration with references to the localized resources. * @param conf job configuration//from w w w . j a v a 2 s. com * @throws IOException */ public void setup(Configuration conf) throws IOException { //If we are not 0th worker, wait for 0th worker to set up the cache if (InvocationWorker.getIgWorkerIndex() > 0 && InvocationWorker.getNumberOfWorkers() > 1) { try { InvocationWorker.getSynchronizationBarrier().waitForComplete(ACTION_NAME, SYNCHRONIZATION_WAIT_MS, WAIT_GRANULARITY_MS); } catch (Exception e) { throw new RuntimeException(e); } return; } File workDir = new File(System.getProperty("user.dir")); // Generate YARN local resources objects corresponding to the distributed // cache configuration Map<String, LocalResource> localResources = new LinkedHashMap<String, LocalResource>(); MRApps.setupDistributedCache(conf, localResources); //CODE CHANGE FROM ORIGINAL FILE: //We need to clear the resources from jar files, since they are distributed through the IG. // Iterator<Map.Entry<String, LocalResource>> iterator = localResources.entrySet().iterator(); while (iterator.hasNext()) { Entry<String, LocalResource> entry = iterator.next(); if (entry.getKey().endsWith(".jar")) { iterator.remove(); } } // Generating unique numbers for FSDownload. AtomicLong uniqueNumberGenerator = new AtomicLong(System.currentTimeMillis()); // Find which resources are to be put on the local classpath Map<String, Path> classpaths = new HashMap<String, Path>(); Path[] archiveClassPaths = DistributedCache.getArchiveClassPaths(conf); if (archiveClassPaths != null) { for (Path p : archiveClassPaths) { FileSystem remoteFS = p.getFileSystem(conf); p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory())); classpaths.put(p.toUri().getPath().toString(), p); } } Path[] fileClassPaths = DistributedCache.getFileClassPaths(conf); if (fileClassPaths != null) { for (Path p : fileClassPaths) { FileSystem remoteFS = p.getFileSystem(conf); p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory())); classpaths.put(p.toUri().getPath().toString(), p); } } // Localize the resources LocalDirAllocator localDirAllocator = new LocalDirAllocator(MRConfig.LOCAL_DIR); FileContext localFSFileContext = FileContext.getLocalFSFileContext(); UserGroupInformation ugi = UserGroupInformation.getCurrentUser(); ExecutorService exec = null; try { ThreadFactory tf = new ThreadFactoryBuilder() .setNameFormat("LocalDistributedCacheManager Downloader #%d").build(); exec = Executors.newCachedThreadPool(tf); Path destPath = localDirAllocator.getLocalPathForWrite(".", conf); Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap(); for (LocalResource resource : localResources.values()) { Callable<Path> download = new FSDownload(localFSFileContext, ugi, conf, new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet())), resource); Future<Path> future = exec.submit(download); resourcesToPaths.put(resource, future); } for (Entry<String, LocalResource> entry : localResources.entrySet()) { LocalResource resource = entry.getValue(); Path path; try { path = resourcesToPaths.get(resource).get(); } catch (InterruptedException e) { throw new IOException(e); } catch (ExecutionException e) { throw new IOException(e); } String pathString = path.toUri().toString(); String link = entry.getKey(); String target = new File(path.toUri()).getPath(); symlink(workDir, target, link); if (resource.getType() == LocalResourceType.ARCHIVE) { localArchives.add(pathString); } else if (resource.getType() == LocalResourceType.FILE) { localFiles.add(pathString); } else if (resource.getType() == LocalResourceType.PATTERN) { //PATTERN is not currently used in local mode throw new IllegalArgumentException( "Resource type PATTERN is not " + "implemented yet. " + resource.getResource()); } Path resourcePath; try { resourcePath = ConverterUtils.getPathFromYarnURL(resource.getResource()); } catch (URISyntaxException e) { throw new IOException(e); } LOG.info(String.format("Localized %s as %s", resourcePath, path)); String cp = resourcePath.toUri().getPath(); if (classpaths.keySet().contains(cp)) { localClasspaths.add(path.toUri().getPath().toString()); } } } finally { if (exec != null) { exec.shutdown(); } } // Update the configuration object with localized data. if (!localArchives.isEmpty()) { conf.set(MRJobConfig.CACHE_LOCALARCHIVES, StringUtils.arrayToString(localArchives.toArray(new String[localArchives.size()]))); } if (!localFiles.isEmpty()) { conf.set(MRJobConfig.CACHE_LOCALFILES, StringUtils.arrayToString(localFiles.toArray(new String[localArchives.size()]))); } setupCalled = true; //If we are 0th worker, signal action complete if (InvocationWorker.getIgWorkerIndex() == 0 && InvocationWorker.getNumberOfWorkers() > 1) { try { InvocationWorker.getSynchronizationBarrier().signalComplete(ACTION_NAME); } catch (Exception e) { throw new RuntimeException(e); } } }
From source file:org.apache.tajo.engine.planner.physical.ExternalSortExec.java
License:Apache License
private ExternalSortExec(final TaskAttemptContext context, final SortNode plan) throws PhysicalPlanningException { super(context, plan.getInSchema(), plan.getOutSchema(), null, plan.getSortKeys()); this.plan = plan; this.meta = CatalogUtil.newTableMeta(StoreType.ROWFILE); this.defaultFanout = context.getConf().getIntVar(ConfVars.EXECUTOR_EXTERNAL_SORT_FANOUT); if (defaultFanout < 2) { throw new PhysicalPlanningException( ConfVars.EXECUTOR_EXTERNAL_SORT_FANOUT.varname + " cannot be lower than 2"); }/*from ww w.j a va 2s. com*/ // TODO - sort buffer and core num should be changed to use the allocated container resource. this.sortBufferBytesNum = context.getQueryContext().getLong(SessionVars.EXTSORT_BUFFER_SIZE) * StorageUnit.MB; this.allocatedCoreNum = context.getConf().getIntVar(ConfVars.EXECUTOR_EXTERNAL_SORT_THREAD_NUM); this.executorService = Executors.newFixedThreadPool(this.allocatedCoreNum); this.inMemoryTable = new ArrayList<Tuple>(100000); this.sortTmpDir = getExecutorTmpDir(); localDirAllocator = new LocalDirAllocator(ConfVars.WORKER_TEMPORAL_DIR.varname); localFS = new RawLocalFileSystem(); }
From source file:org.apache.tajo.storage.HashShuffleAppenderManager.java
License:Apache License
public HashShuffleAppenderManager(TajoConf systemConf) throws IOException { this.systemConf = systemConf; // initialize LocalDirAllocator lDirAllocator = new LocalDirAllocator(ConfVars.WORKER_TEMPORAL_DIR.varname); // initialize DFS and LocalFileSystems defaultFS = TajoConf.getTajoRootDir(systemConf).getFileSystem(systemConf); localFS = FileSystem.getLocal(systemConf); pageSize = systemConf.getIntVar(ConfVars.SHUFFLE_HASH_APPENDER_PAGE_VOLUME) * 1024 * 1024; }
From source file:org.apache.tajo.worker.LocalFetcher.java
License:Apache License
@VisibleForTesting public LocalFetcher(TajoConf conf, URI uri, String tableName) throws IOException { super(conf, uri); this.maxUrlLength = conf.getIntVar(ConfVars.PULLSERVER_FETCH_URL_MAX_LENGTH); this.tableName = tableName; this.localFileSystem = new LocalFileSystem(); this.localDirAllocator = new LocalDirAllocator(ConfVars.WORKER_TEMPORAL_DIR.varname); this.pullServerService = null; String scheme = uri.getScheme() == null ? "http" : uri.getScheme(); this.host = uri.getHost() == null ? "localhost" : uri.getHost(); this.port = uri.getPort(); if (port == -1) { if (scheme.equalsIgnoreCase("http")) { this.port = 80; } else if (scheme.equalsIgnoreCase("https")) { this.port = 443; }/* w ww .jav a 2 s. c o m*/ } bootstrap = new Bootstrap() .group(NettyUtils.getSharedEventLoopGroup(NettyUtils.GROUP.FETCHER, conf.getIntVar(ConfVars.SHUFFLE_RPC_CLIENT_WORKER_THREAD_NUM))) .channel(NioSocketChannel.class).option(ChannelOption.ALLOCATOR, NettyUtils.ALLOCATOR) .option(ChannelOption.CONNECT_TIMEOUT_MILLIS, conf.getIntVar(ConfVars.SHUFFLE_FETCHER_CONNECT_TIMEOUT) * 1000) .option(ChannelOption.SO_RCVBUF, 1048576) // set 1M .option(ChannelOption.TCP_NODELAY, true); }
From source file:org.apache.tajo.worker.TajoWorker.java
License:Apache License
@Override public void serviceInit(Configuration conf) throws Exception { if (!(conf instanceof TajoConf)) { throw new IllegalArgumentException("conf should be a TajoConf type."); }//from w ww . ja v a 2 s . c om Runtime.getRuntime().addShutdownHook(new Thread(new ShutdownHook())); this.systemConf = (TajoConf) conf; RackResolver.init(systemConf); serviceTracker = ServiceTrackerFactory.get(systemConf); this.workerContext = new WorkerContext(); this.lDirAllocator = new LocalDirAllocator(ConfVars.WORKER_TEMPORAL_DIR.varname); String resourceManagerClassName = systemConf.getVar(ConfVars.RESOURCE_MANAGER_CLASS); boolean randomPort = true; if (resourceManagerClassName.indexOf(TajoWorkerResourceManager.class.getName()) >= 0) { randomPort = false; } int clientPort = systemConf.getSocketAddrVar(ConfVars.WORKER_CLIENT_RPC_ADDRESS).getPort(); int peerRpcPort = systemConf.getSocketAddrVar(ConfVars.WORKER_PEER_RPC_ADDRESS).getPort(); int qmManagerPort = systemConf.getSocketAddrVar(ConfVars.WORKER_QM_RPC_ADDRESS).getPort(); if (randomPort) { clientPort = 0; peerRpcPort = 0; qmManagerPort = 0; systemConf.setIntVar(ConfVars.PULLSERVER_PORT, 0); } this.dispatcher = new AsyncDispatcher(); addIfService(dispatcher); tajoWorkerManagerService = new TajoWorkerManagerService(workerContext, peerRpcPort); addIfService(tajoWorkerManagerService); // querymaster worker tajoWorkerClientService = new TajoWorkerClientService(workerContext, clientPort); addIfService(tajoWorkerClientService); queryMasterManagerService = new QueryMasterManagerService(workerContext, qmManagerPort); addIfService(queryMasterManagerService); // taskrunner worker taskRunnerManager = new TaskRunnerManager(workerContext, dispatcher); addService(taskRunnerManager); workerHeartbeatThread = new WorkerHeartbeatService(workerContext); addIfService(workerHeartbeatThread); int httpPort = 0; if (!TajoPullServerService.isStandalone()) { pullService = new TajoPullServerService(); addIfService(pullService); } if (!systemConf.get(CommonTestingUtil.TAJO_TEST_KEY, "FALSE").equalsIgnoreCase("TRUE")) { httpPort = initWebServer(); } super.serviceInit(conf); int pullServerPort; if (pullService != null) { pullServerPort = pullService.getPort(); } else { pullServerPort = getStandAlonePullServerPort(); } this.connectionInfo = new WorkerConnectionInfo(tajoWorkerManagerService.getBindAddr().getHostName(), tajoWorkerManagerService.getBindAddr().getPort(), pullServerPort, tajoWorkerClientService.getBindAddr().getPort(), queryMasterManagerService.getBindAddr().getPort(), httpPort); LOG.info("Tajo Worker is initialized." + " connection :" + connectionInfo.toString()); try { hashShuffleAppenderManager = new HashShuffleAppenderManager(systemConf); } catch (IOException e) { LOG.fatal(e.getMessage(), e); System.exit(-1); } taskHistoryWriter = new HistoryWriter(workerContext.getWorkerName(), false); addIfService(taskHistoryWriter); taskHistoryWriter.init(conf); historyReader = new HistoryReader(workerContext.getWorkerName(), this.systemConf); diagnoseTajoWorker(); }
From source file:org.apache.tez.engine.common.shuffle.impl.Shuffle.java
License:Apache License
public Shuffle(TezEngineTaskContext taskContext, RunningTaskContext runningTaskContext, Configuration conf, int tasksInDegree, TezTaskReporter reporter, Processor combineProcessor) throws IOException { this.taskContext = taskContext; this.runningTaskContext = runningTaskContext; this.conf = conf; this.reporter = reporter; this.metrics = new ShuffleClientMetrics(taskContext.getTaskAttemptId(), this.conf, this.taskContext.getUser(), this.taskContext.getJobName()); this.tasksInDegree = tasksInDegree; FileSystem localFS = FileSystem.getLocal(this.conf); LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezJobConfig.LOCAL_DIRS); copyPhase = this.runningTaskContext.getProgress().addPhase("copy"); mergePhase = this.runningTaskContext.getProgress().addPhase("merge"); // TODO TEZ Get rid of Map / Reduce references. TezCounter shuffledMapsCounter = reporter.getCounter(TaskCounter.SHUFFLED_MAPS); TezCounter reduceShuffleBytes = reporter.getCounter(TaskCounter.REDUCE_SHUFFLE_BYTES); TezCounter failedShuffleCounter = reporter.getCounter(TaskCounter.FAILED_SHUFFLE); TezCounter spilledRecordsCounter = reporter.getCounter(TaskCounter.SPILLED_RECORDS); TezCounter reduceCombineInputCounter = reporter.getCounter(TaskCounter.COMBINE_INPUT_RECORDS); TezCounter mergedMapOutputsCounter = reporter.getCounter(TaskCounter.MERGED_MAP_OUTPUTS); scheduler = new ShuffleScheduler(this.conf, tasksInDegree, runningTaskContext.getStatus(), this, copyPhase, shuffledMapsCounter, reduceShuffleBytes, failedShuffleCounter); merger = new MergeManager(this.taskContext.getTaskAttemptId(), this.conf, localFS, localDirAllocator, reporter, combineProcessor, spilledRecordsCounter, reduceCombineInputCounter, mergedMapOutputsCounter, this, mergePhase); }
From source file:org.apache.tez.mapreduce.processor.map.TestMapProcessor.java
License:Apache License
private Path getMapOutputFile(Configuration jobConf, OutputContext outputContext) throws IOException { LocalDirAllocator lDirAlloc = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); Path attemptOutput = new Path( new Path(Constants.TEZ_RUNTIME_TASK_OUTPUT_DIR, outputContext.getUniqueIdentifier()), Constants.TEZ_RUNTIME_TASK_OUTPUT_FILENAME_STRING); Path mapOutputFile = lDirAlloc.getLocalPathToRead(attemptOutput.toString(), jobConf); return mapOutputFile; }
From source file:org.apache.tez.mapreduce.processor.MapUtils.java
License:Apache License
public static void configureLocalDirs(Configuration conf, String localDir) throws IOException { String[] localSysDirs = new String[1]; localSysDirs[0] = localDir;//w ww . j a v a 2s . c o m conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localSysDirs); conf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, localDir); LOG.info(TezRuntimeFrameworkConfigs.LOCAL_DIRS + " for child: " + conf.get(TezRuntimeFrameworkConfigs.LOCAL_DIRS)); LOG.info(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR + " for child: " + conf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR)); LocalDirAllocator lDirAlloc = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); Path workDir = null; // First, try to find the JOB_LOCAL_DIR on this host. try { workDir = lDirAlloc.getLocalPathToRead("work", conf); } catch (DiskErrorException e) { // DiskErrorException means dir not found. If not found, it will // be created below. } if (workDir == null) { // JOB_LOCAL_DIR doesn't exist on this host -- Create it. workDir = lDirAlloc.getLocalPathForWrite("work", conf); FileSystem lfs = FileSystem.getLocal(conf).getRaw(); boolean madeDir = false; try { madeDir = lfs.mkdirs(workDir); } catch (FileAlreadyExistsException e) { // Since all tasks will be running in their own JVM, the race condition // exists where multiple tasks could be trying to create this directory // at the same time. If this task loses the race, it's okay because // the directory already exists. madeDir = true; workDir = lDirAlloc.getLocalPathToRead("work", conf); } if (!madeDir) { throw new IOException("Mkdirs failed to create " + workDir.toString()); } } conf.set(MRFrameworkConfigs.JOB_LOCAL_DIR, workDir.toString()); }
From source file:org.apache.tez.mapreduce.processor.MRTask.java
License:Apache License
private void configureLocalDirs() throws IOException { // TODO NEWTEZ Is most of this functionality required ? jobConf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, processorContext.getWorkDirs()); if (jobConf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR) == null) { jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, System.getenv(Environment.PWD.name())); }/* w w w . j av a 2s . c o m*/ jobConf.setStrings(MRConfig.LOCAL_DIR, processorContext.getWorkDirs()); LocalDirAllocator lDirAlloc = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS); Path workDir = null; // First, try to find the JOB_LOCAL_DIR on this host. try { workDir = lDirAlloc.getLocalPathToRead("work", jobConf); } catch (DiskErrorException e) { // DiskErrorException means dir not found. If not found, it will // be created below. } if (workDir == null) { // JOB_LOCAL_DIR doesn't exist on this host -- Create it. workDir = lDirAlloc.getLocalPathForWrite("work", jobConf); FileSystem lfs = FileSystem.getLocal(jobConf).getRaw(); boolean madeDir = false; try { madeDir = lfs.mkdirs(workDir); } catch (FileAlreadyExistsException e) { // Since all tasks will be running in their own JVM, the race condition // exists where multiple tasks could be trying to create this directory // at the same time. If this task loses the race, it's okay because // the directory already exists. madeDir = true; workDir = lDirAlloc.getLocalPathToRead("work", jobConf); } if (!madeDir) { throw new IOException("Mkdirs failed to create " + workDir.toString()); } } // TODO NEWTEZ Is this required ? jobConf.set(MRFrameworkConfigs.JOB_LOCAL_DIR, workDir.toString()); jobConf.set(MRJobConfig.JOB_LOCAL_DIR, workDir.toString()); }
From source file:org.apache.tez.runtime.library.broadcast.input.BroadcastInputManager.java
License:Apache License
public BroadcastInputManager(String uniqueIdentifier, Configuration conf) { this.conf = conf; this.fileNameAllocator = new TezTaskOutputFiles(conf, uniqueIdentifier); this.localDirAllocator = new LocalDirAllocator(TezJobConfig.LOCAL_DIRS); // Setup configuration final float maxInMemCopyUse = conf.getFloat(TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT, TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT); if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) { throw new IllegalArgumentException("Invalid value for " + TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT + ": " + maxInMemCopyUse); }//from w w w. ja va 2 s.c o m // Allow unit tests to fix Runtime memory this.memoryLimit = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, Math.min(Runtime.getRuntime().maxMemory(), Integer.MAX_VALUE)) * maxInMemCopyUse); final float singleShuffleMemoryLimitPercent = conf.getFloat( TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT); if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) { throw new IllegalArgumentException( "Invalid value for " + TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": " + singleShuffleMemoryLimitPercent); } this.maxSingleShuffleLimit = (long) (memoryLimit * singleShuffleMemoryLimitPercent); LOG.info("BroadcastInputManager -> " + "MemoryLimit: " + this.memoryLimit + ", maxSingleMemLimit: " + this.maxSingleShuffleLimit); }