Example usage for org.apache.hadoop.fs LocalDirAllocator LocalDirAllocator

List of usage examples for org.apache.hadoop.fs LocalDirAllocator LocalDirAllocator

Introduction

In this page you can find the example usage for org.apache.hadoop.fs LocalDirAllocator LocalDirAllocator.

Prototype

public LocalDirAllocator(String contextCfgItemName) 

Source Link

Document

Create an allocator object

Usage

From source file:com.scaleoutsoftware.soss.hserver.hadoop.DistributedCacheManager.java

License:Apache License

/**
 * Set up the distributed cache by localizing the resources, and updating
 * the configuration with references to the localized resources.
 * @param conf job configuration//from  w w  w  .  j  a  v  a  2 s.  com
 * @throws IOException
 */
public void setup(Configuration conf) throws IOException {
    //If we are not 0th worker, wait for 0th worker to set up the cache
    if (InvocationWorker.getIgWorkerIndex() > 0 && InvocationWorker.getNumberOfWorkers() > 1) {
        try {
            InvocationWorker.getSynchronizationBarrier().waitForComplete(ACTION_NAME, SYNCHRONIZATION_WAIT_MS,
                    WAIT_GRANULARITY_MS);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        return;
    }

    File workDir = new File(System.getProperty("user.dir"));

    // Generate YARN local resources objects corresponding to the distributed
    // cache configuration
    Map<String, LocalResource> localResources = new LinkedHashMap<String, LocalResource>();
    MRApps.setupDistributedCache(conf, localResources);

    //CODE CHANGE FROM ORIGINAL FILE:
    //We need to clear the resources from jar files, since they are distributed through the IG.
    //
    Iterator<Map.Entry<String, LocalResource>> iterator = localResources.entrySet().iterator();
    while (iterator.hasNext()) {
        Entry<String, LocalResource> entry = iterator.next();
        if (entry.getKey().endsWith(".jar")) {
            iterator.remove();
        }
    }

    // Generating unique numbers for FSDownload.

    AtomicLong uniqueNumberGenerator = new AtomicLong(System.currentTimeMillis());

    // Find which resources are to be put on the local classpath
    Map<String, Path> classpaths = new HashMap<String, Path>();
    Path[] archiveClassPaths = DistributedCache.getArchiveClassPaths(conf);
    if (archiveClassPaths != null) {
        for (Path p : archiveClassPaths) {
            FileSystem remoteFS = p.getFileSystem(conf);
            p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory()));
            classpaths.put(p.toUri().getPath().toString(), p);
        }
    }

    Path[] fileClassPaths = DistributedCache.getFileClassPaths(conf);
    if (fileClassPaths != null) {
        for (Path p : fileClassPaths) {
            FileSystem remoteFS = p.getFileSystem(conf);
            p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory()));
            classpaths.put(p.toUri().getPath().toString(), p);
        }
    }

    // Localize the resources
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(MRConfig.LOCAL_DIR);
    FileContext localFSFileContext = FileContext.getLocalFSFileContext();
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();

    ExecutorService exec = null;
    try {
        ThreadFactory tf = new ThreadFactoryBuilder()
                .setNameFormat("LocalDistributedCacheManager Downloader #%d").build();
        exec = Executors.newCachedThreadPool(tf);
        Path destPath = localDirAllocator.getLocalPathForWrite(".", conf);
        Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap();
        for (LocalResource resource : localResources.values()) {
            Callable<Path> download = new FSDownload(localFSFileContext, ugi, conf,
                    new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet())), resource);
            Future<Path> future = exec.submit(download);
            resourcesToPaths.put(resource, future);
        }
        for (Entry<String, LocalResource> entry : localResources.entrySet()) {
            LocalResource resource = entry.getValue();
            Path path;
            try {
                path = resourcesToPaths.get(resource).get();
            } catch (InterruptedException e) {
                throw new IOException(e);
            } catch (ExecutionException e) {
                throw new IOException(e);
            }
            String pathString = path.toUri().toString();
            String link = entry.getKey();
            String target = new File(path.toUri()).getPath();
            symlink(workDir, target, link);

            if (resource.getType() == LocalResourceType.ARCHIVE) {
                localArchives.add(pathString);
            } else if (resource.getType() == LocalResourceType.FILE) {
                localFiles.add(pathString);
            } else if (resource.getType() == LocalResourceType.PATTERN) {
                //PATTERN is not currently used in local mode
                throw new IllegalArgumentException(
                        "Resource type PATTERN is not " + "implemented yet. " + resource.getResource());
            }
            Path resourcePath;
            try {
                resourcePath = ConverterUtils.getPathFromYarnURL(resource.getResource());
            } catch (URISyntaxException e) {
                throw new IOException(e);
            }
            LOG.info(String.format("Localized %s as %s", resourcePath, path));
            String cp = resourcePath.toUri().getPath();
            if (classpaths.keySet().contains(cp)) {
                localClasspaths.add(path.toUri().getPath().toString());
            }
        }
    } finally {
        if (exec != null) {
            exec.shutdown();
        }
    }
    // Update the configuration object with localized data.
    if (!localArchives.isEmpty()) {
        conf.set(MRJobConfig.CACHE_LOCALARCHIVES,
                StringUtils.arrayToString(localArchives.toArray(new String[localArchives.size()])));
    }
    if (!localFiles.isEmpty()) {
        conf.set(MRJobConfig.CACHE_LOCALFILES,
                StringUtils.arrayToString(localFiles.toArray(new String[localArchives.size()])));
    }
    setupCalled = true;

    //If we are  0th worker, signal action complete
    if (InvocationWorker.getIgWorkerIndex() == 0 && InvocationWorker.getNumberOfWorkers() > 1) {
        try {
            InvocationWorker.getSynchronizationBarrier().signalComplete(ACTION_NAME);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

}

From source file:org.apache.tajo.engine.planner.physical.ExternalSortExec.java

License:Apache License

private ExternalSortExec(final TaskAttemptContext context, final SortNode plan)
        throws PhysicalPlanningException {
    super(context, plan.getInSchema(), plan.getOutSchema(), null, plan.getSortKeys());

    this.plan = plan;
    this.meta = CatalogUtil.newTableMeta(StoreType.ROWFILE);

    this.defaultFanout = context.getConf().getIntVar(ConfVars.EXECUTOR_EXTERNAL_SORT_FANOUT);
    if (defaultFanout < 2) {
        throw new PhysicalPlanningException(
                ConfVars.EXECUTOR_EXTERNAL_SORT_FANOUT.varname + " cannot be lower than 2");
    }/*from  ww  w.j  a  va 2s.  com*/
    // TODO - sort buffer and core num should be changed to use the allocated container resource.
    this.sortBufferBytesNum = context.getQueryContext().getLong(SessionVars.EXTSORT_BUFFER_SIZE)
            * StorageUnit.MB;
    this.allocatedCoreNum = context.getConf().getIntVar(ConfVars.EXECUTOR_EXTERNAL_SORT_THREAD_NUM);
    this.executorService = Executors.newFixedThreadPool(this.allocatedCoreNum);
    this.inMemoryTable = new ArrayList<Tuple>(100000);

    this.sortTmpDir = getExecutorTmpDir();
    localDirAllocator = new LocalDirAllocator(ConfVars.WORKER_TEMPORAL_DIR.varname);
    localFS = new RawLocalFileSystem();
}

From source file:org.apache.tajo.storage.HashShuffleAppenderManager.java

License:Apache License

public HashShuffleAppenderManager(TajoConf systemConf) throws IOException {
    this.systemConf = systemConf;

    // initialize LocalDirAllocator
    lDirAllocator = new LocalDirAllocator(ConfVars.WORKER_TEMPORAL_DIR.varname);

    // initialize DFS and LocalFileSystems
    defaultFS = TajoConf.getTajoRootDir(systemConf).getFileSystem(systemConf);
    localFS = FileSystem.getLocal(systemConf);
    pageSize = systemConf.getIntVar(ConfVars.SHUFFLE_HASH_APPENDER_PAGE_VOLUME) * 1024 * 1024;
}

From source file:org.apache.tajo.worker.LocalFetcher.java

License:Apache License

@VisibleForTesting
public LocalFetcher(TajoConf conf, URI uri, String tableName) throws IOException {
    super(conf, uri);
    this.maxUrlLength = conf.getIntVar(ConfVars.PULLSERVER_FETCH_URL_MAX_LENGTH);
    this.tableName = tableName;
    this.localFileSystem = new LocalFileSystem();
    this.localDirAllocator = new LocalDirAllocator(ConfVars.WORKER_TEMPORAL_DIR.varname);
    this.pullServerService = null;

    String scheme = uri.getScheme() == null ? "http" : uri.getScheme();
    this.host = uri.getHost() == null ? "localhost" : uri.getHost();
    this.port = uri.getPort();
    if (port == -1) {
        if (scheme.equalsIgnoreCase("http")) {
            this.port = 80;
        } else if (scheme.equalsIgnoreCase("https")) {
            this.port = 443;
        }/*  w ww .jav a 2  s.  c o  m*/
    }

    bootstrap = new Bootstrap()
            .group(NettyUtils.getSharedEventLoopGroup(NettyUtils.GROUP.FETCHER,
                    conf.getIntVar(ConfVars.SHUFFLE_RPC_CLIENT_WORKER_THREAD_NUM)))
            .channel(NioSocketChannel.class).option(ChannelOption.ALLOCATOR, NettyUtils.ALLOCATOR)
            .option(ChannelOption.CONNECT_TIMEOUT_MILLIS,
                    conf.getIntVar(ConfVars.SHUFFLE_FETCHER_CONNECT_TIMEOUT) * 1000)
            .option(ChannelOption.SO_RCVBUF, 1048576) // set 1M
            .option(ChannelOption.TCP_NODELAY, true);
}

From source file:org.apache.tajo.worker.TajoWorker.java

License:Apache License

@Override
public void serviceInit(Configuration conf) throws Exception {
    if (!(conf instanceof TajoConf)) {
        throw new IllegalArgumentException("conf should be a TajoConf type.");
    }//from   w ww .  ja v a 2  s .  c om
    Runtime.getRuntime().addShutdownHook(new Thread(new ShutdownHook()));

    this.systemConf = (TajoConf) conf;
    RackResolver.init(systemConf);

    serviceTracker = ServiceTrackerFactory.get(systemConf);

    this.workerContext = new WorkerContext();
    this.lDirAllocator = new LocalDirAllocator(ConfVars.WORKER_TEMPORAL_DIR.varname);

    String resourceManagerClassName = systemConf.getVar(ConfVars.RESOURCE_MANAGER_CLASS);

    boolean randomPort = true;
    if (resourceManagerClassName.indexOf(TajoWorkerResourceManager.class.getName()) >= 0) {
        randomPort = false;
    }

    int clientPort = systemConf.getSocketAddrVar(ConfVars.WORKER_CLIENT_RPC_ADDRESS).getPort();
    int peerRpcPort = systemConf.getSocketAddrVar(ConfVars.WORKER_PEER_RPC_ADDRESS).getPort();
    int qmManagerPort = systemConf.getSocketAddrVar(ConfVars.WORKER_QM_RPC_ADDRESS).getPort();

    if (randomPort) {
        clientPort = 0;
        peerRpcPort = 0;
        qmManagerPort = 0;
        systemConf.setIntVar(ConfVars.PULLSERVER_PORT, 0);
    }

    this.dispatcher = new AsyncDispatcher();
    addIfService(dispatcher);

    tajoWorkerManagerService = new TajoWorkerManagerService(workerContext, peerRpcPort);
    addIfService(tajoWorkerManagerService);

    // querymaster worker
    tajoWorkerClientService = new TajoWorkerClientService(workerContext, clientPort);
    addIfService(tajoWorkerClientService);

    queryMasterManagerService = new QueryMasterManagerService(workerContext, qmManagerPort);
    addIfService(queryMasterManagerService);

    // taskrunner worker
    taskRunnerManager = new TaskRunnerManager(workerContext, dispatcher);
    addService(taskRunnerManager);

    workerHeartbeatThread = new WorkerHeartbeatService(workerContext);
    addIfService(workerHeartbeatThread);

    int httpPort = 0;
    if (!TajoPullServerService.isStandalone()) {
        pullService = new TajoPullServerService();
        addIfService(pullService);
    }

    if (!systemConf.get(CommonTestingUtil.TAJO_TEST_KEY, "FALSE").equalsIgnoreCase("TRUE")) {
        httpPort = initWebServer();
    }

    super.serviceInit(conf);

    int pullServerPort;
    if (pullService != null) {
        pullServerPort = pullService.getPort();
    } else {
        pullServerPort = getStandAlonePullServerPort();
    }

    this.connectionInfo = new WorkerConnectionInfo(tajoWorkerManagerService.getBindAddr().getHostName(),
            tajoWorkerManagerService.getBindAddr().getPort(), pullServerPort,
            tajoWorkerClientService.getBindAddr().getPort(), queryMasterManagerService.getBindAddr().getPort(),
            httpPort);

    LOG.info("Tajo Worker is initialized." + " connection :" + connectionInfo.toString());

    try {
        hashShuffleAppenderManager = new HashShuffleAppenderManager(systemConf);
    } catch (IOException e) {
        LOG.fatal(e.getMessage(), e);
        System.exit(-1);
    }

    taskHistoryWriter = new HistoryWriter(workerContext.getWorkerName(), false);
    addIfService(taskHistoryWriter);
    taskHistoryWriter.init(conf);

    historyReader = new HistoryReader(workerContext.getWorkerName(), this.systemConf);

    diagnoseTajoWorker();
}

From source file:org.apache.tez.engine.common.shuffle.impl.Shuffle.java

License:Apache License

public Shuffle(TezEngineTaskContext taskContext, RunningTaskContext runningTaskContext, Configuration conf,
        int tasksInDegree, TezTaskReporter reporter, Processor combineProcessor) throws IOException {
    this.taskContext = taskContext;
    this.runningTaskContext = runningTaskContext;
    this.conf = conf;
    this.reporter = reporter;
    this.metrics = new ShuffleClientMetrics(taskContext.getTaskAttemptId(), this.conf,
            this.taskContext.getUser(), this.taskContext.getJobName());
    this.tasksInDegree = tasksInDegree;

    FileSystem localFS = FileSystem.getLocal(this.conf);
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(TezJobConfig.LOCAL_DIRS);

    copyPhase = this.runningTaskContext.getProgress().addPhase("copy");
    mergePhase = this.runningTaskContext.getProgress().addPhase("merge");

    // TODO TEZ Get rid of Map / Reduce references.
    TezCounter shuffledMapsCounter = reporter.getCounter(TaskCounter.SHUFFLED_MAPS);
    TezCounter reduceShuffleBytes = reporter.getCounter(TaskCounter.REDUCE_SHUFFLE_BYTES);
    TezCounter failedShuffleCounter = reporter.getCounter(TaskCounter.FAILED_SHUFFLE);
    TezCounter spilledRecordsCounter = reporter.getCounter(TaskCounter.SPILLED_RECORDS);
    TezCounter reduceCombineInputCounter = reporter.getCounter(TaskCounter.COMBINE_INPUT_RECORDS);
    TezCounter mergedMapOutputsCounter = reporter.getCounter(TaskCounter.MERGED_MAP_OUTPUTS);

    scheduler = new ShuffleScheduler(this.conf, tasksInDegree, runningTaskContext.getStatus(), this, copyPhase,
            shuffledMapsCounter, reduceShuffleBytes, failedShuffleCounter);
    merger = new MergeManager(this.taskContext.getTaskAttemptId(), this.conf, localFS, localDirAllocator,
            reporter, combineProcessor, spilledRecordsCounter, reduceCombineInputCounter,
            mergedMapOutputsCounter, this, mergePhase);
}

From source file:org.apache.tez.mapreduce.processor.map.TestMapProcessor.java

License:Apache License

private Path getMapOutputFile(Configuration jobConf, OutputContext outputContext) throws IOException {
    LocalDirAllocator lDirAlloc = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    Path attemptOutput = new Path(
            new Path(Constants.TEZ_RUNTIME_TASK_OUTPUT_DIR, outputContext.getUniqueIdentifier()),
            Constants.TEZ_RUNTIME_TASK_OUTPUT_FILENAME_STRING);
    Path mapOutputFile = lDirAlloc.getLocalPathToRead(attemptOutput.toString(), jobConf);
    return mapOutputFile;
}

From source file:org.apache.tez.mapreduce.processor.MapUtils.java

License:Apache License

public static void configureLocalDirs(Configuration conf, String localDir) throws IOException {
    String[] localSysDirs = new String[1];
    localSysDirs[0] = localDir;//w ww .  j  a  v  a 2s . c  o m

    conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localSysDirs);
    conf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, localDir);

    LOG.info(TezRuntimeFrameworkConfigs.LOCAL_DIRS + " for child: "
            + conf.get(TezRuntimeFrameworkConfigs.LOCAL_DIRS));
    LOG.info(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR + " for child: "
            + conf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR));

    LocalDirAllocator lDirAlloc = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    Path workDir = null;
    // First, try to find the JOB_LOCAL_DIR on this host.
    try {
        workDir = lDirAlloc.getLocalPathToRead("work", conf);
    } catch (DiskErrorException e) {
        // DiskErrorException means dir not found. If not found, it will
        // be created below.
    }
    if (workDir == null) {
        // JOB_LOCAL_DIR doesn't exist on this host -- Create it.
        workDir = lDirAlloc.getLocalPathForWrite("work", conf);
        FileSystem lfs = FileSystem.getLocal(conf).getRaw();
        boolean madeDir = false;
        try {
            madeDir = lfs.mkdirs(workDir);
        } catch (FileAlreadyExistsException e) {
            // Since all tasks will be running in their own JVM, the race condition
            // exists where multiple tasks could be trying to create this directory
            // at the same time. If this task loses the race, it's okay because
            // the directory already exists.
            madeDir = true;
            workDir = lDirAlloc.getLocalPathToRead("work", conf);
        }
        if (!madeDir) {
            throw new IOException("Mkdirs failed to create " + workDir.toString());
        }
    }
    conf.set(MRFrameworkConfigs.JOB_LOCAL_DIR, workDir.toString());
}

From source file:org.apache.tez.mapreduce.processor.MRTask.java

License:Apache License

private void configureLocalDirs() throws IOException {
    // TODO NEWTEZ Is most of this functionality required ?
    jobConf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, processorContext.getWorkDirs());
    if (jobConf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR) == null) {
        jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, System.getenv(Environment.PWD.name()));
    }/*  w  w  w .  j  av a 2s  . c o m*/

    jobConf.setStrings(MRConfig.LOCAL_DIR, processorContext.getWorkDirs());

    LocalDirAllocator lDirAlloc = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    Path workDir = null;
    // First, try to find the JOB_LOCAL_DIR on this host.
    try {
        workDir = lDirAlloc.getLocalPathToRead("work", jobConf);
    } catch (DiskErrorException e) {
        // DiskErrorException means dir not found. If not found, it will
        // be created below.
    }
    if (workDir == null) {
        // JOB_LOCAL_DIR doesn't exist on this host -- Create it.
        workDir = lDirAlloc.getLocalPathForWrite("work", jobConf);
        FileSystem lfs = FileSystem.getLocal(jobConf).getRaw();
        boolean madeDir = false;
        try {
            madeDir = lfs.mkdirs(workDir);
        } catch (FileAlreadyExistsException e) {
            // Since all tasks will be running in their own JVM, the race condition
            // exists where multiple tasks could be trying to create this directory
            // at the same time. If this task loses the race, it's okay because
            // the directory already exists.
            madeDir = true;
            workDir = lDirAlloc.getLocalPathToRead("work", jobConf);
        }
        if (!madeDir) {
            throw new IOException("Mkdirs failed to create " + workDir.toString());
        }
    }
    // TODO NEWTEZ Is this required ?
    jobConf.set(MRFrameworkConfigs.JOB_LOCAL_DIR, workDir.toString());
    jobConf.set(MRJobConfig.JOB_LOCAL_DIR, workDir.toString());
}

From source file:org.apache.tez.runtime.library.broadcast.input.BroadcastInputManager.java

License:Apache License

public BroadcastInputManager(String uniqueIdentifier, Configuration conf) {
    this.conf = conf;

    this.fileNameAllocator = new TezTaskOutputFiles(conf, uniqueIdentifier);
    this.localDirAllocator = new LocalDirAllocator(TezJobConfig.LOCAL_DIRS);

    // Setup configuration
    final float maxInMemCopyUse = conf.getFloat(TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT);
    if (maxInMemCopyUse > 1.0 || maxInMemCopyUse < 0.0) {
        throw new IllegalArgumentException("Invalid value for "
                + TezJobConfig.TEZ_RUNTIME_SHUFFLE_INPUT_BUFFER_PERCENT + ": " + maxInMemCopyUse);
    }//from w w  w. ja  va 2 s.c o m

    // Allow unit tests to fix Runtime memory
    this.memoryLimit = (long) (conf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY,
            Math.min(Runtime.getRuntime().maxMemory(), Integer.MAX_VALUE)) * maxInMemCopyUse);

    final float singleShuffleMemoryLimitPercent = conf.getFloat(
            TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT,
            TezJobConfig.DEFAULT_TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT);
    if (singleShuffleMemoryLimitPercent <= 0.0f || singleShuffleMemoryLimitPercent > 1.0f) {
        throw new IllegalArgumentException(
                "Invalid value for " + TezJobConfig.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT + ": "
                        + singleShuffleMemoryLimitPercent);
    }

    this.maxSingleShuffleLimit = (long) (memoryLimit * singleShuffleMemoryLimitPercent);

    LOG.info("BroadcastInputManager -> " + "MemoryLimit: " + this.memoryLimit + ", maxSingleMemLimit: "
            + this.maxSingleShuffleLimit);
}