Example usage for org.apache.hadoop.fs LocalDirAllocator getLocalPathForWrite

List of usage examples for org.apache.hadoop.fs LocalDirAllocator getLocalPathForWrite

Introduction

In this page you can find the example usage for org.apache.hadoop.fs LocalDirAllocator getLocalPathForWrite.

Prototype

public Path getLocalPathForWrite(String pathStr, Configuration conf) throws IOException 

Source Link

Document

Get a path from the local FS.

Usage

From source file:com.scaleoutsoftware.soss.hserver.hadoop.DistributedCacheManager.java

License:Apache License

/**
 * Set up the distributed cache by localizing the resources, and updating
 * the configuration with references to the localized resources.
 * @param conf job configuration//from   w ww  .  ja v  a  2 s.c  o m
 * @throws IOException
 */
public void setup(Configuration conf) throws IOException {
    //If we are not 0th worker, wait for 0th worker to set up the cache
    if (InvocationWorker.getIgWorkerIndex() > 0 && InvocationWorker.getNumberOfWorkers() > 1) {
        try {
            InvocationWorker.getSynchronizationBarrier().waitForComplete(ACTION_NAME, SYNCHRONIZATION_WAIT_MS,
                    WAIT_GRANULARITY_MS);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        return;
    }

    File workDir = new File(System.getProperty("user.dir"));

    // Generate YARN local resources objects corresponding to the distributed
    // cache configuration
    Map<String, LocalResource> localResources = new LinkedHashMap<String, LocalResource>();
    MRApps.setupDistributedCache(conf, localResources);

    //CODE CHANGE FROM ORIGINAL FILE:
    //We need to clear the resources from jar files, since they are distributed through the IG.
    //
    Iterator<Map.Entry<String, LocalResource>> iterator = localResources.entrySet().iterator();
    while (iterator.hasNext()) {
        Entry<String, LocalResource> entry = iterator.next();
        if (entry.getKey().endsWith(".jar")) {
            iterator.remove();
        }
    }

    // Generating unique numbers for FSDownload.

    AtomicLong uniqueNumberGenerator = new AtomicLong(System.currentTimeMillis());

    // Find which resources are to be put on the local classpath
    Map<String, Path> classpaths = new HashMap<String, Path>();
    Path[] archiveClassPaths = DistributedCache.getArchiveClassPaths(conf);
    if (archiveClassPaths != null) {
        for (Path p : archiveClassPaths) {
            FileSystem remoteFS = p.getFileSystem(conf);
            p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory()));
            classpaths.put(p.toUri().getPath().toString(), p);
        }
    }

    Path[] fileClassPaths = DistributedCache.getFileClassPaths(conf);
    if (fileClassPaths != null) {
        for (Path p : fileClassPaths) {
            FileSystem remoteFS = p.getFileSystem(conf);
            p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory()));
            classpaths.put(p.toUri().getPath().toString(), p);
        }
    }

    // Localize the resources
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(MRConfig.LOCAL_DIR);
    FileContext localFSFileContext = FileContext.getLocalFSFileContext();
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();

    ExecutorService exec = null;
    try {
        ThreadFactory tf = new ThreadFactoryBuilder()
                .setNameFormat("LocalDistributedCacheManager Downloader #%d").build();
        exec = Executors.newCachedThreadPool(tf);
        Path destPath = localDirAllocator.getLocalPathForWrite(".", conf);
        Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap();
        for (LocalResource resource : localResources.values()) {
            Callable<Path> download = new FSDownload(localFSFileContext, ugi, conf,
                    new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet())), resource);
            Future<Path> future = exec.submit(download);
            resourcesToPaths.put(resource, future);
        }
        for (Entry<String, LocalResource> entry : localResources.entrySet()) {
            LocalResource resource = entry.getValue();
            Path path;
            try {
                path = resourcesToPaths.get(resource).get();
            } catch (InterruptedException e) {
                throw new IOException(e);
            } catch (ExecutionException e) {
                throw new IOException(e);
            }
            String pathString = path.toUri().toString();
            String link = entry.getKey();
            String target = new File(path.toUri()).getPath();
            symlink(workDir, target, link);

            if (resource.getType() == LocalResourceType.ARCHIVE) {
                localArchives.add(pathString);
            } else if (resource.getType() == LocalResourceType.FILE) {
                localFiles.add(pathString);
            } else if (resource.getType() == LocalResourceType.PATTERN) {
                //PATTERN is not currently used in local mode
                throw new IllegalArgumentException(
                        "Resource type PATTERN is not " + "implemented yet. " + resource.getResource());
            }
            Path resourcePath;
            try {
                resourcePath = ConverterUtils.getPathFromYarnURL(resource.getResource());
            } catch (URISyntaxException e) {
                throw new IOException(e);
            }
            LOG.info(String.format("Localized %s as %s", resourcePath, path));
            String cp = resourcePath.toUri().getPath();
            if (classpaths.keySet().contains(cp)) {
                localClasspaths.add(path.toUri().getPath().toString());
            }
        }
    } finally {
        if (exec != null) {
            exec.shutdown();
        }
    }
    // Update the configuration object with localized data.
    if (!localArchives.isEmpty()) {
        conf.set(MRJobConfig.CACHE_LOCALARCHIVES,
                StringUtils.arrayToString(localArchives.toArray(new String[localArchives.size()])));
    }
    if (!localFiles.isEmpty()) {
        conf.set(MRJobConfig.CACHE_LOCALFILES,
                StringUtils.arrayToString(localFiles.toArray(new String[localArchives.size()])));
    }
    setupCalled = true;

    //If we are  0th worker, signal action complete
    if (InvocationWorker.getIgWorkerIndex() == 0 && InvocationWorker.getNumberOfWorkers() > 1) {
        try {
            InvocationWorker.getSynchronizationBarrier().signalComplete(ACTION_NAME);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

}

From source file:org.apache.tez.mapreduce.processor.MapUtils.java

License:Apache License

public static void configureLocalDirs(Configuration conf, String localDir) throws IOException {
    String[] localSysDirs = new String[1];
    localSysDirs[0] = localDir;//from   ww w  .  ja  va  2s.  c  o  m

    conf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, localSysDirs);
    conf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, localDir);

    LOG.info(TezRuntimeFrameworkConfigs.LOCAL_DIRS + " for child: "
            + conf.get(TezRuntimeFrameworkConfigs.LOCAL_DIRS));
    LOG.info(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR + " for child: "
            + conf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR));

    LocalDirAllocator lDirAlloc = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    Path workDir = null;
    // First, try to find the JOB_LOCAL_DIR on this host.
    try {
        workDir = lDirAlloc.getLocalPathToRead("work", conf);
    } catch (DiskErrorException e) {
        // DiskErrorException means dir not found. If not found, it will
        // be created below.
    }
    if (workDir == null) {
        // JOB_LOCAL_DIR doesn't exist on this host -- Create it.
        workDir = lDirAlloc.getLocalPathForWrite("work", conf);
        FileSystem lfs = FileSystem.getLocal(conf).getRaw();
        boolean madeDir = false;
        try {
            madeDir = lfs.mkdirs(workDir);
        } catch (FileAlreadyExistsException e) {
            // Since all tasks will be running in their own JVM, the race condition
            // exists where multiple tasks could be trying to create this directory
            // at the same time. If this task loses the race, it's okay because
            // the directory already exists.
            madeDir = true;
            workDir = lDirAlloc.getLocalPathToRead("work", conf);
        }
        if (!madeDir) {
            throw new IOException("Mkdirs failed to create " + workDir.toString());
        }
    }
    conf.set(MRFrameworkConfigs.JOB_LOCAL_DIR, workDir.toString());
}

From source file:org.apache.tez.mapreduce.processor.MRTask.java

License:Apache License

private void configureLocalDirs() throws IOException {
    // TODO NEWTEZ Is most of this functionality required ?
    jobConf.setStrings(TezRuntimeFrameworkConfigs.LOCAL_DIRS, processorContext.getWorkDirs());
    if (jobConf.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR) == null) {
        jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR, System.getenv(Environment.PWD.name()));
    }//from   w  w  w .j  a v  a  2  s. co  m

    jobConf.setStrings(MRConfig.LOCAL_DIR, processorContext.getWorkDirs());

    LocalDirAllocator lDirAlloc = new LocalDirAllocator(TezRuntimeFrameworkConfigs.LOCAL_DIRS);
    Path workDir = null;
    // First, try to find the JOB_LOCAL_DIR on this host.
    try {
        workDir = lDirAlloc.getLocalPathToRead("work", jobConf);
    } catch (DiskErrorException e) {
        // DiskErrorException means dir not found. If not found, it will
        // be created below.
    }
    if (workDir == null) {
        // JOB_LOCAL_DIR doesn't exist on this host -- Create it.
        workDir = lDirAlloc.getLocalPathForWrite("work", jobConf);
        FileSystem lfs = FileSystem.getLocal(jobConf).getRaw();
        boolean madeDir = false;
        try {
            madeDir = lfs.mkdirs(workDir);
        } catch (FileAlreadyExistsException e) {
            // Since all tasks will be running in their own JVM, the race condition
            // exists where multiple tasks could be trying to create this directory
            // at the same time. If this task loses the race, it's okay because
            // the directory already exists.
            madeDir = true;
            workDir = lDirAlloc.getLocalPathToRead("work", jobConf);
        }
        if (!madeDir) {
            throw new IOException("Mkdirs failed to create " + workDir.toString());
        }
    }
    // TODO NEWTEZ Is this required ?
    jobConf.set(MRFrameworkConfigs.JOB_LOCAL_DIR, workDir.toString());
    jobConf.set(MRJobConfig.JOB_LOCAL_DIR, workDir.toString());
}