Example usage for org.apache.hadoop.mapreduce MRJobConfig CACHE

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce MRJobConfig CACHE_LOCALFILES.

Prototype

String CACHE_LOCALFILES

To view the source code for org.apache.hadoop.mapreduce MRJobConfig CACHE_LOCALFILES.

Click Source Link

Usage

From source file:com.conversantmedia.mapreduce.tool.DistributedResourceManager.java

License:Apache License

/**
 * Locates the resources in the configuration and distributed cache, etc., 
 * and sets them on the provided mapper instance.
 * /*from ww w  . jav  a2s . c o m*/
 * @param bean            the object to inspect for resource annotations
 * @param config         the job configuration
 * @throws ToolException   if there are errors with reflection or the cache
 */
@SuppressWarnings("unchecked")
public static void initializeResources(Object bean, Configuration config) throws ToolException {
    try {
        List<Field> fields = MaraAnnotationUtil.INSTANCE.findAnnotatedFields(bean.getClass(), Resource.class);
        Path[] files = org.apache.hadoop.util.StringUtils
                .stringToPath(config.getStrings(MRJobConfig.CACHE_LOCALFILES));
        for (Field field : fields) {
            Resource resAnnotation = field.getAnnotation(Resource.class);
            String key = StringUtils.isEmpty(resAnnotation.name()) ? field.getName() : resAnnotation.name();
            String resourceId = config.get(CONFIGKEYBASE_RESOURCE + key);
            if (resourceId != null) {
                String[] parts = StringUtils.split(resourceId, VALUE_SEP);
                String className = parts[0];
                String valueString = parts[1];

                // Retrieve the value
                Object value = getResourceValue(field, valueString, className, files);

                setFieldValue(field, bean, value);
            }
        }
    } catch (IllegalArgumentException | IOException | ClassNotFoundException | IllegalAccessException e) {
        throw new ToolException(e);
    }
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.DistributedCacheManager.java

License:Apache License

/**
 * Set up the distributed cache by localizing the resources, and updating
 * the configuration with references to the localized resources.
 * @param conf job configuration/*from  ww w.j  a  v  a  2s.c  om*/
 * @throws IOException
 */
public void setup(Configuration conf) throws IOException {
    //If we are not 0th worker, wait for 0th worker to set up the cache
    if (InvocationWorker.getIgWorkerIndex() > 0 && InvocationWorker.getNumberOfWorkers() > 1) {
        try {
            InvocationWorker.getSynchronizationBarrier().waitForComplete(ACTION_NAME, SYNCHRONIZATION_WAIT_MS,
                    WAIT_GRANULARITY_MS);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        return;
    }

    File workDir = new File(System.getProperty("user.dir"));

    // Generate YARN local resources objects corresponding to the distributed
    // cache configuration
    Map<String, LocalResource> localResources = new LinkedHashMap<String, LocalResource>();
    MRApps.setupDistributedCache(conf, localResources);

    //CODE CHANGE FROM ORIGINAL FILE:
    //We need to clear the resources from jar files, since they are distributed through the IG.
    //
    Iterator<Map.Entry<String, LocalResource>> iterator = localResources.entrySet().iterator();
    while (iterator.hasNext()) {
        Entry<String, LocalResource> entry = iterator.next();
        if (entry.getKey().endsWith(".jar")) {
            iterator.remove();
        }
    }

    // Generating unique numbers for FSDownload.

    AtomicLong uniqueNumberGenerator = new AtomicLong(System.currentTimeMillis());

    // Find which resources are to be put on the local classpath
    Map<String, Path> classpaths = new HashMap<String, Path>();
    Path[] archiveClassPaths = DistributedCache.getArchiveClassPaths(conf);
    if (archiveClassPaths != null) {
        for (Path p : archiveClassPaths) {
            FileSystem remoteFS = p.getFileSystem(conf);
            p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory()));
            classpaths.put(p.toUri().getPath().toString(), p);
        }
    }

    Path[] fileClassPaths = DistributedCache.getFileClassPaths(conf);
    if (fileClassPaths != null) {
        for (Path p : fileClassPaths) {
            FileSystem remoteFS = p.getFileSystem(conf);
            p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory()));
            classpaths.put(p.toUri().getPath().toString(), p);
        }
    }

    // Localize the resources
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(MRConfig.LOCAL_DIR);
    FileContext localFSFileContext = FileContext.getLocalFSFileContext();
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();

    ExecutorService exec = null;
    try {
        ThreadFactory tf = new ThreadFactoryBuilder()
                .setNameFormat("LocalDistributedCacheManager Downloader #%d").build();
        exec = Executors.newCachedThreadPool(tf);
        Path destPath = localDirAllocator.getLocalPathForWrite(".", conf);
        Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap();
        for (LocalResource resource : localResources.values()) {
            Callable<Path> download = new FSDownload(localFSFileContext, ugi, conf,
                    new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet())), resource);
            Future<Path> future = exec.submit(download);
            resourcesToPaths.put(resource, future);
        }
        for (Entry<String, LocalResource> entry : localResources.entrySet()) {
            LocalResource resource = entry.getValue();
            Path path;
            try {
                path = resourcesToPaths.get(resource).get();
            } catch (InterruptedException e) {
                throw new IOException(e);
            } catch (ExecutionException e) {
                throw new IOException(e);
            }
            String pathString = path.toUri().toString();
            String link = entry.getKey();
            String target = new File(path.toUri()).getPath();
            symlink(workDir, target, link);

            if (resource.getType() == LocalResourceType.ARCHIVE) {
                localArchives.add(pathString);
            } else if (resource.getType() == LocalResourceType.FILE) {
                localFiles.add(pathString);
            } else if (resource.getType() == LocalResourceType.PATTERN) {
                //PATTERN is not currently used in local mode
                throw new IllegalArgumentException(
                        "Resource type PATTERN is not " + "implemented yet. " + resource.getResource());
            }
            Path resourcePath;
            try {
                resourcePath = ConverterUtils.getPathFromYarnURL(resource.getResource());
            } catch (URISyntaxException e) {
                throw new IOException(e);
            }
            LOG.info(String.format("Localized %s as %s", resourcePath, path));
            String cp = resourcePath.toUri().getPath();
            if (classpaths.keySet().contains(cp)) {
                localClasspaths.add(path.toUri().getPath().toString());
            }
        }
    } finally {
        if (exec != null) {
            exec.shutdown();
        }
    }
    // Update the configuration object with localized data.
    if (!localArchives.isEmpty()) {
        conf.set(MRJobConfig.CACHE_LOCALARCHIVES,
                StringUtils.arrayToString(localArchives.toArray(new String[localArchives.size()])));
    }
    if (!localFiles.isEmpty()) {
        conf.set(MRJobConfig.CACHE_LOCALFILES,
                StringUtils.arrayToString(localFiles.toArray(new String[localArchives.size()])));
    }
    setupCalled = true;

    //If we are  0th worker, signal action complete
    if (InvocationWorker.getIgWorkerIndex() == 0 && InvocationWorker.getNumberOfWorkers() > 1) {
        try {
            InvocationWorker.getSynchronizationBarrier().signalComplete(ACTION_NAME);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

}

From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java

License:Apache License

/**
 * test PipesMapRunner    test the transfer data from reader
 *
 * @throws Exception/*from  w w w  .j  a v a2  s . c  om*/
 */
@Test
public void testRunner() throws Exception {
    // clean old password files
    File[] psw = cleanTokenPasswordFile();
    try {
        JobID jobId = new JobID("201408272347", 0);
        TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
        TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

        Job job = new Job(new Configuration());
        job.setJobID(jobId);
        Configuration conf = job.getConfiguration();
        conf.set(Submitter.IS_JAVA_RR, "true");
        conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString());
        job.setInputFormatClass(DummyInputFormat.class);
        FileSystem fs = new RawLocalFileSystem();
        fs.setConf(conf);

        DummyInputFormat input_format = new DummyInputFormat();
        List<InputSplit> isplits = input_format.getSplits(job);

        InputSplit isplit = isplits.get(0);

        TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

        RecordReader<FloatWritable, NullWritable> rReader = input_format.createRecordReader(isplit, tcontext);

        TestMapContext context = new TestMapContext(conf, taskAttemptid, rReader, null, null, null, isplit);
        // stub for client
        File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationRunnableStub");
        conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
        // token for authorization
        Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(),
                "password".getBytes(), new Text("kind"), new Text("service"));
        TokenCache.setJobToken(token, job.getCredentials());
        conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);
        PipesMapper<FloatWritable, NullWritable, IntWritable, Text> mapper = new PipesMapper<FloatWritable, NullWritable, IntWritable, Text>(
                context);

        initStdOut(conf);
        mapper.run(context);
        String stdOut = readStdOut(conf);

        // test part of translated data. As common file for client and test -
        // clients stdOut
        // check version
        assertTrue(stdOut.contains("CURRENT_PROTOCOL_VERSION:0"));
        // check key and value classes
        assertTrue(stdOut.contains("Key class:org.apache.hadoop.io.FloatWritable"));
        assertTrue(stdOut.contains("Value class:org.apache.hadoop.io.NullWritable"));
        // test have sent all data from reader
        assertTrue(stdOut.contains("value:0.0"));
        assertTrue(stdOut.contains("value:9.0"));

    } finally {
        if (psw != null) {
            // remove password files
            for (File file : psw) {
                file.deleteOnExit();
            }
        }
    }
}

From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java

License:Apache License

/**
 * test org.apache.hadoop.mapreduce.pipes.Application
 * test a internal functions: //w w  w  .  ja  v a  2s.  co  m
 *     MessageType.REGISTER_COUNTER,  INCREMENT_COUNTER, STATUS, PROGRESS...
 *
 * @throws Throwable
 */

@Test
public void testApplication() throws Throwable {

    System.err.println("testApplication");

    File[] psw = cleanTokenPasswordFile();
    try {
        JobID jobId = new JobID("201408272347", 0);
        TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
        TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

        Job job = new Job(new Configuration());
        job.setJobID(jobId);
        Configuration conf = job.getConfiguration();
        conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString());
        FileSystem fs = new RawLocalFileSystem();
        fs.setConf(conf);

        File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationStub");
        //getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationRunnableStub");
        conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
        System.err.println("fCommand" + fCommand.getAbsolutePath());

        Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(),
                "password".getBytes(), new Text("kind"), new Text("service"));
        TokenCache.setJobToken(token, job.getCredentials());
        conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);

        TestReporter reporter = new TestReporter();
        DummyInputFormat input_format = new DummyInputFormat();
        List<InputSplit> isplits = input_format.getSplits(job);
        InputSplit isplit = isplits.get(0);
        TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

        DummyRecordReader reader = (DummyRecordReader) input_format.createRecordReader(isplit, tcontext);

        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(Text.class);

        RecordWriter<IntWritable, Text> writer = new TestRecordWriter(
                new FileOutputStream(workSpace.getAbsolutePath() + File.separator + "outfile"));

        MapContextImpl<IntWritable, Text, IntWritable, Text> context = new MapContextImpl<IntWritable, Text, IntWritable, Text>(
                conf, taskAttemptid, null, writer, null, reporter, null);

        System.err.println("ready to launch application");
        Application<IntWritable, Text, IntWritable, Text> application = new Application<IntWritable, Text, IntWritable, Text>(
                context, reader);
        System.err.println("done");

        application.getDownlink().flush();
        application.getDownlink().mapItem(new IntWritable(3), new Text("txt"));
        application.getDownlink().flush();
        application.waitForFinish();

        // test getDownlink().mapItem();
        String stdOut = readStdOut(conf);
        assertTrue(stdOut.contains("key:3"));
        assertTrue(stdOut.contains("value:txt"));

        assertEquals(0.0, context.getProgress(), 0.01);
        assertNotNull(context.getCounter("group", "name"));

        // test status MessageType.STATUS
        assertEquals(context.getStatus(), "PROGRESS");
        // check MessageType.PROGRESS
        assertEquals(0.55f, reader.getProgress(), 0.001);
        application.getDownlink().close();
        // test MessageType.OUTPUT
        stdOut = readFile(new File(workSpace.getAbsolutePath() + File.separator + "outfile"));
        assertTrue(stdOut.contains("key:123"));
        assertTrue(stdOut.contains("value:value"));
        try {
            // try to abort
            application.abort(new Throwable());
            fail();
        } catch (IOException e) {
            // abort works ?
            assertEquals("pipe child exception", e.getMessage());
        }
    } finally {
        if (psw != null) {
            // remove password files
            for (File file : psw) {
                file.deleteOnExit();
            }
        }
    }
}

From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java

License:Apache License

/**
 * test org.apache.hadoop.mapreduce.pipes.PipesReducer
 * test the transfer of data: key and value
 *
 * @throws Exception//from  w  ww . java2  s .c om
 */
@Test
public void testPipesReducer() throws Exception {
    System.err.println("testPipesReducer");

    File[] psw = cleanTokenPasswordFile();
    try {
        JobID jobId = new JobID("201408272347", 0);
        TaskID taskId = new TaskID(jobId, TaskType.MAP, 0);
        TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0);

        Job job = new Job(new Configuration());
        job.setJobID(jobId);
        Configuration conf = job.getConfiguration();
        conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString());
        FileSystem fs = new RawLocalFileSystem();
        fs.setConf(conf);

        File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeReducerStub");
        conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath());
        System.err.println("fCommand" + fCommand.getAbsolutePath());

        Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(),
                "password".getBytes(), new Text("kind"), new Text("service"));
        TokenCache.setJobToken(token, job.getCredentials());
        conf.setBoolean(MRJobConfig.SKIP_RECORDS, true);

        TestReporter reporter = new TestReporter();
        DummyInputFormat input_format = new DummyInputFormat();
        List<InputSplit> isplits = input_format.getSplits(job);
        InputSplit isplit = isplits.get(0);
        TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid);

        RecordWriter<IntWritable, Text> writer = new TestRecordWriter(
                new FileOutputStream(workSpace.getAbsolutePath() + File.separator + "outfile"));

        BooleanWritable bw = new BooleanWritable(true);
        List<Text> texts = new ArrayList<Text>();
        texts.add(new Text("first"));
        texts.add(new Text("second"));
        texts.add(new Text("third"));

        DummyRawKeyValueIterator kvit = new DummyRawKeyValueIterator();

        ReduceContextImpl<BooleanWritable, Text, IntWritable, Text> context = new ReduceContextImpl<BooleanWritable, Text, IntWritable, Text>(
                conf, taskAttemptid, kvit, null, null, writer, null, null, null, BooleanWritable.class,
                Text.class);

        PipesReducer<BooleanWritable, Text, IntWritable, Text> reducer = new PipesReducer<BooleanWritable, Text, IntWritable, Text>();
        reducer.setup(context);

        initStdOut(conf);
        reducer.reduce(bw, texts, context);
        reducer.cleanup(context);
        String stdOut = readStdOut(conf);

        // test data: key
        assertTrue(stdOut.contains("reducer key :true"));
        // and values
        assertTrue(stdOut.contains("reduce value  :first"));
        assertTrue(stdOut.contains("reduce value  :second"));
        assertTrue(stdOut.contains("reduce value  :third"));

    } finally {
        if (psw != null) {
            // remove password files
            for (File file : psw) {
                file.deleteOnExit();
            }
        }
    }

}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2JobResourceManager.java

License:Apache License

/**
 * Prepare job resources. Resolve the classpath list and download it if needed.
 *
 * @param download {@code true} If need to download resources.
 * @param jobLocDir Work directory for the job.
 * @throws IgniteCheckedException If failed.
 *///from  w ww . j a v  a2s .co m
public void prepareJobEnvironment(boolean download, File jobLocDir) throws IgniteCheckedException {
    try {
        if (jobLocDir.exists())
            throw new IgniteCheckedException(
                    "Local job directory already exists: " + jobLocDir.getAbsolutePath());

        JobConf cfg = ctx.getJobConf();

        Collection<URL> clsPathUrls = new ArrayList<>();

        String mrDir = cfg.get(MRJobConfig.MAPREDUCE_JOB_DIR);

        if (mrDir != null) {
            stagingDir = new Path(new URI(mrDir));

            if (download) {
                FileSystem fs = job.fileSystem(stagingDir.toUri(), cfg);

                if (!fs.exists(stagingDir))
                    throw new IgniteCheckedException("Failed to find map-reduce submission "
                            + "directory (does not exist): " + stagingDir);

                if (!FileUtil.copy(fs, stagingDir, jobLocDir, false, cfg))
                    throw new IgniteCheckedException("Failed to copy job submission directory "
                            + "contents to local file system " + "[path=" + stagingDir + ", locDir="
                            + jobLocDir.getAbsolutePath() + ", jobId=" + jobId + ']');
            }

            File jarJobFile = new File(jobLocDir, "job.jar");

            clsPathUrls.add(jarJobFile.toURI().toURL());

            rsrcSet.add(jarJobFile);
            rsrcSet.add(new File(jobLocDir, "job.xml"));
        } else if (!jobLocDir.mkdirs())
            throw new IgniteCheckedException(
                    "Failed to create local job directory: " + jobLocDir.getAbsolutePath());

        processFiles(jobLocDir, ctx.getCacheFiles(), download, false, null, MRJobConfig.CACHE_LOCALFILES);
        processFiles(jobLocDir, ctx.getCacheArchives(), download, true, null, MRJobConfig.CACHE_LOCALARCHIVES);
        processFiles(jobLocDir, ctx.getFileClassPaths(), download, false, clsPathUrls, null);
        processFiles(jobLocDir, ctx.getArchiveClassPaths(), download, true, clsPathUrls, null);

        if (!clsPathUrls.isEmpty())
            clsPath = clsPathUrls.toArray(new URL[clsPathUrls.size()]);

        setLocalFSWorkingDirectory(jobLocDir);
    } catch (URISyntaxException | IOException e) {
        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2JobResourceManager.java

License:Apache License

/**
 * Prepare job resources. Resolve the classpath list and download it if needed.
 *
 * @param download {@code true} If need to download resources.
 * @param jobLocDir Work directory for the job.
 * @throws IgniteCheckedException If failed.
 *//*from  w w w  .  j  a va2 s .  c om*/
public void prepareJobEnvironment(boolean download, File jobLocDir) throws IgniteCheckedException {
    try {
        if (jobLocDir.exists())
            throw new IgniteCheckedException(
                    "Local job directory already exists: " + jobLocDir.getAbsolutePath());

        JobConf cfg = ctx.getJobConf();

        String mrDir = cfg.get("mapreduce.job.dir");

        if (mrDir != null) {
            stagingDir = new Path(new URI(mrDir));

            if (download) {
                FileSystem fs = FileSystem.get(stagingDir.toUri(), cfg);

                if (!fs.exists(stagingDir))
                    throw new IgniteCheckedException(
                            "Failed to find map-reduce submission directory (does not exist): " + stagingDir);

                if (!FileUtil.copy(fs, stagingDir, jobLocDir, false, cfg))
                    throw new IgniteCheckedException(
                            "Failed to copy job submission directory contents to local file system " + "[path="
                                    + stagingDir + ", locDir=" + jobLocDir.getAbsolutePath() + ", jobId="
                                    + jobId + ']');
            }

            File jarJobFile = new File(jobLocDir, "job.jar");

            Collection<URL> clsPathUrls = new ArrayList<>();

            clsPathUrls.add(jarJobFile.toURI().toURL());

            rsrcSet.add(jarJobFile);
            rsrcSet.add(new File(jobLocDir, "job.xml"));

            processFiles(jobLocDir, ctx.getCacheFiles(), download, false, null, MRJobConfig.CACHE_LOCALFILES);
            processFiles(jobLocDir, ctx.getCacheArchives(), download, true, null,
                    MRJobConfig.CACHE_LOCALARCHIVES);
            processFiles(jobLocDir, ctx.getFileClassPaths(), download, false, clsPathUrls, null);
            processFiles(jobLocDir, ctx.getArchiveClassPaths(), download, true, clsPathUrls, null);

            if (!clsPathUrls.isEmpty()) {
                clsPath = new URL[clsPathUrls.size()];

                clsPathUrls.toArray(clsPath);
            }
        } else if (!jobLocDir.mkdirs())
            throw new IgniteCheckedException(
                    "Failed to create local job directory: " + jobLocDir.getAbsolutePath());

        setLocalFSWorkingDirectory(jobLocDir);
    } catch (URISyntaxException | IOException e) {
        throw new IgniteCheckedException(e);
    }
}

From source file:org.apache.rya.accumulo.mr.merge.mappers.BaseCopyToolMapper.java

License:Apache License

/**
 * Fixes the "splits.txt" file path in the "mapreduce.job.cache.local.files" property.  It contains the
 * {@link URI} "file:" prefix which causes {@link KeyRangePartitioner} to throw a {@code FileNotFoundException}
 * when it attempts to open it./*from  www  .  j  a  v  a 2  s . c om*/
 */
private void fixSplitsInCachedLocalFiles() {
    if (useCopyFileOutput) {
        // The "mapreduce.job.cache.local.files" property contains a comma-separated
        // list of cached local file paths.
        final String cachedLocalFiles = parentConfig.get(MRJobConfig.CACHE_LOCALFILES);
        if (cachedLocalFiles != null) {
            final List<String> cachedLocalFilesList = Lists
                    .newArrayList(Splitter.on(',').split(cachedLocalFiles));
            final List<String> formattedCachedLocalFilesList = new ArrayList<>();
            for (final String cachedLocalFile : cachedLocalFilesList) {
                String pathToAdd = cachedLocalFile;
                if (cachedLocalFile.endsWith("splits.txt")) {
                    URI uri = null;
                    try {
                        uri = new URI(cachedLocalFiles);
                        pathToAdd = uri.getPath();
                    } catch (final URISyntaxException e) {
                        log.error("Invalid syntax in local cache file path", e);
                    }
                }
                formattedCachedLocalFilesList.add(pathToAdd);
            }
            final String formattedCachedLocalFiles = Joiner.on(',').join(formattedCachedLocalFilesList);
            if (!cachedLocalFiles.equals(formattedCachedLocalFiles)) {
                parentConfig.set(MRJobConfig.CACHE_LOCALFILES, formattedCachedLocalFiles);
            }
        }
    }
}

From source file:org.gridgain.grid.kernal.processors.hadoop.v2.GridHadoopV2JobResourceManager.java

License:Open Source License

/**
 * Prepare job resources. Resolve the classpath list and download it if needed.
 *
 * @param download {@code true} If need to download resources.
 * @param jobLocDir Work directory for the job.
 * @throws GridException If failed./*from w  w  w .j a v  a2  s . c  om*/
 */
public void prepareJobEnvironment(boolean download, File jobLocDir) throws GridException {
    try {
        if (jobLocDir.exists())
            throw new GridException("Local job directory already exists: " + jobLocDir.getAbsolutePath());

        JobConf cfg = ctx.getJobConf();

        String mrDir = cfg.get("mapreduce.job.dir");

        if (mrDir != null) {
            stagingDir = new Path(new URI(mrDir));

            if (download) {
                FileSystem fs = FileSystem.get(stagingDir.toUri(), cfg);

                if (!fs.exists(stagingDir))
                    throw new GridException(
                            "Failed to find map-reduce submission directory (does not exist): " + stagingDir);

                if (!FileUtil.copy(fs, stagingDir, jobLocDir, false, cfg))
                    throw new GridException(
                            "Failed to copy job submission directory contents to local file system " + "[path="
                                    + stagingDir + ", locDir=" + jobLocDir.getAbsolutePath() + ", jobId="
                                    + jobId + ']');
            }

            File jarJobFile = new File(jobLocDir, "job.jar");

            Collection<URL> clsPathUrls = new ArrayList<>();

            clsPathUrls.add(jarJobFile.toURI().toURL());

            rsrcList.add(jarJobFile);
            rsrcList.add(new File(jobLocDir, "job.xml"));

            processFiles(jobLocDir, ctx.getCacheFiles(), download, false, null, MRJobConfig.CACHE_LOCALFILES);
            processFiles(jobLocDir, ctx.getCacheArchives(), download, true, null,
                    MRJobConfig.CACHE_LOCALARCHIVES);
            processFiles(jobLocDir, ctx.getFileClassPaths(), download, false, clsPathUrls, null);
            processFiles(jobLocDir, ctx.getArchiveClassPaths(), download, true, clsPathUrls, null);

            if (!clsPathUrls.isEmpty()) {
                clsPath = new URL[clsPathUrls.size()];

                clsPathUrls.toArray(clsPath);
            }
        } else if (!jobLocDir.mkdirs())
            throw new GridException("Failed to create local job directory: " + jobLocDir.getAbsolutePath());

        setLocalFSWorkingDirectory(jobLocDir);
    } catch (URISyntaxException | IOException e) {
        throw new GridException(e);
    }
}

Example usage for org.apache.hadoop.mapreduce MRJobConfig CACHE_LOCALFILES

Introduction

Prototype

Usage