Example usage for org.apache.hadoop.mapreduce.filecache DistributedCache getCacheFiles

List of usage examples for org.apache.hadoop.mapreduce.filecache DistributedCache getCacheFiles

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.filecache DistributedCache getCacheFiles.

Prototype

@Deprecated
public static URI[] getCacheFiles(Configuration conf) throws IOException 

Source Link

Document

Get cache files set in the Configuration.

Usage

From source file:it.crs4.pydoop.mapreduce.pipes.CommandLineParser.java

License:Apache License

private static void setupPipesJob(Job job) throws IOException, ClassNotFoundException {
    Configuration conf = job.getConfiguration();
    // default map output types to Text
    if (!getIsJavaMapper(conf)) {
        job.setMapperClass(PipesMapper.class);
        // Save the user's partitioner and hook in our's.
        setJavaPartitioner(conf, job.getPartitionerClass());
        job.setPartitionerClass(PipesPartitioner.class);
    }//from w  w w.jav  a  2  s  .c  o  m
    if (!getIsJavaReducer(conf)) {
        job.setReducerClass(PipesReducer.class);
        if (!getIsJavaRecordWriter(conf)) {
            job.setOutputFormatClass(NullOutputFormat.class);
        }
    }
    String textClassname = Text.class.getName();
    setIfUnset(conf, MRJobConfig.MAP_OUTPUT_KEY_CLASS, textClassname);
    setIfUnset(conf, MRJobConfig.MAP_OUTPUT_VALUE_CLASS, textClassname);
    setIfUnset(conf, MRJobConfig.OUTPUT_KEY_CLASS, textClassname);
    setIfUnset(conf, MRJobConfig.OUTPUT_VALUE_CLASS, textClassname);

    // Use PipesNonJavaInputFormat if necessary to handle progress reporting
    // from C++ RecordReaders ...
    if (!getIsJavaRecordReader(conf) && !getIsJavaMapper(conf)) {
        conf.setClass(Submitter.INPUT_FORMAT, job.getInputFormatClass(), InputFormat.class);
        job.setInputFormatClass(PipesNonJavaInputFormat.class);
    }

    if (avroInput != null) {
        if (explicitInputFormat) {
            conf.setClass(Submitter.INPUT_FORMAT, job.getInputFormatClass(), InputFormat.class);
        } // else let the bridge fall back to the appropriate Avro IF
        switch (avroInput) {
        case K:
            job.setInputFormatClass(PydoopAvroInputKeyBridge.class);
            break;
        case V:
            job.setInputFormatClass(PydoopAvroInputValueBridge.class);
            break;
        case KV:
            job.setInputFormatClass(PydoopAvroInputKeyValueBridge.class);
            break;
        default:
            throw new IllegalArgumentException("Bad Avro input type");
        }
    }
    if (avroOutput != null) {
        if (explicitOutputFormat) {
            conf.setClass(Submitter.OUTPUT_FORMAT, job.getOutputFormatClass(), OutputFormat.class);
        } // else let the bridge fall back to the appropriate Avro OF
        conf.set(props.getProperty("AVRO_OUTPUT"), avroOutput.name());
        switch (avroOutput) {
        case K:
            job.setOutputFormatClass(PydoopAvroOutputKeyBridge.class);
            break;
        case V:
            job.setOutputFormatClass(PydoopAvroOutputValueBridge.class);
            break;
        case KV:
            job.setOutputFormatClass(PydoopAvroOutputKeyValueBridge.class);
            break;
        default:
            throw new IllegalArgumentException("Bad Avro output type");
        }
    }

    String exec = getExecutable(conf);
    if (exec == null) {
        String msg = "No application program defined.";
        throw new IllegalArgumentException(msg);
    }
    // add default debug script only when executable is expressed as
    // <path>#<executable>
    //FIXME: this is kind of useless if the pipes program is not in c++
    if (exec.contains("#")) {
        // set default gdb commands for map and reduce task
        String defScript = "$HADOOP_PREFIX/src/c++/pipes/debug/pipes-default-script";
        setIfUnset(conf, MRJobConfig.MAP_DEBUG_SCRIPT, defScript);
        setIfUnset(conf, MRJobConfig.REDUCE_DEBUG_SCRIPT, defScript);
    }
    URI[] fileCache = DistributedCache.getCacheFiles(conf);
    if (fileCache == null) {
        fileCache = new URI[1];
    } else {
        URI[] tmp = new URI[fileCache.length + 1];
        System.arraycopy(fileCache, 0, tmp, 1, fileCache.length);
        fileCache = tmp;
    }
    try {
        fileCache[0] = new URI(exec);
    } catch (URISyntaxException e) {
        String msg = "Problem parsing executable URI " + exec;
        IOException ie = new IOException(msg);
        ie.initCause(e);
        throw ie;
    }
    DistributedCache.setCacheFiles(fileCache, conf);
}

From source file:it.crs4.pydoop.pipes.Submitter.java

License:Apache License

private static void setupPipesJob(JobConf conf) throws IOException {
    // default map output types to Text
    if (!getIsJavaMapper(conf)) {
        conf.setMapRunnerClass(PipesMapRunner.class);
        // Save the user's partitioner and hook in our's.
        setJavaPartitioner(conf, conf.getPartitionerClass());
        conf.setPartitionerClass(PipesPartitioner.class);
    }/*w w w. j  av  a  2  s.co  m*/
    if (!getIsJavaReducer(conf)) {
        conf.setReducerClass(PipesReducer.class);
        if (!getIsJavaRecordWriter(conf)) {
            conf.setOutputFormat(NullOutputFormat.class);
        }
    }
    String textClassname = Text.class.getName();
    setIfUnset(conf, MRJobConfig.MAP_OUTPUT_KEY_CLASS, textClassname);
    setIfUnset(conf, MRJobConfig.MAP_OUTPUT_VALUE_CLASS, textClassname);
    setIfUnset(conf, MRJobConfig.OUTPUT_KEY_CLASS, textClassname);
    setIfUnset(conf, MRJobConfig.OUTPUT_VALUE_CLASS, textClassname);

    // Use PipesNonJavaInputFormat if necessary to handle progress reporting
    // from C++ RecordReaders ...
    if (!getIsJavaRecordReader(conf) && !getIsJavaMapper(conf)) {
        conf.setClass(Submitter.INPUT_FORMAT, conf.getInputFormat().getClass(), InputFormat.class);
        conf.setInputFormat(PipesNonJavaInputFormat.class);
    }

    String exec = getExecutable(conf);
    if (exec == null) {
        throw new IllegalArgumentException("No application program defined.");
    }
    // add default debug script only when executable is expressed as
    // <path>#<executable>
    if (exec.contains("#")) {
        // set default gdb commands for map and reduce task 
        String defScript = "$HADOOP_PREFIX/src/c++/pipes/debug/pipes-default-script";
        setIfUnset(conf, MRJobConfig.MAP_DEBUG_SCRIPT, defScript);
        setIfUnset(conf, MRJobConfig.REDUCE_DEBUG_SCRIPT, defScript);
    }
    URI[] fileCache = DistributedCache.getCacheFiles(conf);
    if (fileCache == null) {
        fileCache = new URI[1];
    } else {
        URI[] tmp = new URI[fileCache.length + 1];
        System.arraycopy(fileCache, 0, tmp, 1, fileCache.length);
        fileCache = tmp;
    }
    try {
        fileCache[0] = new URI(exec);
    } catch (URISyntaxException e) {
        IOException ie = new IOException("Problem parsing execable URI " + exec);
        ie.initCause(e);
        throw ie;
    }
    DistributedCache.setCacheFiles(fileCache, conf);
}

From source file:org.apache.tez.mapreduce.hadoop.mapreduce.JobContextImpl.java

License:Apache License

/**
 * Get cache files set in the Configuration
 * @return A URI array of the files set in the Configuration
 * @throws IOException//from   w ww.  ja v  a  2s .c  om
 */

public URI[] getCacheFiles() throws IOException {
    return DistributedCache.getCacheFiles(conf);
}

From source file:org.apache.tez.mapreduce.processor.MRTask.java

License:Apache License

/**
 * Set up the DistributedCache related configs to make
 * {@link DistributedCache#getLocalCacheFiles(Configuration)} and
 * {@link DistributedCache#getLocalCacheArchives(Configuration)} working.
 *
 * @param job/*from  w  w  w .ja v a  2s. c o  m*/
 * @throws IOException
 */
private static void setupDistributedCacheConfig(final JobConf job) throws IOException {

    String localWorkDir = (job.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR));
    // ^ ^ all symlinks are created in the current work-dir

    // Update the configuration object with localized archives.
    URI[] cacheArchives = DistributedCache.getCacheArchives(job);
    if (cacheArchives != null) {
        List<String> localArchives = new ArrayList<String>();
        for (int i = 0; i < cacheArchives.length; ++i) {
            URI u = cacheArchives[i];
            Path p = new Path(u);
            Path name = new Path((null == u.getFragment()) ? p.getName() : u.getFragment());
            String linkName = name.toUri().getPath();
            localArchives.add(new Path(localWorkDir, linkName).toUri().getPath());
        }
        if (!localArchives.isEmpty()) {
            job.set(MRJobConfig.CACHE_LOCALARCHIVES, StringUtils.join(localArchives, ','));
        }
    }

    // Update the configuration object with localized files.
    URI[] cacheFiles = DistributedCache.getCacheFiles(job);
    if (cacheFiles != null) {
        List<String> localFiles = new ArrayList<String>();
        for (int i = 0; i < cacheFiles.length; ++i) {
            URI u = cacheFiles[i];
            Path p = new Path(u);
            Path name = new Path((null == u.getFragment()) ? p.getName() : u.getFragment());
            String linkName = name.toUri().getPath();
            localFiles.add(new Path(localWorkDir, linkName).toUri().getPath());
        }
        if (!localFiles.isEmpty()) {
            job.set(MRJobConfig.CACHE_LOCALFILES, StringUtils.join(localFiles, ','));
        }
    }
}

From source file:org.apache.tez.mapreduce.task.MRRuntimeTask.java

License:Apache License

/**
 * Set up the DistributedCache related configs to make
 * {@link DistributedCache#getLocalCacheFiles(Configuration)} and
 * {@link DistributedCache#getLocalCacheArchives(Configuration)} working.
 * /*ww  w  .  jav  a2s.  c  o  m*/
 * @param job
 * @throws IOException
 */
private static void setupDistributedCacheConfig(final JobConf job) throws IOException {

    String localWorkDir = (job.get(TezJobConfig.TASK_LOCAL_RESOURCE_DIR));
    // ^ ^ all symlinks are created in the current work-dir

    // Update the configuration object with localized archives.
    URI[] cacheArchives = DistributedCache.getCacheArchives(job);
    if (cacheArchives != null) {
        List<String> localArchives = new ArrayList<String>();
        for (int i = 0; i < cacheArchives.length; ++i) {
            URI u = cacheArchives[i];
            Path p = new Path(u);
            Path name = new Path((null == u.getFragment()) ? p.getName() : u.getFragment());
            String linkName = name.toUri().getPath();
            localArchives.add(new Path(localWorkDir, linkName).toUri().getPath());
        }
        if (!localArchives.isEmpty()) {
            job.set(MRJobConfig.CACHE_LOCALARCHIVES,
                    StringUtils.arrayToString(localArchives.toArray(new String[localArchives.size()])));
        }
    }

    // Update the configuration object with localized files.
    URI[] cacheFiles = DistributedCache.getCacheFiles(job);
    if (cacheFiles != null) {
        List<String> localFiles = new ArrayList<String>();
        for (int i = 0; i < cacheFiles.length; ++i) {
            URI u = cacheFiles[i];
            Path p = new Path(u);
            Path name = new Path((null == u.getFragment()) ? p.getName() : u.getFragment());
            String linkName = name.toUri().getPath();
            localFiles.add(new Path(localWorkDir, linkName).toUri().getPath());
        }
        if (!localFiles.isEmpty()) {
            job.set(MRJobConfig.CACHE_LOCALFILES,
                    StringUtils.arrayToString(localFiles.toArray(new String[localFiles.size()])));
        }
    }
}