List of usage examples for org.apache.hadoop.mapreduce.filecache DistributedCache getCacheFiles
@Deprecated public static URI[] getCacheFiles(Configuration conf) throws IOException
From source file:it.crs4.pydoop.mapreduce.pipes.CommandLineParser.java
License:Apache License
private static void setupPipesJob(Job job) throws IOException, ClassNotFoundException { Configuration conf = job.getConfiguration(); // default map output types to Text if (!getIsJavaMapper(conf)) { job.setMapperClass(PipesMapper.class); // Save the user's partitioner and hook in our's. setJavaPartitioner(conf, job.getPartitionerClass()); job.setPartitionerClass(PipesPartitioner.class); }//from w w w.jav a 2 s .c o m if (!getIsJavaReducer(conf)) { job.setReducerClass(PipesReducer.class); if (!getIsJavaRecordWriter(conf)) { job.setOutputFormatClass(NullOutputFormat.class); } } String textClassname = Text.class.getName(); setIfUnset(conf, MRJobConfig.MAP_OUTPUT_KEY_CLASS, textClassname); setIfUnset(conf, MRJobConfig.MAP_OUTPUT_VALUE_CLASS, textClassname); setIfUnset(conf, MRJobConfig.OUTPUT_KEY_CLASS, textClassname); setIfUnset(conf, MRJobConfig.OUTPUT_VALUE_CLASS, textClassname); // Use PipesNonJavaInputFormat if necessary to handle progress reporting // from C++ RecordReaders ... if (!getIsJavaRecordReader(conf) && !getIsJavaMapper(conf)) { conf.setClass(Submitter.INPUT_FORMAT, job.getInputFormatClass(), InputFormat.class); job.setInputFormatClass(PipesNonJavaInputFormat.class); } if (avroInput != null) { if (explicitInputFormat) { conf.setClass(Submitter.INPUT_FORMAT, job.getInputFormatClass(), InputFormat.class); } // else let the bridge fall back to the appropriate Avro IF switch (avroInput) { case K: job.setInputFormatClass(PydoopAvroInputKeyBridge.class); break; case V: job.setInputFormatClass(PydoopAvroInputValueBridge.class); break; case KV: job.setInputFormatClass(PydoopAvroInputKeyValueBridge.class); break; default: throw new IllegalArgumentException("Bad Avro input type"); } } if (avroOutput != null) { if (explicitOutputFormat) { conf.setClass(Submitter.OUTPUT_FORMAT, job.getOutputFormatClass(), OutputFormat.class); } // else let the bridge fall back to the appropriate Avro OF conf.set(props.getProperty("AVRO_OUTPUT"), avroOutput.name()); switch (avroOutput) { case K: job.setOutputFormatClass(PydoopAvroOutputKeyBridge.class); break; case V: job.setOutputFormatClass(PydoopAvroOutputValueBridge.class); break; case KV: job.setOutputFormatClass(PydoopAvroOutputKeyValueBridge.class); break; default: throw new IllegalArgumentException("Bad Avro output type"); } } String exec = getExecutable(conf); if (exec == null) { String msg = "No application program defined."; throw new IllegalArgumentException(msg); } // add default debug script only when executable is expressed as // <path>#<executable> //FIXME: this is kind of useless if the pipes program is not in c++ if (exec.contains("#")) { // set default gdb commands for map and reduce task String defScript = "$HADOOP_PREFIX/src/c++/pipes/debug/pipes-default-script"; setIfUnset(conf, MRJobConfig.MAP_DEBUG_SCRIPT, defScript); setIfUnset(conf, MRJobConfig.REDUCE_DEBUG_SCRIPT, defScript); } URI[] fileCache = DistributedCache.getCacheFiles(conf); if (fileCache == null) { fileCache = new URI[1]; } else { URI[] tmp = new URI[fileCache.length + 1]; System.arraycopy(fileCache, 0, tmp, 1, fileCache.length); fileCache = tmp; } try { fileCache[0] = new URI(exec); } catch (URISyntaxException e) { String msg = "Problem parsing executable URI " + exec; IOException ie = new IOException(msg); ie.initCause(e); throw ie; } DistributedCache.setCacheFiles(fileCache, conf); }
From source file:it.crs4.pydoop.pipes.Submitter.java
License:Apache License
private static void setupPipesJob(JobConf conf) throws IOException { // default map output types to Text if (!getIsJavaMapper(conf)) { conf.setMapRunnerClass(PipesMapRunner.class); // Save the user's partitioner and hook in our's. setJavaPartitioner(conf, conf.getPartitionerClass()); conf.setPartitionerClass(PipesPartitioner.class); }/*w w w. j av a 2 s.co m*/ if (!getIsJavaReducer(conf)) { conf.setReducerClass(PipesReducer.class); if (!getIsJavaRecordWriter(conf)) { conf.setOutputFormat(NullOutputFormat.class); } } String textClassname = Text.class.getName(); setIfUnset(conf, MRJobConfig.MAP_OUTPUT_KEY_CLASS, textClassname); setIfUnset(conf, MRJobConfig.MAP_OUTPUT_VALUE_CLASS, textClassname); setIfUnset(conf, MRJobConfig.OUTPUT_KEY_CLASS, textClassname); setIfUnset(conf, MRJobConfig.OUTPUT_VALUE_CLASS, textClassname); // Use PipesNonJavaInputFormat if necessary to handle progress reporting // from C++ RecordReaders ... if (!getIsJavaRecordReader(conf) && !getIsJavaMapper(conf)) { conf.setClass(Submitter.INPUT_FORMAT, conf.getInputFormat().getClass(), InputFormat.class); conf.setInputFormat(PipesNonJavaInputFormat.class); } String exec = getExecutable(conf); if (exec == null) { throw new IllegalArgumentException("No application program defined."); } // add default debug script only when executable is expressed as // <path>#<executable> if (exec.contains("#")) { // set default gdb commands for map and reduce task String defScript = "$HADOOP_PREFIX/src/c++/pipes/debug/pipes-default-script"; setIfUnset(conf, MRJobConfig.MAP_DEBUG_SCRIPT, defScript); setIfUnset(conf, MRJobConfig.REDUCE_DEBUG_SCRIPT, defScript); } URI[] fileCache = DistributedCache.getCacheFiles(conf); if (fileCache == null) { fileCache = new URI[1]; } else { URI[] tmp = new URI[fileCache.length + 1]; System.arraycopy(fileCache, 0, tmp, 1, fileCache.length); fileCache = tmp; } try { fileCache[0] = new URI(exec); } catch (URISyntaxException e) { IOException ie = new IOException("Problem parsing execable URI " + exec); ie.initCause(e); throw ie; } DistributedCache.setCacheFiles(fileCache, conf); }
From source file:org.apache.tez.mapreduce.hadoop.mapreduce.JobContextImpl.java
License:Apache License
/** * Get cache files set in the Configuration * @return A URI array of the files set in the Configuration * @throws IOException//from w ww. ja v a 2s .c om */ public URI[] getCacheFiles() throws IOException { return DistributedCache.getCacheFiles(conf); }
From source file:org.apache.tez.mapreduce.processor.MRTask.java
License:Apache License
/** * Set up the DistributedCache related configs to make * {@link DistributedCache#getLocalCacheFiles(Configuration)} and * {@link DistributedCache#getLocalCacheArchives(Configuration)} working. * * @param job/*from w w w .ja v a 2s. c o m*/ * @throws IOException */ private static void setupDistributedCacheConfig(final JobConf job) throws IOException { String localWorkDir = (job.get(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR)); // ^ ^ all symlinks are created in the current work-dir // Update the configuration object with localized archives. URI[] cacheArchives = DistributedCache.getCacheArchives(job); if (cacheArchives != null) { List<String> localArchives = new ArrayList<String>(); for (int i = 0; i < cacheArchives.length; ++i) { URI u = cacheArchives[i]; Path p = new Path(u); Path name = new Path((null == u.getFragment()) ? p.getName() : u.getFragment()); String linkName = name.toUri().getPath(); localArchives.add(new Path(localWorkDir, linkName).toUri().getPath()); } if (!localArchives.isEmpty()) { job.set(MRJobConfig.CACHE_LOCALARCHIVES, StringUtils.join(localArchives, ',')); } } // Update the configuration object with localized files. URI[] cacheFiles = DistributedCache.getCacheFiles(job); if (cacheFiles != null) { List<String> localFiles = new ArrayList<String>(); for (int i = 0; i < cacheFiles.length; ++i) { URI u = cacheFiles[i]; Path p = new Path(u); Path name = new Path((null == u.getFragment()) ? p.getName() : u.getFragment()); String linkName = name.toUri().getPath(); localFiles.add(new Path(localWorkDir, linkName).toUri().getPath()); } if (!localFiles.isEmpty()) { job.set(MRJobConfig.CACHE_LOCALFILES, StringUtils.join(localFiles, ',')); } } }
From source file:org.apache.tez.mapreduce.task.MRRuntimeTask.java
License:Apache License
/** * Set up the DistributedCache related configs to make * {@link DistributedCache#getLocalCacheFiles(Configuration)} and * {@link DistributedCache#getLocalCacheArchives(Configuration)} working. * /*ww w . jav a2s. c o m*/ * @param job * @throws IOException */ private static void setupDistributedCacheConfig(final JobConf job) throws IOException { String localWorkDir = (job.get(TezJobConfig.TASK_LOCAL_RESOURCE_DIR)); // ^ ^ all symlinks are created in the current work-dir // Update the configuration object with localized archives. URI[] cacheArchives = DistributedCache.getCacheArchives(job); if (cacheArchives != null) { List<String> localArchives = new ArrayList<String>(); for (int i = 0; i < cacheArchives.length; ++i) { URI u = cacheArchives[i]; Path p = new Path(u); Path name = new Path((null == u.getFragment()) ? p.getName() : u.getFragment()); String linkName = name.toUri().getPath(); localArchives.add(new Path(localWorkDir, linkName).toUri().getPath()); } if (!localArchives.isEmpty()) { job.set(MRJobConfig.CACHE_LOCALARCHIVES, StringUtils.arrayToString(localArchives.toArray(new String[localArchives.size()]))); } } // Update the configuration object with localized files. URI[] cacheFiles = DistributedCache.getCacheFiles(job); if (cacheFiles != null) { List<String> localFiles = new ArrayList<String>(); for (int i = 0; i < cacheFiles.length; ++i) { URI u = cacheFiles[i]; Path p = new Path(u); Path name = new Path((null == u.getFragment()) ? p.getName() : u.getFragment()); String linkName = name.toUri().getPath(); localFiles.add(new Path(localWorkDir, linkName).toUri().getPath()); } if (!localFiles.isEmpty()) { job.set(MRJobConfig.CACHE_LOCALFILES, StringUtils.arrayToString(localFiles.toArray(new String[localFiles.size()]))); } } }