List of usage examples for org.apache.hadoop.mapred JobConf getJar
public String getJar()
From source file:it.crs4.pydoop.pipes.Submitter.java
License:Apache License
@Override public int run(String[] args) throws Exception { CommandLineParser cli = new CommandLineParser(); if (args.length == 0) { cli.printUsage();//from w ww . j a va 2s . c om return 1; } cli.addOption("input", false, "input path to the maps", "path"); cli.addOption("output", false, "output path from the reduces", "path"); cli.addOption("jar", false, "job jar file", "path"); cli.addOption("inputformat", false, "java classname of InputFormat", "class"); //cli.addArgument("javareader", false, "is the RecordReader in Java"); cli.addOption("map", false, "java classname of Mapper", "class"); cli.addOption("partitioner", false, "java classname of Partitioner", "class"); cli.addOption("reduce", false, "java classname of Reducer", "class"); cli.addOption("writer", false, "java classname of OutputFormat", "class"); cli.addOption("program", false, "URI to application executable", "class"); cli.addOption("reduces", false, "number of reduces", "num"); cli.addOption("jobconf", false, "\"n1=v1,n2=v2,..\" (Deprecated) Optional. Add or override a JobConf property.", "key=val"); cli.addOption("lazyOutput", false, "Optional. Create output lazily", "boolean"); Parser parser = cli.createParser(); try { GenericOptionsParser genericParser = new GenericOptionsParser(getConf(), args); CommandLine results = parser.parse(cli.options, genericParser.getRemainingArgs()); JobConf job = new JobConf(getConf()); if (results.hasOption("input")) { FileInputFormat.setInputPaths(job, results.getOptionValue("input")); } if (results.hasOption("output")) { FileOutputFormat.setOutputPath(job, new Path(results.getOptionValue("output"))); } if (results.hasOption("jar")) { job.setJar(results.getOptionValue("jar")); } if (results.hasOption("inputformat")) { setIsJavaRecordReader(job, true); job.setInputFormat(getClass(results, "inputformat", job, InputFormat.class)); } if (results.hasOption("javareader")) { setIsJavaRecordReader(job, true); } if (results.hasOption("map")) { setIsJavaMapper(job, true); job.setMapperClass(getClass(results, "map", job, Mapper.class)); } if (results.hasOption("partitioner")) { job.setPartitionerClass(getClass(results, "partitioner", job, Partitioner.class)); } if (results.hasOption("reduce")) { setIsJavaReducer(job, true); job.setReducerClass(getClass(results, "reduce", job, Reducer.class)); } if (results.hasOption("reduces")) { job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces"))); } if (results.hasOption("writer")) { setIsJavaRecordWriter(job, true); job.setOutputFormat(getClass(results, "writer", job, OutputFormat.class)); } if (results.hasOption("lazyOutput")) { if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) { LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormat().getClass()); } } if (results.hasOption("program")) { setExecutable(job, results.getOptionValue("program")); } if (results.hasOption("jobconf")) { LOG.warn("-jobconf option is deprecated, please use -D instead."); String options = results.getOptionValue("jobconf"); StringTokenizer tokenizer = new StringTokenizer(options, ","); while (tokenizer.hasMoreTokens()) { String keyVal = tokenizer.nextToken().trim(); String[] keyValSplit = keyVal.split("="); job.set(keyValSplit[0], keyValSplit[1]); } } // if they gave us a jar file, include it into the class path String jarFile = job.getJar(); if (jarFile != null) { final URL[] urls = new URL[] { FileSystem.getLocal(job).pathToFile(new Path(jarFile)).toURL() }; //FindBugs complains that creating a URLClassLoader should be //in a doPrivileged() block. ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() { public ClassLoader run() { return new URLClassLoader(urls); } }); job.setClassLoader(loader); } runJob(job); return 0; } catch (ParseException pe) { LOG.info("Error : " + pe); cli.printUsage(); return 1; } }
From source file:org.apache.oozie.action.hadoop.TestMapReduceActionExecutor.java
License:Apache License
@SuppressWarnings("unchecked") public void testSetupMethods() throws Exception { MapReduceActionExecutor ae = new MapReduceActionExecutor(); assertEquals(Arrays.asList(StreamingMain.class), ae.getLauncherClasses()); Element actionXml = XmlUtils.parseXml("<map-reduce>" + "<job-tracker>" + getJobTrackerUri() + "</job-tracker>" + "<name-node>" + getNameNodeUri() + "</name-node>" + "<configuration>" + "<property><name>mapred.input.dir</name><value>IN</value></property>" + "<property><name>mapred.output.dir</name><value>OUT</value></property>" + "</configuration>" + "</map-reduce>"); XConfiguration protoConf = new XConfiguration(); protoConf.set(WorkflowAppService.HADOOP_USER, getTestUser()); WorkflowJobBean wf = createBaseWorkflow(protoConf, "mr-action"); WorkflowActionBean action = (WorkflowActionBean) wf.getActions().get(0); action.setType(ae.getType());//from ww w . ja v a2 s. c o m Context context = new Context(wf, action); Configuration conf = ae.createBaseHadoopConf(context, actionXml); ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir()); assertEquals("IN", conf.get("mapred.input.dir")); JobConf launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf); assertEquals(false, launcherJobConf.getBoolean("mapreduce.job.complete.cancel.delegation.tokens", true)); assertEquals(true, conf.getBoolean("mapreduce.job.complete.cancel.delegation.tokens", false)); // Enable uber jars to test that MapReduceActionExecutor picks up the oozie.mapreduce.uber.jar property correctly Services serv = Services.get(); boolean originalUberJarDisabled = serv.getConf().getBoolean("oozie.action.mapreduce.uber.jar.enable", false); serv.getConf().setBoolean("oozie.action.mapreduce.uber.jar.enable", true); actionXml = createUberJarActionXML(getNameNodeUri() + "/app/job.jar", ""); conf = ae.createBaseHadoopConf(context, actionXml); ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir()); assertEquals(getNameNodeUri() + "/app/job.jar", conf.get("oozie.mapreduce.uber.jar")); // absolute path with namenode launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf); assertEquals(getNameNodeUri() + "/app/job.jar", launcherJobConf.getJar()); // same for launcher conf actionXml = createUberJarActionXML("/app/job.jar", ""); conf = ae.createBaseHadoopConf(context, actionXml); ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir()); assertEquals(getNameNodeUri() + "/app/job.jar", conf.get("oozie.mapreduce.uber.jar")); // absolute path without namenode launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf); assertEquals(getNameNodeUri() + "/app/job.jar", launcherJobConf.getJar()); // same for launcher conf actionXml = createUberJarActionXML("job.jar", ""); conf = ae.createBaseHadoopConf(context, actionXml); ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir()); assertEquals(getFsTestCaseDir() + "/job.jar", conf.get("oozie.mapreduce.uber.jar")); // relative path launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf); assertEquals(getFsTestCaseDir() + "/job.jar", launcherJobConf.getJar()); // same for launcher actionXml = createUberJarActionXML("job.jar", "<streaming></streaming>"); conf = ae.createBaseHadoopConf(context, actionXml); ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir()); assertEquals("", conf.get("oozie.mapreduce.uber.jar")); // ignored for streaming launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf); assertNull(launcherJobConf.getJar()); // same for launcher conf (not set) actionXml = createUberJarActionXML("job.jar", "<pipes></pipes>"); conf = ae.createBaseHadoopConf(context, actionXml); ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir()); assertEquals("", conf.get("oozie.mapreduce.uber.jar")); // ignored for pipes launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf); assertNull(launcherJobConf.getJar()); // same for launcher conf (not set) actionXml = XmlUtils.parseXml("<map-reduce>" + "<job-tracker>" + getJobTrackerUri() + "</job-tracker>" + "<name-node>" + getNameNodeUri() + "</name-node>" + "</map-reduce>"); conf = ae.createBaseHadoopConf(context, actionXml); ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir()); assertNull(conf.get("oozie.mapreduce.uber.jar")); // doesn't resolve if not set launcherJobConf = ae.createLauncherConf(getFileSystem(), context, action, actionXml, conf); assertNull(launcherJobConf.getJar()); // same for launcher conf // Disable uber jars to test that MapReduceActionExecutor won't allow the oozie.mapreduce.uber.jar property serv.getConf().setBoolean("oozie.action.mapreduce.uber.jar.enable", false); try { actionXml = createUberJarActionXML(getNameNodeUri() + "/app/job.jar", ""); conf = ae.createBaseHadoopConf(context, actionXml); ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir()); fail("ActionExecutorException expected because uber jars are disabled"); } catch (ActionExecutorException aee) { assertEquals("MR003", aee.getErrorCode()); assertEquals(ActionExecutorException.ErrorType.ERROR, aee.getErrorType()); assertTrue(aee.getMessage().contains("oozie.action.mapreduce.uber.jar.enable")); assertTrue(aee.getMessage().contains("oozie.mapreduce.uber.jar")); } serv.getConf().setBoolean("oozie.action.mapreduce.uber.jar.enable", originalUberJarDisabled); actionXml = XmlUtils.parseXml("<map-reduce>" + "<job-tracker>" + getJobTrackerUri() + "</job-tracker>" + "<name-node>" + getNameNodeUri() + "</name-node>" + "<streaming>" + "<mapper>M</mapper>" + "<reducer>R</reducer>" + "<record-reader>RR</record-reader>" + "<record-reader-mapping>RRM1=1</record-reader-mapping>" + "<record-reader-mapping>RRM2=2</record-reader-mapping>" + "<env>e=E</env>" + "<env>ee=EE</env>" + "</streaming>" + "<configuration>" + "<property><name>mapred.input.dir</name><value>IN</value></property>" + "<property><name>mapred.output.dir</name><value>OUT</value></property>" + "</configuration>" + "</map-reduce>"); conf = ae.createBaseHadoopConf(context, actionXml); ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir()); assertEquals("M", conf.get("oozie.streaming.mapper")); assertEquals("R", conf.get("oozie.streaming.reducer")); assertEquals("RR", conf.get("oozie.streaming.record-reader")); assertEquals("2", conf.get("oozie.streaming.record-reader-mapping.size")); assertEquals("2", conf.get("oozie.streaming.env.size")); actionXml = XmlUtils.parseXml("<map-reduce>" + "<job-tracker>" + getJobTrackerUri() + "</job-tracker>" + "<name-node>" + getNameNodeUri() + "</name-node>" + "<pipes>" + "<map>M</map>" + "<reduce>R</reduce>" + "<inputformat>IF</inputformat>" + "<partitioner>P</partitioner>" + "<writer>W</writer>" + "<program>PP</program>" + "</pipes>" + "<configuration>" + "<property><name>mapred.input.dir</name><value>IN</value></property>" + "<property><name>mapred.output.dir</name><value>OUT</value></property>" + "</configuration>" + "</map-reduce>"); conf = ae.createBaseHadoopConf(context, actionXml); ae.setupActionConf(conf, context, actionXml, getFsTestCaseDir()); assertEquals("M", conf.get("oozie.pipes.map")); assertEquals("R", conf.get("oozie.pipes.reduce")); assertEquals("IF", conf.get("oozie.pipes.inputformat")); assertEquals("P", conf.get("oozie.pipes.partitioner")); assertEquals("W", conf.get("oozie.pipes.writer")); assertEquals(getFsTestCaseDir() + "/PP", conf.get("oozie.pipes.program")); }
From source file:org.kitesdk.apps.cli.commands.InstallCommand.java
License:Apache License
private static final List<File> getLibraryJars() { // Current implementation assumes that library files // are in the same directory, so locate it and // include it in the project library. // This is ugly, using the jobConf logic to identify the containing // JAR. There should be a better way to do this. JobConf jobConf = new JobConf(); jobConf.setJarByClass(InstallCommand.class); String containingJar = jobConf.getJar(); File file = new File(containingJar).getParentFile(); File[] jarFiles = file.listFiles(); return Arrays.asList(jarFiles); }
From source file:org.kitesdk.apps.spark.spi.scheduled.SparkJobManager.java
License:Apache License
@Override public void writeOozieActionBlock(XMLWriter writer, Schedule schedule) { writer.startElement("spark"); writer.addAttribute("xmlns", "uri:oozie:spark-action:0.1"); element(writer, "job-tracker", "${jobTracker}"); element(writer, "name-node", "${nameNode}"); // TODO: the job-xml should probably be job-specific configuration. // element(writer, "job-xml", "${appConfigPath}"); // Make the nominal time visible to the workflow action. writer.startElement("configuration"); // Use the spark and hive sharelibs since many actions use both. property(writer, "oozie.action.sharelib.for.spark", "spark,hive2"); property(writer, "kiteAppRoot", "${kiteAppRoot}"); OozieScheduling.writeJobConfiguration(writer, schedule, context.getHadoopConf()); writer.endElement(); // configuration element(writer, "master", "yarn-cluster"); element(writer, "name", schedule.getName()); element(writer, "class", SparkScheduledJobMain.class.getCanonicalName()); JobConf jobConf = new JobConf(); jobConf.setJarByClass(schedule.getJobClass()); String containingJar = jobConf.getJar(); String jarName = containingJar != null ? "${kiteAppRoot}/lib/" + new File(containingJar).getName() : ""; element(writer, "jar", jarName); element(writer, "spark-opts", getSparkConfString(schedule)); element(writer, "arg", schedule.getJobClass().getName()); writer.endElement(); // spark }
From source file:org.kitesdk.apps.spark.spi.scheduled.SparkJobManager.java
License:Apache License
private static final List<File> getLibraryJars() { // Current implementation assumes that library files // are in the same directory, so locate it and // include it in the project library. // This is ugly, using the jobConf logic to identify the containing // JAR. There should be a better way to do this. JobConf jobConf = new JobConf(); jobConf.setJarByClass(SchedulableJob.class); String containingJar = jobConf.getJar(); if (containingJar == null) return Collections.emptyList(); File file = new File(containingJar).getParentFile(); File[] jarFiles = file.listFiles(); return Arrays.asList(jarFiles); }
From source file:org.kitesdk.apps.spark.spi.streaming.SparkStreamingJobManager.java
License:Apache License
private static final List<File> getLibraryJars() { // Current implementation assumes that library files // are in the same directory, so locate it and // include it in the project library. // This is ugly, using the jobConf logic to identify the containing // JAR. There should be a better way to do this. JobConf jobConf = new JobConf(); jobConf.setJarByClass(StreamingJob.class); String containingJar = jobConf.getJar(); if (containingJar == null) return Collections.emptyList(); File file = new File(containingJar).getParentFile(); File[] jarFiles = file.listFiles(); return Arrays.asList(jarFiles); }
From source file:org.kitesdk.apps.spark.spi.streaming.SparkStreamingJobManager.java
License:Apache License
@Override public void start(FileSystem fs, Path appRoot) { JobConf jobConf = new JobConf(); jobConf.setJarByClass(SparkStreamingJobMain.class); String containingJar = jobConf.getJar(); Path libPath = new Path(appRoot, "lib"); Path jarPath = new Path(libPath, new File(containingJar).getName()); jarPath = fs.makeQualified(jarPath); SparkLauncher launcher = new SparkLauncher(); launcher.setMainClass(SparkStreamingJobMain.class.getName()); launcher.setAppResource(jarPath.toString()); launcher.setMaster("yarn-cluster"); try {/*from ww w . j a v a2 s . c o m*/ // Add the library JARs from HDFS so we don't need to reload // them separately into Spark. FileStatus[] libJars = fs.listStatus(libPath); for (FileStatus jar : libJars) { launcher.addJar(jar.getPath().toString()); } // Add the sharelib JARs, since they are not visible to Spark otherwise. List<Path> shareLibJars = ShareLibs.jars(sparkJobContext.getHadoopConf(), "hive2"); for (Path sharelibJar : shareLibJars) { launcher.addJar(fs.makeQualified(sharelibJar).toString()); } } catch (IOException e) { throw new AppException(e); } launcher.addAppArgs(appRoot.toString(), description.getJobName()); // Explicitly set the metastore URI to be usable in the job. launcher.setConf("spark.hadoop.hive.metastore.uris", sparkJobContext.getHadoopConf().get("hive.metastore.uris")); // Add the Avro classes. List<Schema> schemas = JobReflection.getSchemas(job); StringBuilder avroClassesArg = new StringBuilder(); avroClassesArg.append("-D").append(KryoAvroRegistrator.KITE_AVRO_CLASSES).append("="); boolean first = true; for (Schema schema : schemas) { if (!first) { avroClassesArg.append(","); } avroClassesArg.append(SpecificData.get().getClass(schema).getName()); first = false; } launcher.setConf("spark.driver.extraJavaOptions", avroClassesArg.toString()); launcher.setConf("spark.executor.extraJavaOptions", avroClassesArg.toString()); try { Process process = launcher.launch(); // Redirect the spark-submit output to be visible to the reader. Thread stdoutThread = writeOutput(process.getInputStream(), System.out); Thread stderrThread = writeOutput(process.getErrorStream(), System.err); int result = process.waitFor(); stdoutThread.join(); stderrThread.join(); if (result != 0) { throw new AppException("spark-submit returned error status: " + result); } } catch (IOException e) { throw new AppException(e); } catch (InterruptedException e) { throw new AppException(e); } }
From source file:tap.CommandOptions.java
License:Apache License
/** * @param pipeline/*from ww w . j a v a 2 s .co m*/ * @param conf * @param parser * @param e */ private void handleCmdLineException(Tap pipeline, JobConf conf, CmdLineParser parser, CmdLineException e) { String jobName = pipeline.getName(); if (jobName == null) { jobName = "yourJob"; } String jarName = conf.getJar(); if (jarName == null) { jarName = "yourJar"; } String cmd = "hadoop jar " + jarName + " " + jobName; System.err.println(e.getMessage()); System.err.println("Usage: " + cmd + " [options...] arguments..."); parser.printUsage(System.err); System.err.println(); // print option sample. This is useful some time System.err.println(" Example: " + cmd + " " + parser.printExample(ALL)); }
From source file:voldemort.store.readonly.mr.azkaban.AbstractHadoopJob.java
License:Apache License
public JobConf createJobConf(Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass) throws IOException, URISyntaxException { JobConf conf = new JobConf(); // set custom class loader with custom find resource strategy. conf.setJobName(getId());//from ww w .j a va 2 s .co m conf.setMapperClass(mapperClass); conf.setReducerClass(reducerClass); String hadoop_ugi = _props.getString("hadoop.job.ugi", null); if (hadoop_ugi != null) { conf.set("hadoop.job.ugi", hadoop_ugi); } if (_props.getBoolean("is.local", false)) { conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); conf.set("mapred.local.dir", "/tmp/map-red"); info("Running locally, no hadoop jar set."); } else { setClassLoaderAndJar(conf, getClass()); info("Setting hadoop jar file for class:" + getClass() + " to " + conf.getJar()); info("*************************************************************************"); info(" Running on Real Hadoop Cluster(" + conf.get("mapred.job.tracker") + ") "); info("*************************************************************************"); } // set JVM options if present if (_props.containsKey("mapred.child.java.opts")) { conf.set("mapred.child.java.opts", _props.getString("mapred.child.java.opts")); info("mapred.child.java.opts set to " + _props.getString("mapred.child.java.opts")); } // set input and output paths if they are present if (_props.containsKey("input.paths")) { List<String> inputPaths = _props.getStringList("input.paths"); if (inputPaths.size() == 0) throw new IllegalArgumentException("Must specify at least one value for property 'input.paths'"); for (String path : inputPaths) { // Implied stuff, but good implied stuff if (path.endsWith(LATEST_SUFFIX)) { FileSystem fs = FileSystem.get(conf); PathFilter filter = new PathFilter() { @Override public boolean accept(Path arg0) { return !arg0.getName().startsWith("_") && !arg0.getName().startsWith("."); } }; String latestPath = path.substring(0, path.length() - LATEST_SUFFIX.length()); FileStatus[] statuses = fs.listStatus(new Path(latestPath), filter); Arrays.sort(statuses); path = statuses[statuses.length - 1].getPath().toString(); System.out.println("Using latest folder: " + path); } HadoopUtils.addAllSubPaths(conf, new Path(path)); } } if (_props.containsKey("output.path")) { String location = _props.get("output.path"); if (location.endsWith("#CURRENT")) { DateTimeFormatter format = DateTimeFormat.forPattern(COMMON_FILE_DATE_PATTERN); String destPath = format.print(new DateTime()); location = location.substring(0, location.length() - "#CURRENT".length()) + destPath; System.out.println("Store location set to " + location); } FileOutputFormat.setOutputPath(conf, new Path(location)); // For testing purpose only remove output file if exists if (_props.getBoolean("force.output.overwrite", false)) { FileSystem fs = FileOutputFormat.getOutputPath(conf).getFileSystem(conf); fs.delete(FileOutputFormat.getOutputPath(conf), true); } } // Adds External jars to hadoop classpath String externalJarList = _props.getString("hadoop.external.jarFiles", null); if (externalJarList != null) { String[] jarFiles = externalJarList.split(","); for (String jarFile : jarFiles) { info("Adding extenral jar File:" + jarFile); DistributedCache.addFileToClassPath(new Path(jarFile), conf); } } // Adds distributed cache files String cacheFileList = _props.getString("hadoop.cache.files", null); if (cacheFileList != null) { String[] cacheFiles = cacheFileList.split(","); for (String cacheFile : cacheFiles) { info("Adding Distributed Cache File:" + cacheFile); DistributedCache.addCacheFile(new URI(cacheFile), conf); } } // Adds distributed cache files String archiveFileList = _props.getString("hadoop.cache.archives", null); if (archiveFileList != null) { String[] archiveFiles = archiveFileList.split(","); for (String archiveFile : archiveFiles) { info("Adding Distributed Cache Archive File:" + archiveFile); DistributedCache.addCacheArchive(new URI(archiveFile), conf); } } String hadoopCacheJarDir = _props.getString("hdfs.default.classpath.dir", null); if (hadoopCacheJarDir != null) { FileSystem fs = FileSystem.get(conf); if (fs != null) { FileStatus[] status = fs.listStatus(new Path(hadoopCacheJarDir)); if (status != null) { for (int i = 0; i < status.length; ++i) { if (!status[i].isDir()) { Path path = new Path(hadoopCacheJarDir, status[i].getPath().getName()); info("Adding Jar to Distributed Cache Archive File:" + path); DistributedCache.addFileToClassPath(path, conf); } } } else { info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " is empty."); } } else { info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " filesystem doesn't exist"); } } // May want to add this to HadoopUtils, but will await refactoring for (String key : getProps().keySet()) { String lowerCase = key.toLowerCase(); if (lowerCase.startsWith(HADOOP_PREFIX)) { String newKey = key.substring(HADOOP_PREFIX.length()); conf.set(newKey, getProps().get(key)); } } HadoopUtils.setPropsInJob(conf, getProps()); return conf; }