List of usage examples for org.apache.hadoop.mapred JobConf getJar
public String getJar()
From source file:azkaban.jobtype.javautils.AbstractHadoopJob.java
License:Apache License
@SuppressWarnings("rawtypes") public JobConf createJobConf(Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass) throws IOException, URISyntaxException { JobConf conf = new JobConf(); // set custom class loader with custom find resource strategy. conf.setJobName(getJobName());//from w ww . ja v a2s . c om conf.setMapperClass(mapperClass); if (reducerClass != null) { conf.setReducerClass(reducerClass); } if (props.getBoolean("is.local", false)) { conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); conf.set("mapred.local.dir", "/tmp/map-red"); logger.info("Running locally, no hadoop jar set."); } else { HadoopUtils.setClassLoaderAndJar(conf, getClass()); logger.info("Setting hadoop jar file for class:" + getClass() + " to " + conf.getJar()); logger.info("*************************************************************************"); logger.info( " Running on Real Hadoop Cluster(" + conf.get("mapred.job.tracker") + ") "); logger.info("*************************************************************************"); } // set JVM options if present if (props.containsKey("mapred.child.java.opts")) { conf.set("mapred.child.java.opts", props.getString("mapred.child.java.opts")); logger.info("mapred.child.java.opts set to " + props.getString("mapred.child.java.opts")); } // set input and output paths if they are present if (props.containsKey("input.paths")) { List<String> inputPaths = props.getStringList("input.paths"); if (inputPaths.size() == 0) throw new IllegalArgumentException("Must specify at least one value for property 'input.paths'"); for (String path : inputPaths) { HadoopUtils.addAllSubPaths(conf, new Path(path)); } } if (props.containsKey("output.path")) { String location = props.get("output.path"); FileOutputFormat.setOutputPath(conf, new Path(location)); // For testing purpose only remove output file if exists if (props.getBoolean("force.output.overwrite", false)) { FileSystem fs = FileOutputFormat.getOutputPath(conf).getFileSystem(conf); fs.delete(FileOutputFormat.getOutputPath(conf), true); } } // Adds External jars to hadoop classpath String externalJarList = props.getString("hadoop.external.jarFiles", null); if (externalJarList != null) { FileSystem fs = FileSystem.get(conf); String[] jarFiles = externalJarList.split(","); for (String jarFile : jarFiles) { logger.info("Adding extenral jar File:" + jarFile); DistributedCache.addFileToClassPath(new Path(jarFile), conf, fs); } } // Adds distributed cache files String cacheFileList = props.getString("hadoop.cache.files", null); if (cacheFileList != null) { String[] cacheFiles = cacheFileList.split(","); for (String cacheFile : cacheFiles) { logger.info("Adding Distributed Cache File:" + cacheFile); DistributedCache.addCacheFile(new URI(cacheFile), conf); } } // Adds distributed cache files String archiveFileList = props.getString("hadoop.cache.archives", null); if (archiveFileList != null) { String[] archiveFiles = archiveFileList.split(","); for (String archiveFile : archiveFiles) { logger.info("Adding Distributed Cache Archive File:" + archiveFile); DistributedCache.addCacheArchive(new URI(archiveFile), conf); } } String hadoopCacheJarDir = props.getString("hdfs.default.classpath.dir", null); if (hadoopCacheJarDir != null) { FileSystem fs = FileSystem.get(conf); if (fs != null) { FileStatus[] status = fs.listStatus(new Path(hadoopCacheJarDir)); if (status != null) { for (int i = 0; i < status.length; ++i) { if (!status[i].isDir()) { Path path = new Path(hadoopCacheJarDir, status[i].getPath().getName()); logger.info("Adding Jar to Distributed Cache Archive File:" + path); DistributedCache.addFileToClassPath(path, conf, fs); } } } else { logger.info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " is empty."); } } else { logger.info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " filesystem doesn't exist"); } } for (String key : getProps().getKeySet()) { String lowerCase = key.toLowerCase(); if (lowerCase.startsWith(HADOOP_PREFIX)) { String newKey = key.substring(HADOOP_PREFIX.length()); conf.set(newKey, getProps().get(key)); } } HadoopUtils.setPropsInJob(conf, getProps()); // put in tokens if (System.getenv(HADOOP_TOKEN_FILE_LOCATION) != null) { conf.set(MAPREDUCE_JOB_CREDENTIALS_BINARY, System.getenv(HADOOP_TOKEN_FILE_LOCATION)); } return conf; }
From source file:colossal.pipe.BaseOptions.java
License:Apache License
public int parse(ColPipe pipeline, String... args) { JobConf conf = pipeline.getConf(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); CmdLineParser parser = new CmdLineParser(this); try {// www. j a v a2s . c om parser.parseArgument(otherArgs); } catch (CmdLineException e) { String jobName = pipeline.getName(); if (jobName == null) { jobName = "yourJob"; } String jarName = conf.getJar(); if (jarName == null) { jarName = "yourJar"; } String cmd = "hadoop jar " + jarName + " " + jobName; System.err.println(e.getMessage()); System.err.println("Usage: " + cmd + " [options...] arguments..."); parser.printUsage(System.err); System.err.println(); // print option sample. This is useful some time System.err.println(" Example: " + cmd + " " + parser.printExample(ALL)); return 1; } return 0; }
From source file:com.asakusafw.runtime.stage.launcher.LauncherOptionsParserTest.java
License:Apache License
/** * w/ libjars.//from w ww .j av a 2s .com * @throws Exception if failed */ @Test public void w_libjars() throws Exception { File lib = putFile("dummy.jar"); LauncherOptions options = parse( new String[] { MockTool.class.getName(), LauncherOptionsParser.KEY_ARG_LIBRARIES, lib.getPath(), }); assertClasspath(options.getApplicationClassLoader().getURLs(), "testing"); assertThat(lib, is(inClasspath(options.getApplicationClassLoader().getURLs()))); assertClasspath(GenericOptionsParser.getLibJars(conf), "testing"); assertThat(lib, is(inClasspath(GenericOptionsParser.getLibJars(conf)))); JobConf jc = new JobConf(conf); assertThat(jc.getJar(), is(nullValue())); }
From source file:com.google.mr4c.hadoop.MR4CMRJob.java
License:Open Source License
public void updateFrom(JobConf jobConf) { MR4CConfig bbConf = new MR4CConfig(false); bbConf.initStandardCategories();/*from w ww . j a v a 2s .c o m*/ // pull in all mr4c namespaced properties from the job conf // some of these will be overriden by hadoop properties in the job conf bbConf.importProperties(jobConf); // Don't pick up cluster or env vars - only exported setMR4CJar(bbConf, jobConf.getJar()); importProperty(bbConf, jobConf, Category.HADOOP, HadoopConfig.PROP_TASKS, PROP_TASKS); S3Credentials cred = S3Credentials.extractFrom(jobConf); if (cred != null) { cred.applyTo(bbConf); } importProperty(bbConf, jobConf, Category.CUSTOM, CustomConfig.PROP_JOBID, m_onCluster ? PROP_MAPRED_JOBID : PROP_LAUNCHER_JOBID); importProperty(bbConf, jobConf, Category.CUSTOM, CustomConfig.PROP_TASKID, m_onCluster ? PROP_MAPRED_TASKID : PROP_LAUNCHER_TASKID); if ((isRemote(m_config) || isRemote(bbConf)) && m_onCluster) { // don't want to pick these up from the job submission environment clearProperty(bbConf, Category.CORE, CoreConfig.PROP_EXE_CONF); clearProperty(bbConf, Category.CORE, CoreConfig.PROP_LOG4J_CONF); clearProperty(bbConf, Category.CORE, CoreConfig.PROP_LIB_PATH); clearProperty(bbConf, Category.CORE, CoreConfig.PROP_ROOT_DIR); clearProperty(bbConf, Category.HADOOP, HadoopConfig.PROP_MR4C_JAR); bbConf.getCategory(Category.CORE).setProperty(CoreConfig.PROP_EXE_CONF, REMOTE_EXE_CONF); } // finally have what we want, apply to config m_config.importProperties(CollectionUtils.toMap(bbConf.getProperties()).entrySet()); }
From source file:com.google.mr4c.hadoop.MR4CMRJobTest.java
License:Open Source License
@Test public void testExport() throws Exception { JobConf jobConf = newJobConf(); m_sourceMRJob.applyTo(jobConf);// w w w .j ava 2 s . com assertEquals(m_jar, jobConf.getJar()); Cluster cluster = Cluster.extractFromConfig(jobConf); assertEquals(m_cluster, cluster); assertEquals("5", jobConf.get(MR4CMRJob.PROP_TASKS)); }
From source file:com.ibm.jaql.util.ClassLoaderMgr.java
License:Apache License
private JarOutputStream getJarOutputStream() { //If we have a existing jar stream just use it if (extendedJarStream != null) { return extendedJarStream; }/*from w w w. j ava2 s.com*/ //Otherwise create a new jar and outputstream in which new jars //can be appended File baseJar = null; if (extendedJarPath != null) { baseJar = extendedJarPath; } else { JobConf job = new JobConf(); job.setJarByClass(JaqlUtil.class); String original = job.getJar(); if (original != null) { baseJar = new File(original); } } //Creat new temp jaql file File tmpDir = new File(System.getProperty("java.io.tmpdir") + File.separator + "jaql_" + System.nanoTime()); tmpDir.mkdir(); // TODO: figure out why this causes occasional thread dumps on linux //tmpDir.deleteOnExit(); extendedJarPath = new File(tmpDir.getAbsoluteFile() + File.separator + "jaql.jar"); BaseUtil.LOG.info("creating new jaql.jar: " + extendedJarPath + ", starting from: " + baseJar); //Copy files over into new file try { JarOutputStream jout = null; if (baseJar != null) { JarInputStream jin = new JarInputStream(new FileInputStream(baseJar)); FileOutputStream fout = new FileOutputStream(extendedJarPath); Manifest man = jin.getManifest(); jout = man == null ? new JarOutputStream(fout) : new JarOutputStream(fout, man); copyJarFile(jin, jout); } else { jout = new JarOutputStream(new FileOutputStream(extendedJarPath)); } extendedJarStream = jout; } catch (IOException e) { BaseUtil.LOG.error("Error creating jar: " + e); throw new RuntimeException(e); } return extendedJarStream; }
From source file:com.mellanox.hadoop.mapred.MapOutputLocation.java
License:Apache License
protected void configureClasspath(JobConf conf) throws IOException { // get the task and the current classloader which will become the parent Task task = reduceTask;//from w w w.ja v a2 s.c o m ClassLoader parent = conf.getClassLoader(); // get the work directory which holds the elements we are dynamically // adding to the classpath File workDir = new File(task.getJobFile()).getParentFile(); ArrayList<URL> urllist = new ArrayList<URL>(); // add the jars and directories to the classpath String jar = conf.getJar(); if (jar != null) { File jobCacheDir = new File(new Path(jar).getParent().toString()); File[] libs = new File(jobCacheDir, "lib").listFiles(); if (libs != null) { for (int i = 0; i < libs.length; i++) { urllist.add(libs[i].toURL()); } } urllist.add(new File(jobCacheDir, "classes").toURL()); urllist.add(jobCacheDir.toURL()); } urllist.add(workDir.toURL()); // create a new classloader with the old classloader as its parent // then set that classloader as the one used by the current jobconf URL[] urls = urllist.toArray(new URL[urllist.size()]); URLClassLoader loader = new URLClassLoader(urls, parent); conf.setClassLoader(loader); }
From source file:com.xiaoxiaomo.mr.utils.kafka.HadoopJob.java
License:Apache License
public int run(String[] args) throws Exception { CommandLineParser parser = new PosixParser(); Options options = buildOptions();//from w w w . j av a2 s . com CommandLine cmd = parser.parse(options, args); if (cmd.hasOption("h") || cmd.getArgs().length == 0) { printHelpAndExit(options); } String hdfsPath = cmd.getArgs()[0]; Configuration conf = getConf(); conf.setBoolean("mapred.map.tasks.speculative.execution", false); if (cmd.hasOption("topics")) { LOG.info("Using topics: " + cmd.getOptionValue("topics")); KafkaInputFormat.configureKafkaTopics(conf, cmd.getOptionValue("topics")); } else { printHelpAndExit(options); } KafkaInputFormat.configureZkConnection(conf, cmd.getOptionValue("zk-connect", "localhost:2181")); if (cmd.hasOption("consumer-group")) { CheckpointManager.configureUseZooKeeper(conf, cmd.getOptionValue("consumer-group", "dev-hadoop-loader")); } if (cmd.getOptionValue("autooffset-reset") != null) { KafkaInputFormat.configureAutoOffsetReset(conf, cmd.getOptionValue("autooffset-reset")); } JobConf jobConf = new JobConf(conf); if (cmd.hasOption("remote")) { String ip = cmd.getOptionValue("remote"); LOG.info("Default file system: hdfs://" + ip + ":8020/"); jobConf.set("fs.defaultFS", "hdfs://" + ip + ":8020/"); LOG.info("Remote jobtracker: " + ip + ":8021"); jobConf.set("mapred.job.tracker", ip + ":8021"); } Path jarTarget = new Path( getClass().getProtectionDomain().getCodeSource().getLocation() + "../kafka-hadoop-loader.jar"); if (new File(jarTarget.toUri()).exists()) { // running from IDE/ as maven jobConf.setJar(jarTarget.toUri().getPath()); LOG.info("Using target jar: " + jarTarget.toString()); } else { // running from jar remotely or locally jobConf.setJarByClass(getClass()); LOG.info("Using parent jar: " + jobConf.getJar()); } Job job = Job.getInstance(jobConf, "kafka.hadoop.loader"); job.setInputFormatClass(KafkaInputFormat.class); job.setMapperClass(HadoopJobMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(MultiOutputFormat.class); job.setNumReduceTasks(0); MultiOutputFormat.setOutputPath(job, new Path(hdfsPath)); MultiOutputFormat.setCompressOutput(job, cmd.getOptionValue("compress-output", "on").equals("on")); LOG.info("Output hdfs location: {}", hdfsPath); LOG.info("Output hdfs compression: {}", MultiOutputFormat.getCompressOutput(job)); return job.waitForCompletion(true) ? 0 : -1; }
From source file:edu.stolaf.cs.wmrserver.streaming.PipeMapRed.java
License:Apache License
public void configure(JobConf job) { try {/*from ww w . j a v a 2s . co m*/ String argv = getPipeCommand(job); joinDelay_ = job.getLong("stream.joindelay.milli", 0); job_ = job; fs_ = FileSystem.get(job_); nonZeroExitIsFailure_ = job_.getBoolean("stream.non.zero.exit.is.failure", true); doPipe_ = getDoPipe(); if (!doPipe_) return; setStreamJobDetails(job); String[] argvSplit = splitArgs(argv); String prog = argvSplit[0]; File currentDir = new File(".").getAbsoluteFile(); if (new File(prog).isAbsolute()) { // we don't own it. Hope it is executable } else { // Try to find executable in unpacked job JAR and make absolute if // present. Otherwise, leave it as relative to be resolved against PATH File jarDir = new File(job.getJar()).getParentFile(); File progFile = new File(jarDir, argvSplit[0]); if (progFile.isFile()) { progFile.setExecutable(true); argvSplit[0] = progFile.getAbsolutePath(); } } logprintln("PipeMapRed exec " + Arrays.asList(argvSplit)); Hashtable<String, String> childEnv = new Hashtable(); addJobConfToEnvironment(job_, childEnv); addEnvironment(childEnv, job_.get("stream.addenvironment")); // add TMPDIR environment variable with the value of java.io.tmpdir envPut(childEnv, "TMPDIR", System.getProperty("java.io.tmpdir")); // Start the process ProcessBuilder builder = new ProcessBuilder(argvSplit); // The process' environment initially inherits all vars from the parent -- // only setting those we add/override builder.environment().putAll(childEnv); // Set the working directory to the job jars directory // This is a bad idea... fix this. builder.directory(new File(job.getJar()).getParentFile()); sim = builder.start(); clientOut_ = new DataOutputStream(new BufferedOutputStream(sim.getOutputStream(), BUFFER_SIZE)); clientIn_ = new DataInputStream(new BufferedInputStream(sim.getInputStream(), BUFFER_SIZE)); clientErr_ = new DataInputStream(new BufferedInputStream(sim.getErrorStream())); startTime_ = System.currentTimeMillis(); errThread_ = new MRErrorThread(); errThread_.start(); } catch (Exception e) { logStackTrace(e); LOG.error("configuration exception", e); throw new RuntimeException("configuration exception", e); } }
From source file:edu.uci.ics.hyracks.hadoop.compat.driver.CompatibilityLayer.java
License:Apache License
private String getApplicationNameForHadoopJob(JobConf jobConf) { String jar = jobConf.getJar(); if (jar != null) { return jar.substring(jar.lastIndexOf("/") >= 0 ? jar.lastIndexOf("/") + 1 : 0); } else {//from w w w .jav a 2 s.c om return "" + System.currentTimeMillis(); } }