Example usage for org.apache.hadoop.mapred JobConf getJar

List of usage examples for org.apache.hadoop.mapred JobConf getJar

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getJar.

Prototype

public String getJar() 

Source Link

Document

Get the user jar for the map-reduce job.

Usage

From source file:azkaban.jobtype.javautils.AbstractHadoopJob.java

License:Apache License

@SuppressWarnings("rawtypes")
public JobConf createJobConf(Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass)
        throws IOException, URISyntaxException {
    JobConf conf = new JobConf();
    // set custom class loader with custom find resource strategy.

    conf.setJobName(getJobName());//from   w ww . ja v  a2s  .  c  om
    conf.setMapperClass(mapperClass);
    if (reducerClass != null) {
        conf.setReducerClass(reducerClass);
    }

    if (props.getBoolean("is.local", false)) {
        conf.set("mapred.job.tracker", "local");
        conf.set("fs.default.name", "file:///");
        conf.set("mapred.local.dir", "/tmp/map-red");

        logger.info("Running locally, no hadoop jar set.");
    } else {
        HadoopUtils.setClassLoaderAndJar(conf, getClass());
        logger.info("Setting hadoop jar file for class:" + getClass() + "  to " + conf.getJar());
        logger.info("*************************************************************************");
        logger.info(
                "          Running on Real Hadoop Cluster(" + conf.get("mapred.job.tracker") + ")           ");
        logger.info("*************************************************************************");
    }

    // set JVM options if present
    if (props.containsKey("mapred.child.java.opts")) {
        conf.set("mapred.child.java.opts", props.getString("mapred.child.java.opts"));
        logger.info("mapred.child.java.opts set to " + props.getString("mapred.child.java.opts"));
    }

    // set input and output paths if they are present
    if (props.containsKey("input.paths")) {
        List<String> inputPaths = props.getStringList("input.paths");
        if (inputPaths.size() == 0)
            throw new IllegalArgumentException("Must specify at least one value for property 'input.paths'");
        for (String path : inputPaths) {
            HadoopUtils.addAllSubPaths(conf, new Path(path));
        }
    }

    if (props.containsKey("output.path")) {
        String location = props.get("output.path");
        FileOutputFormat.setOutputPath(conf, new Path(location));

        // For testing purpose only remove output file if exists
        if (props.getBoolean("force.output.overwrite", false)) {
            FileSystem fs = FileOutputFormat.getOutputPath(conf).getFileSystem(conf);
            fs.delete(FileOutputFormat.getOutputPath(conf), true);
        }
    }

    // Adds External jars to hadoop classpath
    String externalJarList = props.getString("hadoop.external.jarFiles", null);
    if (externalJarList != null) {
        FileSystem fs = FileSystem.get(conf);
        String[] jarFiles = externalJarList.split(",");
        for (String jarFile : jarFiles) {
            logger.info("Adding extenral jar File:" + jarFile);
            DistributedCache.addFileToClassPath(new Path(jarFile), conf, fs);
        }
    }

    // Adds distributed cache files
    String cacheFileList = props.getString("hadoop.cache.files", null);
    if (cacheFileList != null) {
        String[] cacheFiles = cacheFileList.split(",");
        for (String cacheFile : cacheFiles) {
            logger.info("Adding Distributed Cache File:" + cacheFile);
            DistributedCache.addCacheFile(new URI(cacheFile), conf);
        }
    }

    // Adds distributed cache files
    String archiveFileList = props.getString("hadoop.cache.archives", null);
    if (archiveFileList != null) {
        String[] archiveFiles = archiveFileList.split(",");
        for (String archiveFile : archiveFiles) {
            logger.info("Adding Distributed Cache Archive File:" + archiveFile);
            DistributedCache.addCacheArchive(new URI(archiveFile), conf);
        }
    }

    String hadoopCacheJarDir = props.getString("hdfs.default.classpath.dir", null);
    if (hadoopCacheJarDir != null) {
        FileSystem fs = FileSystem.get(conf);
        if (fs != null) {
            FileStatus[] status = fs.listStatus(new Path(hadoopCacheJarDir));

            if (status != null) {
                for (int i = 0; i < status.length; ++i) {
                    if (!status[i].isDir()) {
                        Path path = new Path(hadoopCacheJarDir, status[i].getPath().getName());
                        logger.info("Adding Jar to Distributed Cache Archive File:" + path);

                        DistributedCache.addFileToClassPath(path, conf, fs);
                    }
                }
            } else {
                logger.info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " is empty.");
            }
        } else {
            logger.info("hdfs.default.classpath.dir " + hadoopCacheJarDir + " filesystem doesn't exist");
        }
    }

    for (String key : getProps().getKeySet()) {
        String lowerCase = key.toLowerCase();
        if (lowerCase.startsWith(HADOOP_PREFIX)) {
            String newKey = key.substring(HADOOP_PREFIX.length());
            conf.set(newKey, getProps().get(key));
        }
    }

    HadoopUtils.setPropsInJob(conf, getProps());

    // put in tokens
    if (System.getenv(HADOOP_TOKEN_FILE_LOCATION) != null) {
        conf.set(MAPREDUCE_JOB_CREDENTIALS_BINARY, System.getenv(HADOOP_TOKEN_FILE_LOCATION));
    }

    return conf;
}

From source file:colossal.pipe.BaseOptions.java

License:Apache License

public int parse(ColPipe pipeline, String... args) {
    JobConf conf = pipeline.getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    CmdLineParser parser = new CmdLineParser(this);

    try {//  www. j  a v  a2s . c  om
        parser.parseArgument(otherArgs);
    } catch (CmdLineException e) {
        String jobName = pipeline.getName();
        if (jobName == null) {
            jobName = "yourJob";
        }
        String jarName = conf.getJar();
        if (jarName == null) {
            jarName = "yourJar";
        }
        String cmd = "hadoop jar " + jarName + " " + jobName;
        System.err.println(e.getMessage());
        System.err.println("Usage: " + cmd + " [options...] arguments...");
        parser.printUsage(System.err);
        System.err.println();

        // print option sample. This is useful some time
        System.err.println("  Example: " + cmd + " " + parser.printExample(ALL));
        return 1;
    }
    return 0;
}

From source file:com.asakusafw.runtime.stage.launcher.LauncherOptionsParserTest.java

License:Apache License

/**
 * w/ libjars.//from w  ww .j av a  2s  .com
 * @throws Exception if failed
 */
@Test
public void w_libjars() throws Exception {
    File lib = putFile("dummy.jar");
    LauncherOptions options = parse(
            new String[] { MockTool.class.getName(), LauncherOptionsParser.KEY_ARG_LIBRARIES, lib.getPath(), });
    assertClasspath(options.getApplicationClassLoader().getURLs(), "testing");
    assertThat(lib, is(inClasspath(options.getApplicationClassLoader().getURLs())));

    assertClasspath(GenericOptionsParser.getLibJars(conf), "testing");
    assertThat(lib, is(inClasspath(GenericOptionsParser.getLibJars(conf))));

    JobConf jc = new JobConf(conf);
    assertThat(jc.getJar(), is(nullValue()));
}

From source file:com.google.mr4c.hadoop.MR4CMRJob.java

License:Open Source License

public void updateFrom(JobConf jobConf) {

    MR4CConfig bbConf = new MR4CConfig(false);
    bbConf.initStandardCategories();/*from  w  ww  . j a  v a  2s .c o m*/

    // pull in all mr4c namespaced properties from the job conf
    // some of these will be overriden by hadoop properties in the job conf
    bbConf.importProperties(jobConf);

    // Don't pick up cluster or env vars - only exported

    setMR4CJar(bbConf, jobConf.getJar());
    importProperty(bbConf, jobConf, Category.HADOOP, HadoopConfig.PROP_TASKS, PROP_TASKS);

    S3Credentials cred = S3Credentials.extractFrom(jobConf);
    if (cred != null) {
        cred.applyTo(bbConf);
    }

    importProperty(bbConf, jobConf, Category.CUSTOM, CustomConfig.PROP_JOBID,
            m_onCluster ? PROP_MAPRED_JOBID : PROP_LAUNCHER_JOBID);
    importProperty(bbConf, jobConf, Category.CUSTOM, CustomConfig.PROP_TASKID,
            m_onCluster ? PROP_MAPRED_TASKID : PROP_LAUNCHER_TASKID);

    if ((isRemote(m_config) || isRemote(bbConf)) && m_onCluster) {
        // don't want to pick these up from the job submission environment
        clearProperty(bbConf, Category.CORE, CoreConfig.PROP_EXE_CONF);
        clearProperty(bbConf, Category.CORE, CoreConfig.PROP_LOG4J_CONF);
        clearProperty(bbConf, Category.CORE, CoreConfig.PROP_LIB_PATH);
        clearProperty(bbConf, Category.CORE, CoreConfig.PROP_ROOT_DIR);
        clearProperty(bbConf, Category.HADOOP, HadoopConfig.PROP_MR4C_JAR);
        bbConf.getCategory(Category.CORE).setProperty(CoreConfig.PROP_EXE_CONF, REMOTE_EXE_CONF);
    }

    // finally have what we want, apply to config
    m_config.importProperties(CollectionUtils.toMap(bbConf.getProperties()).entrySet());

}

From source file:com.google.mr4c.hadoop.MR4CMRJobTest.java

License:Open Source License

@Test
public void testExport() throws Exception {
    JobConf jobConf = newJobConf();
    m_sourceMRJob.applyTo(jobConf);//  w w  w .j ava  2 s  .  com
    assertEquals(m_jar, jobConf.getJar());
    Cluster cluster = Cluster.extractFromConfig(jobConf);
    assertEquals(m_cluster, cluster);
    assertEquals("5", jobConf.get(MR4CMRJob.PROP_TASKS));
}

From source file:com.ibm.jaql.util.ClassLoaderMgr.java

License:Apache License

private JarOutputStream getJarOutputStream() {
    //If we have a existing jar stream just use it
    if (extendedJarStream != null) {
        return extendedJarStream;
    }/*from w  w w.  j  ava2  s.com*/

    //Otherwise create a new jar and outputstream in which new jars
    //can be appended
    File baseJar = null;
    if (extendedJarPath != null) {
        baseJar = extendedJarPath;
    } else {
        JobConf job = new JobConf();
        job.setJarByClass(JaqlUtil.class);
        String original = job.getJar();
        if (original != null) {
            baseJar = new File(original);
        }
    }

    //Creat new temp jaql file
    File tmpDir = new File(System.getProperty("java.io.tmpdir") + File.separator + "jaql_" + System.nanoTime());
    tmpDir.mkdir();
    // TODO: figure out why this causes occasional thread dumps on linux
    //tmpDir.deleteOnExit(); 

    extendedJarPath = new File(tmpDir.getAbsoluteFile() + File.separator + "jaql.jar");
    BaseUtil.LOG.info("creating new jaql.jar: " + extendedJarPath + ", starting from: " + baseJar);
    //Copy files over into new file
    try {
        JarOutputStream jout = null;
        if (baseJar != null) {
            JarInputStream jin = new JarInputStream(new FileInputStream(baseJar));
            FileOutputStream fout = new FileOutputStream(extendedJarPath);
            Manifest man = jin.getManifest();
            jout = man == null ? new JarOutputStream(fout) : new JarOutputStream(fout, man);
            copyJarFile(jin, jout);
        } else {
            jout = new JarOutputStream(new FileOutputStream(extendedJarPath));
        }
        extendedJarStream = jout;
    } catch (IOException e) {
        BaseUtil.LOG.error("Error creating jar: " + e);
        throw new RuntimeException(e);
    }

    return extendedJarStream;
}

From source file:com.mellanox.hadoop.mapred.MapOutputLocation.java

License:Apache License

protected void configureClasspath(JobConf conf) throws IOException {

    // get the task and the current classloader which will become the parent
    Task task = reduceTask;//from w w  w.ja v a2  s.c  o m
    ClassLoader parent = conf.getClassLoader();

    // get the work directory which holds the elements we are dynamically
    // adding to the classpath
    File workDir = new File(task.getJobFile()).getParentFile();
    ArrayList<URL> urllist = new ArrayList<URL>();

    // add the jars and directories to the classpath
    String jar = conf.getJar();
    if (jar != null) {
        File jobCacheDir = new File(new Path(jar).getParent().toString());

        File[] libs = new File(jobCacheDir, "lib").listFiles();
        if (libs != null) {
            for (int i = 0; i < libs.length; i++) {
                urllist.add(libs[i].toURL());
            }
        }
        urllist.add(new File(jobCacheDir, "classes").toURL());
        urllist.add(jobCacheDir.toURL());

    }
    urllist.add(workDir.toURL());

    // create a new classloader with the old classloader as its parent
    // then set that classloader as the one used by the current jobconf
    URL[] urls = urllist.toArray(new URL[urllist.size()]);
    URLClassLoader loader = new URLClassLoader(urls, parent);
    conf.setClassLoader(loader);
}

From source file:com.xiaoxiaomo.mr.utils.kafka.HadoopJob.java

License:Apache License

public int run(String[] args) throws Exception {
    CommandLineParser parser = new PosixParser();
    Options options = buildOptions();//from w  w  w  .  j av a2 s . com
    CommandLine cmd = parser.parse(options, args);

    if (cmd.hasOption("h") || cmd.getArgs().length == 0) {
        printHelpAndExit(options);
    }

    String hdfsPath = cmd.getArgs()[0];
    Configuration conf = getConf();
    conf.setBoolean("mapred.map.tasks.speculative.execution", false);

    if (cmd.hasOption("topics")) {
        LOG.info("Using topics: " + cmd.getOptionValue("topics"));
        KafkaInputFormat.configureKafkaTopics(conf, cmd.getOptionValue("topics"));
    } else {
        printHelpAndExit(options);
    }

    KafkaInputFormat.configureZkConnection(conf, cmd.getOptionValue("zk-connect", "localhost:2181"));
    if (cmd.hasOption("consumer-group")) {
        CheckpointManager.configureUseZooKeeper(conf,
                cmd.getOptionValue("consumer-group", "dev-hadoop-loader"));
    }

    if (cmd.getOptionValue("autooffset-reset") != null) {
        KafkaInputFormat.configureAutoOffsetReset(conf, cmd.getOptionValue("autooffset-reset"));
    }

    JobConf jobConf = new JobConf(conf);
    if (cmd.hasOption("remote")) {
        String ip = cmd.getOptionValue("remote");
        LOG.info("Default file system: hdfs://" + ip + ":8020/");
        jobConf.set("fs.defaultFS", "hdfs://" + ip + ":8020/");
        LOG.info("Remote jobtracker: " + ip + ":8021");
        jobConf.set("mapred.job.tracker", ip + ":8021");
    }

    Path jarTarget = new Path(
            getClass().getProtectionDomain().getCodeSource().getLocation() + "../kafka-hadoop-loader.jar");

    if (new File(jarTarget.toUri()).exists()) {
        // running from IDE/ as maven
        jobConf.setJar(jarTarget.toUri().getPath());
        LOG.info("Using target jar: " + jarTarget.toString());
    } else {
        // running from jar remotely or locally
        jobConf.setJarByClass(getClass());
        LOG.info("Using parent jar: " + jobConf.getJar());
    }

    Job job = Job.getInstance(jobConf, "kafka.hadoop.loader");

    job.setInputFormatClass(KafkaInputFormat.class);
    job.setMapperClass(HadoopJobMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(MultiOutputFormat.class);
    job.setNumReduceTasks(0);

    MultiOutputFormat.setOutputPath(job, new Path(hdfsPath));
    MultiOutputFormat.setCompressOutput(job, cmd.getOptionValue("compress-output", "on").equals("on"));

    LOG.info("Output hdfs location: {}", hdfsPath);
    LOG.info("Output hdfs compression: {}", MultiOutputFormat.getCompressOutput(job));

    return job.waitForCompletion(true) ? 0 : -1;
}

From source file:edu.stolaf.cs.wmrserver.streaming.PipeMapRed.java

License:Apache License

public void configure(JobConf job) {
    try {/*from   ww  w  . j a v a 2s .  co m*/
        String argv = getPipeCommand(job);

        joinDelay_ = job.getLong("stream.joindelay.milli", 0);

        job_ = job;
        fs_ = FileSystem.get(job_);

        nonZeroExitIsFailure_ = job_.getBoolean("stream.non.zero.exit.is.failure", true);

        doPipe_ = getDoPipe();
        if (!doPipe_)
            return;

        setStreamJobDetails(job);

        String[] argvSplit = splitArgs(argv);
        String prog = argvSplit[0];
        File currentDir = new File(".").getAbsoluteFile();
        if (new File(prog).isAbsolute()) {
            // we don't own it. Hope it is executable
        } else {
            // Try to find executable in unpacked job JAR and make absolute if
            // present. Otherwise, leave it as relative to be resolved against PATH
            File jarDir = new File(job.getJar()).getParentFile();
            File progFile = new File(jarDir, argvSplit[0]);
            if (progFile.isFile()) {
                progFile.setExecutable(true);
                argvSplit[0] = progFile.getAbsolutePath();
            }
        }

        logprintln("PipeMapRed exec " + Arrays.asList(argvSplit));
        Hashtable<String, String> childEnv = new Hashtable();
        addJobConfToEnvironment(job_, childEnv);
        addEnvironment(childEnv, job_.get("stream.addenvironment"));
        // add TMPDIR environment variable with the value of java.io.tmpdir
        envPut(childEnv, "TMPDIR", System.getProperty("java.io.tmpdir"));

        // Start the process
        ProcessBuilder builder = new ProcessBuilder(argvSplit);
        // The process' environment initially inherits all vars from the parent --
        // only setting those we add/override
        builder.environment().putAll(childEnv);
        // Set the working directory to the job jars directory
        // This is a bad idea... fix this.
        builder.directory(new File(job.getJar()).getParentFile());
        sim = builder.start();

        clientOut_ = new DataOutputStream(new BufferedOutputStream(sim.getOutputStream(), BUFFER_SIZE));
        clientIn_ = new DataInputStream(new BufferedInputStream(sim.getInputStream(), BUFFER_SIZE));
        clientErr_ = new DataInputStream(new BufferedInputStream(sim.getErrorStream()));
        startTime_ = System.currentTimeMillis();

        errThread_ = new MRErrorThread();
        errThread_.start();
    } catch (Exception e) {
        logStackTrace(e);
        LOG.error("configuration exception", e);
        throw new RuntimeException("configuration exception", e);
    }
}

From source file:edu.uci.ics.hyracks.hadoop.compat.driver.CompatibilityLayer.java

License:Apache License

private String getApplicationNameForHadoopJob(JobConf jobConf) {
    String jar = jobConf.getJar();
    if (jar != null) {
        return jar.substring(jar.lastIndexOf("/") >= 0 ? jar.lastIndexOf("/") + 1 : 0);
    } else {//from  w w  w .jav  a  2 s.c om
        return "" + System.currentTimeMillis();
    }
}