Example usage for org.apache.hadoop.mapreduce Job setJarByClass

List of usage examples for org.apache.hadoop.mapreduce Job setJarByClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJarByClass.

Prototype

public void setJarByClass(Class<?> cls) 

Source Link

Document

Set the Jar by finding where a given class came from.

Usage

From source file:com.cloudy.mapred.base.JobUtil.java

License:Apache License

public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf)
        throws IOException {

    Job job = new Job(new Configuration(conf));
    Configuration jobConf = job.getConfiguration();

    if (mapper.equals(Mapper.class)) {
        throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
    }/*from  w  w w. j ava 2  s .c  om*/
    job.setJarByClass(mapper);

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);
    job.setOutputKeyClass(mapperKey);
    job.setOutputValueClass(mapperValue);
    jobConf.setBoolean("mapred.compress.map.output", true);
    job.setNumReduceTasks(0);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.cloudy.mapred.base.JobUtil.java

License:Apache License

/**
 * Create a map and reduce Hadoop job.  Does not set the name on the job.
 * @param inputPath The input {@link org.apache.hadoop.fs.Path}
 * @param outputPath The output {@link org.apache.hadoop.fs.Path}
 * @param inputFormat The {@link org.apache.hadoop.mapreduce.InputFormat}
 * @param mapper The {@link org.apache.hadoop.mapreduce.Mapper} class to use
 * @param mapperKey The {@link org.apache.hadoop.io.Writable} key class.  If the Mapper is a no-op,
 *                  this value may be null
 * @param mapperValue The {@link org.apache.hadoop.io.Writable} value class.  If the Mapper is a no-op,
 *                    this value may be null
 * @param reducer The {@link org.apache.hadoop.mapreduce.Reducer} to use
 * @param reducerKey The reducer key class.
 * @param reducerValue The reducer value class.
 * @param outputFormat The {@link org.apache.hadoop.mapreduce.OutputFormat}.
 * @param conf The {@link org.apache.hadoop.conf.Configuration} to use.
 * @return The {@link org.apache.hadoop.mapreduce.Job}.
 * @throws IOException if there is a problem with the IO.
 *
 * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class)
 * @see #prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class,
 * org.apache.hadoop.conf.Configuration)
 *//*  w ww  .  j  a  va  2s  .  c o m*/
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException {

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }

    //    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.cmcc.hy.bigdata.weijifen.jobs.hubei.score.ScoreInfoDayJob.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub

    Configuration conf = ConfigurationUtil.loginAuthentication(args, SEPCIFIC_CONFIG_NAME, getConf());

    // ?()//from   www. jav  a  2 s .co m
    String statDate = DateUtil.getFilterDate(args);
    if (statDate == null) {
        System.exit(1);
    }

    conf.set(STAT_DAY, statDate);

    // ?job
    Job job = Job.getInstance(conf, JOB_NAME + ":" + statDate);
    job.setJarByClass(ScoreInfoDayJob.class);
    String scoreInfoInput = conf.get(SCORE_INFO_INPUT_PATH);
    Path scoreInfoPath = new Path(scoreInfoInput);

    String acctPhoneMapInfoInput = conf.get(ACCT_PHONE_MAP_INPUT_PATH);
    Path accPhoneMapInfoPath = new Path(acctPhoneMapInfoInput);

    // ?
    if (FileSystemUtil.exists(scoreInfoPath)) {
        MultipleInputs.addInputPath(job, scoreInfoPath, SequenceFileInputFormat.class,
                ScoreInfoDayMapper.class);
        logger.info("SocreInfoPath is " + scoreInfoInput);
    } else {
        logger.error("Path [{}] not exist!", scoreInfoInput);
    }

    // ??
    //        if (FileSystemUtil.exists(accPhoneMapInfoPath)) {
    //            MultipleInputs.addInputPath(job, accPhoneMapInfoPath, TextInputFormat.class,
    //                    AcctPhoneMapper.class);
    //            logger.info("AccPhoneMapInfoPath is " + acctPhoneMapInfoInput);
    //        } else {
    //            logger.error("Path [{}] not exist!", acctPhoneMapInfoInput);
    //        }
    // job
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(ScoreInfo.class);
    job.setNumReduceTasks(conf.getInt(REDUCE_NUMBER, 40));
    job.setOutputFormatClass(NullOutputFormat.class);

    //        TableMapReduceUtil.initTableReducerJob(HBaseTableSchema.USER_INFO_TABLE2,
    //                ScoreInfoDayReducer.class, job);

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.conversantmedia.mapreduce.tool.annotation.handler.MaraAnnotationUtil.java

License:Apache License

/**
 *
 * @param job                     the job
 * @param jobField                  the field to retrieve annotations from
 * @param driver                  the driver bean
 * @param context                  the tool context
 * @throws ToolException            if any issue is encountered through reflection or expression evaluation
 *//*from   w  w w. j  a v a 2 s  .c  o m*/
public void configureJobFromField(Job job, Field jobField, Object driver, AnnotatedToolContext context)
        throws ToolException {

    JobInfo jobInfo = jobField.getAnnotation(JobInfo.class);

    String name = StringUtils.isBlank(jobInfo.value()) ? jobInfo.name() : jobInfo.value();
    if (StringUtils.isBlank(name)) {
        name = defaultDriverIdForClass(driver.getClass());
    }

    name = (String) ExpressionEvaluator.instance().evaluate(driver, context, name);
    job.setJobName(name);

    if (!jobInfo.numReducers().equals("-1")) {
        if (NumberUtils.isNumber(jobInfo.numReducers())) {
            job.setNumReduceTasks(Integer.valueOf(jobInfo.numReducers()));
        } else {
            Object reducerValue = ExpressionEvaluator.instance().evaluate(driver, context,
                    jobInfo.numReducers());
            if (reducerValue != null) {
                job.setNumReduceTasks((Integer) reducerValue);
            }
        }
    }

    // We can override (the runjob script does) which jar to use instead of using running driver class
    if (StringUtils.isBlank(job.getConfiguration().get("mapred.jar"))) {
        job.setJarByClass(driver.getClass());
    }

    handleJobFieldAnnotations(job, jobField, jobInfo);
}

From source file:com.conversantmedia.mapreduce.tool.BaseTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    final T context = newContext();
    context.setContextListener(new ToolContextListener() {
        @Override/*from  w  w  w. j  av  a 2  s .  c om*/
        public void afterInitOptions(Options options) throws Exception {
            notifyListeners(Event.AFTER_INIT_CLI_OPTIONS, context, options);
        }

        @Override
        public void afterParseCommandLine(CommandLine commandLine) throws Exception {
            notifyListeners(Event.AFTER_PARSE_CLI, context, commandLine);
        }
    });

    context.setDriverClass(this.getClass());

    try {
        // Register ourselves as a listener
        this.addListener(this);

        context.parseFromArgs(args);

        // Useful info
        logger().info(context.toString());

        // Perform any specific initialization tasks
        notifyListeners(Event.BEFORE_INIT_DRIVER, context, null);
        initInternal(context);

        // Notify any listeners before initializing job
        notifyListeners(Event.BEFORE_INIT_JOB, context, null);

        // Initialize our job
        Job job = initJob(context);
        // We can override (the runjob script does) which jar to use instead of using running driver class
        if (StringUtils.isBlank(job.getConfiguration().get("mapred.jar"))) {
            logger().info("Setting job jar by class [" + this.getClass() + "]");
            job.setJarByClass(this.getClass());
        }
        context.setJob(job);

        // Post-initialization routines
        jobPostInit(context);

        if (context.isDumpConfig()) {
            Console.out(context.toString());
            dumpConfig(job.getConfiguration());
        }

        if (context.isDryRun()) {
            Console.out("Dry run only. Job will not be executed.");
            return 0;
        }

        // Launches the job
        launchJob(context, job);

        // Now move our input to archive
        if (context.getReturnCode() == 0 && context.getArchive() != null) {
            archiveInputs(context);
        }

        // Clean up our job
        cleanUp(context);

        notifyListeners(Event.BEFORE_EXIT, context, null);
    } catch (ParseException pe) {
        // Output a more "friendly" message
        context.showHelpAndExit(context.initOptions(), 1, pe.getMessage());
    } catch (Exception e) {
        logger().error("Problem running tool: " + e.getMessage(), e);
        notifyListeners(Event.EXCEPTION, context, e);
    }

    return context.getReturnCode();
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test operates on a single file//  w  w  w  .  j  a v a  2 s .c  o m
 * 
 * Expected result: success
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testSingle() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testSingle()                    ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "zip-01.zip"));
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Single"));

    //
    assertTrue(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test operates on a Path containing files that will cause the Job to fail
 * /*from  w ww  . java  2 s.  c o  m*/
 * Expected result: failure
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testMultiple() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testMultiple()                  ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, inputPath);
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Multiple"));

    //
    assertFalse(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test is identical to testMultiple() however the ZipFileInputFormat is set to
 * be lenient, errors that cause testMultiple() to fail will be quietly ignored here.
 * //from w  w  w  .jav  a  2s. c  o  m
 * Expected result: success
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testMultipleLenient() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testMultipleLenient()           ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setLenient(true);
    ZipFileInputFormat.setInputPaths(job, inputPath);
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_MultipleLenient"));

    //
    assertTrue(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * ZipInputStream doesn't support encrypted entries thus this will fail.
 * //from  ww  w .  j  a v a 2  s.  c  o  m
 * Expected result: failure
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testEncryptedZip() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testEncryptedZip()              ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "encrypted.zip"));
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Encrypted"));

    //
    assertFalse(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test explicitly tries to read a file containing random noise as a ZIP file,
 * the expected result is a quiet failure. The Job shouldn't fail if non-ZIP data is
 * encountered./*from  w  w w  .  ja  va2  s.com*/
 * 
 * Expected result: (quiet) failure
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testNonZipData() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testNonZipData()                ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "random.dat"));
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_NonZipData"));

    //
    assertTrue(job.waitForCompletion(true));
}