Example usage for org.apache.hadoop.mapreduce Job setJarByClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJarByClass.

Prototype

public void setJarByClass(Class<?> cls)

Source Link

Document

Set the Jar by finding where a given class came from.

Usage

From source file:com.cloudy.mapred.base.JobUtil.java

License:Apache License

public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf)
        throws IOException {

    Job job = new Job(new Configuration(conf));
    Configuration jobConf = job.getConfiguration();

    if (mapper.equals(Mapper.class)) {
        throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
    }/*from  w  w w. j ava 2  s .c  om*/
    job.setJarByClass(mapper);

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(mapperKey);
    job.setMapOutputValueClass(mapperValue);
    job.setOutputKeyClass(mapperKey);
    job.setOutputValueClass(mapperValue);
    jobConf.setBoolean("mapred.compress.map.output", true);
    job.setNumReduceTasks(0);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.cloudy.mapred.base.JobUtil.java

License:Apache License

/**
 * Create a map and reduce Hadoop job.  Does not set the name on the job.
 * @param inputPath The input {@link org.apache.hadoop.fs.Path}
 * @param outputPath The output {@link org.apache.hadoop.fs.Path}
 * @param inputFormat The {@link org.apache.hadoop.mapreduce.InputFormat}
 * @param mapper The {@link org.apache.hadoop.mapreduce.Mapper} class to use
 * @param mapperKey The {@link org.apache.hadoop.io.Writable} key class.  If the Mapper is a no-op,
 *                  this value may be null
 * @param mapperValue The {@link org.apache.hadoop.io.Writable} value class.  If the Mapper is a no-op,
 *                    this value may be null
 * @param reducer The {@link org.apache.hadoop.mapreduce.Reducer} to use
 * @param reducerKey The reducer key class.
 * @param reducerValue The reducer value class.
 * @param outputFormat The {@link org.apache.hadoop.mapreduce.OutputFormat}.
 * @param conf The {@link org.apache.hadoop.conf.Configuration} to use.
 * @return The {@link org.apache.hadoop.mapreduce.Job}.
 * @throws IOException if there is a problem with the IO.
 *
 * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class)
 * @see #prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class,
 * org.apache.hadoop.conf.Configuration)
 *//*  w ww  .  j  a  va  2s  .  c o m*/
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat,
        Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey,
        Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer,
        Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue,
        Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException {

    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    if (reducer.equals(Reducer.class)) {
        if (mapper.equals(Mapper.class)) {
            throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer");
        }
        job.setJarByClass(mapper);
    } else {
        job.setJarByClass(reducer);
    }

    job.setInputFormatClass(inputFormat);
    jobConf.set("mapred.input.dir", inputPath.toString());

    job.setMapperClass(mapper);
    if (mapperKey != null) {
        job.setMapOutputKeyClass(mapperKey);
    }
    if (mapperValue != null) {
        job.setMapOutputValueClass(mapperValue);
    }

    //    jobConf.setBoolean("mapred.compress.map.output", true);

    job.setReducerClass(reducer);
    job.setOutputKeyClass(reducerKey);
    job.setOutputValueClass(reducerValue);

    job.setOutputFormatClass(outputFormat);
    jobConf.set("mapred.output.dir", outputPath.toString());

    return job;
}

From source file:com.cmcc.hy.bigdata.weijifen.jobs.hubei.score.ScoreInfoDayJob.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub

    Configuration conf = ConfigurationUtil.loginAuthentication(args, SEPCIFIC_CONFIG_NAME, getConf());

    // ?()//from   www. jav  a  2 s .co m
    String statDate = DateUtil.getFilterDate(args);
    if (statDate == null) {
        System.exit(1);
    }

    conf.set(STAT_DAY, statDate);

    // ?job
    Job job = Job.getInstance(conf, JOB_NAME + ":" + statDate);
    job.setJarByClass(ScoreInfoDayJob.class);
    String scoreInfoInput = conf.get(SCORE_INFO_INPUT_PATH);
    Path scoreInfoPath = new Path(scoreInfoInput);

    String acctPhoneMapInfoInput = conf.get(ACCT_PHONE_MAP_INPUT_PATH);
    Path accPhoneMapInfoPath = new Path(acctPhoneMapInfoInput);

    // ?
    if (FileSystemUtil.exists(scoreInfoPath)) {
        MultipleInputs.addInputPath(job, scoreInfoPath, SequenceFileInputFormat.class,
                ScoreInfoDayMapper.class);
        logger.info("SocreInfoPath is " + scoreInfoInput);
    } else {
        logger.error("Path [{}] not exist!", scoreInfoInput);
    }

    // ??
    //        if (FileSystemUtil.exists(accPhoneMapInfoPath)) {
    //            MultipleInputs.addInputPath(job, accPhoneMapInfoPath, TextInputFormat.class,
    //                    AcctPhoneMapper.class);
    //            logger.info("AccPhoneMapInfoPath is " + acctPhoneMapInfoInput);
    //        } else {
    //            logger.error("Path [{}] not exist!", acctPhoneMapInfoInput);
    //        }
    // job
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(ScoreInfo.class);
    job.setNumReduceTasks(conf.getInt(REDUCE_NUMBER, 40));
    job.setOutputFormatClass(NullOutputFormat.class);

    //        TableMapReduceUtil.initTableReducerJob(HBaseTableSchema.USER_INFO_TABLE2,
    //                ScoreInfoDayReducer.class, job);

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.conversantmedia.mapreduce.tool.annotation.handler.MaraAnnotationUtil.java

License:Apache License

/**
 *
 * @param job                     the job
 * @param jobField                  the field to retrieve annotations from
 * @param driver                  the driver bean
 * @param context                  the tool context
 * @throws ToolException            if any issue is encountered through reflection or expression evaluation
 *//*from   w  w w. j  a v a 2 s  .c  o m*/
public void configureJobFromField(Job job, Field jobField, Object driver, AnnotatedToolContext context)
        throws ToolException {

    JobInfo jobInfo = jobField.getAnnotation(JobInfo.class);

    String name = StringUtils.isBlank(jobInfo.value()) ? jobInfo.name() : jobInfo.value();
    if (StringUtils.isBlank(name)) {
        name = defaultDriverIdForClass(driver.getClass());
    }

    name = (String) ExpressionEvaluator.instance().evaluate(driver, context, name);
    job.setJobName(name);

    if (!jobInfo.numReducers().equals("-1")) {
        if (NumberUtils.isNumber(jobInfo.numReducers())) {
            job.setNumReduceTasks(Integer.valueOf(jobInfo.numReducers()));
        } else {
            Object reducerValue = ExpressionEvaluator.instance().evaluate(driver, context,
                    jobInfo.numReducers());
            if (reducerValue != null) {
                job.setNumReduceTasks((Integer) reducerValue);
            }
        }
    }

    // We can override (the runjob script does) which jar to use instead of using running driver class
    if (StringUtils.isBlank(job.getConfiguration().get("mapred.jar"))) {
        job.setJarByClass(driver.getClass());
    }

    handleJobFieldAnnotations(job, jobField, jobInfo);
}

From source file:com.conversantmedia.mapreduce.tool.BaseTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    final T context = newContext();
    context.setContextListener(new ToolContextListener() {
        @Override/*from  w  w  w. j  av  a 2  s .  c om*/
        public void afterInitOptions(Options options) throws Exception {
            notifyListeners(Event.AFTER_INIT_CLI_OPTIONS, context, options);
        }

        @Override
        public void afterParseCommandLine(CommandLine commandLine) throws Exception {
            notifyListeners(Event.AFTER_PARSE_CLI, context, commandLine);
        }
    });

    context.setDriverClass(this.getClass());

    try {
        // Register ourselves as a listener
        this.addListener(this);

        context.parseFromArgs(args);

        // Useful info
        logger().info(context.toString());

        // Perform any specific initialization tasks
        notifyListeners(Event.BEFORE_INIT_DRIVER, context, null);
        initInternal(context);

        // Notify any listeners before initializing job
        notifyListeners(Event.BEFORE_INIT_JOB, context, null);

        // Initialize our job
        Job job = initJob(context);
        // We can override (the runjob script does) which jar to use instead of using running driver class
        if (StringUtils.isBlank(job.getConfiguration().get("mapred.jar"))) {
            logger().info("Setting job jar by class [" + this.getClass() + "]");
            job.setJarByClass(this.getClass());
        }
        context.setJob(job);

        // Post-initialization routines
        jobPostInit(context);

        if (context.isDumpConfig()) {
            Console.out(context.toString());
            dumpConfig(job.getConfiguration());
        }

        if (context.isDryRun()) {
            Console.out("Dry run only. Job will not be executed.");
            return 0;
        }

        // Launches the job
        launchJob(context, job);

        // Now move our input to archive
        if (context.getReturnCode() == 0 && context.getArchive() != null) {
            archiveInputs(context);
        }

        // Clean up our job
        cleanUp(context);

        notifyListeners(Event.BEFORE_EXIT, context, null);
    } catch (ParseException pe) {
        // Output a more "friendly" message
        context.showHelpAndExit(context.initOptions(), 1, pe.getMessage());
    } catch (Exception e) {
        logger().error("Problem running tool: " + e.getMessage(), e);
        notifyListeners(Event.EXCEPTION, context, e);
    }

    return context.getReturnCode();
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test operates on a single file//  w  w  w  .  j  a v a  2 s .c  o m
 * 
 * Expected result: success
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testSingle() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testSingle()                    ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "zip-01.zip"));
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Single"));

    //
    assertTrue(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test operates on a Path containing files that will cause the Job to fail
 * /*from  w ww  . java  2 s.  c o  m*/
 * Expected result: failure
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testMultiple() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testMultiple()                  ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, inputPath);
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Multiple"));

    //
    assertFalse(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test is identical to testMultiple() however the ZipFileInputFormat is set to
 * be lenient, errors that cause testMultiple() to fail will be quietly ignored here.
 * //from w  w  w  .jav  a  2s. c  o  m
 * Expected result: success
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testMultipleLenient() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testMultipleLenient()           ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setLenient(true);
    ZipFileInputFormat.setInputPaths(job, inputPath);
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_MultipleLenient"));

    //
    assertTrue(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * ZipInputStream doesn't support encrypted entries thus this will fail.
 * //from  ww  w .  j  a v a 2  s.  c  o  m
 * Expected result: failure
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testEncryptedZip() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testEncryptedZip()              ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "encrypted.zip"));
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Encrypted"));

    //
    assertFalse(job.waitForCompletion(true));
}

From source file:com.cotdp.hadoop.ZipFileTest.java

License:Apache License

/**
 * This test explicitly tries to read a file containing random noise as a ZIP file,
 * the expected result is a quiet failure. The Job shouldn't fail if non-ZIP data is
 * encountered./*from  w  w w  .  ja  va2  s.com*/
 * 
 * Expected result: (quiet) failure
 * 
 * @throws IOException 
 * @throws InterruptedException 
 * @throws ClassNotFoundException 
 */
public void testNonZipData() throws IOException, ClassNotFoundException, InterruptedException

{
    LOG.info("============================================================");
    LOG.info("==                Running testNonZipData()                ==");
    LOG.info("============================================================");

    // Standard stuff
    Job job = new Job(conf);
    job.setJobName(this.getClass().getSimpleName());
    job.setJarByClass(this.getClass());
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // 
    job.setInputFormatClass(ZipFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // The output files will contain "Word [TAB] Count"
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    //
    ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "random.dat"));
    TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_NonZipData"));

    //
    assertTrue(job.waitForCompletion(true));
}