List of usage examples for org.apache.hadoop.mapreduce Job setJarByClass
public void setJarByClass(Class<?> cls)
From source file:com.cloudy.mapred.base.JobUtil.java
License:Apache License
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { Job job = new Job(new Configuration(conf)); Configuration jobConf = job.getConfiguration(); if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }/*from w w w. j ava 2 s .c om*/ job.setJarByClass(mapper); job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); job.setOutputKeyClass(mapperKey); job.setOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setNumReduceTasks(0); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:com.cloudy.mapred.base.JobUtil.java
License:Apache License
/** * Create a map and reduce Hadoop job. Does not set the name on the job. * @param inputPath The input {@link org.apache.hadoop.fs.Path} * @param outputPath The output {@link org.apache.hadoop.fs.Path} * @param inputFormat The {@link org.apache.hadoop.mapreduce.InputFormat} * @param mapper The {@link org.apache.hadoop.mapreduce.Mapper} class to use * @param mapperKey The {@link org.apache.hadoop.io.Writable} key class. If the Mapper is a no-op, * this value may be null * @param mapperValue The {@link org.apache.hadoop.io.Writable} value class. If the Mapper is a no-op, * this value may be null * @param reducer The {@link org.apache.hadoop.mapreduce.Reducer} to use * @param reducerKey The reducer key class. * @param reducerValue The reducer value class. * @param outputFormat The {@link org.apache.hadoop.mapreduce.OutputFormat}. * @param conf The {@link org.apache.hadoop.conf.Configuration} to use. * @return The {@link org.apache.hadoop.mapreduce.Job}. * @throws IOException if there is a problem with the IO. * * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class) * @see #prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class, * org.apache.hadoop.conf.Configuration) *//* w ww . j a va 2s . c o m*/ public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); if (mapperKey != null) { job.setMapOutputKeyClass(mapperKey); } if (mapperValue != null) { job.setMapOutputValueClass(mapperValue); } // jobConf.setBoolean("mapred.compress.map.output", true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:com.cmcc.hy.bigdata.weijifen.jobs.hubei.score.ScoreInfoDayJob.java
License:Open Source License
@Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub Configuration conf = ConfigurationUtil.loginAuthentication(args, SEPCIFIC_CONFIG_NAME, getConf()); // ?()//from www. jav a 2 s .co m String statDate = DateUtil.getFilterDate(args); if (statDate == null) { System.exit(1); } conf.set(STAT_DAY, statDate); // ?job Job job = Job.getInstance(conf, JOB_NAME + ":" + statDate); job.setJarByClass(ScoreInfoDayJob.class); String scoreInfoInput = conf.get(SCORE_INFO_INPUT_PATH); Path scoreInfoPath = new Path(scoreInfoInput); String acctPhoneMapInfoInput = conf.get(ACCT_PHONE_MAP_INPUT_PATH); Path accPhoneMapInfoPath = new Path(acctPhoneMapInfoInput); // ? if (FileSystemUtil.exists(scoreInfoPath)) { MultipleInputs.addInputPath(job, scoreInfoPath, SequenceFileInputFormat.class, ScoreInfoDayMapper.class); logger.info("SocreInfoPath is " + scoreInfoInput); } else { logger.error("Path [{}] not exist!", scoreInfoInput); } // ?? // if (FileSystemUtil.exists(accPhoneMapInfoPath)) { // MultipleInputs.addInputPath(job, accPhoneMapInfoPath, TextInputFormat.class, // AcctPhoneMapper.class); // logger.info("AccPhoneMapInfoPath is " + acctPhoneMapInfoInput); // } else { // logger.error("Path [{}] not exist!", acctPhoneMapInfoInput); // } // job job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(ScoreInfo.class); job.setNumReduceTasks(conf.getInt(REDUCE_NUMBER, 40)); job.setOutputFormatClass(NullOutputFormat.class); // TableMapReduceUtil.initTableReducerJob(HBaseTableSchema.USER_INFO_TABLE2, // ScoreInfoDayReducer.class, job); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:com.conversantmedia.mapreduce.tool.annotation.handler.MaraAnnotationUtil.java
License:Apache License
/** * * @param job the job * @param jobField the field to retrieve annotations from * @param driver the driver bean * @param context the tool context * @throws ToolException if any issue is encountered through reflection or expression evaluation *//*from w w w. j a v a 2 s .c o m*/ public void configureJobFromField(Job job, Field jobField, Object driver, AnnotatedToolContext context) throws ToolException { JobInfo jobInfo = jobField.getAnnotation(JobInfo.class); String name = StringUtils.isBlank(jobInfo.value()) ? jobInfo.name() : jobInfo.value(); if (StringUtils.isBlank(name)) { name = defaultDriverIdForClass(driver.getClass()); } name = (String) ExpressionEvaluator.instance().evaluate(driver, context, name); job.setJobName(name); if (!jobInfo.numReducers().equals("-1")) { if (NumberUtils.isNumber(jobInfo.numReducers())) { job.setNumReduceTasks(Integer.valueOf(jobInfo.numReducers())); } else { Object reducerValue = ExpressionEvaluator.instance().evaluate(driver, context, jobInfo.numReducers()); if (reducerValue != null) { job.setNumReduceTasks((Integer) reducerValue); } } } // We can override (the runjob script does) which jar to use instead of using running driver class if (StringUtils.isBlank(job.getConfiguration().get("mapred.jar"))) { job.setJarByClass(driver.getClass()); } handleJobFieldAnnotations(job, jobField, jobInfo); }
From source file:com.conversantmedia.mapreduce.tool.BaseTool.java
License:Apache License
@Override public int run(String[] args) throws Exception { final T context = newContext(); context.setContextListener(new ToolContextListener() { @Override/*from w w w. j av a 2 s . c om*/ public void afterInitOptions(Options options) throws Exception { notifyListeners(Event.AFTER_INIT_CLI_OPTIONS, context, options); } @Override public void afterParseCommandLine(CommandLine commandLine) throws Exception { notifyListeners(Event.AFTER_PARSE_CLI, context, commandLine); } }); context.setDriverClass(this.getClass()); try { // Register ourselves as a listener this.addListener(this); context.parseFromArgs(args); // Useful info logger().info(context.toString()); // Perform any specific initialization tasks notifyListeners(Event.BEFORE_INIT_DRIVER, context, null); initInternal(context); // Notify any listeners before initializing job notifyListeners(Event.BEFORE_INIT_JOB, context, null); // Initialize our job Job job = initJob(context); // We can override (the runjob script does) which jar to use instead of using running driver class if (StringUtils.isBlank(job.getConfiguration().get("mapred.jar"))) { logger().info("Setting job jar by class [" + this.getClass() + "]"); job.setJarByClass(this.getClass()); } context.setJob(job); // Post-initialization routines jobPostInit(context); if (context.isDumpConfig()) { Console.out(context.toString()); dumpConfig(job.getConfiguration()); } if (context.isDryRun()) { Console.out("Dry run only. Job will not be executed."); return 0; } // Launches the job launchJob(context, job); // Now move our input to archive if (context.getReturnCode() == 0 && context.getArchive() != null) { archiveInputs(context); } // Clean up our job cleanUp(context); notifyListeners(Event.BEFORE_EXIT, context, null); } catch (ParseException pe) { // Output a more "friendly" message context.showHelpAndExit(context.initOptions(), 1, pe.getMessage()); } catch (Exception e) { logger().error("Problem running tool: " + e.getMessage(), e); notifyListeners(Event.EXCEPTION, context, e); } return context.getReturnCode(); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test operates on a single file// w w w . j a v a 2 s .c o m * * Expected result: success * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testSingle() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testSingle() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "zip-01.zip")); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Single")); // assertTrue(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test operates on a Path containing files that will cause the Job to fail * /*from w ww . java 2 s. c o m*/ * Expected result: failure * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testMultiple() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testMultiple() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, inputPath); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Multiple")); // assertFalse(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test is identical to testMultiple() however the ZipFileInputFormat is set to * be lenient, errors that cause testMultiple() to fail will be quietly ignored here. * //from w w w .jav a 2s. c o m * Expected result: success * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testMultipleLenient() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testMultipleLenient() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setLenient(true); ZipFileInputFormat.setInputPaths(job, inputPath); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_MultipleLenient")); // assertTrue(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * ZipInputStream doesn't support encrypted entries thus this will fail. * //from ww w . j a v a 2 s. c o m * Expected result: failure * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testEncryptedZip() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testEncryptedZip() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "encrypted.zip")); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_Encrypted")); // assertFalse(job.waitForCompletion(true)); }
From source file:com.cotdp.hadoop.ZipFileTest.java
License:Apache License
/** * This test explicitly tries to read a file containing random noise as a ZIP file, * the expected result is a quiet failure. The Job shouldn't fail if non-ZIP data is * encountered./*from w w w . ja va2 s.com*/ * * Expected result: (quiet) failure * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void testNonZipData() throws IOException, ClassNotFoundException, InterruptedException { LOG.info("============================================================"); LOG.info("== Running testNonZipData() =="); LOG.info("============================================================"); // Standard stuff Job job = new Job(conf); job.setJobName(this.getClass().getSimpleName()); job.setJarByClass(this.getClass()); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // job.setInputFormatClass(ZipFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // The output files will contain "Word [TAB] Count" job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // ZipFileInputFormat.setInputPaths(job, new Path(inputPath, "random.dat")); TextOutputFormat.setOutputPath(job, new Path(workingPath, "Output_NonZipData")); // assertTrue(job.waitForCompletion(true)); }