List of usage examples for org.apache.hadoop.mapreduce Job setNumReduceTasks
public void setNumReduceTasks(int tasks) throws IllegalStateException
From source file:com.cloudy.mapred.base.JobUtil.java
License:Apache License
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { Job job = new Job(new Configuration(conf)); Configuration jobConf = job.getConfiguration(); if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }//from w w w . ja va 2 s . c o m job.setJarByClass(mapper); job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); job.setOutputKeyClass(mapperKey); job.setOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setNumReduceTasks(0); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:com.cmcc.hy.bigdata.weijifen.jobs.hubei.score.ScoreInfoDayJob.java
License:Open Source License
@Override public int run(String[] args) throws Exception { // TODO Auto-generated method stub Configuration conf = ConfigurationUtil.loginAuthentication(args, SEPCIFIC_CONFIG_NAME, getConf()); // ?()/*from w w w .j av a 2s .c om*/ String statDate = DateUtil.getFilterDate(args); if (statDate == null) { System.exit(1); } conf.set(STAT_DAY, statDate); // ?job Job job = Job.getInstance(conf, JOB_NAME + ":" + statDate); job.setJarByClass(ScoreInfoDayJob.class); String scoreInfoInput = conf.get(SCORE_INFO_INPUT_PATH); Path scoreInfoPath = new Path(scoreInfoInput); String acctPhoneMapInfoInput = conf.get(ACCT_PHONE_MAP_INPUT_PATH); Path accPhoneMapInfoPath = new Path(acctPhoneMapInfoInput); // ? if (FileSystemUtil.exists(scoreInfoPath)) { MultipleInputs.addInputPath(job, scoreInfoPath, SequenceFileInputFormat.class, ScoreInfoDayMapper.class); logger.info("SocreInfoPath is " + scoreInfoInput); } else { logger.error("Path [{}] not exist!", scoreInfoInput); } // ?? // if (FileSystemUtil.exists(accPhoneMapInfoPath)) { // MultipleInputs.addInputPath(job, accPhoneMapInfoPath, TextInputFormat.class, // AcctPhoneMapper.class); // logger.info("AccPhoneMapInfoPath is " + acctPhoneMapInfoInput); // } else { // logger.error("Path [{}] not exist!", acctPhoneMapInfoInput); // } // job job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(ScoreInfo.class); job.setNumReduceTasks(conf.getInt(REDUCE_NUMBER, 40)); job.setOutputFormatClass(NullOutputFormat.class); // TableMapReduceUtil.initTableReducerJob(HBaseTableSchema.USER_INFO_TABLE2, // ScoreInfoDayReducer.class, job); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:com.conversantmedia.mapreduce.tool.annotation.handler.MaraAnnotationUtil.java
License:Apache License
/** * * @param job the job * @param jobField the field to retrieve annotations from * @param driver the driver bean * @param context the tool context * @throws ToolException if any issue is encountered through reflection or expression evaluation *///from w w w . java 2 s .c om public void configureJobFromField(Job job, Field jobField, Object driver, AnnotatedToolContext context) throws ToolException { JobInfo jobInfo = jobField.getAnnotation(JobInfo.class); String name = StringUtils.isBlank(jobInfo.value()) ? jobInfo.name() : jobInfo.value(); if (StringUtils.isBlank(name)) { name = defaultDriverIdForClass(driver.getClass()); } name = (String) ExpressionEvaluator.instance().evaluate(driver, context, name); job.setJobName(name); if (!jobInfo.numReducers().equals("-1")) { if (NumberUtils.isNumber(jobInfo.numReducers())) { job.setNumReduceTasks(Integer.valueOf(jobInfo.numReducers())); } else { Object reducerValue = ExpressionEvaluator.instance().evaluate(driver, context, jobInfo.numReducers()); if (reducerValue != null) { job.setNumReduceTasks((Integer) reducerValue); } } } // We can override (the runjob script does) which jar to use instead of using running driver class if (StringUtils.isBlank(job.getConfiguration().get("mapred.jar"))) { job.setJarByClass(driver.getClass()); } handleJobFieldAnnotations(job, jobField, jobInfo); }
From source file:com.cqx.mr.MRSearchAuto.java
public void searchHBase(int numOfDays) throws IOException, InterruptedException, ClassNotFoundException { long startTime; long endTime; Configuration conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "node2,node3,node4"); conf.set("fs.default.name", "hdfs://node1"); conf.set("mapred.job.tracker", "node1:54311"); /*/*from www .j a v a 2 s . c o m*/ * ?map */ conf.set("search.license", "C87310"); conf.set("search.color", "10"); conf.set("search.direction", "2"); Job job = new Job(conf, "MRSearchHBase"); System.out.println("search.license: " + conf.get("search.license")); job.setNumReduceTasks(0); job.setJarByClass(MRSearchAuto.class); Scan scan = new Scan(); scan.addFamily(FAMILY_NAME); byte[] startRow = Bytes.toBytes("2011010100000"); byte[] stopRow; switch (numOfDays) { case 1: stopRow = Bytes.toBytes("2011010200000"); break; case 10: stopRow = Bytes.toBytes("2011011100000"); break; case 30: stopRow = Bytes.toBytes("2011020100000"); break; case 365: stopRow = Bytes.toBytes("2012010100000"); break; default: stopRow = Bytes.toBytes("2011010101000"); } // ?key scan.setStartRow(startRow); scan.setStopRow(stopRow); TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, SearchMapper.class, ImmutableBytesWritable.class, Text.class, job); Path outPath = new Path("searchresult"); HDFS_File file = new HDFS_File(); file.DelFile(conf, outPath.getName(), true); // FileOutputFormat.setOutputPath(job, outPath);// startTime = System.currentTimeMillis(); job.waitForCompletion(true); endTime = System.currentTimeMillis(); System.out.println("Time used: " + (endTime - startTime)); System.out.println("startRow:" + Text.decode(startRow)); System.out.println("stopRow: " + Text.decode(stopRow)); }
From source file:com.daleway.training.hadoop.condprob.ConditionalProbabilityPairs.java
License:Apache License
public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException { Job job = new Job(conf, "pair wise count"); job.setJarByClass(ConditionalProbabilityPairs.class); job.setMapperClass(TokenizerMapper.class); //job.setCombinerClass(IntSumReducer.class); job.setPartitionerClass(ProbDistPartitioner.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(5); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job;/*from www. ja va 2 s . c o m*/ }
From source file:com.daleway.training.hadoop.condprob.ConditionalProbabilityStripes.java
License:Apache License
public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException { Job job = new Job(conf, "pair wise count"); job.setJarByClass(ConditionalProbabilityStripes.class); job.setMapperClass(TokenizerMapper.class); // job.setCombinerClass(IntSumReducer.class); job.setPartitionerClass(ProbDistPartitioner.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(5); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job;/*from ww w . ja va 2 s . c o m*/ }
From source file:com.daleway.training.hadoop.pagerank.PageRankSecondarySort.java
License:Apache License
public static Job createJob(Configuration conf, String inputPath, String outputPath) throws IOException { Job job = new Job(conf, "pair wise count"); job.setJarByClass(PageRankSecondarySort.class); job.setMapperClass(TokenizerMapper.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(LongWritable.class); job.setMapOutputValueClass(Text.class); job.setSortComparatorClass(LongWritable.DecreasingComparator.class); job.setMaxReduceAttempts(1);/* ww w . j a v a2 s . co m*/ job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); return job; }
From source file:com.datasalt.pangool.tuplemr.MapOnlyJobBuilder.java
License:Apache License
public Job createJob() throws IOException, TupleMRException, URISyntaxException { // perform a deep copy of the configuration this.conf = new Configuration(this.conf); String uniqueName = UUID.randomUUID().toString() + '.' + "out-format.dat"; try {//from ww w . ja v a2 s. c o m InstancesDistributor.distribute(outputFormat, uniqueName, conf); instanceFilesCreated.add(uniqueName); } catch (URISyntaxException e1) { throw new TupleMRException(e1); } Job job; if (jobName == null) { job = new Job(conf); } else { job = new Job(conf, jobName); } job.setNumReduceTasks(0); job.getConfiguration().set(ProxyOutputFormat.PROXIED_OUTPUT_FORMAT_CONF, uniqueName); job.setOutputFormatClass(ProxyOutputFormat.class); if (outputKeyClass == null) { throw new TupleMRException("Output spec must be defined, use setOutput()"); } job.setOutputKeyClass(outputKeyClass); job.setOutputValueClass(outputValueClass); FileOutputFormat.setOutputPath(job, outputPath); Input lastInput = null; for (Input input : multipleInputs.getMultiInputs()) { if (input.inputProcessor == null) { input.inputProcessor = mapOnlyMapper; if (input.inputProcessor == null) { throw new TupleMRException("Either mapOnlyMapper property or full Input spec must be set."); } } lastInput = input; } if (lastInput == null) { throw new TupleMRException("At least one input must be specified"); } job.setJarByClass((jarByClass != null) ? jarByClass : lastInput.inputProcessor.getClass()); instanceFilesCreated.addAll(multipleInputs.configureJob(job)); instanceFilesCreated.addAll(namedOutputs.configureJob(job)); return job; }
From source file:com.datasalt.pangool.tuplemr.mapred.TestCombiner.java
License:Apache License
@Test public void test() throws TupleMRException, IOException, InterruptedException, ClassNotFoundException { Configuration conf = getConf(); String input = "combiner-input"; String output = "combiner-output"; withInput(input, writable("hola don pepito hola don jose")); TupleMRBuilder jobBuilder = new TestCombiner().getBuilder(conf, input, output); try {/*from ww w .j a va 2 s. com*/ Job job = jobBuilder.createJob(); job.setNumReduceTasks(1); assertRun(job); } finally { jobBuilder.cleanUpInstanceFiles(); } withOutput(output + "/part-r-00000", writable("don"), writable(2)); withOutput(output + "/part-r-00000", writable("hola"), writable(2)); withOutput(output + "/part-r-00000", writable("jose"), writable(1)); withOutput(output + "/part-r-00000", writable("pepito"), writable(1)); trash(input); trash(output); }
From source file:com.datasalt.pangool.tuplemr.mapred.TestForProfiling.java
License:Apache License
public void runningIdentityJob(boolean withNulls) throws IOException, ClassNotFoundException, InterruptedException, TupleMRException { Configuration conf = getConf(); String input = TestTupleMRJob.class + "-input"; String output = TestTupleMRJob.class + "-output"; Schema schema = SCHEMA;//from w w w. ja v a2 s . co m if (withNulls) { decorateWithNullables(schema); } ITuple tuple = new Tuple(schema); for (int i = 0; i < NUM_ROWS_TO_GENERATE; i++) { withTupleInput(input, fillTuple(true, tuple)); } TupleMRBuilder builder = new TupleMRBuilder(getConf(), "test"); builder.addTupleInput(new Path(input), new IdentityTupleMapper()); builder.setTupleReducer(new IdentityTupleReducer()); builder.addIntermediateSchema(schema); builder.setGroupByFields(schema.getField(0).getName()); builder.setTupleOutput(new Path(output), schema); Job job = builder.createJob(); try { job.setNumReduceTasks(1); assertRun(job); } finally { builder.cleanUpInstanceFiles(); } trash(input); trash(output); }