List of usage examples for org.apache.hadoop.mapreduce Job setNumReduceTasks
public void setNumReduceTasks(int tasks) throws IllegalStateException
From source file:com.rockstor.compact.RecoveryTool.java
License:Apache License
private Job createSubmittableJob(Configuration conf) throws IOException { Job job = new Job(conf, NAME); job.setJarByClass(RecoveryTool.class); job.setInputFormatClass(CompactDirInputFormat.class); job.setMapOutputValueClass(NullWritable.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapperClass(RecoveryMapper.class); job.setNumReduceTasks(0); job.setOutputFormatClass(NullOutputFormat.class); LOG.info("init job " + NAME + " OK!"); return job;/*from w w w .ja va 2 s. co m*/ }
From source file:com.sa.npopa.samples.hbase.FindBadMOBReferences.java
License:Apache License
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0];/*w w w . ja v a 2s . c om*/ Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(FindBadMOBReferences.class); Scan scan = new Scan(); scan.setCacheBlocks(false); scan.setBatch(10); scan.setAttribute(MobConstants.MOB_SCAN_RAW, Bytes.toBytes(Boolean.TRUE)); scan.setAttribute(MobConstants.MOB_SCAN_REF_ONLY, Bytes.toBytes(Boolean.TRUE)); scan.addFamily(Bytes.toBytes("J")); //scan.setRowPrefixFilter(Bytes.toBytes("a00")); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); // job.setOutputFormatClass(NullOutputFormat.class); TableMapReduceUtil.initTableMapperJob(tableName, scan, FindBadMOBReferencesMapper.class, Text.class, Text.class, job); //job.setNumReduceTasks(0); job.setReducerClass(FindBadMOBReferencesReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); FileOutputFormat.setOutputPath(job, new Path("/tmp/out")); return job; }
From source file:com.sa.npopa.samples.hbase.myMR.java
License:Apache License
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0];/*from w w w . java2s. co m*/ Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(myMR.class); Scan scan = new Scan(); scan.setCacheBlocks(false); scan.setBatch(10); //scan.setFilter(new FirstKeyOnlyFilter()); //need to find another filter like key only. scan.setFilter(new KeyOnlyFilter()); job.setOutputFormatClass(NullOutputFormat.class); TableMapReduceUtil.initTableMapperJob(tableName, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(0); return job; }
From source file:com.sa.npopa.samples.hbase.RowCounter.java
License:Apache License
/** * Sets up the actual job./* ww w. j a v a2s. co m*/ * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; String startKey = null; String endKey = null; long startTime = 0; long endTime = 0; StringBuilder sb = new StringBuilder(); final String rangeSwitch = "--range="; final String startTimeArgKey = "--starttime="; final String endTimeArgKey = "--endtime="; final String expectedCountArg = "--expected-count="; // First argument is table name, starting from second for (int i = 1; i < args.length; i++) { if (args[i].startsWith(rangeSwitch)) { String[] startEnd = args[i].substring(rangeSwitch.length()).split(",", 2); if (startEnd.length != 2 || startEnd[1].contains(",")) { printUsage("Please specify range in such format as \"--range=a,b\" " + "or, with only one boundary, \"--range=,b\" or \"--range=a,\""); return null; } startKey = startEnd[0]; endKey = startEnd[1]; continue; } if (args[i].startsWith(startTimeArgKey)) { startTime = Long.parseLong(args[i].substring(startTimeArgKey.length())); continue; } if (args[i].startsWith(endTimeArgKey)) { endTime = Long.parseLong(args[i].substring(endTimeArgKey.length())); continue; } if (args[i].startsWith(expectedCountArg)) { conf.setLong(EXPECTED_COUNT_KEY, Long.parseLong(args[i].substring(expectedCountArg.length()))); continue; } // if no switch, assume column names sb.append(args[i]); sb.append(" "); } if (endTime < startTime) { printUsage("--endtime=" + endTime + " needs to be greater than --starttime=" + startTime); return null; } Job job = Job.getInstance(conf, conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName)); job.setJarByClass(RowCounter.class); Scan scan = new Scan(); scan.setCacheBlocks(false); if (startKey != null && !startKey.equals("")) { scan.setStartRow(Bytes.toBytes(startKey)); } if (endKey != null && !endKey.equals("")) { scan.setStopRow(Bytes.toBytes(endKey)); } if (sb.length() > 0) { for (String columnName : sb.toString().trim().split(" ")) { String family = StringUtils.substringBefore(columnName, ":"); String qualifier = StringUtils.substringAfter(columnName, ":"); if (StringUtils.isBlank(qualifier)) { scan.addFamily(Bytes.toBytes(family)); } else { scan.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier)); } } } scan.setFilter(new FirstKeyOnlyFilter()); scan.setTimeRange(startTime, endTime == 0 ? HConstants.LATEST_TIMESTAMP : endTime); job.setOutputFormatClass(NullOutputFormat.class); TableMapReduceUtil.initTableMapperJob(tableName, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); job.setNumReduceTasks(0); return job; }
From source file:com.sanjay.mapreduce.SiCombiner.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);/*from ww w . ja v a2 s . c o m*/ } Job job = new Job(conf, "word count"); job.setJarByClass(SiCombiner.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setPartitionerClass(WordPartitioner.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(5); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.savy3.nonequijoin.MapOutputSampler.java
License:Apache License
/** * Driver for InputSampler from the command line. Configures a JobConf * instance and calls {@link #writePartitionFile}. *//*w w w . ja v a 2 s. c om*/ public int run(String[] args) throws Exception { Job job = new Job(getConf()); ArrayList<String> otherArgs = new ArrayList<String>(); Sampler<K, V> sampler = null; for (int i = 0; i < args.length; ++i) { try { if ("-r".equals(args[i])) { job.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-inFormat".equals(args[i])) { job.setInputFormatClass(Class.forName(args[++i]).asSubclass(InputFormat.class)); } else if ("-keyClass".equals(args[i])) { job.setMapOutputKeyClass(Class.forName(args[++i]).asSubclass(WritableComparable.class)); } else if ("-splitSample".equals(args[i])) { int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new SplitSampler<K, V>(numSamples, maxSplits); } else if ("-splitRandom".equals(args[i])) { System.out.println("Random sampling"); double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new RandomSampler<K, V>(pcnt, numSamples, maxSplits); } else if ("-splitInterval".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new IntervalSampler<K, V>(pcnt, maxSplits); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } if (job.getNumReduceTasks() <= 1) { System.err.println("Sampler requires more than one reducer"); return printUsage(); } if (otherArgs.size() < 2) { System.out.println("ERROR: Wrong number of parameters: "); return printUsage(); } if (null == sampler) { sampler = new RandomSampler<K, V>(0.1, 10000, 10); } System.out.println("before paths"); Path outf = new Path(otherArgs.remove(otherArgs.size() - 1)); TotalOrderPartitioner.setPartitionFile(getConf(), outf); for (String s : otherArgs) { FileInputFormat.addInputPath(job, new Path(s)); } MapOutputSampler.<K, V>writePartitionFile(job, sampler); return 0; }
From source file:com.sematext.hbase.hut.RollbackUpdatesMrJob.java
License:Apache License
/** * Sets up the actual job.//from ww w. ja va 2 s . co m * * @param conf The current configuration. * @param args The command line parameters. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException { String tableName = args[0]; conf.set("mapred.map.tasks.speculative.execution", "false"); Job job = new Job(conf, NAME + "_" + tableName); job.setJobName(NAME + "_" + tableName); job.setJarByClass(RollbackUpdatesMapper.class); // TODO: Allow passing filter and subset of rows/columns. Scan s = new Scan(); // Optional arguments. long startTime = args.length > 1 ? Long.parseLong(args[1]) : 0L; long endTime = args.length > 2 ? Long.parseLong(args[2]) : Long.MAX_VALUE; // TODO: consider using scan.setTimeRange() for limiting scanned data range. It may // not be good way to do if tss are artificial in HutPuts though // s.setTimeRange(startTime, endTime); job.getConfiguration().set(RollbackUpdatesMapper.HUT_ROLLBACK_UPDATE_MIN_TIME_ATTR, String.valueOf(startTime)); job.getConfiguration().set(RollbackUpdatesMapper.HUT_ROLLBACK_UPDATE_MAX_TIME_ATTR, String.valueOf(endTime)); s.setFilter(new HutWriteTimeRowsFilter(endTime, startTime)); s.setCacheBlocks(false); // TODO: allow user change using job params s.setCaching(512); s.setCacheBlocks(false); LOG.info("Using scan: " + s.toString()); // TODO: allow better limiting of data to be fetched if (conf.get(TableInputFormat.SCAN_COLUMN_FAMILY) != null) { s.addFamily(Bytes.toBytes(conf.get(TableInputFormat.SCAN_COLUMN_FAMILY))); } LOG.info("starttime (inclusive): " + startTime + " (" + new Date(startTime) + ")" + ", endtime (inclusive): " + endTime + " (" + new Date(endTime) + ")"); TableMapReduceUtil.initTableMapperJob(tableName, s, RollbackUpdatesMapper.class, null, null, job); TableMapReduceUtil.initTableReducerJob(tableName, null, job); // No reducers. Just write straight to output files. job.setNumReduceTasks(0); return job; }
From source file:com.sematext.hbase.hut.UpdatesProcessingMrJob.java
License:Apache License
/** * Use this before submitting a TableMap job. It will appropriately set up * the job./*from w w w . j a va 2s . co m*/ * * @param table The table name. * @param scan The scan with the columns to scan. * @param up update processor implementation * @param job The job configuration. * @throws java.io.IOException When setting up the job fails. */ @SuppressWarnings("unchecked") public static void initJob(String table, Scan scan, UpdateProcessor up, Job job) throws IOException { TableMapReduceUtil.initTableMapperJob(table, scan, UpdatesProcessingMapper.class, null, null, job); job.setJarByClass(UpdatesProcessingMrJob.class); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); job.getConfiguration().set(UpdatesProcessingMapper.HTABLE_NAME_ATTR, table); job.getConfiguration().set(UpdatesProcessingMapper.HUT_PROCESSOR_CLASS_ATTR, up.getClass().getName()); job.getConfiguration().set(UpdatesProcessingMapper.HUT_PROCESSOR_DETAILS_ATTR, convertUpdateProcessorToString(up)); job.getConfiguration().set("mapred.map.tasks.speculative.execution", "false"); // TODO: explain }
From source file:com.sematext.hbase.wd.RowKeyDistributorTestBase.java
License:Apache License
private void testMapReduceInternal(long origKeyPrefix, Scan scan, int numValues, int startWithValue, int seekIntervalMinValue, int seekIntervalMaxValue) throws IOException, InterruptedException, ClassNotFoundException { int valuesCountInSeekInterval = writeTestData(origKeyPrefix, numValues, startWithValue, seekIntervalMinValue, seekIntervalMaxValue); // Reading data Configuration conf = testingUtility.getConfiguration(); Job job = new Job(conf, "testMapReduceInternal()-Job"); job.setJarByClass(this.getClass()); TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, RowCounterMapper.class, ImmutableBytesWritable.class, Result.class, job); // Substituting standard TableInputFormat which was set in TableMapReduceUtil.initTableMapperJob(...) job.setInputFormatClass(WdTableInputFormat.class); keyDistributor.addInfo(job.getConfiguration()); job.setOutputFormatClass(NullOutputFormat.class); job.setNumReduceTasks(0); boolean succeeded = job.waitForCompletion(true); Assert.assertTrue(succeeded);/*from w w w . j a va 2 s. c om*/ long mapInputRecords = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS).getValue(); Assert.assertEquals(valuesCountInSeekInterval, mapInputRecords); }
From source file:com.sequenceiq.yarntest.mr.QuasiMonteCarlo.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi/* w w w . ja v a 2 s .co m*/ */ public static JobID submitPiEstimationMRApp(String jobName, int numMaps, long numPoints, Path tmpDir, Configuration conf) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(conf); //setup job conf job.setJobName(jobName); job.setJarByClass(QuasiMonteCarlo.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(BooleanWritable.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(QmcMapper.class); job.setReducerClass(QmcReducer.class); job.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. job.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(tmpDir, "in"); final Path outDir = new Path(tmpDir, "out"); FileInputFormat.setInputPaths(job, inDir); FileOutputFormat.setOutputPath(job, outDir); final FileSystem fs = FileSystem.get(conf); if (fs.exists(tmpDir)) { fs.delete(tmpDir, true); // throw new IOException("Tmp directory " + fs.makeQualified(tmpDir) // + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } // try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); job.submit(); // final double duration = (System.currentTimeMillis() - startTime)/1000.0; // System.out.println("Job Finished in " + duration + " seconds"); return job.getJobID(); // } finally { // fs.delete(tmpDir, true); // } }