List of usage examples for org.apache.hadoop.mapreduce Job setNumReduceTasks
public void setNumReduceTasks(int tasks) throws IllegalStateException
From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileJob.java
License:Apache License
/** * The driver for the MapReduce job./* w w w . jav a 2 s .c o m*/ * * @param conf configuration * @param inputDirAsString input directory in CSV-form * @param outputDirAsString output directory * @return true if the job completed successfully * @throws java.io.IOException if something went wrong * @throws java.net.URISyntaxException if a URI wasn't correctly formed */ public boolean runJob(final Configuration conf, final String inputDirAsString, final String outputDirAsString) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException { Job job = new Job(conf); job.setJarByClass(CombineSequenceFileJob.class); job.setJobName("seqfilecombiner"); job.setNumReduceTasks(0); // job.setMapperClass(IdentityMapper.class); job.setInputFormatClass(CombineSequenceFileInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, inputDirAsString); FileOutputFormat.setOutputPath(job, new Path(outputDirAsString)); Date startTime = new Date(); System.out.println("Job started: " + startTime); boolean jobResult = job.waitForCompletion(true); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + TimeUnit.MILLISECONDS.toSeconds(endTime.getTime() - startTime.getTime()) + " seconds."); return jobResult; }
From source file:com.alexholmes.json.mapreduce.ExampleJob.java
License:Apache License
/** * The MapReduce driver - setup and launch the job. * * @param args the command-line arguments * @return the process exit code//w ww. ja v a2s . c o m * @throws Exception if something goes wrong */ public int run(final String[] args) throws Exception { String input = args[0]; String output = args[1]; Configuration conf = super.getConf(); writeInput(conf, new Path(input)); Job job = new Job(conf); job.setJarByClass(ExampleJob.class); job.setMapperClass(Map.class); job.setNumReduceTasks(0); Path outputPath = new Path(output); FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, outputPath); // use the JSON input format job.setInputFormatClass(MultiLineJsonInputFormat.class); // specify the JSON attribute name which is used to determine which // JSON elements are supplied to the mapper MultiLineJsonInputFormat.setInputJsonMember(job, "colorName"); if (job.waitForCompletion(true)) { return 0; } return 1; }
From source file:com.antbrains.crf.hadoop.CalcFeatureWeights.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3 && otherArgs.length != 4) { System.err.println("CalcFeatureWeights <inDir> <tmpDir> <outDir> [startStep]"); System.exit(-1);// w w w .ja va 2 s.c o m } int startStep = 1; if (otherArgs.length == 4) { startStep = Integer.valueOf(otherArgs[otherArgs.length - 1]); } FileSystem fs = FileSystem.get(conf); if (startStep <= 1) { System.out.println("calc"); fs.delete(new Path(otherArgs[1]), true); Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName()); job.setNumReduceTasks(1); job.setJarByClass(CalcFeatureWeights.class); job.setMapperClass(CalcFeatureMapper.class); job.setReducerClass(CalcFeatureReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(MyKey.class); job.setOutputKeyClass(MyKey.class); job.setOutputValueClass(MyValue.class); FileInputFormat.setInputPaths(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); boolean res = job.waitForCompletion(true); if (!res) { System.err.println("step1 failed"); return; } } if (startStep <= 2) // sort { fs.delete(new Path(otherArgs[2]), true); System.out.println("sort"); Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName()); job.setNumReduceTasks(1); job.setJarByClass(CalcFeatureWeights.class); job.setMapperClass(IdentityMapper.class); job.setReducerClass(IdentityReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setMapOutputKeyClass(MyKey.class); job.setMapOutputValueClass(MyValue.class); job.setOutputKeyClass(MyKey.class); job.setOutputValueClass(MyValue.class); FileInputFormat.setInputPaths(job, new Path(otherArgs[1])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[2])); boolean res = job.waitForCompletion(true); if (!res) { System.err.println("step2 failed"); return; } } }
From source file:com.architecting.ch07.MapReduceIndexerTool.java
License:Apache License
/** API for Java clients;visible for testing;may become a public API eventually */ int run(Options options) throws Exception { if (getConf().getBoolean("isMR1", false) && "local".equals(getConf().get("mapred.job.tracker"))) { throw new IllegalStateException( "Running with LocalJobRunner (i.e. all of Hadoop inside a single JVM) is not supported " + "because LocalJobRunner does not (yet) implement the Hadoop Distributed Cache feature, " + "which is required for passing files via --files and --libjars"); }// ww w. jav a 2s . c o m long programStartTime = System.nanoTime(); getConf().setInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, options.maxSegments); // switch off a false warning about allegedly not implementing Tool // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html // also see https://issues.apache.org/jira/browse/HADOOP-8183 getConf().setBoolean("mapred.used.genericoptionsparser", true); if (options.log4jConfigFile != null) { Utils.setLogConfigFile(options.log4jConfigFile, getConf()); addDistributedCacheFile(options.log4jConfigFile, getConf()); } Configuration config = HBaseConfiguration.create(); Job job = Job.getInstance(config); job.setJarByClass(getClass()); // To be able to run this example from eclipse, we need to make sure // the built jar is distributed to the map-reduce tasks from the // local file system. job.addCacheArchive(new URI("file:///home/cloudera/ahae/target/ahae.jar")); FileSystem fs = options.outputDir.getFileSystem(job.getConfiguration()); if (fs.exists(options.outputDir) && !delete(options.outputDir, true, fs)) { return -1; } Path outputResultsDir = new Path(options.outputDir, RESULTS_DIR); Path outputReduceDir = new Path(options.outputDir, "reducers"); int reducers = 1; Scan scan = new Scan(); scan.addFamily(CF); // tag::SETUP[] scan.setCaching(500); // <1> scan.setCacheBlocks(false); // <2> TableMapReduceUtil.initTableMapperJob( // <3> options.inputTable, // Input HBase table name scan, // Scan instance to control what to index HBaseAvroToSOLRMapper.class, // Mapper to parse cells content. Text.class, // Mapper output key SolrInputDocumentWritable.class, // Mapper output value job); FileOutputFormat.setOutputPath(job, outputReduceDir); job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(HBaseAvroToSOLRMapper.class)); job.setReducerClass(SolrReducer.class); // <4> job.setPartitionerClass(SolrCloudPartitioner.class); // <5> job.getConfiguration().set(SolrCloudPartitioner.ZKHOST, options.zkHost); job.getConfiguration().set(SolrCloudPartitioner.COLLECTION, options.collection); job.getConfiguration().setInt(SolrCloudPartitioner.SHARDS, options.shards); job.setOutputFormatClass(SolrOutputFormat.class); SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job); job.setOutputKeyClass(Text.class); job.setOutputValueClass(SolrInputDocumentWritable.class); job.setSpeculativeExecution(false); // end::SETUP[] job.setNumReduceTasks(reducers); // Set the number of reducers based on the number of shards we have. if (!waitForCompletion(job, true)) { return -1;// job failed } // ------------------------------------------------------------------------------------------------------------------------------------- assert reducers == options.shards; // normalize output shard dir prefix, i.e. // rename part-r-00000 to part-00000 (stems from zero tree merge iterations) // rename part-m-00000 to part-00000 (stems from > 0 tree merge iterations) for (FileStatus stats : fs.listStatus(outputReduceDir)) { String dirPrefix = SolrOutputFormat.getOutputName(job); Path srcPath = stats.getPath(); if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) { String dstName = dirPrefix + srcPath.getName().substring(dirPrefix.length() + "-m".length()); Path dstPath = new Path(srcPath.getParent(), dstName); if (!rename(srcPath, dstPath, fs)) { return -1; } } } ; // publish results dir if (!rename(outputReduceDir, outputResultsDir, fs)) { return -1; } if (options.goLive && !new GoLive().goLive(options, listSortedOutputShardDirs(job, outputResultsDir, fs))) { return -1; } goodbye(job, programStartTime); return 0; }
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunnerTest.java
License:Apache License
/** * Test for map only job.//from w ww . jav a 2 s .c o m * @throws Exception if failed */ @Test public void map_only() throws Exception { Job job = newJob(); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(SimpleMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); File inputDir = folder.newFolder(); File inputFile = new File(inputDir, "input.txt"); write(inputFile, "Hello, world!"); File outputDir = folder.newFolder(); outputDir.delete(); FileInputFormat.setInputPaths(job, new Path(inputFile.toURI())); FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI())); assertThat(new SimpleJobRunner().run(job), is(true)); assertThat(trimHead(read(outputDir)), is(set("Hello, world!"))); }
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunnerTest.java
License:Apache License
/** * Test for map-reduce job.//from w w w . j a va 2 s. c om * @throws Exception if failed */ @Test public void map_reduce() throws Exception { Job job = newJob(); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(WordCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setSortComparatorClass(Text.Comparator.class); job.setGroupingComparatorClass(Text.Comparator.class); job.setReducerClass(WordCountReducer.class); job.setNumReduceTasks(1); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); File inputDir = folder.newFolder(); File inputFile = new File(inputDir, "input.txt"); write(inputFile, new String[] { "a b c d", "a a b c", "c", }); File outputDir = folder.newFolder(); outputDir.delete(); FileInputFormat.setInputPaths(job, new Path(inputFile.toURI())); FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI())); assertThat(new SimpleJobRunner().run(job), is(true)); assertThat(toMap(read(outputDir)), is(map(new String[] { "a", "3", "b", "2", "c", "3", "d", "1", }))); }
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunnerTest.java
License:Apache License
/** * Test for wrong job./*from w ww . jav a 2 s . c o m*/ * @throws Exception if failed */ @Test public void exception() throws Exception { Job job = newJob(); job.setJobName("w/ exception"); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(InvalidMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); File inputDir = folder.newFolder(); File inputFile = new File(inputDir, "input.txt"); write(inputFile, "testing"); File outputDir = folder.newFolder(); outputDir.delete(); FileInputFormat.setInputPaths(job, new Path(inputFile.toURI())); FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI())); assertThat(new SimpleJobRunner().run(job), is(false)); }
From source file:com.asakusafw.runtime.stage.AbstractStageClient.java
License:Apache License
@SuppressWarnings("rawtypes") private void configureShuffle(Job job, VariableTable variables) { Class<? extends Reducer> reducer = getReducerClassOrNull(); if (reducer != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Reducer: {0}", reducer.getName())); //$NON-NLS-1$ }// ww w . j a va 2 s .c o m job.setReducerClass(reducer); } else { if (LOG.isDebugEnabled()) { LOG.debug("Reducer: N/A"); //$NON-NLS-1$ } job.setNumReduceTasks(0); return; } Class<? extends Writable> outputKeyClass = or(getShuffleKeyClassOrNull(), NullWritable.class); Class<? extends Writable> outputValueClass = or(getShuffleValueClassOrNull(), NullWritable.class); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Shuffle: key={0}, value={1}", //$NON-NLS-1$ outputKeyClass.getName(), outputValueClass.getName())); } job.setMapOutputKeyClass(outputKeyClass); job.setMapOutputValueClass(outputValueClass); Class<? extends Reducer> combiner = getCombinerClassOrNull(); if (combiner != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Combiner: {0}", combiner.getName())); //$NON-NLS-1$ } job.setCombinerClass(combiner); } else { if (LOG.isDebugEnabled()) { LOG.debug("Combiner: N/A"); //$NON-NLS-1$ } } Class<? extends Partitioner> partitioner = getPartitionerClassOrNull(); if (partitioner != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Partitioner: {0}", partitioner.getName())); //$NON-NLS-1$ } job.setPartitionerClass(partitioner); } else { if (LOG.isDebugEnabled()) { LOG.debug("Partitioner: DEFAULT"); //$NON-NLS-1$ } } Class<? extends RawComparator> groupingComparator = getGroupingComparatorClassOrNull(); if (groupingComparator != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("GroupingComparator: {0}", groupingComparator.getName())); //$NON-NLS-1$ } job.setGroupingComparatorClass(groupingComparator); } else { if (LOG.isDebugEnabled()) { LOG.debug("GroupingComparator: DEFAULT"); //$NON-NLS-1$ } } Class<? extends RawComparator> sortComparator = getSortComparatorClassOrNull(); if (sortComparator != null) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("SortComparator: {0}", sortComparator.getName())); //$NON-NLS-1$ } job.setSortComparatorClass(sortComparator); } else { if (LOG.isDebugEnabled()) { LOG.debug("SortComparator: DEFAULT"); //$NON-NLS-1$ } } }
From source file:com.asakusafw.runtime.stage.optimizer.ReducerSimplifierConfigurator.java
License:Apache License
@Override public void configure(Job job) throws IOException, InterruptedException { int count = job.getNumReduceTasks(); if (count <= TASKS_TINY) { return;/*from w w w .j a v a 2s .com*/ } Configuration conf = job.getConfiguration(); long limit = conf.getLong(KEY_TINY_LIMIT, -1L); if (limit < 0L) { if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Reducer simplifier is disabled for tiny inputs: {0}", //$NON-NLS-1$ job.getJobName())); } return; } long estimated = StageInputDriver.estimateInputSize(job); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Reducer simplifier: job={0}, tiny-limit={1}, estimated={2}", //$NON-NLS-1$ job.getJobName(), limit, estimated)); } if (estimated < 0L || estimated > limit) { return; } LOG.info(MessageFormat.format("The number of reduce task ({0}) is configured: {1}->{2}", job.getJobName(), job.getNumReduceTasks(), TASKS_TINY)); job.setNumReduceTasks(TASKS_TINY); }
From source file:com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient.java
License:Apache License
private void updateTable() throws IOException, InterruptedException { Job job = newJob(); List<StageInput> inputList = new ArrayList<>(); inputList.add(new StageInput(storage.getHeadContents("*").toString(), TemporaryInputFormat.class, TableJoinBaseMapper.class)); inputList.add(new StageInput(storage.getPatchContents("*").toString(), TemporaryInputFormat.class, TableJoinPatchMapper.class)); StageInputDriver.set(job, inputList); StageResourceDriver.add(job, storage.getPatchContents("*").toString(), TableJoinBaseMapper.RESOURCE_KEY); job.setInputFormatClass(StageInputFormat.class); job.setMapperClass(StageInputMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(modelClass); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(modelClass); TemporaryOutputFormat.setOutputPath(job, getNextDirectory()); job.setOutputFormatClass(TemporaryOutputFormat.class); job.getConfiguration().setClass("mapred.output.committer.class", LegacyBridgeOutputCommitter.class, org.apache.hadoop.mapred.OutputCommitter.class); job.setNumReduceTasks(0); LOG.info(MessageFormat.format("applying patch (table join): {0} / {1} -> {2}", storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); try {// www.j av a2 s .c om boolean succeed = job.waitForCompletion(true); LOG.info(MessageFormat.format("applied patch (table join): succeed={0}, {1} / {2} -> {3}", succeed, storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); if (succeed == false) { throw new IOException(MessageFormat.format("failed to apply patch (table join): {0} / {1} -> {2}", storage.getPatchContents("*"), storage.getHeadContents("*"), getNextContents())); } } catch (ClassNotFoundException e) { throw new IOException(e); } putMeta(); }