List of usage examples for org.apache.hadoop.mapreduce Job setJobName
public void setJobName(String name) throws IllegalStateException
From source file:code.DemoWordCount.java
License:Apache License
/** * Runs this tool.//from www. j a v a 2 s.co m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool: " + DemoWordCount.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - number of reducers: " + reduceTasks); Configuration conf = getConf(); Job job = Job.getInstance(conf); job.setJobName(DemoWordCount.class.getSimpleName()); job.setJarByClass(DemoWordCount.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileJob.java
License:Apache License
/** * The driver for the MapReduce job./*from w w w . j a v a2s . c o m*/ * * @param conf configuration * @param inputDirAsString input directory in CSV-form * @param outputDirAsString output directory * @return true if the job completed successfully * @throws java.io.IOException if something went wrong * @throws java.net.URISyntaxException if a URI wasn't correctly formed */ public boolean runJob(final Configuration conf, final String inputDirAsString, final String outputDirAsString) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException { Job job = new Job(conf); job.setJarByClass(CombineSequenceFileJob.class); job.setJobName("seqfilecombiner"); job.setNumReduceTasks(0); // job.setMapperClass(IdentityMapper.class); job.setInputFormatClass(CombineSequenceFileInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, inputDirAsString); FileOutputFormat.setOutputPath(job, new Path(outputDirAsString)); Date startTime = new Date(); System.out.println("Job started: " + startTime); boolean jobResult = job.waitForCompletion(true); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + TimeUnit.MILLISECONDS.toSeconds(endTime.getTime() - startTime.getTime()) + " seconds."); return jobResult; }
From source file:com.app.hadoopexample.MaxTemperatureDriver.java
public int run(String[] arg) throws Exception { String[] args = { "C:/Hadoop/input/LICENSE.txt", "C:/Hadoop/output/LICENSE.txt" }; if (args.length != 2) { System.err.println("Usage: MaxTemperatureDriver <input path> <outputpath>"); System.exit(-1);/*w w w .j a v a2 s. c o m*/ } Job job = new Job(); job.setJarByClass(MaxTemperatureDriver.class); job.setJobName("Max Temperature"); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(MaxTemperatureMapper.class); job.setReducerClass(MaxTemperatureReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.architecting.ch07.MapReduceIndexerTool.java
License:Apache License
/** API for Java clients;visible for testing;may become a public API eventually */ int run(Options options) throws Exception { if (getConf().getBoolean("isMR1", false) && "local".equals(getConf().get("mapred.job.tracker"))) { throw new IllegalStateException( "Running with LocalJobRunner (i.e. all of Hadoop inside a single JVM) is not supported " + "because LocalJobRunner does not (yet) implement the Hadoop Distributed Cache feature, " + "which is required for passing files via --files and --libjars"); }/*from ww w. ja v a 2 s. c o m*/ long programStartTime = System.nanoTime(); getConf().setInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, options.maxSegments); // switch off a false warning about allegedly not implementing Tool // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html // also see https://issues.apache.org/jira/browse/HADOOP-8183 getConf().setBoolean("mapred.used.genericoptionsparser", true); if (options.log4jConfigFile != null) { Utils.setLogConfigFile(options.log4jConfigFile, getConf()); addDistributedCacheFile(options.log4jConfigFile, getConf()); } Configuration config = HBaseConfiguration.create(); Job job = Job.getInstance(config); job.setJarByClass(getClass()); // To be able to run this example from eclipse, we need to make sure // the built jar is distributed to the map-reduce tasks from the // local file system. job.addCacheArchive(new URI("file:///home/cloudera/ahae/target/ahae.jar")); FileSystem fs = options.outputDir.getFileSystem(job.getConfiguration()); if (fs.exists(options.outputDir) && !delete(options.outputDir, true, fs)) { return -1; } Path outputResultsDir = new Path(options.outputDir, RESULTS_DIR); Path outputReduceDir = new Path(options.outputDir, "reducers"); int reducers = 1; Scan scan = new Scan(); scan.addFamily(CF); // tag::SETUP[] scan.setCaching(500); // <1> scan.setCacheBlocks(false); // <2> TableMapReduceUtil.initTableMapperJob( // <3> options.inputTable, // Input HBase table name scan, // Scan instance to control what to index HBaseAvroToSOLRMapper.class, // Mapper to parse cells content. Text.class, // Mapper output key SolrInputDocumentWritable.class, // Mapper output value job); FileOutputFormat.setOutputPath(job, outputReduceDir); job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(HBaseAvroToSOLRMapper.class)); job.setReducerClass(SolrReducer.class); // <4> job.setPartitionerClass(SolrCloudPartitioner.class); // <5> job.getConfiguration().set(SolrCloudPartitioner.ZKHOST, options.zkHost); job.getConfiguration().set(SolrCloudPartitioner.COLLECTION, options.collection); job.getConfiguration().setInt(SolrCloudPartitioner.SHARDS, options.shards); job.setOutputFormatClass(SolrOutputFormat.class); SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job); job.setOutputKeyClass(Text.class); job.setOutputValueClass(SolrInputDocumentWritable.class); job.setSpeculativeExecution(false); // end::SETUP[] job.setNumReduceTasks(reducers); // Set the number of reducers based on the number of shards we have. if (!waitForCompletion(job, true)) { return -1;// job failed } // ------------------------------------------------------------------------------------------------------------------------------------- assert reducers == options.shards; // normalize output shard dir prefix, i.e. // rename part-r-00000 to part-00000 (stems from zero tree merge iterations) // rename part-m-00000 to part-00000 (stems from > 0 tree merge iterations) for (FileStatus stats : fs.listStatus(outputReduceDir)) { String dirPrefix = SolrOutputFormat.getOutputName(job); Path srcPath = stats.getPath(); if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) { String dstName = dirPrefix + srcPath.getName().substring(dirPrefix.length() + "-m".length()); Path dstPath = new Path(srcPath.getParent(), dstName); if (!rename(srcPath, dstPath, fs)) { return -1; } } } ; // publish results dir if (!rename(outputReduceDir, outputResultsDir, fs)) { return -1; } if (options.goLive && !new GoLive().goLive(options, listSortedOutputShardDirs(job, outputResultsDir, fs))) { return -1; } goodbye(job, programStartTime); return 0; }
From source file:com.asakusafw.runtime.mapreduce.simple.SimpleJobRunnerTest.java
License:Apache License
/** * Test for wrong job.//from www . ja va 2s .co m * @throws Exception if failed */ @Test public void exception() throws Exception { Job job = newJob(); job.setJobName("w/ exception"); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(InvalidMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); File inputDir = folder.newFolder(); File inputFile = new File(inputDir, "input.txt"); write(inputFile, "testing"); File outputDir = folder.newFolder(); outputDir.delete(); FileInputFormat.setInputPaths(job, new Path(inputFile.toURI())); FileOutputFormat.setOutputPath(job, new Path(outputDir.toURI())); assertThat(new SimpleJobRunner().run(job), is(false)); }
From source file:com.asakusafw.runtime.stage.AbstractStageClient.java
License:Apache License
private void configureJobInfo(Job job, VariableTable variables) { Class<?> clientClass = getClass(); String operationId = getOperationId(); if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Hadoop Job Client: {0}", clientClass.getName())); //$NON-NLS-1$ }// w w w . j a v a2s . com String jar = job.getConfiguration().get(PROP_APPLICATION_JAR); if (jar == null || (job.getConfiguration() instanceof JobConf) == false) { job.setJarByClass(clientClass); } else { ((JobConf) job.getConfiguration()).setJar(jar); } if (LOG.isDebugEnabled()) { LOG.debug(MessageFormat.format("Hadoop Job Name: {0}", operationId)); //$NON-NLS-1$ } job.setJobName(operationId); }
From source file:com.asakusafw.runtime.stage.inprocess.InProcessStageConfiguratorTest.java
License:Apache License
private Job newJob() { try {/* ww w .j a va2 s . c o m*/ Job job = JobCompatibility.newJob(new ConfigurationProvider().newInstance()); Assume.assumeThat(job.getConfiguration().get(StageConstants.PROP_JOB_RUNNER), is(nullValue())); job.setJobName("testing"); return job; } catch (IOException e) { Assume.assumeNoException(e); throw new AssertionError(e); } }
From source file:com.asakusafw.thundergate.runtime.cache.mapreduce.CacheBuildClient.java
License:Apache License
private Job newJob() throws IOException { Job job = Job.getInstance(getConf()); job.setJobName("TGC-CREATE-" + tableName); Configuration conf = job.getConfiguration(); Invalidation.setupInvalidationTimestamp(conf, tableName); return job;/*w ww . j a v a 2 s. co m*/ }
From source file:com.awcoleman.BouncyCastleGenericCDRHadoop.BasicDriverMapReduce.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { System.out.println("Missing input and output filenames. Exiting."); System.exit(1);//from w w w. j av a2 s. c o m } Job job = new Job(super.getConf()); job.setJarByClass(BasicDriverMapReduce.class); job.setJobName("BasicDriver1"); job.setMapperClass(BasicMapper.class); job.setReducerClass(BasicReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setInputFormatClass(RawFileAsBinaryInputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.awcoleman.BouncyCastleGenericCDRHadoopWithWritable.BasicDriverMapReduce.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { System.out.println("Missing input and output filenames. Exiting."); System.exit(1);//from w w w.jav a 2s . c om } @SuppressWarnings("deprecation") Job job = new Job(super.getConf()); job.setJarByClass(BasicDriverMapReduce.class); job.setJobName("BasicDriverMapReduce"); job.setMapperClass(BasicMapper.class); job.setReducerClass(BasicReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(CallDetailRecord.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setInputFormatClass(RawFileAsBinaryInputFormat.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }