List of usage examples for org.apache.hadoop.mapreduce Job setJobName
public void setJobName(String name) throws IllegalStateException
From source file:ca.uwaterloo.cs.bigdata2017w.assignment4.BuildPersonalizedPageRankRecords.java
License:Apache License
/** * Runs this tool.//ww w . j ava 2s . co m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES)); options.addOption( OptionBuilder.withArgName("sources").hasArg().withDescription("source nodes").create(SOURCES)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES)); String sourcesString = cmdline.getOptionValue(SOURCES); String[] sources = sourcesString.split(","); for (int i = 0; i < sources.length; i++) { sources[i] = sources[i].trim(); } LOG.info("Tool name: " + BuildPersonalizedPageRankRecords.class.getSimpleName()); LOG.info(" - inputDir: " + inputPath); LOG.info(" - outputDir: " + outputPath); LOG.info(" - numNodes: " + n); LOG.info(" - use sources: " + sourcesString); Configuration conf = getConf(); conf.setInt(NODE_CNT_FIELD, n); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); conf.setStrings(SOURCES, sources); Job job = Job.getInstance(conf); job.setJobName(BuildPersonalizedPageRankRecords.class.getSimpleName() + ":" + inputPath); job.setJarByClass(BuildPersonalizedPageRankRecords.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(PageRankNode.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(PageRankNode.class); job.setMapperClass(MyMapper.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); return 0; }
From source file:cascading.flow.hadoop.MapReduceFlowPlatformTest.java
License:Open Source License
@Test public void testCascade() throws IOException { getPlatform().copyFromLocal(inputFileApache); // Setup two standard cascading flows that will generate the input for the first MapReduceFlow Tap source1 = new Hfs(new TextLine(new Fields("offset", "line")), remove(inputFileApache, false)); String sinkPath4 = getOutputPath("flow4"); Tap sink1 = new Hfs(new TextLine(new Fields("offset", "line")), remove(sinkPath4, true), SinkMode.REPLACE); Flow firstFlow = getPlatform().getFlowConnector(getProperties()).connect(source1, sink1, new Pipe("first-flow")); String sinkPath5 = getOutputPath("flow5"); Tap sink2 = new Hfs(new TextLine(new Fields("offset", "line")), remove(sinkPath5, true), SinkMode.REPLACE); Flow secondFlow = getPlatform().getFlowConnector(getProperties()).connect(sink1, sink2, new Pipe("second-flow")); JobConf defaultConf = HadoopPlanner.createJobConf(getProperties()); JobConf firstConf = new JobConf(defaultConf); firstConf.setJobName("first-mr"); firstConf.setOutputKeyClass(LongWritable.class); firstConf.setOutputValueClass(Text.class); firstConf.setMapperClass(IdentityMapper.class); firstConf.setReducerClass(IdentityReducer.class); firstConf.setInputFormat(TextInputFormat.class); firstConf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(firstConf, new Path(remove(sinkPath5, true))); String sinkPath1 = getOutputPath("flow1"); FileOutputFormat.setOutputPath(firstConf, new Path(remove(sinkPath1, true))); Flow firstMR = new MapReduceFlow(firstConf, true); JobConf secondConf = new JobConf(defaultConf); secondConf.setJobName("second-mr"); secondConf.setOutputKeyClass(LongWritable.class); secondConf.setOutputValueClass(Text.class); secondConf.setMapperClass(IdentityMapper.class); secondConf.setReducerClass(IdentityReducer.class); secondConf.setInputFormat(TextInputFormat.class); secondConf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(secondConf, new Path(remove(sinkPath1, true))); String sinkPath2 = getOutputPath("flow2"); FileOutputFormat.setOutputPath(secondConf, new Path(remove(sinkPath2, true))); Flow secondMR = new MapReduceFlow(secondConf, true); Job job = new Job(defaultConf); job.setJobName("third-mr"); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setMapperClass(org.apache.hadoop.mapreduce.Mapper.class); job.setReducerClass(org.apache.hadoop.mapreduce.Reducer.class); job.setInputFormatClass(org.apache.hadoop.mapreduce.lib.input.TextInputFormat.class); job.setOutputFormatClass(org.apache.hadoop.mapreduce.lib.output.TextOutputFormat.class); job.getConfiguration().set("mapred.mapper.new-api", "true"); job.getConfiguration().set("mapred.reducer.new-api", "true"); org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(remove(sinkPath2, true))); String sinkPath3 = getOutputPath("flow3"); org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, new Path(remove(sinkPath3, true))); Flow thirdMR = new MapReduceFlow(new JobConf(job.getConfiguration()), true); CascadeConnector cascadeConnector = new CascadeConnector(); // pass out of order Cascade cascade = cascadeConnector.connect(firstFlow, secondFlow, thirdMR, firstMR, secondMR); cascade.complete();/*from ww w . j a v a2 s . com*/ validateLength(new Hfs(new TextLine(), sinkPath3).openForRead(new HadoopFlowProcess(defaultConf)), 10); }
From source file:cc.slda.AnnotateDocuments.java
License:Apache License
/** * Runs this tool./*from www.j a va2 s. co m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); options.addOption(OptionBuilder.withArgName(PCUTOFF).hasArg() .withDescription("probability of topic assignment").create(PCUTOFF)); options.addOption(OptionBuilder.withArgName(INDEX).hasArg() .withDescription("path to data directory containing term and title indices").create(INDEX)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(INDEX)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String indexPath = cmdline.getOptionValue(INDEX); String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; float cutoff = 0.9f; if (cmdline.hasOption(PCUTOFF)) { cutoff = Float.parseFloat(cmdline.getOptionValue(PCUTOFF)); } LOG.info("Tool: " + AnnotateDocuments.class.getSimpleName()); LOG.info(" - indices path: " + indexPath); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - number of reducers: " + reduceTasks); LOG.info(" - log(probCutoff): " + Math.log(cutoff)); Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Job job = Job.getInstance(conf); job.setJobName(AnnotateDocuments.class.getSimpleName()); job.setJarByClass(AnnotateDocuments.class); String termIndex = indexPath + Path.SEPARATOR + TERM; String titleIndex = indexPath + Path.SEPARATOR + TITLE; Path termIndexPath = new Path(termIndex); Path titleIndexPath = new Path(titleIndex); Preconditions.checkArgument(fs.exists(termIndexPath), "Missing term index files... " + termIndexPath); DistributedCache.addCacheFile(termIndexPath.toUri(), job.getConfiguration()); Preconditions.checkArgument(fs.exists(titleIndexPath), "Missing title index files... " + titleIndexPath); DistributedCache.addCacheFile(titleIndexPath.toUri(), job.getConfiguration()); job.setNumReduceTasks(reduceTasks); conf.setFloat(PCUTOFF, cutoff); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(HMapSIW.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(HMapSIW.class); job.setMapperClass(MyMapper.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:cn.edu.hfut.dmic.webcollector.crawldb.DBReader.java
public static void main(String[] args) throws Exception { Path crawlPath = new Path("task2"); Path currentPath = new Path(crawlPath, "crawldb/current"); Path output = new Path("output"); Configuration config = CrawlerConfiguration.create(); FileSystem fs = FileSystem.get(config); if (fs.exists(output)) { fs.delete(output);/*from www. j a va2 s .c o m*/ } Job job = new Job(config); job.setJobName("dbreader " + crawlPath.toString()); job.setMapperClass(DBReaderMapper.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, currentPath); FileOutputFormat.setOutputPath(job, output); job.waitForCompletion(true); }
From source file:cn.edu.hfut.dmic.webcollector.crawldb.Generator.java
public static String generate(Path crawlPath, Configuration conf) throws Exception { SegmentUtil.initSegments(crawlPath, conf); String segmentName = SegmentUtil.createSegment(crawlPath, conf); Path currentPath = new Path(crawlPath, "crawldb/current"); Path generatePath = new Path(crawlPath, "segments/" + segmentName + "/generate"); Job job = new Job(conf); job.setJobName("generate " + crawlPath.toString()); job.setJarByClass(Generator.class); job.setReducerClass(GeneratorReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(CrawlDatum.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); FileInputFormat.addInputPath(job, currentPath); FileOutputFormat.setOutputPath(job, generatePath); job.waitForCompletion(true);// w w w. j av a2s . c o m long count = job.getCounters().findCounter("generator", "count").getValue(); System.out.println("total generate:" + count); if (count == 0) { return null; } else { return segmentName; } }
From source file:cn.edu.hfut.dmic.webcollector.crawldb.Merge.java
public static void merge(Path crawlPath, Path[] mergePaths, Configuration conf, String jobName) throws Exception { Job job = new Job(conf); job.setJobName(jobName + " " + crawlPath.toString()); job.setJarByClass(Merge.class); // job.getConfiguration().set("mapred", "/home/hu/mygit/WebCollector2/WebCollectorCluster/target/WebCollectorCluster-2.0.jar"); Path crawldbPath = new Path(crawlPath, "crawldb"); Path newdb = new Path(crawldbPath, "new"); Path currentdb = new Path(crawldbPath, "current"); FileSystem fs = FileSystem.get(conf); if (fs.exists(currentdb)) { FileInputFormat.addInputPath(job, currentdb); }/*from www . j a v a 2 s . com*/ if (fs.exists(newdb)) { fs.delete(newdb); } for (Path mergePath : mergePaths) { FileInputFormat.addInputPath(job, mergePath); } FileOutputFormat.setOutputPath(job, newdb); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(CrawlDatum.class); job.setMapperClass(MergeMap.class); job.setReducerClass(MergeReduce.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(CrawlDatum.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.waitForCompletion(true); }
From source file:cn.edu.hfut.dmic.webcollector.fetcher.Fetcher.java
public static void fetch(Path crawlPath, String segmentName, Configuration conf) throws Exception { Path segmentPath = new Path(crawlPath, "segments/" + segmentName); Path generatePath = new Path(segmentPath, "generate"); Job job = new Job(conf); job.setJobName("fetch " + crawlPath.toString()); job.setJarByClass(Fetcher.class); job.setReducerClass(FetcherReducer.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(FetcherOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(CrawlDatum.class); FileInputFormat.addInputPath(job, generatePath); FileOutputFormat.setOutputPath(job, segmentPath); job.waitForCompletion(true);//w w w.java2s . c om }
From source file:cn.itcast.hadoop.mr.wordcount.DBCountPageView.java
License:Apache License
@Override //Usage DBCountPageView [driverClass dburl] public int run(String[] args) throws Exception { //?MySql/*from w ww.j a v a 2 s . co m*/ String driverClassName = DRIVER_CLASS; String url = DB_URL; //?? //???? if (args.length > 1) { driverClassName = args[0]; url = args[1]; } //driverClassNameurl?? initialize(driverClassName, url); //hdfs? Configuration conf = getConf(); //?? DBConfiguration.configureDB(conf, driverClassName, url); //??? //job Job job = Job.getInstance(conf); //job?? job.setJobName("Count Pageviews of URLs"); //job job.setJarByClass(DBCountPageView.class); //Map job.setMapperClass(PageviewMapper.class); //Combiner job.setCombinerClass(LongSumReducer.class); //reduce job.setReducerClass(PageviewReducer.class); //DB? // setInput(Job job, Class<? extends DBWritable> inputClass, String tableName, String conditions, String orderBy, String... fieldNames) DBInputFormat.setInput(job, AccessRecord.class, "HAccess", null, "url", AccessFieldNames); //? //FileOutputFormat.setoutput ? DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);// //Mapkey? job.setMapOutputKeyClass(Text.class); //MapValue? job.setMapOutputValueClass(LongWritable.class); //Reducekey? job.setOutputKeyClass(PageviewRecord.class); //Reducevalue? job.setOutputValueClass(NullWritable.class); int ret;//job????? try { ret = job.waitForCompletion(true) ? 0 : 1; boolean correct = verify(); if (!correct) { throw new RuntimeException("Evaluation was not correct!"); } } finally { shutdown(); } return ret; }
From source file:cn.lhfei.hadoop.ch02.MaxTemperature.java
License:Apache License
public static void main(String[] args) { log.debug("Logging ... "); if (args.length != 2) { System.err.println("Usage: MaxTemperature <input path> <output path>"); System.exit(-1);//from ww w. j a va2 s . co m } try { Job job = new Job(); job.setJarByClass(MaxTemperature.class); job.setJobName("Max temperature"); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); /*FileInputFormat.addInputPath(job, new Path(INPUT)); FileOutputFormat.setOutputPath(job, new Path(OUTPUT));*/ job.setMapperClass(MaxTemperatureMapper.class); job.setReducerClass(MaxTemperatureReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); } catch (IllegalStateException e) { log.error(e.getMessage(), e); } catch (IllegalArgumentException e) { log.error(e.getMessage(), e); } catch (ClassNotFoundException e) { log.error(e.getMessage(), e); } catch (IOException e) { log.error(e.getMessage(), e); } catch (InterruptedException e) { log.error(e.getMessage(), e); } }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java
License:Apache License
@Override protected void startUp() throws Exception { // Creates a temporary directory locally for storing all generated files. File tempDir = createTempDirectory(); cleanupTask = createCleanupTask(tempDir); try {//w w w.j a v a 2s. c o m Job job = createJob(new File(tempDir, "mapreduce")); Configuration mapredConf = job.getConfiguration(); classLoader = new MapReduceClassLoader(injector, cConf, mapredConf, context.getProgram().getClassLoader(), context.getPlugins(), context.getPluginInstantiator()); cleanupTask = createCleanupTask(cleanupTask, classLoader); mapredConf.setClassLoader(new WeakReferenceDelegatorClassLoader(classLoader)); ClassLoaders.setContextClassLoader(mapredConf.getClassLoader()); context.setJob(job); beforeSubmit(job); // Localize additional resources that users have requested via BasicMapReduceContext.localize methods Map<String, String> localizedUserResources = localizeUserResources(job, tempDir); // Override user-defined job name, since we set it and depend on the name. // https://issues.cask.co/browse/CDAP-2441 String jobName = job.getJobName(); if (!jobName.isEmpty()) { LOG.warn("Job name {} is being overridden.", jobName); } job.setJobName(getJobName(context)); // Create a temporary location for storing all generated files through the LocationFactory. Location tempLocation = createTempLocationDirectory(); cleanupTask = createCleanupTask(cleanupTask, tempLocation); // For local mode, everything is in the configuration classloader already, hence no need to create new jar if (!MapReduceTaskContextProvider.isLocal(mapredConf)) { // After calling beforeSubmit, we know what plugins are needed for the program, hence construct the proper // ClassLoader from here and use it for setting up the job Location pluginArchive = createPluginArchive(tempLocation); if (pluginArchive != null) { job.addCacheArchive(pluginArchive.toURI()); mapredConf.set(Constants.Plugin.ARCHIVE, pluginArchive.getName()); } } // set resources for the job TaskType.MAP.setResources(mapredConf, context.getMapperResources()); TaskType.REDUCE.setResources(mapredConf, context.getReducerResources()); // replace user's Mapper & Reducer's with our wrappers in job config MapperWrapper.wrap(job); ReducerWrapper.wrap(job); // packaging job jar which includes cdap classes with dependencies File jobJar = buildJobJar(job, tempDir); job.setJar(jobJar.toURI().toString()); Location programJar = programJarLocation; if (!MapReduceTaskContextProvider.isLocal(mapredConf)) { // Copy and localize the program jar in distributed mode programJar = copyProgramJar(tempLocation); job.addCacheFile(programJar.toURI()); List<String> classpath = new ArrayList<>(); // Localize logback.xml Location logbackLocation = createLogbackJar(tempLocation); if (logbackLocation != null) { job.addCacheFile(logbackLocation.toURI()); classpath.add(logbackLocation.getName()); } // Generate and localize the launcher jar to control the classloader of MapReduce containers processes classpath.add("job.jar/lib/*"); classpath.add("job.jar/classes"); Location launcherJar = createLauncherJar( Joiner.on(",").join(MapReduceContainerHelper.getMapReduceClassPath(mapredConf, classpath)), tempLocation); job.addCacheFile(launcherJar.toURI()); // The only thing in the container classpath is the launcher.jar // The MapReduceContainerLauncher inside the launcher.jar will creates a MapReduceClassLoader and launch // the actual MapReduce AM/Task from that // We explicitly localize the mr-framwork, but not use it with the classpath URI frameworkURI = MapReduceContainerHelper.getFrameworkURI(mapredConf); if (frameworkURI != null) { job.addCacheArchive(frameworkURI); } mapredConf.unset(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH); mapredConf.set(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, launcherJar.getName()); mapredConf.set(YarnConfiguration.YARN_APPLICATION_CLASSPATH, launcherJar.getName()); } MapReduceContextConfig contextConfig = new MapReduceContextConfig(mapredConf); // We start long-running tx to be used by mapreduce job tasks. Transaction tx = txClient.startLong(); try { // We remember tx, so that we can re-use it in mapreduce tasks CConfiguration cConfCopy = cConf; contextConfig.set(context, cConfCopy, tx, programJar.toURI(), localizedUserResources); LOG.info("Submitting MapReduce Job: {}", context); // submits job and returns immediately. Shouldn't need to set context ClassLoader. job.submit(); this.job = job; this.transaction = tx; } catch (Throwable t) { Transactions.invalidateQuietly(txClient, tx); throw t; } } catch (Throwable t) { LOG.error("Exception when submitting MapReduce Job: {}", context, t); cleanupTask.run(); throw t; } }