List of usage examples for org.apache.hadoop.mapred JobConf setJobName
public void setJobName(String name)
From source file:com.linkedin.mapred.AbstractAvroJob.java
License:Open Source License
/** * Sets up various standard settings in the JobConf. You probably don't want to mess with this. * //from w ww . j a va2 s . c om * @return A configured JobConf. * @throws IOException * @throws URISyntaxException */ protected JobConf createJobConf() throws IOException, URISyntaxException { JobConf conf = new JobConf(); conf.setJobName(getJobId()); conf.setInputFormat(AvroInputFormat.class); conf.setOutputFormat(AvroOutputFormat.class); AvroOutputFormat.setDeflateLevel(conf, 9); String hadoop_ugi = _config.getString("hadoop.job.ugi", null); if (hadoop_ugi != null) { conf.set("hadoop.job.ugi", hadoop_ugi); } if (_config.getBoolean("is.local", false)) { conf.set("mapred.job.tracker", "local"); conf.set("fs.default.name", "file:///"); conf.set("mapred.local.dir", "/tmp/map-red"); _log.info("Running locally, no hadoop jar set."); } // set JVM options if present if (_config.containsKey("mapred.child.java.opts")) { conf.set("mapred.child.java.opts", _config.getString("mapred.child.java.opts")); _log.info("mapred.child.java.opts set to " + _config.getString("mapred.child.java.opts")); } if (_config.containsKey(INPUT_PATHS)) { List<String> inputPathnames = _config.getStringList(INPUT_PATHS); for (String pathname : inputPathnames) { AvroUtils.addAllSubPaths(conf, new Path(pathname)); } AvroJob.setInputSchema(conf, AvroUtils.getAvroInputSchema(conf)); } if (_config.containsKey(OUTPUT_PATH)) { Path path = new Path(_config.get(OUTPUT_PATH)); AvroOutputFormat.setOutputPath(conf, path); if (_config.getBoolean("force.output.overwrite", false)) { FileSystem fs = FileOutputFormat.getOutputPath(conf).getFileSystem(conf); fs.delete(FileOutputFormat.getOutputPath(conf), true); } } // set all hadoop configs for (String key : _config.keySet()) { String lowerCase = key.toLowerCase(); if (lowerCase.startsWith(HADOOP_PREFIX)) { String newKey = key.substring(HADOOP_PREFIX.length()); conf.set(newKey, _config.get(key)); } } return conf; }
From source file:com.liveramp.hank.hadoop.HadoopDomainBuilder.java
License:Apache License
public static void main(String[] args) throws IOException, InvalidConfigurationException { CommandLineChecker.check(args,/*from w w w .j av a 2s . co m*/ new String[] { "domain name", "config path", "jobjar", "input path", "output_path" }, HadoopDomainBuilder.class); String domainName = args[0]; CoordinatorConfigurator configurator = new YamlCoordinatorConfigurator(args[1]); String jobJar = args[2]; String inputPath = args[3]; String outputPath = args[4]; DomainBuilderProperties properties = new DomainBuilderProperties(domainName, configurator) .setOutputPath(outputPath); JobConf conf = new JobConf(); conf.setJar(jobJar); conf.setJobName(HadoopDomainBuilder.class.getSimpleName() + " Domain " + domainName + ", Output path: " + outputPath); HadoopDomainBuilder builder = new HadoopDomainBuilder(conf, inputPath, SequenceFileInputFormat.class, DomainBuilderMapperDefault.class); LOG.info("Building Hank domain " + domainName + " from input " + inputPath + " and coordinator configuration " + configurator); // TODO: Create DomainVersionProperties throw new NotImplementedException("TODO: Create DomainVersionProperties"); // builder.buildHankDomain(properties, null); }
From source file:com.liveramp.hank.hadoop.HadoopDomainCompactor.java
License:Apache License
public static void main(String[] args) throws IOException, InvalidConfigurationException { CommandLineChecker.check(args, new String[] { "domain name", "version to compact number", "mapred.task.timeout", "config path", "jobjar" }, HadoopDomainCompactor.class); String domainName = args[0];// w w w.j a v a 2 s . c o m Integer versionToCompactNumber = Integer.valueOf(args[1]); Integer mapredTaskTimeout = Integer.valueOf(args[2]); CoordinatorConfigurator configurator = new YamlCoordinatorConfigurator(args[3]); String jobJar = args[4]; DomainCompactorProperties properties = new DomainCompactorProperties(domainName, versionToCompactNumber, configurator); JobConf conf = new JobConf(); conf.setJar(jobJar); conf.set("mapred.task.timeout", mapredTaskTimeout.toString()); conf.setJobName(HadoopDomainCompactor.class.getSimpleName() + " Domain " + domainName + ", Version " + versionToCompactNumber); HadoopDomainCompactor compactor = new HadoopDomainCompactor(conf); LOG.info("Compacting Hank domain " + domainName + " version " + versionToCompactNumber + " with coordinator configuration " + configurator); compactor.buildHankDomain(properties, new IncrementalDomainVersionProperties.Base("Version " + versionToCompactNumber + " compacted")); }
From source file:com.manning.hip.ch4.joins.improved.impl.OptimizedDataJoinJob.java
License:Apache License
public static JobConf createDataJoinJob(String args[]) throws IOException { String inputDir = args[0];/*from ww w .jav a 2 s . c o m*/ String outputDir = args[1]; Class inputFormat = SequenceFileInputFormat.class; if (args[2].compareToIgnoreCase("text") != 0) { System.out.println("Using SequenceFileInputFormat: " + args[2]); } else { System.out.println("Using TextInputFormat: " + args[2]); inputFormat = TextInputFormat.class; } int numOfReducers = Integer.parseInt(args[3]); Class mapper = getClassByName(args[4]); Class reducer = getClassByName(args[5]); Class mapoutputValueClass = getClassByName(args[6]); Class outputFormat = TextOutputFormat.class; Class outputValueClass = Text.class; if (args[7].compareToIgnoreCase("text") != 0) { System.out.println("Using SequenceFileOutputFormat: " + args[7]); outputFormat = SequenceFileOutputFormat.class; outputValueClass = getClassByName(args[7]); } else { System.out.println("Using TextOutputFormat: " + args[7]); } long maxNumOfValuesPerGroup = 100; String jobName = ""; if (args.length > 8) { maxNumOfValuesPerGroup = Long.parseLong(args[8]); } if (args.length > 9) { jobName = args[9]; } Configuration defaults = new Configuration(); JobConf job = new JobConf(defaults, OptimizedDataJoinJob.class); job.setJobName("DataJoinJob: " + jobName); FileSystem fs = FileSystem.get(defaults); fs.delete(new Path(outputDir)); FileInputFormat.setInputPaths(job, inputDir); job.setInputFormat(inputFormat); job.setMapperClass(mapper); FileOutputFormat.setOutputPath(job, new Path(outputDir)); job.setOutputFormat(outputFormat); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.setMapOutputKeyClass(CompositeKey.class); job.setMapOutputValueClass(mapoutputValueClass); job.setOutputKeyClass(Text.class); job.setOutputValueClass(outputValueClass); job.setReducerClass(reducer); job.setPartitionerClass(CompositeKeyPartitioner.class); job.setOutputKeyComparatorClass(CompositeKeyComparator.class); job.setOutputValueGroupingComparator(CompositeKeyOnlyComparator.class); job.setNumMapTasks(1); job.setNumReduceTasks(numOfReducers); job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup); return job; }
From source file:com.me.neu.Popular_question.Runner.java
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(Runner.class); conf.setJobName("pop-ques"); conf.setMapperClass(Mapper1.class); // conf.setOutputKeyComparatorClass(DescendingIntComparable.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setReducerClass(Reducer1.class); // take the input and output from the command line FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);// www. ja v a 2 s . co m }
From source file:com.me.neu.popular_tag_year.Runner.java
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(Runner.class); conf.setJobName("tag-year"); conf.setMapperClass(Mapper1.class); // conf.setOutputKeyComparatorClass(DescendingIntComparable.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setReducerClass(Reducer1.class); // take the input and output from the command line FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/*from w w w .j ava 2s .c o m*/ }
From source file:com.me.neu.stackoverflow.Runner.java
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(Runner.class); conf.setJobName("tag-reco"); conf.setMapperClass(Mapper1.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setReducerClass(Reducer1.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/* w w w . j a v a2s . c o m*/ }
From source file:com.mh2c.WikipediaDumpLoaderDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { // arg checks JobConf conf = new JobConf(getClass()); conf.setJobName("WP dump loader"); // Set the mapper class, but skip the reduce phase conf.setMapperClass(WikipediaDumpLoaderMapper.class); conf.setNumReduceTasks(0);//from w ww. ja v a 2 s . c om // The object key/value pairs are text conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); // Stream XML into the job conf.setInputFormat(StreamInputFormat.class); StreamInputFormat.addInputPath(conf, new Path(args[0])); // Use the XML record reader, with each page as one record conf.set("stream.recordreader.class", "org.apache.hadoop.streaming.StreamXmlRecordReader"); conf.set("stream.recordreader.begin", "<page>"); conf.set("stream.recordreader.end", "</page>"); // Emit sequence files conf.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); return 0; }
From source file:com.mh2c.WikipediaWordCountDriver.java
License:Apache License
@Override public int run(String[] args) throws Exception { // arg checks JobConf conf = new JobConf(getClass()); conf.setJobName("WP word count"); // Set the mapper and reducer classes, and use the reducer as a combiner conf.setMapperClass(WikipediaWordCountMapper.class); conf.setReducerClass(WikipediaWordCountReducer.class); conf.setCombinerClass(WikipediaWordCountReducer.class); // The object key/value pairs are text words and integer counts conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); // Read in sequence files conf.setInputFormat(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(conf, new Path(args[0])); // Emit ordinary text files conf.setOutputFormat(TextOutputFormat.class); TextOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/*from www . j a v a 2s . c o m*/ return 0; }
From source file:com.mycompany.app.TestStagingDirectoryPermissions.java
License:Apache License
@Test public void perms() throws IOException, InterruptedException { MiniDFSCluster minidfs = null;//www . jav a 2s. c o m FileSystem fs = null; MiniMRClientCluster minimr = null; try { Configuration conf = new Configuration(true); conf.set("fs.permission.umask-mode", "0077"); minidfs = new MiniDFSCluster.Builder(conf).build(); minidfs.waitActive(); fs = minidfs.getFileSystem(); conf.set(FileSystem.FS_DEFAULT_NAME_KEY, fs.getUri().toString()); Path p = path("/in"); fs.mkdirs(p); FSDataOutputStream os = fs.create(new Path(p, "input.txt")); os.write("hello!".getBytes("UTF-8")); os.close(); String user = UserGroupInformation.getCurrentUser().getUserName(); Path home = new Path("/User/" + user); fs.mkdirs(home); minimr = MiniMRClientClusterFactory.create(this.getClass(), 1, conf); JobConf job = new JobConf(minimr.getConfig()); job.setJobName("PermsTest"); JobClient client = new JobClient(job); FileInputFormat.addInputPath(job, p); FileOutputFormat.setOutputPath(job, path("/out")); job.setInputFormat(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(MySleepMapper.class); job.setNumReduceTasks(1); RunningJob submittedJob = client.submitJob(job); // Sleep for a bit to let localization finish System.out.println("Sleeping..."); Thread.sleep(3 * 1000l); System.out.println("Done sleeping..."); assertFalse(UserGroupInformation.isSecurityEnabled()); Path stagingRoot = path("/tmp/hadoop-yarn/staging/" + user + "/.staging/"); assertTrue(fs.exists(stagingRoot)); assertEquals(1, fs.listStatus(stagingRoot).length); Path staging = fs.listStatus(stagingRoot)[0].getPath(); Path jobXml = path(staging + "/job.xml"); assertTrue(fs.exists(jobXml)); FileStatus fileStatus = fs.getFileStatus(jobXml); System.out.println("job.xml permission = " + fileStatus.getPermission()); assertTrue(fileStatus.getPermission().getOtherAction().implies(FsAction.READ)); assertTrue(fileStatus.getPermission().getGroupAction().implies(FsAction.READ)); submittedJob.waitForCompletion(); } finally { if (minimr != null) { minimr.stop(); } if (fs != null) { fs.close(); } if (minidfs != null) { minidfs.shutdown(true); } } }