List of usage examples for org.apache.hadoop.mapreduce Job setJobName
public void setJobName(String name) throws IllegalStateException
From source file:io.bfscan.clueweb12.BuildVByteDocVectors.java
License:Apache License
/** * Runs this tool./*from w w w . j a va 2s .c om*/ */ @SuppressWarnings("static-access") public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("dictionary").create(DICTIONARY_OPTION)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(REDUCERS_OPTION)); options.addOption(OptionBuilder.withArgName("string " + AnalyzerFactory.getOptions()).hasArg() .withDescription("preprocessing").create(PREPROCESSING)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION) || !cmdline.hasOption(DICTIONARY_OPTION) || !cmdline.hasOption(PREPROCESSING)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String input = cmdline.getOptionValue(INPUT_OPTION); String output = cmdline.getOptionValue(OUTPUT_OPTION); String dictionary = cmdline.getOptionValue(DICTIONARY_OPTION); String preprocessing = cmdline.getOptionValue(PREPROCESSING); Job job = Job.getInstance(getConf()); job.setJobName(BuildVByteDocVectors.class.getSimpleName() + ":" + input); job.setJarByClass(BuildVByteDocVectors.class); LOG.info("Tool name: " + BuildVByteDocVectors.class.getSimpleName()); LOG.info(" - input: " + input); LOG.info(" - output: " + output); LOG.info(" - dictionary: " + dictionary); LOG.info(" - preprocessing: " + preprocessing); if (cmdline.hasOption(REDUCERS_OPTION)) { int numReducers = Integer.parseInt(cmdline.getOptionValue(REDUCERS_OPTION)); LOG.info(" - reducers: " + numReducers); job.setNumReduceTasks(numReducers); } else { job.setNumReduceTasks(0); } FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, new Path(output)); job.getConfiguration().set(DICTIONARY_OPTION, dictionary); job.getConfiguration().set(PREPROCESSING, preprocessing); job.setInputFormatClass(ClueWeb12InputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.setMapperClass(MyMapper.class); FileSystem.get(getConf()).delete(new Path(output), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:io.covert.binary.analysis.BinaryAnalysisJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { usage(""); }/*from w ww . ja v a2 s .c om*/ String inDir = args[0]; String outDir = args[1]; Configuration conf = getConf(); for (String name : requiredSettings) { if (conf.get(name) == null) usage("Missing required setting: " + name); } Job job = new Job(conf); job.setJobName(BinaryAnalysisJob.class.getName() + " inDir=" + inDir + ", outDir=" + outDir); job.setJarByClass(getClass()); job.setMapperClass(BinaryAnalysisMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.setInputPaths(job, new Path(inDir)); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outDir)); job.submit(); int retVal = job.waitForCompletion(true) ? 0 : 1; return retVal; }
From source file:io.covert.dns.collection.CollectionJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 4) { usage(""); }//from w w w. j a v a 2 s.c om String dclass = args[0]; String types = args[1]; String inDir = args[2]; String outDir = args[3]; Configuration conf = getConf(); if (conf.get("dns.collection.num.resolvers") == null) conf.setInt("dns.collection.num.resolvers", 50); if (conf.get("dns.collection.nameservers") == null) conf.set("dns.collection.nameservers", "127.0.0.1"); Job job = new Job(conf); job.setJobName(CollectionJob.class.getSimpleName() + ": types=" + types + ", dclass=" + dclass + " inDir=" + inDir + ", outDir=" + outDir + ", resolvers=" + conf.get("dns.collection.nameservers")); job.setJarByClass(getClass()); job.setMapperClass(CollectionMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormatClass(DnsRequestInputFormat.class); DnsRequestInputFormat.setInputPaths(job, new Path(inDir)); DnsRequestInputFormat.configure(job, dclass.toUpperCase(), Arrays.asList(types.split(",")), Arrays.asList("")); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outDir)); SequenceFileOutputFormat.setCompressOutput(job, true); job.submit(); int retVal = job.waitForCompletion(true) ? 0 : 1; CounterGroup counters = job.getCounters().getGroup(CollectionMapper.RESOLVER_GROUP); Counter constructMessageMS = counters.findCounter(CollectionMapper.CONSTRUCT_MESSAGE_MS); Counter parseResponseMS = counters.findCounter(CollectionMapper.PARSE_RESPONSE_MS); Counter performRequestMS = counters.findCounter(CollectionMapper.PERFORM_REQUEST_MS); Counter totalRequestHandlingMS = counters.findCounter(CollectionMapper.TOTAL_REQUEST_HANDLING_MS); Log.info("Total ConstructMessage percent: " + (double) (constructMessageMS.getValue() * 100L) / ((double) totalRequestHandlingMS.getValue())); Log.info("Total ParseResponse percent: " + (double) (parseResponseMS.getValue() * 100L) / ((double) totalRequestHandlingMS.getValue())); Log.info("Total PerformRequest percent: " + (double) (performRequestMS.getValue() * 100L) / ((double) totalRequestHandlingMS.getValue())); return retVal; }
From source file:io.covert.dns.extract.ExtractorJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 3) { usage(""); }/*www. ja v a 2 s. c o m*/ String expression = args[0]; String inDir = args[1]; String outDir = args[2]; Configuration conf = getConf(); conf.set(ExtractorMapper.EXTRACTOR_JEXL_EXPRESSION, expression); Job job = new Job(conf); job.setJobName(ExtractorJob.class.getSimpleName() + ": inDir=" + inDir + ", outDir=" + outDir + ", expression=[" + expression + "]"); job.setJarByClass(getClass()); job.setMapperClass(ExtractorMapper.class); job.setReducerClass(UniqueKeyOnlyReducer.class); job.setNumReduceTasks(new JobClient(new JobConf(conf)).getClusterStatus().getTaskTrackers()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.setInputPaths(job, new Path(inDir)); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outDir)); SequenceFileOutputFormat.setCompressOutput(job, true); job.submit(); int retVal = job.waitForCompletion(true) ? 0 : 1; return retVal; }
From source file:io.covert.dns.filtering.FilterJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 3) { usage(""); }// www .j a v a 2 s. com String filter = args[0]; String inDir = args[1]; String outDir = args[2]; Configuration conf = getConf(); conf.set(FilterMapper.FILTER_JEXL_EXPRESSION, filter); Job job = new Job(conf); job.setJobName(FilterJob.class.getSimpleName() + ": inDir=" + inDir + ", outDir=" + outDir + ", filter=[" + filter + "]"); job.setJarByClass(getClass()); job.setMapperClass(FilterMapper.class); job.setReducerClass(Reducer.class); // Identity Reduce... job.setNumReduceTasks(new JobClient(new JobConf(conf)).getClusterStatus().getTaskTrackers()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.setInputPaths(job, new Path(inDir)); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outDir)); SequenceFileOutputFormat.setCompressOutput(job, true); job.submit(); int retVal = job.waitForCompletion(true) ? 0 : 1; return retVal; }
From source file:io.covert.dns.geo.GeoJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 4) { usage(""); }/*ww w .j av a 2 s.c o m*/ String dbfile = args[0]; String asnDbfile = args[1]; String inDir = args[2]; String outDir = args[3]; Configuration conf = getConf(); conf.set("maxmind.geo.database.file", dbfile); conf.set("maxmind.asn.database.file", asnDbfile); Job job = new Job(conf); job.setJobName(GeoJob.class.getSimpleName() + ": dbfile=" + dbfile + ", asnDB=" + asnDbfile + " inDir=" + inDir + ", outDir=" + outDir); job.setJarByClass(getClass()); job.setMapperClass(GeoMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.setInputPaths(job, new Path(inDir)); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outDir)); SequenceFileOutputFormat.setCompressOutput(job, true); job.submit(); int retVal = job.waitForCompletion(true) ? 0 : 1; return retVal; }
From source file:io.covert.dns.parse.ParseJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { String inDir = args[0];/*from ww w .j a va2 s . com*/ String outDir = args[1]; Configuration conf = getConf(); Job job = new Job(conf); job.setJobName(ParseJob.class.getSimpleName() + ": inDir=" + inDir + ", outDir=" + outDir); job.setJarByClass(getClass()); job.setMapperClass(ParseMapper.class); job.setReducerClass(UniqueKeyOnlyReducer.class); job.setNumReduceTasks(new JobClient(new JobConf(conf)).getClusterStatus().getTaskTrackers()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.setInputPaths(job, new Path(inDir)); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outDir)); SequenceFileOutputFormat.setCompressOutput(job, true); job.submit(); return job.waitForCompletion(true) ? 0 : 1; }
From source file:io.covert.dns.storage.StorageJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { String inDir = args[0];/*from w ww .j a va2 s.co m*/ Configuration conf = getConf(); Job job = new Job(conf); job.setJarByClass(getClass()); job.setJobName(StorageJob.class.getSimpleName() + ": inDir=" + inDir); job.setMapperClass(StorageMapper.class); job.setNumReduceTasks(0); job.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.setInputPaths(job, new Path(inDir)); // This job doesn't write output via Hadoop, it uses the configured storage modules job.setOutputFormatClass(NullOutputFormat.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.submit(); return job.waitForCompletion(true) ? 0 : 1; }
From source file:io.covert.util.FileFormatToConverterJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 1) { usage(""); }/* www. j ava 2 s. c o m*/ String inDir = args[0]; Configuration conf = getConf(); if (conf.get("stream.process.command") == null) { conf.set("stream.process.command", "/opt/decompress.sh"); } Job job = new Job(conf); job.setJobName(FileFormatToConverterJob.class.getName() + " inDir=" + inDir); job.setJarByClass(getClass()); job.setMapperClass(ConvertMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.setInputPaths(job, new Path(inDir)); job.setOutputFormatClass(NullOutputFormat.class); job.submit(); int retVal = job.waitForCompletion(true) ? 0 : 1; return retVal; }
From source file:io.fluo.stress.trie.Init.java
License:Apache License
private int unique(Path input, Path tmp) throws Exception { Job job = Job.getInstance(getConf()); job.setJarByClass(Init.class); job.setJobName(Init.class.getName() + "_unique"); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, input); job.setReducerClass(UniqueReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(NullWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, tmp); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }