List of usage examples for org.apache.hadoop.mapreduce Job setJobName
public void setJobName(String name) throws IllegalStateException
From source file:it.crs4.seal.tsv_sort.TsvSort.java
License:Apache License
public int run(String[] args) throws Exception { LOG.info("starting"); TsvSortOptionParser parser = new TsvSortOptionParser(); parser.parse(getConf(), args);/*from w w w . j av a 2 s . c om*/ LOG.info("Using " + parser.getNReduceTasks() + " reduce tasks"); Job job = new Job(getConf()); job.setJobName("TsvSort " + parser.getInputPaths().get(0)); job.setJarByClass(TsvSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(TsvInputFormat.class); job.setOutputFormatClass(TextValueOutputFormat.class); job.setPartitionerClass(TotalOrderPartitioner.class); // output path FileOutputFormat.setOutputPath(job, parser.getOutputPath()); FileSystem fs = parser.getOutputPath().getFileSystem(job.getConfiguration()); /* * * Pick a random name for the partition file in the same directory as the * output path. So, TsvSort /user/me/input /user/me/output * results in the partition file being placed in /user/me/_partition.lst.12340921387402174 * * Why not place it directly in the input path? * * We wouldn't be able to run two sorts on the same data at the same time. * We've received complaints about this in the past, so it has been a * limit in practice. * * Why not place it directly in the output path? * * We'd have to create the output path before the output format did. * For this to work we'd have to disable the FileOutputFormat's default check * that verifies that the output directory doesn't exist. This means that we'd * need some other way to ensure that we're not writing to the same path where * some other job wrote. */ Path partitionFile; Random rnd = new Random(); do { partitionFile = new Path(parser.getOutputPath().getParent(), String.format("_partition.lst.%012d", Math.abs(rnd.nextLong()))); } while (fs.exists(partitionFile)); // this is still subject to a race condition between it and another instance of this program partitionFile = partitionFile.makeQualified(fs); LOG.info("partition file path: " + partitionFile); URI partitionUri = new URI(partitionFile.toString() + "#" + PARTITION_SYMLINK); LOG.debug("partitionUri for distributed cache: " + partitionUri); // input paths for (Path p : parser.getInputPaths()) TsvInputFormat.addInputPath(job, p); LOG.info("sampling input"); TextSampler.writePartitionFile(new TsvInputFormat(), job, partitionFile); LOG.info("created partitions"); try { DistributedCache.addCacheFile(partitionUri, job.getConfiguration()); DistributedCache.createSymlink(job.getConfiguration()); int retcode = job.waitForCompletion(true) ? 0 : 1; LOG.info("done"); return retcode; } finally { LOG.debug("deleting partition file " + partitionFile); fs.delete(partitionFile, false); } }
From source file:job.ItemPDFJob.java
License:Open Source License
/** * il metodo run prende in input i parametri passati da riga di comando e setta i parametri d'esecuzione mapreduce * fornisce in output 1 in caso di successo, 0 in caso di fallimento * @param args [] la prima cella di memoria dell'array contiene l'input path, * la seconda l'output path per l'esecuzione ItemPDFJob la terza l'output path per l'esecuzione di UserProfileJob, * la quarta e' opzionale e contiene il valore K con cui eseguire il processo * @return lo stato di completamento del job, 0 se fallisce l'esecuzione 1 se l'esecuzione termina con successo * @throws Exception//from ww w .j av a2 s. com */ @Override public int run(String[] args) throws Exception { String input = GLOBALS.getTRAIN_FILE_NAME(); String output = GLOBALS.getOUTPUT1(); conf.set("split", GLOBALS.getSPLIT_TOKEN()); Job pr = new Job(conf); pr.setJarByClass(ItemPDFJob.class); pr.setJobName("ItemPDF Job"); FileInputFormat.setInputPaths(pr, new Path(input)); FileOutputFormat.setOutputPath(pr, new Path(output)); pr.setMapperClass(ItemProfileMapper.class); pr.setReducerClass(ItemProfileReducer.class); pr.setMapOutputKeyClass(IntWritable.class); pr.setMapOutputValueClass(BooleanWritable.class); boolean success = pr.waitForCompletion(true); return success ? 0 : 1; }
From source file:job.UserProfileJob.java
License:Open Source License
/** * il metodo run prende in input gli argomenti passati da riga di comando e setta i parametri d'esecuzione del job mapreduce * fornisce in output 1 in caso di successo del processo, 0 in caso di fallimento * @param args [] la prima cella di memoria dell'array contiene l'input path, * la seconda l'output path per l'esecuzione ItemPDFJob la terza l'output path per l'esecuzione di UserProfileJob, * la quarta e' opzionale e contiene il valore K con cui eseguire il processo * @return lo stato di completamento del job, 0 se fallisce l'esecuzione 1 se l'esecuzione termina con successo * @throws Exception/*from w w w.ja v a 2 s . c o m*/ */ @Override public int run(String[] args) throws Exception { String input = GLOBALS.getTRAIN_FILE_NAME(); String output = GLOBALS.getOUTPUT2(); conf.set("k", GLOBALS.getK() + ""); conf.set("output1", GLOBALS.getOUTPUT1()); Job pr = new Job(conf); pr.setJarByClass(UserProfileJob.class); pr.setJobName("UserProfile Job"); FileInputFormat.setInputPaths(pr, new Path(input)); FileOutputFormat.setOutputPath(pr, new Path(output)); pr.setMapperClass(UserProfileMapper.class); pr.setReducerClass(UserProfileReducer.class); pr.setMapOutputKeyClass(IntWritable.class); pr.setMapOutputValueClass(PointWritable.class); boolean success = pr.waitForCompletion(true); return success ? 0 : 1; }
From source file:layer.AutoCoder.java
License:Apache License
/** * Runs this tool./*ww w . j ava2 s. c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath0 = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + inputPath0); LOG.info(" - output path: " + outputPath); LOG.info(" - number of reducers: " + reduceTasks); Configuration conf = getConf(); initialParameters(conf); for (int iterations = 1; iterations < GlobalUtil.NUM_LAYER + 1; iterations++) { LOG.info("** Layer: " + iterations); try { Job job = Job.getInstance(conf); job.setJobName(AutoCoder.class.getSimpleName()); job.setJarByClass(AutoCoder.class); // set the path of the information of k clusters in this iteration job.getConfiguration().set("sidepath", inputPath0 + "/side_output"); job.getConfiguration().setInt("layer_ind", iterations); job.setNumReduceTasks(reduceTasks); String inputPath = inputPath0 + "/train"; dataShuffle(); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(ModelNode.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(ModelNode.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); prepareNextIteration(inputPath0, outputPath, iterations, conf, reduceTasks); } catch (Exception exp) { exp.printStackTrace(); } } return 0; }
From source file:mapreducemaxstock.MapReduceMaxStock.java
/** * @param args the command line arguments *//*from w w w . j av a 2s . c o m*/ public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Invalid parameters:input_path output_path"); System.exit(-1); } //Create MapReduce job Job job = new Job(); job.setJarByClass(MapReduceMaxStock.class); job.setJobName("MaxStockPrice"); //Specify input and output paths FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); //Specify input and output formats job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); //Specify mapper and reducer classes job.setMapperClass(StockPriceMapper.class); job.setReducerClass(StockPriceReducer.class); //Specify Key,Value types job.setOutputKeyClass(Text.class); job.setOutputValueClass(FloatWritable.class); //submit job boolean jobStatus = job.waitForCompletion(true); System.exit(jobStatus ? 0 : 1); }
From source file:mil.nga.giat.geowave.mapreduce.copy.StoreCopyJobRunner.java
License:Open Source License
/** * Main method to execute the MapReduce analytic. */// w w w . j av a 2s. c om public int runJob() throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = super.getConf(); if (conf == null) { conf = new Configuration(); setConf(conf); } GeoWaveConfiguratorBase.setRemoteInvocationParams(options.getHdfsHostPort(), options.getJobTrackerOrResourceManHostPort(), conf); final Job job = Job.getInstance(conf); job.setJarByClass(this.getClass()); job.setJobName(jobName); job.setMapperClass(StoreCopyMapper.class); job.setReducerClass(StoreCopyReducer.class); job.setInputFormatClass(GeoWaveInputFormat.class); job.setOutputFormatClass(GeoWaveOutputFormat.class); job.setMapOutputKeyClass(GeoWaveInputKey.class); job.setMapOutputValueClass(ObjectWritable.class); job.setOutputKeyClass(GeoWaveOutputKey.class); job.setOutputValueClass(Object.class); job.setNumReduceTasks(options.getNumReducers()); GeoWaveInputFormat.setMinimumSplitCount(job.getConfiguration(), options.getMinSplits()); GeoWaveInputFormat.setMaximumSplitCount(job.getConfiguration(), options.getMaxSplits()); GeoWaveInputFormat.setStoreOptions(job.getConfiguration(), inputStoreOptions); GeoWaveOutputFormat.setStoreOptions(job.getConfiguration(), outputStoreOptions); final AdapterIndexMappingStore adapterIndexMappingStore = inputStoreOptions .createAdapterIndexMappingStore(); try (CloseableIterator<DataAdapter<?>> adapterIt = inputStoreOptions.createAdapterStore().getAdapters()) { while (adapterIt.hasNext()) { DataAdapter<?> dataAdapter = adapterIt.next(); LOGGER.debug("Adding adapter to output config: " + StringUtils.stringFromBinary(dataAdapter.getAdapterId().getBytes())); GeoWaveOutputFormat.addDataAdapter(job.getConfiguration(), dataAdapter); final AdapterToIndexMapping mapping = adapterIndexMappingStore .getIndicesForAdapter(dataAdapter.getAdapterId()); JobContextAdapterIndexMappingStore.addAdapterToIndexMapping(job.getConfiguration(), mapping); } } try (CloseableIterator<Index<?, ?>> indexIt = inputStoreOptions.createIndexStore().getIndices()) { while (indexIt.hasNext()) { Index<?, ?> index = indexIt.next(); if (index instanceof PrimaryIndex) { LOGGER.debug("Adding index to output config: " + StringUtils.stringFromBinary(index.getId().getBytes())); GeoWaveOutputFormat.addIndex(job.getConfiguration(), (PrimaryIndex) index); } } } boolean retVal = false; try { retVal = job.waitForCompletion(true); } catch (final IOException ex) { LOGGER.error("Error waiting for store copy job: ", ex); } return retVal ? 0 : 1; }
From source file:minor_MapReduce.C4_5.java
License:Open Source License
private static void summarizeData() throws Exception { Job job = Job.getInstance(); job.setJarByClass(C4_5.class); job.setJobName("C4.5_summarizeData"); FileInputFormat.addInputPath(job, input_path); FileOutputFormat.setOutputPath(job, summarized_data_path); job.setMapperClass(SummarizeMapper.class); job.setReducerClass(SummarizeReducer.class); job.setOutputKeyClass(TextArrayWritable.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.waitForCompletion(false);// w w w . ja va 2s. c om /* Store it locally */ Option optPath = SequenceFile.Reader.file(new Path(summarized_data_path.toString() + "/part-r-00000")); SequenceFile.Reader reader = new SequenceFile.Reader(new Configuration(), optPath); TextArrayWritable key = new TextArrayWritable(); IntWritable val = new IntWritable(); summarized_data = new HashMap<String[], Integer>(); while (reader.next(key, val)) { summarized_data.put(key.toStrings(), val.get()); } reader.close(); }
From source file:model.AutoCoder.java
License:Apache License
/** * Runs this tool./*from w w w. j a v a 2s. c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT) + "/part*"; String outputPath = cmdline.getOptionValue(OUTPUT); //String inputPath = "mingled_v2/part*"; //String outputPath = "output"; String dataPath = cmdline.getOptionValue(INPUT) + "/common"; int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool: " + AutoCoder.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - number of reducers: " + reduceTasks); Configuration conf = getConf(); initialParameters(conf); conf.set("dataPath", dataPath); conf.set("mapreduce.map.memory.mb", "2048"); conf.set("mapreduce.map.java.opts", "-Xmx2048m"); conf.set("mapreduce.reduce.memory.mb", "2048"); conf.set("mapreduce.reduce.java.opts", "-Xmx2048m"); Job job = Job.getInstance(conf); job.setJobName(AutoCoder.class.getSimpleName()); job.setJarByClass(AutoCoder.class); // set the path of the information of k clusters in this iteration job.getConfiguration().set("sidepath", inputPath + "/side_output"); job.setNumReduceTasks(reduceTasks); dataShuffle(); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileInputFormat.setMaxInputSplitSize(job, 1000 * 1024 * 1024); FileInputFormat.setMinInputSplitSize(job, 1000 * 1024 * 1024); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(ModelNode.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(SuperModel.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); //prepareNextIteration(inputPath0, outputPath,iterations,conf,reduceTasks); return 0; }
From source file:my.mahout.AbstractJob.java
License:Apache License
protected Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, String jobname) throws IOException { Job job = HadoopUtil.prepareJob(inputPath, outputPath, inputFormat, mapper, mapperKey, mapperValue, outputFormat, getConf());//w w w . ja va 2 s . c o m String name = jobname != null ? jobname : HadoopUtil.getCustomJobName(getClass().getSimpleName(), job, mapper, Reducer.class); job.setJobName(name); return job; }