List of usage examples for org.apache.hadoop.mapred JobConf setNumReduceTasks
public void setNumReduceTasks(int n)
From source file:dinocode.SpeciesGraphBuilder.java
public static void main(String[] args) throws Exception { JobClient client = new JobClient(); JobConf conf = new JobConf(SpeciesDriver.class); conf.setJobName("Page-rank Species Graph Builder"); final File f = new File(SpeciesDriver.class.getProtectionDomain().getCodeSource().getLocation().getPath()); String inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/InputFiles/species_medium.txt"; String outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result"; FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); //conf.setOutputKeyClass(Text.class); //conf.setOutputValueClass(Text.class); conf.setMapperClass(SpeciesGraphBuilderMapperd.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); //conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); //conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); conf.setReducerClass(SpeciesGraphBuilderReducerd.class); //conf.setCombinerClass(SpeciesGraphBuilderReducer.class); //conf.setInputPath(new Path("graph1")); //conf.setOutputPath(new Path("graph2")); // take the input and output from the command line FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); client.setConf(conf);/*ww w . j a va2 s . c o m*/ try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result/part-00000"; for (int i = 0; i < 500; i++) { client = new JobClient(); conf = new JobConf(SpeciesDriver.class); conf.setJobName("Species Iter"); int count = i + 1; outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result" + count; conf.setNumReduceTasks(5); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); conf.setMapperClass(SpeciesIterMapper2d.class); conf.setReducerClass(SpeciesIterReducer2d.class); conf.setCombinerClass(SpeciesIterReducer2d.class); client.setConf(conf); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } inFiles = outFiles; } //Viewer client = new JobClient(); conf = new JobConf(SpeciesDriver.class); conf.setJobName("Species Viewer"); conf.setOutputKeyClass(FloatWritable.class); conf.setOutputValueClass(Text.class); inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result500/part-00000"; outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/ResultFinal"; FileInputFormat.setInputPaths(conf, new Path(inFiles)); FileOutputFormat.setOutputPath(conf, new Path(outFiles)); conf.setMapperClass(SpeciesViewerMapperd.class); conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class); client.setConf(conf); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } }
From source file:edu.brown.cs.mapreduce.BenchmarkBase.java
License:Open Source License
public JobConf getJobConf() { JobConf jobConf = new JobConf(this.conf, this.benchmarkClass); ///*from w w w. j a va 2 s .c o m*/ // Options // List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; i++) { try { // // Print property and exit // if ("-property".equals(args[i])) { String prop = jobConf.get(args[i + 1]); System.out.println(prop); System.exit(0); // // # of Maps // } else if ("-m".equals(args[i])) { this.num_of_maps = Integer.parseInt(args[++i]); // // # of Reduces // } else if ("-r".equals(args[i])) { this.num_of_reduces = Integer.parseInt(args[++i]); // // Enable debug // } else if ("-debug".equals(args[i])) { this.debug = true; // // Enable single output file for results // } else if ("-combine".equals(args[i])) { this.combine = true; // // Tell jobs to compress their intermediate output files // } else if ("-compress".equals(args[i])) { this.compress = true; // // We're using TupleWritable (which has to be in a SequenceFile) // } else if ("-tuple".equals(args[i])) { this.tuple_data = true; this.sequence_file = true; // // Use SequenceFiles for initial input // } else if ("-sequence".equals(args[i])) { this.sequence_file = true; // // Recursively load directories // } else if ("-recursive-dirs".equals(args[i])) { this.load_directories = true; // // Job Basename // } else if ("-basename".equals(args[i])) { this.job_name = args[++i]; // // Misc. Properties // } else if ("-D".equals(args[i].substring(0, 2))) { String arg = args[i].substring(2); int pos = arg.indexOf('='); if (pos == -1) { System.err.println("ERROR: Invalid properties option '" + arg + "'"); System.exit(1); } this.options.put(arg.substring(0, pos), arg.substring(pos + 1)); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.err.println("ERROR: Integer expected instead of " + args[i]); System.exit(1); } catch (ArrayIndexOutOfBoundsException except) { System.err.println("ERROR: Required parameter missing from " + args[i - 1]); System.exit(1); } } // FOR // // Make sure there are exactly 2 parameters left. // if (otherArgs.size() < 2) { System.err.println("ERROR: Wrong number of parameters: " + otherArgs.size()); System.exit(1); } // // Set these flags so the jobs know about them // if (this.getSequenceFile()) this.options.put(PROPERTY_SEQUENCEFILE, "true"); if (this.getTupleData()) this.options.put(PROPERTY_TUPLEDATA, "true"); if (this.getDebug()) this.options.put(PROPERTY_DEBUG, "true"); FileSystem fs = null; try { fs = FileSystem.get(conf); } catch (Exception ex) { ex.printStackTrace(); System.exit(-1); } // // Input Paths // int cnt = otherArgs.size() - 1; this.input_paths = new ArrayList<Path>(); for (int ctr = 0; ctr < cnt; ctr++) { Path new_path = new Path(otherArgs.get(ctr)); try { if (this.load_directories && fs.getFileStatus(new_path).isDir()) { //int limit = 10; FileStatus paths[] = fs.listStatus(new_path); for (FileStatus p : paths) { this.input_paths.add(p.getPath()); FileInputFormat.addInputPath(jobConf, p.getPath()); //if (limit-- <= 0) break; } // FOR } else { this.input_paths.add(new_path); FileInputFormat.addInputPath(jobConf, new_path); } } catch (Exception ex) { ex.printStackTrace(); System.exit(-1); } } // FOR if (this.input_paths.isEmpty()) { System.err.println( "ERROR: No input paths were defined for '" + this.benchmarkClass.getSimpleName() + "'"); System.exit(-1); } // // Output Paths // this.output_path = new Path(otherArgs.get(otherArgs.size() - 1)); FileOutputFormat.setOutputPath(jobConf, this.output_path); jobConf.setJobName(this.job_name != null ? this.job_name : this.benchmarkClass.getSimpleName()); if (this.num_of_maps >= 0) jobConf.setNumMapTasks(this.num_of_maps); if (this.num_of_reduces >= 0) jobConf.setNumReduceTasks(this.num_of_reduces); // // Set all properties // for (String key : this.options.keySet()) { jobConf.set(key, this.options.get(key)); } return (jobConf); }
From source file:edu.brown.cs.mapreduce.BenchmarkBase.java
License:Open Source License
public void runCombine() throws Exception { if (this.last_job == null) { throw new NullPointerException("ERROR: Last job is Null"); }// www.j a v a 2 s . c o m JobConf job = new JobConf(this.conf, this.benchmarkClass); job.setJobName((this.job_name != null ? this.job_name : this.benchmarkClass.getSimpleName()) + ".combine"); job.setMapperClass(IdentityMapper.class); job.setNumMapTasks(0); job.setReducerClass(IdentityReducer.class); job.setNumReduceTasks(1); // this is needed to get a single output file // Input FileInputFormat.setInputPaths(job, FileOutputFormat.getOutputPath(this.last_job)); job.setInputFormat(KeyValueTextInputFormat.class); // Output FileOutputFormat.setOutputPath(job, new Path(FileOutputFormat.getOutputPath(this.last_job).toString() + "/combine")); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); JobConf real_last_job = this.last_job; this.runJob(job); this.last_job = real_last_job; return; }
From source file:edu.brown.cs.mapreduce.benchmarks.Benchmark1.java
License:Open Source License
public int run(String[] args) throws Exception { BenchmarkBase base = new BenchmarkBase(this.getConf(), this.getClass(), args); JobConf job = base.getJobConf(); job.setInputFormat(base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); if (base.getTupleData()) { job.setMapperClass(Benchmark1.TupleMap.class); } else {/*w w w . j a v a 2 s . c o m*/ job.setMapperClass(Benchmark1.TextMap.class); } //job.setReducerClass(IdentityReducer.class); job.setNumReduceTasks(0); try { base.runJob(job); if (base.getCombine()) base.runCombine(); } catch (Exception ex) { ex.printStackTrace(); System.exit(1); } return 0; }
From source file:edu.brown.cs.mapreduce.benchmarks.Benchmark3.java
License:Open Source License
public int run(String[] args) throws Exception { BenchmarkBase base = new BenchmarkBase(this.getConf(), this.getClass(), args); Date startTime = new Date(); System.out.println("Job started: " + startTime); // ------------------------------------------- // Phase #1/*from www.jav a 2 s . c o m*/ // ------------------------------------------- JobConf p1_job = base.getJobConf(); p1_job.setJobName(p1_job.getJobName() + ".Phase1"); Path p1_output = new Path(base.getOutputPath().toString() + "/phase1"); FileOutputFormat.setOutputPath(p1_job, p1_output); // // Make sure we have our properties // String required[] = { BenchmarkBase.PROPERTY_START_DATE, BenchmarkBase.PROPERTY_STOP_DATE }; for (String req : required) { if (!base.getOptions().containsKey(req)) { System.err.println("ERROR: The property '" + req + "' is not set"); System.exit(1); } } // FOR p1_job.setInputFormat( base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class); if (base.getSequenceFile()) p1_job.setOutputFormat(SequenceFileOutputFormat.class); p1_job.setOutputKeyClass(Text.class); p1_job.setOutputValueClass(Text.class); p1_job.setMapperClass( base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TupleWritableMap.class : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TextMap.class); p1_job.setReducerClass( base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TupleWritableReduce.class : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TextReduce.class); p1_job.setCompressMapOutput(base.getCompress()); // ------------------------------------------- // Phase #2 // ------------------------------------------- JobConf p2_job = base.getJobConf(); p2_job.setJobName(p2_job.getJobName() + ".Phase2"); p2_job.setInputFormat( base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class); if (base.getSequenceFile()) p2_job.setOutputFormat(SequenceFileOutputFormat.class); p2_job.setOutputKeyClass(Text.class); p2_job.setOutputValueClass(Text.class); p2_job.setMapperClass(IdentityMapper.class); p2_job.setReducerClass( base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase2.TupleWritableReduce.class : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase2.TextReduce.class); p2_job.setCompressMapOutput(base.getCompress()); p2_job.setNumMapTasks(60); // ------------------------------------------- // Phase #3 // ------------------------------------------- JobConf p3_job = base.getJobConf(); p3_job.setJobName(p3_job.getJobName() + ".Phase3"); p3_job.setNumReduceTasks(1); p3_job.setInputFormat( base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class); p3_job.setOutputKeyClass(Text.class); p3_job.setOutputValueClass(Text.class); //p3_job.setMapperClass(Phase3Map.class); p3_job.setMapperClass(IdentityMapper.class); p3_job.setReducerClass( base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase3.TupleWritableReduce.class : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase3.TextReduce.class); // // Execute #1 // base.runJob(p1_job); // // Execute #2 // Path p2_output = new Path(base.getOutputPath().toString() + "/phase2"); FileOutputFormat.setOutputPath(p2_job, p2_output); FileInputFormat.setInputPaths(p2_job, p1_output); base.runJob(p2_job); // // Execute #3 // Path p3_output = new Path(base.getOutputPath().toString() + "/phase3"); FileOutputFormat.setOutputPath(p3_job, p3_output); FileInputFormat.setInputPaths(p3_job, p2_output); base.runJob(p3_job); // There does need to be a combine if (base.getCombine()) base.runCombine(); return 0; }
From source file:edu.brown.cs.mapreduce.demo.OrderSum.java
License:Open Source License
/** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the job tracker. */// ww w . jav a 2 s . c o m public int run(String[] args) throws Exception { JobConf conf = new JobConf(this.getConf(), OrderSum.class); conf.setJobName(OrderSum.class.getSimpleName()); // Input File Format conf.setInputFormat(KeyValueTextInputFormat.class); // Output Key/Value Types conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(DoubleWritable.class); // Map/Reduce Classes conf.setMapperClass(OrderSum.OrderSumMapper.class); conf.setReducerClass(OrderSum.OrderSumReducer.class); // Input/Output Paths (HDFS) FileInputFormat.setInputPaths(conf, "/demo/input/"); FileOutputFormat.setOutputPath(conf, new Path("/demo/output/")); /***** Additional Features *****/ // Compression //conf.setCompressMapOutput(true); // Combine //conf.setCombinerClass(OrderSum.OrderSumReducer.class); // Create a single output file conf.setNumReduceTasks(1); // Pass search date on command-line /* uncomment configure! if (args.length == 1) { conf.set("edu.brown.cs.pavlo.search_date", args[0]); }*/ // Bombs away! JobClient.runJob(conf); return 0; }
From source file:edu.iu.wordcount.CollectiveWordCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);//ww w. j a va2 s .c om } Job job = new Job(conf, "collective word count"); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setNumReduceTasks(0); job.setJarByClass(CollectiveWordCount.class); job.setMapperClass(WordCountMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:edu.ldzm.analysis.AnalysisSummary.java
License:Apache License
/** * The main driver for word count map/reduce program. Invoke this method to * submit the map/reduce job./*from w w w.j a v a2 s .c o m*/ * * @throws IOException * When there is communication problems with the job tracker. */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), AnalysisSummary.class); conf.setJobName("analysis_summery"); // the keys are words (strings) conf.setOutputKeyClass(Text.class); // the values are counts (ints) conf.setOutputValueClass(Text.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Combine.class); conf.setReducerClass(Reduce.class); boolean param = false; List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-l".equals(args[i])) { param = true; String[] fields = args[++i].split(SEPARATOR); conf.setInt("NAME_LIST_LENGTH", fields.length); for (int j = 0; j < fields.length; j++) { if ("timeStamp".equals(fields[j])) { conf.setInt("REQUEST_TIME_INDEX", j); } else if ("elapsed".equals(fields[j])) { conf.setInt("REQUEST_ELAPSE_TIME_INDEX", j); } else if ("label".equals(fields[j])) { conf.setInt("REQUEST_LABEL_INDEX", j); } else if ("success".equals(fields[j])) { conf.setInt("REQUEST_SUCCESSFUL_INDEX", j); } else if ("bytes".equals(fields[j])) { conf.setInt("REQUEST_BYTE_INDEX", j); } } } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } if (!param) { System.out.println("-l namelist.txt"); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:edu.ldzm.average.AverageResponseTime.java
License:Apache License
/** * The main driver for word count map/reduce program. Invoke this method to * submit the map/reduce job./* w w w .j a v a2 s . c om*/ * * @throws IOException * When there is communication problems with the job tracker. */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), AverageResponseTime.class); conf.setJobName("average_response_time"); // the keys are words (strings) conf.setOutputKeyClass(Text.class); // the values are counts (ints) conf.setOutputValueClass(Text.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Combine.class); conf.setReducerClass(Reduce.class); int param = 0; List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-l".equals(args[i])) { param++; String[] fields = args[++i].split(SEPARATOR); conf.setInt("NAME_LIST_LENGTH", fields.length); for (int j = 0; j < fields.length; j++) { if ("timeStamp".equals(fields[j])) { conf.setInt("REQUEST_TIME_INDEX", j); } else if ("elapsed".equals(fields[j])) { conf.setInt("REQUEST_ELAPSE_TIME_INDEX", j); } else if ("label".equals(fields[j])) { conf.setInt("REQUEST_LABEL_INDEX", j); } else if ("success".equals(fields[j])) { conf.setInt("REQUEST_SUCCESSFUL_INDEX", j); } else if ("bytes".equals(fields[j])) { conf.setInt("REQUEST_BYTE_INDEX", j); } } } else if ("-i".equals(args[i])) { param++; conf.setInt("INTERVAL_TIME", Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } if (param != 2) { System.out.println("-l -i?"); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java
License:Apache License
public void submit(JobRequest request, long submissionID, File mapperFile, File reducerFile, File packageDir, Path inputPath) throws ValidationException, NotFoundException, CompilationException, InternalException { // Generate job output path Path outputDir = new Path(_homeDir, "out"); Path outputPath;/*from w ww .j a v a 2s. co m*/ try { FileSystem fs = outputDir.getFileSystem(new Configuration()); outputPath = JobServiceHandler.getNonexistantPath(outputDir, request.getName(), fs); } catch (IOException ex) { throw JobServiceHandler.wrapException("Could not construct output path.", ex); } JobConf conf = new JobConf(); conf.setJobName(request.getName()); // Set mapper and number of tasks if specified StreamJob.setStreamMapper(conf, mapperFile.toString()); if (request.isSetMapTasks()) conf.setNumMapTasks(request.getMapTasks()); // Set reducer and number of tasks if specified StreamJob.setStreamReducer(conf, reducerFile.toString()); if (request.isSetReduceTasks()) conf.setNumReduceTasks(request.getReduceTasks()); // Create and set job JAR, including necessary files ArrayList<String> jarFiles = new ArrayList<String>(); jarFiles.add(packageDir.toString()); String jarPath; try { jarPath = StreamJob.createJobJar(conf, jarFiles, _tempDir); } catch (IOException ex) { throw JobServiceHandler.wrapException("Could not create job jar.", ex); } if (jarPath != null) conf.setJar(jarPath); // TODO: This is a hack. Rewrite streaming to use DistributedCache. //conf.setPattern("mapreduce.job.jar.unpack.pattern", // Pattern.compile(".*")); // Set I/O formats and paths conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.addInputPath(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); // Use numeric sort if appropriate conf.setBoolean(CONF_NUMERIC, request.isNumericSort()); if (request.isNumericSort()) { conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class); conf.setPartitionerClass(KeyFieldBasedPartitioner.class); conf.setKeyFieldComparatorOptions("-n"); conf.setKeyFieldPartitionerOptions("-n"); } // Set other job information conf.set(CONF_USER, request.getUser()); conf.set(CONF_LANGUAGE, request.getLanguage()); conf.set(CONF_MAPPER, request.getMapper()); conf.set(CONF_REDUCER, request.getReducer()); // Attempt to submit the job RunningJob job; try { JobClient client = new JobClient(new JobConf()); job = client.submitJob(conf); } catch (IOException ex) { throw JobServiceHandler.wrapException("There was a serious error while attempting to submit the job.", ex); } try { SubmissionDatabase.setSubmitted(submissionID); SubmissionDatabase.setHadoopID(submissionID, job.getID().toString()); } catch (SQLException ex) { throw JobServiceHandler.wrapException("Could not update submission in database.", ex); } }