List of usage examples for org.apache.hadoop.mapreduce Job setNumReduceTasks
public void setNumReduceTasks(int tasks) throws IllegalStateException
From source file:com.yahoo.semsearch.fastlinking.utils.RunFELOntheGrid.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf); //Job job = Job.getInstance( conf ); job.setJarByClass(RunFELOntheGrid.class); // Process custom command-line options Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // Specify various job-specific parameters job.setJobName("Entity Linker"); job.setNumReduceTasks(100); job.setJarByClass(RunFELOntheGrid.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(FELMapper.class); job.setReducerClass(FELReducer.class); job.setCombinerClass(FELReducer.class); job.waitForCompletion(true);/*w ww . ja va 2s. c om*/ return 0; }
From source file:com.yahoo.semsearch.fastlinking.w2v.EntityEmbeddings.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf); //Job job = Job.getInstance( conf ); job.setJarByClass(EntityEmbeddings.class); // Process custom command-line options Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // Specify various job-specific parameters job.setJobName("Entity embeddings"); job.setNumReduceTasks(1); job.setJarByClass(EntityEmbeddings.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(EntityEMapper.class); job.setReducerClass(EntityEReducer.class); job.setCombinerClass(EntityEReducer.class); job.waitForCompletion(true);//from w w w .j a v a 2 s .c om return 0; }
From source file:com.yahoo.ycsb.bulk.hbase.BulkDataGeneratorJob.java
License:Apache License
/** * Parameters for bulk loader specified through the config file: * * - prefix for the row keys// ww w . ja v a2 s .c om * - range start * - range end (inclusive) * - num splits (or number of partitions). * - user * - password * - table * * For the accepted default options * @see org.apache.hadoop.util.Tool#run(java.lang.String[]) */ public int run(String[] args) throws Exception { Configuration conf = this.getConf(); Util.printArgs("run", args, System.err); printKeyValues(conf, ARG_KEYS, System.err); if (args.length > 1 || (args.length == 1 && "-help".compareToIgnoreCase(args[0]) == 0)) { System.err.println("Usage: " + this.getClass().getName() + "input_path [generic options] [input_paths...] ouptut_path"); GenericOptionsParser.printGenericCommandUsage(System.err); return 1; } // Time run long startTime = System.currentTimeMillis(); String workdir; if (args.length == 1) { /* override workdir in the config if it is specified in the * command line */ conf.set(ARG_KEY_OUTDIR, args[0]); workdir = args[0]; } workdir = conf.get(ARG_KEY_OUTDIR); if (workdir == null) { System.err.println("No output directory specified"); return 1; } /* Initialize job, check parameters and decide which mapper to use */ Job job = new Job(conf, conf.get(ARG_KEY_JOBNAME, "YCSB KV data generator")); /* these settings are the same (i.e., fixed) independent of the * parameters */ job.setJarByClass(this.getClass()); // job.setInputFormatClass(TextInputFormat.class); job.setInputFormatClass(NLineInputFormat.class); /* these settings should depend on the type of output file */ job.setOutputFormatClass(HFileOutputFormat.class); /* not sure the next two are needed */ job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); this.createInputFile(job, workdir); HFileOutputFormat.setOutputPath(job, new Path(workdir + "/files")); /* depending on whether the keys need to be sorted and hashed, then * decide which mapper and reducer to use */ boolean hashKeys = conf.getBoolean(ARG_KEY_HASH_KEYS, false); boolean sortKeys = conf.getBoolean(ARG_KEY_SORTKEYS, true); /* get splits file name: side-effect -> this may generate a splits file */ String splitsfile = this.getSplitsFile(job, workdir); if (sortKeys && hashKeys) { /* do a full map reduce job */ job.setMapperClass(RowGeneratorMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setPartitionerClass(RangePartitioner.class); if (splitsfile == null) { /* Auto generate the splits file either from: * - the input key ranges * - from the current table splits */ throw new InvalidInputException("No splits specified"); } /* Set splits file */ RangePartitioner.setSplitFile(job, splitsfile); /* Add reducer (based on mapper code) */ job.setReducerClass(RowGeneratorReduce.class); /* the number of reducers is dependent on the number of * partitions */ int numReduce = conf.getInt(ARG_KEY_NUMREDUCE, 1); job.setNumReduceTasks(numReduce); } else { /* perform a map only job */ job.setMapperClass(RowGeneratorMapOnly.class); /* map output key and value types are the same as * for the job */ job.setMapOutputKeyClass(job.getOutputKeyClass()); job.setMapOutputValueClass(job.getOutputValueClass()); job.setNumReduceTasks(0); } job.waitForCompletion(true); // JobClient.runJob(conf); SimpleDateFormat df = new SimpleDateFormat("yyyy.MM.dd HH:mm:ss.SSS z"); SimpleDateFormat ddf = new SimpleDateFormat("HH:mm:ss.SSS"); ddf.setTimeZone(TimeZone.getTimeZone("UTC")); long endTime = System.currentTimeMillis(); System.out.println("Start time (ms): " + df.format(new Date(startTime)) + " -- " + startTime); System.out.println("End time (ms): " + df.format(new Date(endTime)) + " -- " + endTime); System.out .println("Elapsed time (ms): " + ddf.format(endTime - startTime) + " -- " + (endTime - startTime)); return 0; }
From source file:com.yourcompany.hadoop.mapreduce.aggregate.UnionDriver.java
License:Apache License
public int run(String[] args) throws Exception { Job job = new Job(); parseArguements(args, job);//from w w w .j a v a 2 s. c om job.setJarByClass(UnionDriver.class); // Mapper Class job.setMapperClass(UnionMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); // Reducer Task job.setNumReduceTasks(1); // Run a Hadoop Job return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.yourcompany.hadoop.mapreduce.KoreanWordcountDriver.java
License:Apache License
private void parseArguements(String[] args, Job job) throws IOException { for (int i = 0; i < args.length; ++i) { if ("-input".equals(args[i])) { FileInputFormat.addInputPaths(job, args[++i]); } else if ("-output".equals(args[i])) { FileOutputFormat.setOutputPath(job, new Path(args[++i])); } else if ("-exactMatch".equals(args[i])) { job.getConfiguration().set("exactMatch", args[++i]); } else if ("-bigrammable".equals(args[i])) { job.getConfiguration().set("bigrammable", args[++i]); } else if ("-hasOrigin".equals(args[i])) { job.getConfiguration().set("hasOrigin", args[++i]); } else if ("-originCNoun".equals(args[i])) { job.getConfiguration().set("originCNoun", args[++i]); } else if ("-reducer".equals(args[i])) { job.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-minSupport".equals(args[i])) { job.getConfiguration().set("minSupport", args[++i]); }/* w w w .java 2 s .c o m*/ } }
From source file:com.yourcompany.hadoop.mapreduce.lexical.LexicalAnalyzerDriver.java
License:Apache License
public int run(String[] args) throws Exception { Job job = new Job(); parseArguements(args, job);// w w w . j a va 2 s . co m job.setJarByClass(LexicalAnalyzerDriver.class); // Mapper & Reducer Class job.setMapperClass(LexicalAnalyzerMapper.class); // Mapper Output Key & Value Type after Hadoop 0.20 job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0); // Run a Hadoop Job return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.yourcompany.hadoop.mapreduce.lexical.LexicalAnalyzerDriver.java
License:Apache License
private void parseArguements(String[] args, Job job) throws IOException { for (int i = 0; i < args.length; ++i) { if ("-input".equals(args[i])) { FileInputFormat.addInputPaths(job, args[++i]); } else if ("-output".equals(args[i])) { FileOutputFormat.setOutputPath(job, new Path(args[++i])); } else if ("-indexmode".equals(args[i])) { job.getConfiguration().set("indexmode", args[++i]); } else if ("-reducer".equals(args[i])) { job.setNumReduceTasks(Integer.parseInt(args[++i])); }/* w ww.j a v a 2 s . c om*/ } }
From source file:com.yunrang.hadoop.app.utils.CustomizedUtil.java
License:Apache License
/** * Ensures that the given number of reduce tasks for the given job * configuration does not exceed the number of regions for the given table. * //from w w w .j a v a 2s . c om * @param table * The table to get the region count for. * @param job * The current job to adjust. * @throws IOException * When retrieving the table details fails. */ public static void limitNumReduceTasks(String table, Job job) throws IOException { HTable outputTable = new HTable(job.getConfiguration(), table); int regions = outputTable.getRegionsInfo().size(); if (job.getNumReduceTasks() > regions) job.setNumReduceTasks(regions); }
From source file:com.yunrang.hadoop.app.utils.CustomizedUtil.java
License:Apache License
/** * Sets the number of reduce tasks for the given job configuration to the * number of regions the given table has. * //from w w w.jav a2s. co m * @param table * The table to get the region count for. * @param job * The current job to adjust. * @throws IOException * When retrieving the table details fails. */ public static void setNumReduceTasks(String table, Job job) throws IOException { HTable outputTable = new HTable(job.getConfiguration(), table); int regions = outputTable.getRegionsInfo().size(); job.setNumReduceTasks(regions); }
From source file:connected.components.HashToMin.java
License:Open Source License
protected Job jobConfig() throws IOException { Job job = new Job(new Configuration(), "iteration"); job.setJarByClass(HashToMin.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1); return job;/* w w w . jav a2 s . com*/ }