List of usage examples for org.apache.hadoop.mapred JobConf getNumReduceTasks
public int getNumReduceTasks()
From source file:org.gridgain.grid.kernal.processors.hadoop.GridHadoopUtils.java
License:Open Source License
/** * Creates JobInfo from hadoop configuration. * * @param cfg Hadoop configuration.//ww w .j a v a2s . c o m * @return Job info. * @throws GridException If failed. */ public static GridHadoopDefaultJobInfo createJobInfo(Configuration cfg) throws GridException { JobConf jobConf = new JobConf(cfg); boolean hasCombiner = jobConf.get("mapred.combiner.class") != null || jobConf.get(MRJobConfig.COMBINE_CLASS_ATTR) != null; int numReduces = jobConf.getNumReduceTasks(); jobConf.setBooleanIfUnset("mapred.mapper.new-api", jobConf.get(OLD_MAP_CLASS_ATTR) == null); if (jobConf.getUseNewMapper()) { String mode = "new map API"; ensureNotSet(jobConf, "mapred.input.format.class", mode); ensureNotSet(jobConf, OLD_MAP_CLASS_ATTR, mode); if (numReduces != 0) ensureNotSet(jobConf, "mapred.partitioner.class", mode); else ensureNotSet(jobConf, "mapred.output.format.class", mode); } else { String mode = "map compatibility"; ensureNotSet(jobConf, MRJobConfig.INPUT_FORMAT_CLASS_ATTR, mode); ensureNotSet(jobConf, MRJobConfig.MAP_CLASS_ATTR, mode); if (numReduces != 0) ensureNotSet(jobConf, MRJobConfig.PARTITIONER_CLASS_ATTR, mode); else ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode); } if (numReduces != 0) { jobConf.setBooleanIfUnset("mapred.reducer.new-api", jobConf.get(OLD_REDUCE_CLASS_ATTR) == null); if (jobConf.getUseNewReducer()) { String mode = "new reduce API"; ensureNotSet(jobConf, "mapred.output.format.class", mode); ensureNotSet(jobConf, OLD_REDUCE_CLASS_ATTR, mode); } else { String mode = "reduce compatibility"; ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode); ensureNotSet(jobConf, MRJobConfig.REDUCE_CLASS_ATTR, mode); } } Map<String, String> props = new HashMap<>(); for (Map.Entry<String, String> entry : jobConf) props.put(entry.getKey(), entry.getValue()); return new GridHadoopDefaultJobInfo(jobConf.getJobName(), jobConf.getUser(), hasCombiner, numReduces, props); }
From source file:org.macau.util.FuzzyJoinDriver.java
License:Apache License
/** * //from w w w.java 2 s .c om * @param job * @throws IOException * run the job and output the basic information of the job * the start time * the finished time * the running time(finished_Time - start_Time) */ public static void run(JobConf job) throws IOException { job.setJarByClass(FuzzyJoinDriver.class); // // print info // String ret = "FuzzyJoinDriver(" + job.getJobName() + ")\n" + " Input Path: {"; Path inputs[] = FileInputFormat.getInputPaths(job); for (int ctr = 0; ctr < inputs.length; ctr++) { if (ctr > 0) { ret += "\n "; } ret += inputs[ctr].toString(); } ret += "}\n"; ret += " Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + " Map Jobs: " + job.getNumMapTasks() + "\n" + " Reduce Jobs: " + job.getNumReduceTasks() + "\n" + " Properties: {"; String[][] properties = new String[][] { new String[] { FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY, FuzzyJoinConfig.SIMILARITY_NAME_VALUE }, new String[] { FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY, "" + FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE }, new String[] { FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE }, new String[] { TOKENS_PACKAGE_PROPERTY, TOKENS_PACKAGE_VALUE }, new String[] { TOKENS_LENGTHSTATS_PROPERTY, "" + TOKENS_LENGTHSTATS_VALUE }, new String[] { RIDPAIRS_GROUP_CLASS_PROPERTY, RIDPAIRS_GROUP_CLASS_VALUE }, new String[] { RIDPAIRS_GROUP_FACTOR_PROPERTY, "" + RIDPAIRS_GROUP_FACTOR_VALUE }, new String[] { FuzzyJoinConfig.DATA_TOKENS_PROPERTY, "" }, new String[] { DATA_JOININDEX_PROPERTY, "" }, }; for (int crt = 0; crt < properties.length; crt++) { if (crt > 0) { ret += "\n "; } ret += properties[crt][0] + "=" + job.get(properties[crt][0], properties[crt][1]); } ret += "}"; System.out.println(ret); // // run job // Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(job); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println( "The job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds."); }
From source file:org.mitre.bio.mapred.TotalSequenceLength.java
License:Open Source License
/** * Init the job with the given parameters and run it. * * @param jobConf the hadoop job configuration * @param input input {@link SequenceFile} path * @param output output path (this will contain ONE part with the length) * @return zero if successful//from w w w . j av a 2 s . c o m * @throws java.lang.Exception */ public int initJob(JobConf jobConf, String input, String output, boolean cleanLogs) throws Exception { JobConf conf = new JobConf(jobConf, TotalSequenceLength.class); conf.setJobName("TotalSequenceLength"); // We can only handle one reducer if (conf.getNumReduceTasks() != 1) { conf.setNumReduceTasks(1); LOG.info("Setting number of reducers to ONE!"); } SequenceFileInputFormat.setInputPaths(conf, new Path(input)); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapperClass(SequenceMapClass.class); conf.setOutputKeyClass(IntWritable.class); // map output key class conf.setOutputValueClass(IntWritable.class); // map output value class conf.setCombinerClass(LengthReduceClass.class); conf.setReducerClass(LengthReduceClass.class); FileOutputFormat.setOutputPath(conf, new Path(output)); JobClient.runJob(conf); if (cleanLogs) { LOG.info("removing log directory"); Path path = new Path(output, "_logs"); FileSystem fs = path.getFileSystem(jobConf); fs.delete(path, true); } return 0; }
From source file:org.pentaho.hadoop.mapreduce.MRUtil.java
License:Apache License
public static void passInformationToTransformation(final VariableSpace variableSpace, final JobConf job) { if (variableSpace != null && job != null) { variableSpace.setVariable("Internal.Hadoop.NumMapTasks", Integer.toString(job.getNumMapTasks())); variableSpace.setVariable("Internal.Hadoop.NumReduceTasks", Integer.toString(job.getNumReduceTasks())); String taskId = job.get("mapred.task.id"); variableSpace.setVariable("Internal.Hadoop.TaskId", taskId); // TODO: Verify if the string range holds true for all Hadoop distributions // Extract the node number from the task ID. // The consensus currently is that it's the part after the last underscore. ///*from ww w.j a v a2 s .c o m*/ // Examples: // job_201208090841_9999 // job_201208090841_10000 // String nodeNumber; if (Const.isEmpty(taskId)) { nodeNumber = "0"; } else { int lastUnderscoreIndex = taskId.lastIndexOf("_"); if (lastUnderscoreIndex >= 0) { nodeNumber = taskId.substring(lastUnderscoreIndex + 1); } else { nodeNumber = "0"; } } // get rid of zeroes. // variableSpace.setVariable("Internal.Hadoop.NodeNumber", Integer.toString(Integer.valueOf(nodeNumber))); } }
From source file:org.smartfrog.services.hadoop.mapreduce.terasort.TeraInputFormat.java
License:Apache License
/** * Use the input splits to take samples of the input and generate sample keys. By default reads 100,000 keys from 10 * locations in the input, sorts them and picks N-1 keys to generate N equally sized partitions. * * @param conf the job to sample//from www .j a v a2 s .c om * @param partFile where to write the output file to * @throws IOException if something goes wrong */ public static void writePartitionFile(JobConf conf, Path partFile) throws IOException { TeraInputFormat inFormat = new TeraInputFormat(); TextSampler sampler = new TextSampler(); Text key = new Text(); Text value = new Text(); int partitions = conf.getNumReduceTasks(); long sampleSize = conf.getLong(TeraConstants.SAMPLE_SIZE, 100000); InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks()); int samples = Math.min(10, splits.length); long recordsPerSample = sampleSize / samples; int sampleStep = splits.length / samples; long records = 0; // take N samples from different parts of the input for (int i = 0; i < samples; ++i) { RecordReader<Text, Text> reader = inFormat.getRecordReader(splits[sampleStep * i], conf, null); while (reader.next(key, value)) { sampler.addKey(key); records += 1; if ((i + 1) * recordsPerSample <= records) { break; } } } FileSystem outFs = partFile.getFileSystem(conf); if (outFs.exists(partFile)) { outFs.delete(partFile, false); } SequenceFile.Writer writer = SequenceFile.createWriter(outFs, conf, partFile, Text.class, NullWritable.class); NullWritable nullValue = NullWritable.get(); for (Text split : sampler.createPartitions(partitions)) { writer.append(split, nullValue); } writer.close(); }
From source file:tap.core.MapperBridge.java
License:Apache License
@SuppressWarnings("unchecked") @Override/* w ww .j av a 2 s . c o m*/ public void configure(JobConf conf) { this.mapper = ReflectionUtils.newInstance(conf.getClass(Phase.MAPPER, TapMapper.class, TapMapper.class), conf); this.isMapOnly = conf.getNumReduceTasks() == 0; try { determineInputFormat(conf); determineOutputFormat(conf); this.groupBy = conf.get(Phase.GROUP_BY); this.sortBy = conf.get(Phase.SORT_BY); } catch (Exception e) { if (e instanceof RuntimeException) throw (RuntimeException) e; throw new RuntimeException(e); } mapper.setConf(conf); mapper.init(conf.get("map.input.file")); }