List of usage examples for org.apache.hadoop.mapred JobConf set
public void set(String name, String value)
value
of the name
property. From source file:jobimtext.thesaurus.distributional.hadoop.mapreduce.SimCounts1WithFeatures.java
License:Apache License
@SuppressWarnings("deprecation") public static void main(String[] args) throws Exception { JobConf conf = HadoopUtil.generateJobConf(args); /* set the new defined type to be used */ conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); if (args.length > 3) { conf.setInt("threshold", Integer.parseInt(args[3])); }//w w w .j a v a 2 s .c om /* number of milliseconds before killing a not responding task */ conf.set("mapred.task.timeout", "600000"); /* change to 128mb */ conf.set("dfs.block.size", "134217728"); /* set the maximum number of task per node */ int maptasks = 200; /* * Number of map tasks to deploy on each machine. 0.5 to 2 * * (cores/node) */ conf.set("mapred.tasktracker.map.tasks.maximum", "" + maptasks); conf.set("mapred.tasktracker.map", "" + maptasks); /* * The default number of map tasks per job. Typically set to a prime * several times greater than number of available hosts. */ conf.set("mapred.map.tasks", "" + maptasks); int reducetasks = 20; conf.set("mapred.tasktracker.reduce.tasks.maximum", "" + reducetasks); conf.set("mapred.tasktracker.reduce", "" + reducetasks); conf.set("mapred.reduce.tasks", "" + reducetasks); /* * how much virtual memory the entire process tree of each map/reduce * task will use */ conf.set("mapred.job.map.memory.mb", "4000"); conf.set("mapred.job.reduce.memory.mb", "4000"); conf.set("dfs.replication", "1"); /* * reduce I/O load */ conf.set("mapred.child.java.opts", "-Xmx1400M"); conf.set("io.sort.mb", "300"); conf.set("io.sort.factor", "30"); JobClient.runJob(conf); }
From source file:jobimtext.thesaurus.distributional.hadoop.mapreduce.SimCountsLog.java
License:Apache License
/** * The reducer step will sum all float values, i.e. the * weight for any (word1,word2) pair sharing a feature. *///from w w w . j a v a 2s. co m public static void main(String[] args) throws Exception { JobConf conf = HadoopUtil.generateJobConf(args); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(FloatWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(DoubleSumReducer.class); conf.setReducerClass(DoubleSumReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); /* number of milliseconds before killing a not responding task */ conf.set("mapred.task.timeout", "600000"); /* change to 128mb */ conf.set("dfs.block.size", "134217728"); /* set the maximum number of task per node */ int maptasks = 100; /* Number of map tasks to deploy on each machine. 0.5 to 2 * (cores/node) */ conf.set("mapred.tasktracker.map.tasks.maximum", "" + maptasks); conf.set("mapred.tasktracker.map", "" + maptasks); /* The default number of map tasks per job. Typically set to a prime several times greater than number of available hosts. */ conf.set("mapred.map.tasks", "" + maptasks); int reducetasks = 100; conf.set("mapred.tasktracker.reduce.tasks.maximum", "" + reducetasks); conf.set("mapred.tasktracker.reduce", "" + reducetasks); conf.set("mapred.reduce.tasks", "" + reducetasks); JobClient.runJob(conf); }
From source file:junto.algorithm.parallel.AdsorptionHadoop.java
License:Apache License
public static void main(String[] args) throws Exception { Hashtable config = ConfigReader.read_config(args); String baseInputFilePat = Defaults.GetValueOrDie(config, "hdfs_input_pattern"); String baseOutputFilePat = Defaults.GetValueOrDie(config, "hdfs_output_base"); int numIterations = Integer.parseInt(Defaults.GetValueOrDie(config, "iters")); String currInputFilePat = baseInputFilePat; String currOutputFilePat = ""; for (int iter = 1; iter <= numIterations; ++iter) { JobConf conf = new JobConf(AdsorptionHadoop.class); conf.setJobName("adsorption_hadoop"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); // conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // hyperparameters conf.set("mu1", Defaults.GetValueOrDie(config, "mu1")); conf.set("mu2", Defaults.GetValueOrDie(config, "mu2")); conf.set("mu3", Defaults.GetValueOrDie(config, "mu3")); conf.set("keepTopKLabels", Defaults.GetValueOrDefault((String) config.get("keep_top_k_labels"), Integer.toString(Integer.MAX_VALUE))); if (iter > 1) { // output from last iteration is the input for current iteration currInputFilePat = currOutputFilePat + "/*"; }//from www . j av a 2 s . co m FileInputFormat.setInputPaths(conf, new Path(currInputFilePat)); currOutputFilePat = baseOutputFilePat + "_" + iter; FileOutputFormat.setOutputPath(conf, new Path(currOutputFilePat)); JobClient.runJob(conf); } }
From source file:junto.algorithm.parallel.LP_ZGL_Hadoop.java
License:Apache License
public static void main(String[] args) throws Exception { Hashtable config = ConfigReader.read_config(args); String baseInputFilePat = Defaults.GetValueOrDie(config, "hdfs_input_pattern"); String baseOutputFilePat = Defaults.GetValueOrDie(config, "hdfs_output_base"); int numIterations = Integer.parseInt(Defaults.GetValueOrDie(config, "iters")); String currInputFilePat = baseInputFilePat; String currOutputFilePat = ""; for (int iter = 1; iter <= numIterations; ++iter) { JobConf conf = new JobConf(LP_ZGL_Hadoop.class); conf.setJobName("lp_zgl_hadoop"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(LP_ZGL_Map.class); // conf.setCombinerClass(LP_ZGL_Reduce.class); conf.setReducerClass(LP_ZGL_Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // hyperparameters conf.set("mu1", Defaults.GetValueOrDie(config, "mu1")); conf.set("mu2", Defaults.GetValueOrDie(config, "mu2")); conf.set("keepTopKLabels", Defaults.GetValueOrDefault((String) config.get("keep_top_k_labels"), Integer.toString(Integer.MAX_VALUE))); if (iter > 1) { // output from last iteration is the input for current iteration currInputFilePat = currOutputFilePat + "/*"; }/*from ww w .java 2 s .com*/ FileInputFormat.setInputPaths(conf, new Path(currInputFilePat)); currOutputFilePat = baseOutputFilePat + "_" + iter; FileOutputFormat.setOutputPath(conf, new Path(currOutputFilePat)); JobClient.runJob(conf); } }
From source file:junto.algorithm.parallel.MADHadoop.java
License:Apache License
public static void main(String[] args) throws Exception { Hashtable config = ConfigReader.read_config(args); String baseInputFilePat = Defaults.GetValueOrDie(config, "hdfs_input_pattern"); String baseOutputFilePat = Defaults.GetValueOrDie(config, "hdfs_output_base"); int numIterations = Integer.parseInt(Defaults.GetValueOrDie(config, "iters")); int numReducers = Defaults.GetValueOrDefault((String) config.get("num_reducers"), 10); String currInputFilePat = baseInputFilePat; String currOutputFilePat = ""; for (int iter = 1; iter <= numIterations; ++iter) { JobConf conf = new JobConf(MADHadoop.class); conf.setJobName("mad_hadoop"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(MADHadoopMap.class); // conf.setCombinerClass(MADHadoopReduce.class); conf.setReducerClass(MADHadoopReduce.class); conf.setNumReduceTasks(numReducers); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // hyperparameters conf.set("mu1", Defaults.GetValueOrDie(config, "mu1")); conf.set("mu2", Defaults.GetValueOrDie(config, "mu2")); conf.set("mu3", Defaults.GetValueOrDie(config, "mu3")); conf.set("keepTopKLabels", Defaults.GetValueOrDefault((String) config.get("keep_top_k_labels"), Integer.toString(Integer.MAX_VALUE))); if (iter > 1) { // output from last iteration is the input for current iteration currInputFilePat = currOutputFilePat + "/*"; }//from w ww . j a va 2s . c om FileInputFormat.setInputPaths(conf, new Path(currInputFilePat)); currOutputFilePat = baseOutputFilePat + "_iter_" + iter; FileOutputFormat.setOutputPath(conf, new Path(currOutputFilePat)); JobClient.runJob(conf); } }
From source file:kafka.etl.impl.DataGenerator.java
License:Apache License
protected void generateOffsets() throws Exception { JobConf conf = new JobConf(); conf.set("hadoop.job.ugi", _props.getProperty("hadoop.job.ugi")); conf.setCompressMapOutput(false);/*from ww w . ja v a 2 s . c om*/ Path outPath = new Path(_offsetsDir + Path.SEPARATOR + "1.dat"); FileSystem fs = outPath.getFileSystem(conf); if (fs.exists(outPath)) fs.delete(outPath); KafkaETLRequest request = new KafkaETLRequest(_topic, "tcp://" + _uri.getHost() + ":" + _uri.getPort(), 0); System.out.println("Dump " + request.toString() + " to " + outPath.toUri().toString()); byte[] bytes = request.toString().getBytes("UTF-8"); KafkaETLKey dummyKey = new KafkaETLKey(); SequenceFile.setCompressionType(conf, SequenceFile.CompressionType.NONE); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outPath, KafkaETLKey.class, BytesWritable.class); writer.append(dummyKey, new BytesWritable(bytes)); writer.close(); }
From source file:kafka.etl.tweet.producer.TweetProducer.java
License:Apache License
protected void generateOffsets() throws Exception { JobConf conf = new JobConf(); java.util.Date date = new java.util.Date(); conf.set("hadoop.job.ugi", _props.getProperty("hadoop.job.ugi")); conf.setCompressMapOutput(false);/*w ww . j a v a 2s . c o m*/ Calendar cal = Calendar.getInstance(); Path outPath = new Path(_offsetsDir + Path.SEPARATOR + "1.dat"); FileSystem fs = outPath.getFileSystem(conf); if (fs.exists(outPath)) fs.delete(outPath); KafkaETLRequest request = new KafkaETLRequest(_topic, "tcp://" + _uri.getHost() + ":" + _uri.getPort(), 0); System.out.println("Dump " + request.toString() + " to " + outPath.toUri().toString()); byte[] bytes = request.toString().getBytes("UTF-8"); KafkaETLKey dummyKey = new KafkaETLKey(); SequenceFile.setDefaultCompressionType(conf, SequenceFile.CompressionType.NONE); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outPath, KafkaETLKey.class, BytesWritable.class); writer.append(dummyKey, new BytesWritable(bytes)); writer.close(); }
From source file:mapreduce.DosAttack.java
License:Apache License
private void issue() throws IOException { LOG.info("Starting DOS on url[{}] with clients[{}]", wsURL, numMappers); DosMapper.init(wsURL);//from w w w .j a v a 2s . c o m JobConf job = new JobConf(DosAttack.class); job.setJarByClass(DosAttack.class); job.setJobName("DOS Attack"); job.setNumReduceTasks(0); job.setInputFormat(NullInputFormat.class); job.setOutputFormat(NullOutputFormat.class); job.setMapperClass(DosMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setNumMapTasks(numMappers); job.setInt(NUM_MAPPERS_KEY, numMappers); job.setInt(NUM_REQUESTS_KEY, numRequests); job.set(TARGET_URL_KEY, wsURL); JobClient.runJob(job); }