List of usage examples for org.apache.hadoop.mapred JobConf setReducerClass
public void setReducerClass(Class<? extends Reducer> theClass)
From source file:SleepJob.java
License:Apache License
public JobConf setupJobConf(int numMapper, int numReducer, long mapSleepTime, int mapSleepCount, long reduceSleepTime, int reduceSleepCount) { JobConf job = new JobConf(getConf(), SleepJob.class); job.setNumMapTasks(numMapper);/*w ww .j a va2s . co m*/ job.setNumReduceTasks(numReducer); job.setMapperClass(SleepJob.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setReducerClass(SleepJob.class); job.setOutputFormat(NullOutputFormat.class); job.setInputFormat(SleepInputFormat.class); job.setPartitionerClass(SleepJob.class); job.setSpeculativeExecution(false); FileInputFormat.addInputPath(job, new Path("ignored")); job.setLong("sleep.job.map.sleep.time", mapSleepTime); job.setLong("sleep.job.reduce.sleep.time", reduceSleepTime); job.setInt("sleep.job.map.sleep.count", mapSleepCount); job.setInt("sleep.job.reduce.sleep.count", reduceSleepCount); return job; }
From source file:update_sentiment.java
License:LGPL
public static void runjob(String input, String output) throws Exception { JobConf conf = new JobConf(update_sentiment.class); conf.setJobName("Update_Sentiment_Train"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(input)); FileOutputFormat.setOutputPath(conf, new Path(output)); JobClient.runJob(conf);/*from w ww. j a v a2s . co m*/ }
From source file:BMTKeyValueLoader.java
License:Apache License
public int run(String[] args) { JobConf conf = new JobConf(getConf(), CassandraTableLoader.class); GenericOptionsParser parser = new GenericOptionsParser(conf, args); conf.setInputFormat(KeyValueTextInputFormat.class); conf.setJobName("BMTKeyValueLoader"); conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { other_args.add(args[i]);/* w w w . j av a2s . c o m*/ } FileInputFormat.setInputPaths(conf, new Path(other_args.get(0))); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); try { JobClient.runJob(conf); } catch (IOException e) { throw new RuntimeException(e); } return 0; }
From source file:HoopRemoteTask.java
License:Open Source License
/** * *///from ww w. j a v a 2 s . co m public static void main(String args[]) throws Exception { // run the HoopLink constructor; We need this to have a global settings registry @SuppressWarnings("unused") HoopLink link = new HoopLink(); dbg("main ()"); showTimeStamp(); /** * I've taken out the statistics portion since it relies on code that isn't distributed * The next version will have this solved. I might try the solution in: * http://stackoverflow.com/questions/7443074/initialize-public-static-variable-in-hadoop-through-arguments * Although chances are I will switch to using Hoop to collect much better performance and distribution * statistics. See Hoop.java for more information */ HoopPerformanceMeasure metrics = new HoopPerformanceMeasure(); metrics.setMarker("main"); HoopLink.metrics.getDataSet().add(metrics); if (parseArgs(args) == false) { usage(); return; } if (HoopLink.postonly == true) { postOnly(); return; } if (HoopLink.task.equals("none") == true) { dbg("No task defined, please use the commandline option -task <task>"); return; } dbg("Starting system ..."); HoopRemoteTask driver = new HoopRemoteTask(); if (HoopLink.useHadoop == false) { dbg("Starting built-in mapper ..."); driver.indexDocuments(); } else { dbg("Starting hadoop job ..."); Configuration conf = new Configuration(); // TRANSFER SETTHoopGS FROM HoopLink to Configuration!!! transferConf(conf); // Now we're feeling much better HoopRemoteTask.hdfs = FileSystem.get(conf); if (HoopLink.dbglocal == true) { dbg("Enabling local debugging ..."); conf.set("mapred.job.tracker", "local"); } else dbg("Disabling local debugging"); JobConf job = new JobConf(conf, HoopRemoteTask.class); job.setJobName(driver.getClassName()); driver.setJob(job); @SuppressWarnings("unused") String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); job.setJarByClass(HoopRemoteTask.class); if (HoopLink.task.equals("invert") == true) { dbg("Configuring job for invert task ..."); job.setReducerClass(HoopInvertedListReducer.class); job.setMapperClass(HoopInvertedListMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); } if (HoopLink.task.equals("wordcount") == true) { dbg("Configuring job for wordcount task ..."); job.setReducerClass(HoopWordCountReducer.class); job.setMapperClass(HoopWordCountMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); } dbg("Using input path: " + HoopLink.datapath); dbg("Using output path: " + HoopLink.outputpath); FileInputFormat.addInputPath(job, new Path(HoopLink.datapath)); FileOutputFormat.setOutputPath(job, new Path(HoopLink.outputpath)); job.setInputFormat(HoopWholeFileInputFormat.class); if ((HoopLink.shardcreate.equals("mos") == true) && (HoopLink.nrshards > 1)) { dbg("Setting output to sharded output streams class ..."); job.setOutputFormat(HoopShardedOutputFormat.class); } else job.setOutputFormat(TextOutputFormat.class); /** * Temporarily commented out for testing purposes */ //job.setPartitionerClass (HoopPartitioner.class); driver.register("Main"); JobClient.runJob(job); postProcess(conf); } showTimeStamp(); metrics.closeMarker(); long timeTaken = metrics.getYValue(); //long timeTaken=metrics.getMarkerRaw (); metrics.printMetrics(timeTaken); driver.unregister(); /** * I've taken out the statistics portion since it relies on code that isn't distributed * The next version will have this solved. I might try the solution in: * http://stackoverflow.com/questions/7443074/initialize-public-static-variable-in-hadoop-through-arguments * Although chances are I will switch to using Hoop to collect much better performance and distribution * statistics. See Hoop.java for more information */ //stats.calcStatistics(); //dbg (stats.printStatistics()); }
From source file:NaivePageRank.java
License:Apache License
public static void main(String[] args) throws Exception { int iteration = -1; String inputPath = args[0];// w w w . ja v a 2 s .co m String outputPath = args[1]; int specIteration = 0; if (args.length > 2) { specIteration = Integer.parseInt(args[2]); } int numNodes = 100000; if (args.length > 3) { numNodes = Integer.parseInt(args[3]); } int numReducers = 32; if (args.length > 4) { numReducers = Integer.parseInt(args[4]); } System.out.println("specified iteration: " + specIteration); long start = System.currentTimeMillis(); /** * job to count out-going links for each url */ JobConf conf = new JobConf(NaivePageRank.class); conf.setJobName("PageRank-Count"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(CountMapper.class); conf.setReducerClass(CountReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath + "/count")); conf.setNumReduceTasks(numReducers); JobClient.runJob(conf); /******************** Initial Rank Assignment Job ***********************/ conf = new JobConf(NaivePageRank.class); conf.setJobName("PageRank-Initialize"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(InitialRankAssignmentMapper.class); conf.setReducerClass(InitialRankAssignmentReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath + "/i" + iteration)); conf.setNumReduceTasks(numReducers); // conf.setIterative(false); JobClient.runJob(conf); iteration++; do { /****************** Join Job ********************************/ conf = new JobConf(NaivePageRank.class); conf.setJobName("PageRank-Join"); conf.setOutputKeyClass(Text.class); // conf.setOutputValueClass(Text.class); conf.setMapperClass(ComputeRankMap.class); conf.setReducerClass(ComputeRankReduce.class); conf.setMapOutputKeyClass(TextPair.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setPartitionerClass(FirstPartitioner.class); conf.setOutputKeyComparatorClass(KeyComparator.class); conf.setOutputValueGroupingComparator(GroupComparator.class); // relation table FileInputFormat.setInputPaths(conf, new Path(inputPath)); // rank table FileInputFormat.addInputPath(conf, new Path(outputPath + "/i" + (iteration - 1))); // count table FileInputFormat.addInputPath(conf, new Path(outputPath + "/count")); FileOutputFormat.setOutputPath(conf, new Path(outputPath + "/i" + iteration)); conf.setNumReduceTasks(numReducers); JobClient.runJob(conf); iteration++; /******************** Rank Aggregate Job ***********************/ conf = new JobConf(NaivePageRank.class); conf.setJobName("PageRank-Aggregate"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapOutputKeyClass(Text.class); conf.setMapperClass(RankAggregateMapper.class); conf.setReducerClass(RankAggregateReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(outputPath + "/i" + (iteration - 1))); FileOutputFormat.setOutputPath(conf, new Path(outputPath + "/i" + iteration)); conf.setNumReduceTasks(numReducers); conf.setInt("haloop.num.nodes", numNodes); JobClient.runJob(conf); iteration++; } while (iteration < 2 * specIteration); long end = System.currentTimeMillis(); System.out.println("running time " + (end - start) / 1000 + "s"); }
From source file:IndexWords.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { return -1; }/*from ww w . java 2s . c om*/ checkWords = new String[args.length - 2]; int numIter = 5; Path input = new Path(args[0]); for (int i = 0; i < numIter; i++) { JobConf conf = new JobConf(getConf(), IndexWords.class); conf.setJobName("indexwords"); conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(MapClass.class); conf.setReducerClass(Reduce.class); FileInputFormat.setInputPaths(conf, input); FileOutputFormat.setOutputPath(conf, new Path(args[1] + Integer.toString(i))); RunningJob rj = JobClient.runJob(conf); input = new Path(args[1] + Integer.toString(i)); double resVal = rj.getCounters().getCounter(RecordCounters.RESIDUAL_COUNTER) * 1.0 / 10000; System.out.println(N + " " + (resVal / (1.0 * N))); if (resVal / (1.0 * N) < 0.001) break; } return 0; }
From source file:ClimateData.java
License:Open Source License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(ClimateData.class); conf.setJobName("climatedata"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);//from ww w .j av a 2 s . co m }
From source file:FormatStorage2ColumnStorageMR.java
License:Open Source License
@SuppressWarnings("deprecation") public static void main(String[] args) throws Exception { if (args.length != 2) { System.out.println("FormatStorage2ColumnStorageMR <input> <output>"); System.exit(-1);/* ww w .j av a 2 s. c o m*/ } JobConf conf = new JobConf(FormatStorageMR.class); conf.setJobName("FormatStorage2ColumnStorageMR"); conf.setNumMapTasks(1); conf.setNumReduceTasks(4); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setMapperClass(FormatStorageMapper.class); conf.setReducerClass(ColumnStorageReducer.class); conf.setInputFormat(FormatStorageInputFormat.class); conf.set("mapred.output.compress", "flase"); Head head = new Head(); initHead(head); head.toJobConf(conf); FileInputFormat.setInputPaths(conf, args[0]); Path outputPath = new Path(args[1]); FileOutputFormat.setOutputPath(conf, outputPath); FileSystem fs = outputPath.getFileSystem(conf); fs.delete(outputPath, true); JobClient jc = new JobClient(conf); RunningJob rj = null; rj = jc.submitJob(conf); String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rj.isComplete()) { try { Thread.sleep(1000); } catch (InterruptedException e) { } int mapProgress = Math.round(rj.mapProgress() * 100); int reduceProgress = Math.round(rj.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.out.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } System.exit(0); }
From source file:BP.java
License:Apache License
protected JobConf configUpdateMessage() throws Exception { final JobConf conf = new JobConf(getConf(), BP.class); conf.set("nstate", "" + nstate); conf.set("compat_matrix_str", "" + edge_potential_str); conf.setJobName("BP_Update_message"); conf.setMapperClass(MapUpdateMessage.class); conf.setReducerClass(RedUpdateMessage.class); fs.delete(message_next_path, true);//from w w w . j ava 2s. c om FileInputFormat.setInputPaths(conf, message_cur_path, prior_path); FileOutputFormat.setOutputPath(conf, message_next_path); conf.setNumReduceTasks(nreducer); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); return conf; }
From source file:BP.java
License:Apache License
protected JobConf configCheckErr() throws Exception { final JobConf conf = new JobConf(getConf(), BP.class); conf.set("nstate", "" + nstate); conf.setJobName("BP_Check Err"); fs.delete(check_error_path, true);/*from ww w .j av a 2 s. co m*/ conf.setMapperClass(MapCheckErr.class); conf.setReducerClass(RedCheckErr.class); FileInputFormat.setInputPaths(conf, message_cur_path, message_next_path); FileOutputFormat.setOutputPath(conf, check_error_path); conf.setNumReduceTasks(nreducer); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Text.class); return conf; }