List of usage examples for org.apache.hadoop.mapred JobConf setNumReduceTasks
public void setNumReduceTasks(int n)
From source file:org.cloudata.examples.upload.partitionjob.PartitionJob.java
License:Apache License
public boolean runJob(String inputPath, String tableName, int numOfTablets) throws IOException { JobConf jobConf = new JobConf(PartitionJob.class); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); FileSystem fs = FileSystem.get(jobConf); // ? /*from w w w .j a va 2 s . com*/ FileUtil.delete(fs, new Path(getLogCountFilepath(tableName)), true); jobConf.setJobName("PartitionJob_" + tableName + "(" + new Date() + ")"); jobConf.set("cloudata.numOfTablets", String.valueOf(numOfTablets)); jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName); String clientOpt = jobConf.get("mapred.child.java.opts"); if (clientOpt == null) { clientOpt = ""; } jobConf.set("mapred.child.java.opts", clientOpt + " -Duser.name=" + System.getProperty("user.name")); //<Map> FileInputFormat.addInputPath(jobConf, new Path(inputPath)); jobConf.setInputFormat(TextInputFormat.class); jobConf.setMapperClass(PartitionMap.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); //</Map> //<Reduce> Path tempOutputPath = new Path("temp/partitionJob/" + tableName + "/reducer"); FileOutputFormat.setOutputPath(jobConf, tempOutputPath); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); jobConf.setReducerClass(PartitionReducer.class); //Reduce 1 jobConf.setNumReduceTasks(1); //</Reduce> try { RunningJob job = JobClient.runJob(jobConf); return job.isSuccessful(); } finally { FileUtil.delete(fs, new Path(getLogCountFilepath(tableName)), true); FileUtil.delete(fs, tempOutputPath, true); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.examples.upload.partitionjob.UploadJob.java
License:Apache License
public void runJob(String inputPath, String tableName) throws IOException { JobConf jobConf = new JobConf(UploadJob.class); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); jobConf.setJobName("UploadJob_" + tableName + "(" + new Date() + ")"); //KeyRangePartitioner //AbstractTabletInputFormat.OUTPUT_TABLE? ? jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName); CloudataConf conf = new CloudataConf(); CTable ctable = CTable.openTable(conf, tableName); TabletInfo[] tabletInfos = ctable.listTabletInfos(); //<Map> FileInputFormat.addInputPath(jobConf, new Path(inputPath)); jobConf.setInputFormat(TextInputFormat.class); jobConf.setMapperClass(UploadMap.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0);/*from www . j a va 2s.co m*/ jobConf.setPartitionerClass(KeyRangePartitioner.class); //</Map> //<Reduce> Path tempOutputPath = new Path("temp/uploadJob/" + tableName + "/reducer"); FileOutputFormat.setOutputPath(jobConf, tempOutputPath); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); jobConf.setReducerClass(UploadReducer.class); jobConf.setReduceSpeculativeExecution(false); jobConf.setMaxReduceAttempts(0); //Reduce Tablet jobConf.setNumReduceTasks(tabletInfos.length); //</Reduce> try { JobClient.runJob(jobConf); } finally { FileSystem fs = FileSystem.get(jobConf); FileUtil.delete(fs, tempOutputPath, true); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.examples.upload.SimpleUploaderMapReduce.java
License:Apache License
public void run(String[] args) throws IOException { if (args.length < 3) { System.out.println("Usage: java SimpleUploaderMapReduce <input path> <table name> <# reduce>"); System.exit(0);//www. j ava 2 s. co m } Path inputPath = new Path(args[0]); String tableName = args[1]; CloudataConf nconf = new CloudataConf(); if (!CTable.existsTable(nconf, tableName)) { TableSchema tableSchema = new TableSchema(tableName); tableSchema.addColumn("Col1"); Row.Key[] rowKeys = new Row.Key[20]; for (int i = 0; i < 10; i++) { rowKeys[i] = new Row.Key("-0" + i); } for (int i = 1; i < 10; i++) { rowKeys[9 + i] = new Row.Key("0" + i); } rowKeys[19] = Row.Key.MAX_KEY; CTable.createTable(nconf, tableSchema, rowKeys); } JobConf jobConf = new JobConf(HdfsToCloudataMapReduce.class); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); // <MAP> FileInputFormat.addInputPath(jobConf, inputPath); jobConf.setInputFormat(TextInputFormat.class); jobConf.setMapperClass(SimpleUploaderMapper.class); jobConf.setPartitionerClass(KeyRangePartitioner.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName); // </MAP> // <REDUCE> FileOutputFormat.setOutputPath(jobConf, new Path("SimpleUploaderMapReduce_" + System.currentTimeMillis())); jobConf.setReducerClass(SimpleUploaderReducer.class); jobConf.setNumReduceTasks(Integer.parseInt(args[2])); jobConf.setMaxReduceAttempts(0); // </REDUCE> try { JobClient.runJob(jobConf); } catch (Exception e) { e.printStackTrace(); } finally { FileSystem fs = FileSystem.get(jobConf); fs.delete(FileOutputFormat.getOutputPath(jobConf), true); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.examples.web.DocFreqJob.java
License:Apache License
public void exec(String[] options) throws Exception { if (options.length < 1) { System.out.println("Usage: java DocFreqJob <num of repeats> docFreq [#reduce]"); System.exit(0);/*from w w w .j a v a 2 s . co m*/ } JobConf jobConf = new JobConf(DocFreqJob.class); JobClient jobClinet = new JobClient(jobConf); int maxReduce = jobClinet.getClusterStatus().getMaxReduceTasks() * 2; if (options.length > 0) { maxReduce = Integer.parseInt(options[0]); } CloudataConf nconf = new CloudataConf(); if (!CTable.existsTable(nconf, TermUploadJob.TERM_TABLE)) { TableSchema temrTableInfo = new TableSchema(TermUploadJob.TERM_TABLE, "Test", TermUploadJob.TERM_TABLE_COLUMNS); CTable.createTable(nconf, temrTableInfo); } Path tempOutputPath = new Path("DocFreqJob_" + System.currentTimeMillis()); jobConf.setJobName("DocFreqJob" + "(" + new Date() + ")"); //<MAP> jobConf.setMapperClass(DocFreqMap.class); jobConf.setInputFormat(AbstractTabletInputFormat.class); jobConf.set(AbstractTabletInputFormat.INPUT_TABLE, WebTableJob.WEB_TABLE); jobConf.set(AbstractTabletInputFormat.INPUT_COLUMN_LIST, WebTableJob.WEB_TABLE_COLUMNS[1] + "," + WebTableJob.WEB_TABLE_COLUMNS[2]); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); // jobConf.setMaxMapAttempts(0); //</MAP> //<REDUCE> jobConf.setReducerClass(DocFreqReduce.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(jobConf, tempOutputPath); jobConf.setNumReduceTasks(maxReduce); //</REDUCE> //Run Job JobClient.runJob(jobConf); // //delete temp output path FileSystem fs = FileSystem.get(jobConf); fs.delete(tempOutputPath, true); }
From source file:org.cloudata.examples.web.TermGlobalJob.java
License:Apache License
public void exec() throws Exception { CloudataConf nconf = new CloudataConf(); if (!CTable.existsTable(nconf, GLOBAL_TABLE)) { TableSchema globalTableInfo = new TableSchema(GLOBAL_TABLE, "Test", GLOBAL_TABLE_COLUMNS); CTable.createTable(nconf, globalTableInfo); }/*from w w w . j av a2s . c om*/ Path tempOutputPath = new Path("globalTableInfo" + System.currentTimeMillis()); JobConf jobConf = new JobConf(WebTableJob.class); jobConf.setJobName("TermGlobalJob" + "(" + new Date() + ")"); //<MAP> jobConf.setMapperClass(TermGlobalMap.class); jobConf.setInputFormat(DefaultTabletInputFormat.class); jobConf.set(DefaultTabletInputFormat.INPUT_TABLE, WebTableJob.WEB_TABLE); jobConf.set(DefaultTabletInputFormat.INPUT_COLUMN_LIST, WebTableJob.WEB_TABLE_COLUMNS[2]); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); //</MAP> //<REDUCE> jobConf.setReducerClass(TermGlobalReduce.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); jobConf.setNumReduceTasks(1); jobConf.setMaxReduceAttempts(0); FileOutputFormat.setOutputPath(jobConf, tempOutputPath); //</REDUCE> //Run Job JobClient.runJob(jobConf); //delete temp output path FileSystem fs = FileSystem.get(jobConf); fs.delete(tempOutputPath, true); }
From source file:org.cloudata.examples.web.TermUploadJob.java
License:Apache License
public void exec(String[] options) throws Exception { if (options.length < 1) { System.out.println("Usage: java TermUploadJob <num of repeats> termUpload <inputPath> [#redcue]"); System.exit(0);/*from w w w .j a va 2 s .c o m*/ } JobConf jobConf = new JobConf(TermUploadJob.class); JobClient jobClinet = new JobClient(jobConf); int maxReduce = jobClinet.getClusterStatus().getMaxReduceTasks() * 2; if (options.length > 1) { maxReduce = Integer.parseInt(options[1]); } jobConf.setInt("mapred.task.timeout", 60 * 60 * 1000); FileSystem fs = FileSystem.get(jobConf); CloudataConf nconf = new CloudataConf(); if (!CTable.existsTable(nconf, TERM_TABLE)) { //Table Path path = new Path("blogdata/tmp/weight"); FileStatus[] paths = fs.listStatus(path); if (paths == null || paths.length == 0) { LOG.error("No Partition info:" + path); return; } SortedSet<Text> terms = new TreeSet<Text>(); Text text = new Text(); for (FileStatus eachPath : paths) { CloudataLineReader reader = new CloudataLineReader(fs.open(eachPath.getPath())); while (true) { int length = reader.readLine(text); if (length <= 0) { break; } terms.add(new Text(text)); } } int temrsPerTablet = terms.size() / (maxReduce - 1); int count = 0; List<Row.Key> rowKeys = new ArrayList<Row.Key>(); for (Text term : terms) { count++; if (count == temrsPerTablet) { rowKeys.add(new Row.Key(term.getBytes())); count = 0; } } rowKeys.add(Row.Key.MAX_KEY); TableSchema temrTableInfo = new TableSchema(TERM_TABLE, "Test", TERM_TABLE_COLUMNS); CTable.createTable(nconf, temrTableInfo, rowKeys.toArray(new Row.Key[] {})); } CTable termTable = CTable.openTable(nconf, TERM_TABLE); TabletInfo[] tabletInfos = termTable.listTabletInfos(); Path tempOutputPath = new Path("WebTableJob_" + System.currentTimeMillis()); jobConf.setJobName("TermUploadJob" + "(" + new Date() + ")"); FileInputFormat.addInputPath(jobConf, new Path(options[0])); //<MAP> jobConf.setMapperClass(TermUploadMap.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); jobConf.setInputFormat(TextInputFormat.class); jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, TERM_TABLE); jobConf.setPartitionerClass(WebKeyRangePartitioner.class); jobConf.setMaxMapAttempts(0); //</MAP> //<REDUCE> jobConf.setReducerClass(TermUploadReduce.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); jobConf.setNumReduceTasks(tabletInfos.length); FileOutputFormat.setOutputPath(jobConf, tempOutputPath); jobConf.setNumReduceTasks(maxReduce); jobConf.setMaxReduceAttempts(0); //<REDUCE> //Run Job JobClient.runJob(jobConf); fs.delete(tempOutputPath); }
From source file:org.cloudata.examples.web.TermWeightJob.java
License:Apache License
public void exec(String[] options) throws Exception { if (options.length < 1) { System.out.println("Usage: java TermWeightJob <num of repeats> termWeight <outputPath> [noGlobal]"); System.exit(0);/* w ww. j ava 2 s . co m*/ } if (options.length == 1 || !options[1].equals("noGlobal")) { TermGlobalJob termGlobalJob = new TermGlobalJob(); termGlobalJob.exec(); } Path outputPath = new Path(options[0]); JobConf jobConf = new JobConf(WebTableJob.class); jobConf.setJobName("TermWeightJob" + "(" + new Date() + ")"); //<MAP> jobConf.setMapperClass(TermWeightMap.class); jobConf.setInputFormat(DefaultTabletInputFormat.class); jobConf.set(DefaultTabletInputFormat.INPUT_TABLE, WebTableJob.WEB_TABLE); jobConf.set(DefaultTabletInputFormat.INPUT_COLUMN_LIST, WebTableJob.WEB_TABLE_COLUMNS[1] + "," + WebTableJob.WEB_TABLE_COLUMNS[2]); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); // jobConf.setMaxMapAttempts(0); //</MAP> //<REDUCE> jobConf.setReducerClass(TermWeightReduce.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(jobConf, outputPath); //jobConf.setMaxReduceAttempts(0); JobClient jobClinet = new JobClient(jobConf); int maxReduce = jobClinet.getClusterStatus().getMaxReduceTasks(); jobConf.setNumReduceTasks(maxReduce); //</REDUCE> //Run Job JobClient.runJob(jobConf); // //delete temp output path // FileSystem fs = FileSystem.get(jobConf); // fs.delete(outputPath); }
From source file:org.cloudata.examples.web.TermWeightJobOnline.java
License:Apache License
public void exec(String[] options) throws Exception { if (options.length < 1) { System.out.println("Usage: java TermWeightJobOnline <num of repeats> termWeightOnline [noGlobal]"); System.exit(0);/*from www. j av a 2 s .c om*/ } if (options.length == 1 || !options[1].equals("noGlobal")) { TermGlobalJob termGlobalJob = new TermGlobalJob(); termGlobalJob.exec(); } Path outputPath = new Path("TermWeightJobOnline_" + System.currentTimeMillis()); JobConf jobConf = new JobConf(TermWeightJobOnline.class); jobConf.setJobName("TermWeightJobOnline" + "(" + new Date() + ")"); //<MAP> jobConf.setMapperClass(TermWeightMap.class); jobConf.setInputFormat(DefaultTabletInputFormat.class); jobConf.set(DefaultTabletInputFormat.INPUT_TABLE, WebTableJob.WEB_TABLE); jobConf.set(DefaultTabletInputFormat.INPUT_COLUMN_LIST, WebTableJob.WEB_TABLE_COLUMNS[1] + "," + WebTableJob.WEB_TABLE_COLUMNS[2]); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); // jobConf.setMaxMapAttempts(0); //</MAP> //<REDUCE> jobConf.setReducerClass(TermWeightReduceOnline.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(jobConf, outputPath); //jobConf.setMaxReduceAttempts(0); JobClient jobClinet = new JobClient(jobConf); int maxReduce = jobClinet.getClusterStatus().getMaxReduceTasks(); jobConf.setNumReduceTasks(maxReduce); //</REDUCE> //Run Job try { JobClient.runJob(jobConf); } finally { //delete temp output path FileSystem fs = FileSystem.get(jobConf); fs.delete(outputPath, true); } }
From source file:org.cloudata.examples.web.WebTableJob.java
License:Apache License
public void exec(String[] options) throws Exception { if (options.length < 1) { System.out.println("Usage: java TestWebPage <num of repeats> webtable <inputPath>"); System.exit(0);/*w w w . ja va2 s . c om*/ } //WebTable ? CloudataConf nconf = new CloudataConf(); if (!CTable.existsTable(nconf, WEB_TABLE)) { TableSchema webTableInfo = new TableSchema(WEB_TABLE, "Test", WEB_TABLE_COLUMNS); webTableInfo.setNumOfVersion(2); CTable.createTable(nconf, webTableInfo); } Path tempOutputPath = new Path("WebTableJob_" + System.currentTimeMillis()); JobConf jobConf = new JobConf(WebTableJob.class); jobConf.setJobName("WebTableJob" + "(" + new Date() + ")"); FileInputFormat.addInputPath(jobConf, new Path(options[0])); //<MAP> jobConf.setMapperClass(WebTableMap.class); jobConf.setInputFormat(TextInputFormat.class); jobConf.setMaxMapAttempts(0); //</MAP> //Map Only jobConf.setNumReduceTasks(0); FileOutputFormat.setOutputPath(jobConf, tempOutputPath); //Run Job JobClient.runJob(jobConf); //delete temp output path FileSystem fs = FileSystem.get(jobConf); fs.delete(tempOutputPath, true); }
From source file:org.cloudata.examples.weblink.ScanJob.java
License:Apache License
public void run(String[] args) throws IOException { if (args.length < 1) { System.out.println("Usage: java ScanJob <table name>"); System.exit(0);//from w w w .j a v a2s .co m } String tableName = args[0]; CloudataConf nconf = new CloudataConf(); if (!CTable.existsTable(nconf, tableName)) { System.out.println("No table: " + tableName); System.exit(0); } JobConf jobConf = new JobConf(UploadJob.class); jobConf.setJobName("CloudataExamles.weblink.ScanJob_" + new Date()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); // <MAP> jobConf.setInputFormat(DefaultTabletInputFormat.class); jobConf.setMapperClass(ScanJobMapper.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); jobConf.set(AbstractTabletInputFormat.INPUT_TABLE, tableName); jobConf.set(AbstractTabletInputFormat.INPUT_COLUMN_LIST, "url"); // </MAP> // <REDUCE> FileOutputFormat.setOutputPath(jobConf, new Path("CloudataExamles_WebScanJob_" + System.currentTimeMillis())); jobConf.setReducerClass(ScanJobReducer.class); jobConf.setNumReduceTasks(1); // </REDUCE> try { JobClient.runJob(jobConf); } catch (Exception e) { e.printStackTrace(); } finally { CloudataMapReduceUtil.clearMapReduce(libDir); } }