List of usage examples for org.apache.hadoop.mapred JobConf setMaxMapAttempts
public void setMaxMapAttempts(int n)
From source file:org.cloudata.core.testjob.tera.TeraScanJob.java
License:Apache License
public void runJob(String tableName) throws IOException { JobConf jobConf = new JobConf(TeraScanJob.class); CloudataConf nconf = new CloudataConf(); if (!CTable.existsTable(nconf, tableName)) { System.out.println("No table:" + tableName); System.exit(0);//w w w . j a v a2s . c om } Path tempOutputPath = new Path("TeraScanJob" + System.currentTimeMillis()); jobConf.setJobName("TeraScanJob" + "(" + new Date() + ")"); //<MAP> jobConf.setMapperClass(TeraScanMap.class); jobConf.setInputFormat(TeraScanJobTabletInputFormat.class); jobConf.set(AbstractTabletInputFormat.INPUT_TABLE, tableName); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); //</MAP> //<REDUCE> // jobConf.setReducerClass(DocFreqReduce.class); // jobConf.setOutputKeyClass(Text.class); // jobConf.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(jobConf, tempOutputPath); jobConf.setNumReduceTasks(0); //</REDUCE> //Run Job JobClient.runJob(jobConf); // //delete temp output path FileSystem fs = FileSystem.get(jobConf); FileUtil.delete(fs, tempOutputPath, true); }
From source file:org.cloudata.examples.upload.partitionjob.UploadJob.java
License:Apache License
public void runJob(String inputPath, String tableName) throws IOException { JobConf jobConf = new JobConf(UploadJob.class); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); jobConf.setJobName("UploadJob_" + tableName + "(" + new Date() + ")"); //KeyRangePartitioner //AbstractTabletInputFormat.OUTPUT_TABLE? ? jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName); CloudataConf conf = new CloudataConf(); CTable ctable = CTable.openTable(conf, tableName); TabletInfo[] tabletInfos = ctable.listTabletInfos(); //<Map> FileInputFormat.addInputPath(jobConf, new Path(inputPath)); jobConf.setInputFormat(TextInputFormat.class); jobConf.setMapperClass(UploadMap.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); jobConf.setPartitionerClass(KeyRangePartitioner.class); //</Map> //<Reduce> Path tempOutputPath = new Path("temp/uploadJob/" + tableName + "/reducer"); FileOutputFormat.setOutputPath(jobConf, tempOutputPath); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); jobConf.setReducerClass(UploadReducer.class); jobConf.setReduceSpeculativeExecution(false); jobConf.setMaxReduceAttempts(0);/* w w w .j a v a 2 s . co m*/ //Reduce Tablet jobConf.setNumReduceTasks(tabletInfos.length); //</Reduce> try { JobClient.runJob(jobConf); } finally { FileSystem fs = FileSystem.get(jobConf); FileUtil.delete(fs, tempOutputPath, true); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.examples.web.TermUploadJob.java
License:Apache License
public void exec(String[] options) throws Exception { if (options.length < 1) { System.out.println("Usage: java TermUploadJob <num of repeats> termUpload <inputPath> [#redcue]"); System.exit(0);//from w ww . ja v a2 s.c o m } JobConf jobConf = new JobConf(TermUploadJob.class); JobClient jobClinet = new JobClient(jobConf); int maxReduce = jobClinet.getClusterStatus().getMaxReduceTasks() * 2; if (options.length > 1) { maxReduce = Integer.parseInt(options[1]); } jobConf.setInt("mapred.task.timeout", 60 * 60 * 1000); FileSystem fs = FileSystem.get(jobConf); CloudataConf nconf = new CloudataConf(); if (!CTable.existsTable(nconf, TERM_TABLE)) { //Table Path path = new Path("blogdata/tmp/weight"); FileStatus[] paths = fs.listStatus(path); if (paths == null || paths.length == 0) { LOG.error("No Partition info:" + path); return; } SortedSet<Text> terms = new TreeSet<Text>(); Text text = new Text(); for (FileStatus eachPath : paths) { CloudataLineReader reader = new CloudataLineReader(fs.open(eachPath.getPath())); while (true) { int length = reader.readLine(text); if (length <= 0) { break; } terms.add(new Text(text)); } } int temrsPerTablet = terms.size() / (maxReduce - 1); int count = 0; List<Row.Key> rowKeys = new ArrayList<Row.Key>(); for (Text term : terms) { count++; if (count == temrsPerTablet) { rowKeys.add(new Row.Key(term.getBytes())); count = 0; } } rowKeys.add(Row.Key.MAX_KEY); TableSchema temrTableInfo = new TableSchema(TERM_TABLE, "Test", TERM_TABLE_COLUMNS); CTable.createTable(nconf, temrTableInfo, rowKeys.toArray(new Row.Key[] {})); } CTable termTable = CTable.openTable(nconf, TERM_TABLE); TabletInfo[] tabletInfos = termTable.listTabletInfos(); Path tempOutputPath = new Path("WebTableJob_" + System.currentTimeMillis()); jobConf.setJobName("TermUploadJob" + "(" + new Date() + ")"); FileInputFormat.addInputPath(jobConf, new Path(options[0])); //<MAP> jobConf.setMapperClass(TermUploadMap.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); jobConf.setInputFormat(TextInputFormat.class); jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, TERM_TABLE); jobConf.setPartitionerClass(WebKeyRangePartitioner.class); jobConf.setMaxMapAttempts(0); //</MAP> //<REDUCE> jobConf.setReducerClass(TermUploadReduce.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); jobConf.setNumReduceTasks(tabletInfos.length); FileOutputFormat.setOutputPath(jobConf, tempOutputPath); jobConf.setNumReduceTasks(maxReduce); jobConf.setMaxReduceAttempts(0); //<REDUCE> //Run Job JobClient.runJob(jobConf); fs.delete(tempOutputPath); }
From source file:org.cloudata.examples.web.WebTableJob.java
License:Apache License
public void exec(String[] options) throws Exception { if (options.length < 1) { System.out.println("Usage: java TestWebPage <num of repeats> webtable <inputPath>"); System.exit(0);/*from w w w . j ava2 s. c o m*/ } //WebTable ? CloudataConf nconf = new CloudataConf(); if (!CTable.existsTable(nconf, WEB_TABLE)) { TableSchema webTableInfo = new TableSchema(WEB_TABLE, "Test", WEB_TABLE_COLUMNS); webTableInfo.setNumOfVersion(2); CTable.createTable(nconf, webTableInfo); } Path tempOutputPath = new Path("WebTableJob_" + System.currentTimeMillis()); JobConf jobConf = new JobConf(WebTableJob.class); jobConf.setJobName("WebTableJob" + "(" + new Date() + ")"); FileInputFormat.addInputPath(jobConf, new Path(options[0])); //<MAP> jobConf.setMapperClass(WebTableMap.class); jobConf.setInputFormat(TextInputFormat.class); jobConf.setMaxMapAttempts(0); //</MAP> //Map Only jobConf.setNumReduceTasks(0); FileOutputFormat.setOutputPath(jobConf, tempOutputPath); //Run Job JobClient.runJob(jobConf); //delete temp output path FileSystem fs = FileSystem.get(jobConf); fs.delete(tempOutputPath, true); }
From source file:org.cloudata.util.upload.UploadUtil.java
License:Apache License
private void doHadoopUpload(CloudataConf conf) throws IOException { if (!CTable.existsTable(conf, tableName)) { throw new IOException("No table:" + tableName); }//from ww w . j ava 2s.c o m JobConf jobConf = new JobConf(UploadUtil.class); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); jobConf.setJobName("UploadJob_" + tableName + "(" + new Date() + ")"); //KeyRangePartitioner //AbstractTabletInputFormat.OUTPUT_TABLE? ? jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName); //<Map> FileInputFormat.addInputPath(jobConf, new Path(inputPath)); jobConf.setInputFormat(TextInputFormat.class); jobConf.set("uploadJob.delim", delim); String columnStr = ""; for (String eachColumn : columns) { columnStr += eachColumn + ","; } jobConf.set("uploadJob.columns", columnStr); String fieldNumStr = ""; for (int eachField : fieldNums) { fieldNumStr += eachField + ","; } jobConf.set("uploadJob.fieldNums", fieldNumStr); jobConf.setBoolean("uploadJob.keyValuePair", keyValuePair); jobConf.setMapperClass(UploadMap.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); //</Map> //<Reduce> Path tempOutputPath = new Path("temp/uploadJob/" + tableName + "/reducer"); FileOutputFormat.setOutputPath(jobConf, tempOutputPath); jobConf.setNumReduceTasks(0); //</Reduce> try { JobClient.runJob(jobConf); } finally { FileSystem fs = FileSystem.get(jobConf); FileUtil.delete(fs, tempOutputPath, true); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.terrier.utility.io.TestHadoopPlugin.java
License:Mozilla Public License
protected void checkTwoJC(JobConf jc1, JobConf jc2) { jc1.set(RANDOM_PROPERTY, "notnull"); jc1.setMaxMapAttempts(3014); assertNull(jc2.get(RANDOM_PROPERTY, null)); assertNotSame(3014, jc2.getMaxMapAttempts()); }