List of usage examples for org.apache.hadoop.mapred JobConf setMapSpeculativeExecution
public void setMapSpeculativeExecution(boolean speculativeExecution)
From source file:org.apache.sysml.runtime.matrix.ReblockMR.java
License:Apache License
public static JobReturn runJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, long[] nnz, String instructionsInMapper, String reblockInstructions, String otherInstructionsInReducer, int numReducers, int replication, boolean jvmReuse, byte[] resultIndexes, String[] outputs, OutputInfo[] outputInfos) throws Exception { JobConf job = new JobConf(ReblockMR.class); job.setJobName("Reblock-MR"); byte[] realIndexes = new byte[inputs.length]; for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b;/*from w w w. j a v a2 s. co m*/ //set up the input files and their format information //(internally used input converters: text2bc for text, identity for binary inputs) MRJobConfiguration.setUpMultipleInputsReblock(job, realIndexes, inputs, inputInfos, brlens, bclens); //set up the dimensions of input matrices MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens, nnz); //set up the block size MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens); //set up unary instructions that will perform in the mapper MRJobConfiguration.setInstructionsInMapper(job, instructionsInMapper); //set up the aggregate instructions that will happen in the combiner and reducer MRJobConfiguration.setReblockInstructions(job, reblockInstructions); //set up the instructions that will happen in the reducer, after the aggregation instrucions MRJobConfiguration.setInstructionsInReducer(job, otherInstructionsInReducer); //set up the replication factor for the results job.setInt(MRConfigurationNames.DFS_REPLICATION, replication); //disable automatic tasks timeouts and speculative task exec job.setInt(MRConfigurationNames.MR_TASK_TIMEOUT, 0); job.setMapSpeculativeExecution(false); //set up preferred custom serialization framework for binary block format if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION) MRJobConfiguration.addBinaryBlockSerializationFramework(job); //set up custom map/reduce configurations DMLConfig config = ConfigurationManager.getDMLConfig(); MRJobConfiguration.setupCustomMRConfigurations(job, config); //enable jvm reuse (based on SystemML configuration) if (jvmReuse) job.setNumTasksToExecutePerJvm(-1); //set up what matrices are needed to pass from the mapper to reducer HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, instructionsInMapper, reblockInstructions, null, otherInstructionsInReducer, resultIndexes); MatrixChar_N_ReducerGroups ret = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, instructionsInMapper, reblockInstructions, null, null, otherInstructionsInReducer, resultIndexes, mapoutputIndexes, false); MatrixCharacteristics[] stats = ret.stats; //set up the number of reducers (according to output size) int numRed = determineNumReducers(rlens, clens, nnz, config.getIntValue(DMLConfig.NUM_REDUCERS), ret.numReducerGroups); job.setNumReduceTasks(numRed); //setup in-memory reduce buffers budget (reblock reducer dont need much memory) //job.set(MRConfigurationNames.MR_REDUCE_INPUT_BUFFER_PERCENT, "0.70"); // Print the complete instruction if (LOG.isTraceEnabled()) inst.printCompleteMRJobInstruction(stats); // Update resultDimsUnknown based on computed "stats" byte[] resultDimsUnknown = new byte[resultIndexes.length]; for (int i = 0; i < resultIndexes.length; i++) { if (stats[i].getRows() == -1 || stats[i].getCols() == -1) { resultDimsUnknown[i] = (byte) 1; } else { resultDimsUnknown[i] = (byte) 0; } } //set up the multiple output files, and their format information MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown, outputs, outputInfos, true, true); // configure mapper and the mapper output key value pairs job.setMapperClass(ReblockMapper.class); job.setMapOutputKeyClass(MatrixIndexes.class); //represent key offsets for block job.setMapOutputValueClass(TaggedAdaptivePartialBlock.class); //binary cell/block //configure reducer job.setReducerClass(ReblockReducer.class); // By default, the job executes in "cluster" mode. // Determine if we can optimize and run it in "local" mode. // at this point, both reblock_binary and reblock_text are similar MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length]; for (int i = 0; i < inputs.length; i++) { inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]); } //set unique working dir MRJobConfiguration.setUniqueWorkingDir(job); RunningJob runjob = JobClient.runJob(job); /* Process different counters */ Group group = runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS); for (int i = 0; i < resultIndexes.length; i++) { // number of non-zeros stats[i].setNonZeros(group.getCounter(Integer.toString(i))); // System.out.println("result #"+resultIndexes[i]+" ===>\n"+stats[i]); } return new JobReturn(stats, outputInfos, runjob.isSuccessful()); }
From source file:org.cloudata.core.testjob.performance.ManyTableJob.java
License:Apache License
public static void getData(CloudataConf conf, Path keyPath) throws IOException { JobConf jobConf = new JobConf(TeraReadJob.class); jobConf.set("user.name", conf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); Path tempOutputPath = new Path("ManyTableJob_Get_" + System.currentTimeMillis()); jobConf.setJobName("ManyTableJob_Get_" + "(" + new Date() + ")"); TextOutputFormat.setOutputPath(jobConf, tempOutputPath); //<MAP> jobConf.setMapperClass(ManyTableGetMap.class); jobConf.setInputFormat(TextInputFormat.class); TextInputFormat.addInputPath(jobConf, keyPath); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0);/* w w w.j av a 2 s. c o m*/ //</MAP> //<REDUCE> jobConf.setNumReduceTasks(0); //</REDUCE> try { //Run Job JobClient.runJob(jobConf); } finally { //delete temp output path FileSystem fs = FileSystem.get(jobConf); FileUtil.delete(fs, tempOutputPath, true); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.core.testjob.performance.ManyTableJob.java
License:Apache License
public static Path putData() throws IOException { CloudataConf nconf = new CloudataConf(); JobConf jobConf = new JobConf(ManyTableJob.class); jobConf.set("user.name", nconf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); jobConf.setJobName("ManyTableJob_Put" + "(" + new Date() + ")"); jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000); Path outputPath = new Path("ManyTableJob_KEY_" + System.currentTimeMillis()); FileOutputFormat.setOutputPath(jobConf, outputPath); //<MAP> jobConf.setMapperClass(ManyTablePutMap.class); jobConf.setInputFormat(SimpleInputFormat.class); jobConf.setNumMapTasks(numOfTables); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0);//from w w w . ja va2 s. com //</MAP> //<REDUCE> jobConf.setNumReduceTasks(0); //</REDUCE> try { //Run Job JobClient.runJob(jobConf); return outputPath; } finally { //delete temp output path FileSystem fs = FileSystem.get(jobConf); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.core.testjob.performance.TestMultiThreadCTable.java
License:Apache License
public static Path putData(String outputDir) throws IOException { CloudataConf nconf = new CloudataConf(); JobConf jobConf = new JobConf(TestMultiThreadCTable.class); jobConf.set("user.name", nconf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); jobConf.setJobName("TestMultiThreadNTable_" + "(" + new Date() + ")"); jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000); Path outputPath = new Path(outputDir); FileOutputFormat.setOutputPath(jobConf, outputPath); JobClient jobClient = new JobClient(); int numOfRowPerMap = 100000 / jobClient.getClusterStatus().getMaxMapTasks(); jobConf.setInt("numOfRowPerMap", numOfRowPerMap); //<MAP> jobConf.setMapperClass(PutDataMap.class); jobConf.setInputFormat(SimpleInputFormat.class); jobConf.setNumMapTasks(jobClient.getClusterStatus().getMaxMapTasks()); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0);// w ww .j av a 2 s . co m //</MAP> //<REDUCE> jobConf.setNumReduceTasks(0); //</REDUCE> try { //Run Job JobClient.runJob(jobConf); return outputPath; } finally { FileSystem fs = FileSystem.get(jobConf); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.core.testjob.tera.TeraJob.java
License:Apache License
public void runJob(String tableName, int numOfTablets, int dataLength, int totalGb, String keyOutputPath) throws IOException { CloudataConf nconf = new CloudataConf(); JobConf jobConf = new JobConf(TeraJob.class); jobConf.set("user.name", nconf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); if (!CTable.existsTable(nconf, tableName)) { TableSchema tableInfo = new TableSchema(tableName, "Test"); tableInfo.addColumn(new ColumnInfo("Col1")); tableInfo.addColumn(new ColumnInfo("Col2", TableSchema.CACHE_TYPE)); tableInfo.addColumn(new ColumnInfo("Col3")); CTable.createTable(nconf, tableInfo); }/*ww w .ja v a 2 s . c o m*/ jobConf.setJobName("TeraOnlineJob" + "(" + new Date() + ")"); long rowsPerTask = ((((long) totalGb) * 1024L * 1024L * 1024L) / ((long) dataLength)) / (long) numOfTablets; jobConf.setInt("teraJob.dataLength", dataLength); jobConf.setLong("teraJob.rowsPerTask", rowsPerTask); jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000); FileOutputFormat.setOutputPath(jobConf, new Path(keyOutputPath)); //<MAP> jobConf.setMapperClass(TeraOnlineMap.class); jobConf.setInputFormat(SimpleInputFormat.class); jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName); jobConf.setNumMapTasks(numOfTablets); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); //</MAP> //<REDUCE> jobConf.setNumReduceTasks(0); //</REDUCE> try { //Run Job JobClient.runJob(jobConf); } finally { //delete temp output path FileSystem fs = FileSystem.get(jobConf); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.core.testjob.tera.TeraReadJob.java
License:Apache License
public static void main(String[] args) throws IOException { if (args.length < 2) { System.out.println("Usage: java TeraReadJob <table name> <keyOutputPath>"); System.exit(0);/*from ww w . j ava 2 s. c o m*/ } String tableName = args[0]; String keyOutputPath = args[1]; CloudataConf nconf = new CloudataConf(); JobConf jobConf = new JobConf(TeraReadJob.class); jobConf.set("user.name", nconf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); if (!CTable.existsTable(nconf, tableName)) { System.out.println("Error: No table " + tableName); System.exit(0); } Path tempOutputPath = new Path("TeraReadJob" + System.currentTimeMillis()); jobConf.setJobName("TeraReadJob" + "(" + new Date() + ")"); jobConf.set("TeraReadJob.tableName", tableName); TextOutputFormat.setOutputPath(jobConf, tempOutputPath); //<MAP> jobConf.setMapperClass(TeraReadMap.class); jobConf.setInputFormat(TextInputFormat.class); TextInputFormat.addInputPath(jobConf, new Path(keyOutputPath)); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); //</MAP> //<REDUCE> jobConf.setNumReduceTasks(0); //</REDUCE> try { //Run Job JobClient.runJob(jobConf); } finally { //delete temp output path FileSystem fs = FileSystem.get(jobConf); FileUtil.delete(fs, tempOutputPath, true); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.core.testjob.tera.TeraScanJob.java
License:Apache License
public void runJob(String tableName) throws IOException { JobConf jobConf = new JobConf(TeraScanJob.class); CloudataConf nconf = new CloudataConf(); if (!CTable.existsTable(nconf, tableName)) { System.out.println("No table:" + tableName); System.exit(0);/*from w w w.j a va2 s . c o m*/ } Path tempOutputPath = new Path("TeraScanJob" + System.currentTimeMillis()); jobConf.setJobName("TeraScanJob" + "(" + new Date() + ")"); //<MAP> jobConf.setMapperClass(TeraScanMap.class); jobConf.setInputFormat(TeraScanJobTabletInputFormat.class); jobConf.set(AbstractTabletInputFormat.INPUT_TABLE, tableName); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); //</MAP> //<REDUCE> // jobConf.setReducerClass(DocFreqReduce.class); // jobConf.setOutputKeyClass(Text.class); // jobConf.setOutputValueClass(Text.class); FileOutputFormat.setOutputPath(jobConf, tempOutputPath); jobConf.setNumReduceTasks(0); //</REDUCE> //Run Job JobClient.runJob(jobConf); // //delete temp output path FileSystem fs = FileSystem.get(jobConf); FileUtil.delete(fs, tempOutputPath, true); }
From source file:org.cloudata.examples.upload.partitionjob.UploadJob.java
License:Apache License
public void runJob(String inputPath, String tableName) throws IOException { JobConf jobConf = new JobConf(UploadJob.class); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); jobConf.setJobName("UploadJob_" + tableName + "(" + new Date() + ")"); //KeyRangePartitioner //AbstractTabletInputFormat.OUTPUT_TABLE? ? jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName); CloudataConf conf = new CloudataConf(); CTable ctable = CTable.openTable(conf, tableName); TabletInfo[] tabletInfos = ctable.listTabletInfos(); //<Map> FileInputFormat.addInputPath(jobConf, new Path(inputPath)); jobConf.setInputFormat(TextInputFormat.class); jobConf.setMapperClass(UploadMap.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0);//from w w w . ja va 2 s.c o m jobConf.setPartitionerClass(KeyRangePartitioner.class); //</Map> //<Reduce> Path tempOutputPath = new Path("temp/uploadJob/" + tableName + "/reducer"); FileOutputFormat.setOutputPath(jobConf, tempOutputPath); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); jobConf.setReducerClass(UploadReducer.class); jobConf.setReduceSpeculativeExecution(false); jobConf.setMaxReduceAttempts(0); //Reduce Tablet jobConf.setNumReduceTasks(tabletInfos.length); //</Reduce> try { JobClient.runJob(jobConf); } finally { FileSystem fs = FileSystem.get(jobConf); FileUtil.delete(fs, tempOutputPath, true); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.util.upload.UploadUtil.java
License:Apache License
private void doHadoopUpload(CloudataConf conf) throws IOException { if (!CTable.existsTable(conf, tableName)) { throw new IOException("No table:" + tableName); }// ww w . j av a2 s . c o m JobConf jobConf = new JobConf(UploadUtil.class); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); jobConf.setJobName("UploadJob_" + tableName + "(" + new Date() + ")"); //KeyRangePartitioner //AbstractTabletInputFormat.OUTPUT_TABLE? ? jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName); //<Map> FileInputFormat.addInputPath(jobConf, new Path(inputPath)); jobConf.setInputFormat(TextInputFormat.class); jobConf.set("uploadJob.delim", delim); String columnStr = ""; for (String eachColumn : columns) { columnStr += eachColumn + ","; } jobConf.set("uploadJob.columns", columnStr); String fieldNumStr = ""; for (int eachField : fieldNums) { fieldNumStr += eachField + ","; } jobConf.set("uploadJob.fieldNums", fieldNumStr); jobConf.setBoolean("uploadJob.keyValuePair", keyValuePair); jobConf.setMapperClass(UploadMap.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0); //</Map> //<Reduce> Path tempOutputPath = new Path("temp/uploadJob/" + tableName + "/reducer"); FileOutputFormat.setOutputPath(jobConf, tempOutputPath); jobConf.setNumReduceTasks(0); //</Reduce> try { JobClient.runJob(jobConf); } finally { FileSystem fs = FileSystem.get(jobConf); FileUtil.delete(fs, tempOutputPath, true); CloudataMapReduceUtil.clearMapReduce(libDir); } }