List of usage examples for org.apache.hadoop.mapred JobConf setNumMapTasks
public void setNumMapTasks(int n)
From source file:org.apache.sysml.runtime.matrix.CleanupMR.java
License:Apache License
public static boolean runJob(DMLConfig conf) throws Exception { boolean ret = false; try {//from w ww .j a v a 2 s .c o m JobConf job; job = new JobConf(CleanupMR.class); job.setJobName("Cleanup-MR"); //set up SystemML local tmp dir String dir = conf.getTextValue(DMLConfig.LOCAL_TMP_DIR); MRJobConfiguration.setSystemMLLocalTmpDir(job, dir); //set mappers, reducers int numNodes = InfrastructureAnalyzer.getRemoteParallelNodes(); job.setMapperClass(CleanupMapper.class); //map-only job.setNumMapTasks(numNodes); //numMappers job.setNumReduceTasks(0); //set input/output format, input path String inFileName = conf.getTextValue(DMLConfig.SCRATCH_SPACE) + "/cleanup_tasks"; job.setInputFormat(NLineInputFormat.class); job.setOutputFormat(NullOutputFormat.class); Path path = new Path(inFileName); FileInputFormat.setInputPaths(job, path); writeCleanupTasksToFile(path, numNodes); //disable automatic tasks timeouts and speculative task exec job.setInt(MRConfigurationNames.MR_TASK_TIMEOUT, 0); job.setMapSpeculativeExecution(false); ///// // execute the MR job RunningJob runjob = JobClient.runJob(job); ret = runjob.isSuccessful(); } catch (Exception ex) { //don't raise an exception, just gracefully an error message. LOG.error("Failed to run cleanup MR job. ", ex); } return ret; }
From source file:org.apache.sysml.runtime.matrix.DataGenMR.java
License:Apache License
/** * <p>Starts a Rand MapReduce job which will produce one or more random objects.</p> * /*from w w w . j a v a 2 s . c o m*/ * @param inst MR job instruction * @param dataGenInstructions array of data gen instructions * @param instructionsInMapper instructions in mapper * @param aggInstructionsInReducer aggregate instructions in reducer * @param otherInstructionsInReducer other instructions in reducer * @param numReducers number of reducers * @param replication file replication * @param resultIndexes result indexes for each random object * @param dimsUnknownFilePrefix file path prefix when dimensions unknown * @param outputs output file for each random object * @param outputInfos output information for each random object * @return matrix characteristics for each random object * @throws Exception if Exception occurs */ public static JobReturn runJob(MRJobInstruction inst, String[] dataGenInstructions, String instructionsInMapper, String aggInstructionsInReducer, String otherInstructionsInReducer, int numReducers, int replication, byte[] resultIndexes, String dimsUnknownFilePrefix, String[] outputs, OutputInfo[] outputInfos) throws Exception { JobConf job = new JobConf(DataGenMR.class); job.setJobName("DataGen-MR"); //whether use block representation or cell representation MRJobConfiguration.setMatrixValueClass(job, true); byte[] realIndexes = new byte[dataGenInstructions.length]; for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b; String[] inputs = new String[dataGenInstructions.length]; InputInfo[] inputInfos = new InputInfo[dataGenInstructions.length]; long[] rlens = new long[dataGenInstructions.length]; long[] clens = new long[dataGenInstructions.length]; int[] brlens = new int[dataGenInstructions.length]; int[] bclens = new int[dataGenInstructions.length]; FileSystem fs = FileSystem.get(job); String dataGenInsStr = ""; int numblocks = 0; int maxbrlen = -1, maxbclen = -1; double maxsparsity = -1; for (int i = 0; i < dataGenInstructions.length; i++) { dataGenInsStr = dataGenInsStr + Lop.INSTRUCTION_DELIMITOR + dataGenInstructions[i]; MRInstruction mrins = MRInstructionParser.parseSingleInstruction(dataGenInstructions[i]); MRINSTRUCTION_TYPE mrtype = mrins.getMRInstructionType(); DataGenMRInstruction genInst = (DataGenMRInstruction) mrins; rlens[i] = genInst.getRows(); clens[i] = genInst.getCols(); brlens[i] = genInst.getRowsInBlock(); bclens[i] = genInst.getColsInBlock(); maxbrlen = Math.max(maxbrlen, brlens[i]); maxbclen = Math.max(maxbclen, bclens[i]); if (mrtype == MRINSTRUCTION_TYPE.Rand) { RandInstruction randInst = (RandInstruction) mrins; inputs[i] = LibMatrixDatagen.generateUniqueSeedPath(genInst.getBaseDir()); maxsparsity = Math.max(maxsparsity, randInst.getSparsity()); PrintWriter pw = null; try { pw = new PrintWriter(fs.create(new Path(inputs[i]))); //for obj reuse and preventing repeated buffer re-allocations StringBuilder sb = new StringBuilder(); //seed generation Well1024a bigrand = LibMatrixDatagen.setupSeedsForRand(randInst.getSeed()); LongStream nnz = LibMatrixDatagen.computeNNZperBlock(rlens[i], clens[i], brlens[i], bclens[i], randInst.getSparsity()); PrimitiveIterator.OfLong nnzIter = nnz.iterator(); for (long r = 0; r < rlens[i]; r += brlens[i]) { long curBlockRowSize = Math.min(brlens[i], (rlens[i] - r)); for (long c = 0; c < clens[i]; c += bclens[i]) { long curBlockColSize = Math.min(bclens[i], (clens[i] - c)); sb.append((r / brlens[i]) + 1); sb.append(','); sb.append((c / bclens[i]) + 1); sb.append(','); sb.append(curBlockRowSize); sb.append(','); sb.append(curBlockColSize); sb.append(','); sb.append(nnzIter.nextLong()); sb.append(','); sb.append(bigrand.nextLong()); pw.println(sb.toString()); sb.setLength(0); numblocks++; } } } finally { IOUtilFunctions.closeSilently(pw); } inputInfos[i] = InputInfo.TextCellInputInfo; } else if (mrtype == MRINSTRUCTION_TYPE.Seq) { SeqInstruction seqInst = (SeqInstruction) mrins; inputs[i] = genInst.getBaseDir() + System.currentTimeMillis() + ".seqinput"; maxsparsity = 1.0; //always dense double from = seqInst.fromValue; double to = seqInst.toValue; double incr = seqInst.incrValue; //handle default 1 to -1 for special case of from>to incr = LibMatrixDatagen.updateSeqIncr(from, to, incr); // Correctness checks on (from, to, incr) boolean neg = (from > to); if (incr == 0) throw new DMLRuntimeException("Invalid value for \"increment\" in seq()."); if (neg != (incr < 0)) throw new DMLRuntimeException("Wrong sign for the increment in a call to seq()"); // Compute the number of rows in the sequence long numrows = UtilFunctions.getSeqLength(from, to, incr); if (rlens[i] > 0) { if (numrows != rlens[i]) throw new DMLRuntimeException( "Unexpected error while processing sequence instruction. Expected number of rows does not match given number: " + rlens[i] + " != " + numrows); } else { rlens[i] = numrows; } if (clens[i] > 0 && clens[i] != 1) throw new DMLRuntimeException( "Unexpected error while processing sequence instruction. Number of columns (" + clens[i] + ") must be equal to 1."); else clens[i] = 1; PrintWriter pw = null; try { pw = new PrintWriter(fs.create(new Path(inputs[i]))); StringBuilder sb = new StringBuilder(); double temp = from; double block_from, block_to; for (long r = 0; r < rlens[i]; r += brlens[i]) { long curBlockRowSize = Math.min(brlens[i], (rlens[i] - r)); // block (bid_i,bid_j) generates a sequence from the interval [block_from, block_to] (inclusive of both end points of the interval) long bid_i = ((r / brlens[i]) + 1); long bid_j = 1; block_from = temp; block_to = temp + (curBlockRowSize - 1) * incr; temp = block_to + incr; // next block starts from here sb.append(bid_i); sb.append(','); sb.append(bid_j); sb.append(','); sb.append(block_from); sb.append(','); sb.append(block_to); sb.append(','); sb.append(incr); pw.println(sb.toString()); sb.setLength(0); numblocks++; } } finally { IOUtilFunctions.closeSilently(pw); } inputInfos[i] = InputInfo.TextCellInputInfo; } else { throw new DMLRuntimeException("Unexpected Data Generation Instruction Type: " + mrtype); } } dataGenInsStr = dataGenInsStr.substring(1);//remove the first "," RunningJob runjob; MatrixCharacteristics[] stats; try { //set up the block size MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens); //set up the input files and their format information MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, false, ConvertTarget.BLOCK); //set up the dimensions of input matrices MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens); MRJobConfiguration.setDimsUnknownFilePrefix(job, dimsUnknownFilePrefix); //set up the block size MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens); //set up the rand Instructions MRJobConfiguration.setRandInstructions(job, dataGenInsStr); //set up unary instructions that will perform in the mapper MRJobConfiguration.setInstructionsInMapper(job, instructionsInMapper); //set up the aggregate instructions that will happen in the combiner and reducer MRJobConfiguration.setAggregateInstructions(job, aggInstructionsInReducer); //set up the instructions that will happen in the reducer, after the aggregation instrucions MRJobConfiguration.setInstructionsInReducer(job, otherInstructionsInReducer); //set up the replication factor for the results job.setInt(MRConfigurationNames.DFS_REPLICATION, replication); //set up map/reduce memory configurations (if in AM context) DMLConfig config = ConfigurationManager.getDMLConfig(); DMLAppMasterUtils.setupMRJobRemoteMaxMemory(job, config); //set up custom map/reduce configurations MRJobConfiguration.setupCustomMRConfigurations(job, config); //determine degree of parallelism (nmappers: 1<=n<=capacity) //TODO use maxsparsity whenever we have a way of generating sparse rand data int capacity = InfrastructureAnalyzer.getRemoteParallelMapTasks(); long dfsblocksize = InfrastructureAnalyzer.getHDFSBlockSize(); //correction max number of mappers on yarn clusters if (InfrastructureAnalyzer.isYarnEnabled()) capacity = (int) Math.max(capacity, YarnClusterAnalyzer.getNumCores()); int nmapers = Math .max(Math.min((int) (8 * maxbrlen * maxbclen * (long) numblocks / dfsblocksize), capacity), 1); job.setNumMapTasks(nmapers); //set up what matrices are needed to pass from the mapper to reducer HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, dataGenInsStr, instructionsInMapper, null, aggInstructionsInReducer, otherInstructionsInReducer, resultIndexes); MatrixChar_N_ReducerGroups ret = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, dataGenInsStr, instructionsInMapper, null, aggInstructionsInReducer, null, otherInstructionsInReducer, resultIndexes, mapoutputIndexes, false); stats = ret.stats; //set up the number of reducers MRJobConfiguration.setNumReducers(job, ret.numReducerGroups, numReducers); // print the complete MRJob instruction if (LOG.isTraceEnabled()) inst.printCompleteMRJobInstruction(stats); // Update resultDimsUnknown based on computed "stats" byte[] resultDimsUnknown = new byte[resultIndexes.length]; for (int i = 0; i < resultIndexes.length; i++) { if (stats[i].getRows() == -1 || stats[i].getCols() == -1) { resultDimsUnknown[i] = (byte) 1; } else { resultDimsUnknown[i] = (byte) 0; } } boolean mayContainCtable = instructionsInMapper.contains("ctabletransform") || instructionsInMapper.contains("groupedagg"); //set up the multiple output files, and their format information MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown, outputs, outputInfos, true, mayContainCtable); // configure mapper and the mapper output key value pairs job.setMapperClass(DataGenMapper.class); if (numReducers == 0) { job.setMapOutputKeyClass(Writable.class); job.setMapOutputValueClass(Writable.class); } else { job.setMapOutputKeyClass(MatrixIndexes.class); job.setMapOutputValueClass(TaggedMatrixBlock.class); } //set up combiner if (numReducers != 0 && aggInstructionsInReducer != null && !aggInstructionsInReducer.isEmpty()) job.setCombinerClass(GMRCombiner.class); //configure reducer job.setReducerClass(GMRReducer.class); //job.setReducerClass(PassThroughReducer.class); // By default, the job executes in "cluster" mode. // Determine if we can optimize and run it in "local" mode. MatrixCharacteristics[] inputStats = new MatrixCharacteristics[inputs.length]; for (int i = 0; i < inputs.length; i++) { inputStats[i] = new MatrixCharacteristics(rlens[i], clens[i], brlens[i], bclens[i]); } //set unique working dir MRJobConfiguration.setUniqueWorkingDir(job); runjob = JobClient.runJob(job); /* Process different counters */ Group group = runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS); for (int i = 0; i < resultIndexes.length; i++) { // number of non-zeros stats[i].setNonZeros(group.getCounter(Integer.toString(i))); } String dir = dimsUnknownFilePrefix + "/" + runjob.getID().toString() + "_dimsFile"; stats = MapReduceTool.processDimsFiles(dir, stats); MapReduceTool.deleteFileIfExistOnHDFS(dir); } finally { for (String input : inputs) MapReduceTool.deleteFileIfExistOnHDFS(new Path(input), job); } return new JobReturn(stats, outputInfos, runjob.isSuccessful()); }
From source file:org.apache.tez.mapreduce.examples.MapredWordCount.java
License:Apache License
/** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker./*from w w w. jav a 2 s .co m*/ */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), MapredWordCount.class); conf.setJobName("wordcount"); LOG.info("Running WordCount job using mapred apis"); // the keys are words (strings) conf.setOutputKeyClass(Text.class); // the values are counts (ints) conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { LOG.error("Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { LOG.error("Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { LOG.error("Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:org.archive.hadoop.jobs.ArchiveFileExtractor.java
License:Apache License
/** * Run the job./*from w ww . j ava 2 s . c o m*/ */ public int run(String[] args) throws Exception { if (args.length < 2) { printUsage(); return 1; } // Create a job configuration JobConf job = new JobConf(getConf()); // Job name uses output dir to help identify it to the operator. job.setJobName("Archive File Extractor"); // This is a map-only job, no reducers. job.setNumReduceTasks(0); // turn off speculative execution job.setBoolean("mapred.map.tasks.speculative.execution", false); // set timeout to a high value - 20 hours job.setInt("mapred.task.timeout", 72000000); //tolerate task exceptions job.setBoolean("soft", false); int arg = 0; int numMaps = 10; String DEFAULT_WARC_PATTERN = "software: %s Extractor\r\n" + "format: WARC File Format 1.0\r\n" + "conformsTo: http://bibnum.bnf.fr/WARC/WARC_ISO_28500_version1_latestdraft.pdf\r\n" + "publisher: Internet Archive\r\n" + "created: %s\r\n\r\n"; String warcHeaderString = String.format(DEFAULT_WARC_PATTERN, IAUtils.COMMONS_VERSION, DateUtils.getLog17Date(System.currentTimeMillis())); while (arg < args.length - 1) { if (args[arg].equals("-soft")) { job.setBoolean("soft", true); arg++; } else if (args[arg].equals("-mappers")) { arg++; numMaps = Integer.parseInt(args[arg]); job.setNumMapTasks(numMaps); arg++; } else if (args[arg].equals("-timestamp14")) { arg++; String timestamp14 = DateUtils.get14DigitDate(DateUtils.parse14DigitDate(args[arg])); job.set("timestamp14", timestamp14); arg++; } else if (args[arg].equals("-warc-header-local-file")) { arg++; File f = new File(args[arg]); FileInputStream fis = new FileInputStream(f); warcHeaderString = IOUtils.toString(fis, "UTF-8"); arg++; } else if (args[arg].equals("-hmacname")) { arg++; String hmacName = args[arg]; job.set("hmacName", hmacName); arg++; } else if (args[arg].equals("-hmacsignature")) { arg++; String hmacSignature = args[arg]; job.set("hmacSignature", hmacSignature); arg++; } else if (args[arg].equals("-timeout")) { arg++; int taskTimeout = Integer.parseInt(args[arg]); job.setInt("mapred.task.timeout", taskTimeout); arg++; } else if (args[arg].equals("-failpct")) { arg++; int failPct = Integer.parseInt(args[arg]); job.setInt("mapred.max.map.failures.percent", failPct); arg++; } else { break; } } job.set("warcHeaderString", warcHeaderString); if (args.length - 2 != arg) { printUsage(); return 1; } Path inputPath = new Path(args[arg]); arg++; String outputDir = args[arg]; arg++; job.set("outputDir", outputDir); Path outputPath = new Path(outputDir); job.setInputFormat(TextInputFormat.class); job.setOutputFormat(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(ArchiveFileExtractorMapper.class); job.setJarByClass(ArchiveFileExtractor.class); TextInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); // Run the job! RunningJob rj = JobClient.runJob(job); if (!rj.isSuccessful()) { LOG.error("FAILED: " + rj.getID()); return 2; } return 0; }
From source file:org.archive.wayback.hadoop.CDXSort.java
License:Apache License
/** * The main driver for sort program. Invoke this method to submit the * map/reduce job.// w ww . j a v a 2 s . c om * * @throws IOException * When there is communication problems with the job tracker. */ public int run(String[] args) throws Exception { boolean compressOutput = false; boolean dereferenceInputs = false; boolean canonicalize = false; boolean funkyInput = false; JobConf jobConf = new JobConf(getConf(), CDXSort.class); jobConf.setJobName("cdxsort"); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { jobConf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("--compress-output".equals(args[i])) { compressOutput = true; } else if ("--funky-input".equals(args[i])) { funkyInput = true; } else if ("--dereference-inputs".equals(args[i])) { dereferenceInputs = true; } else if ("--canonicalize".equals(args[i])) { canonicalize = true; } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Make sure there are exactly 3 parameters left: split input output if (otherArgs.size() != 3) { System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 3."); return printUsage(); } String splitPath = otherArgs.get(0); String inputPath = otherArgs.get(1); String outputPath = otherArgs.get(2); // load the split file, find and set the number of reduces AlphaPartitioner partitioner = new AlphaPartitioner(); File localSplitFile = new File(splitPath); FileInputStream fis = new FileInputStream(localSplitFile); InputStreamReader isr = new InputStreamReader(fis, ByteOp.UTF8); BufferedReader bis = new BufferedReader(isr); // try { // partitioner.loadBoundaries(bis); // } catch (IOException except) { // System.err.println("ERROR: Problem loading file " + splitPath); // return printUsage(); // exits // } // jobConf.setNumReduceTasks(partitioner.getNumPartitions()); // // // copy the split file into the FS, add to the DistributedCache: //// AlphaPartitioner.setPartitionFile(jobConf, localSplitFile); // AlphaPartitioner.setSplitCache(jobConf, localSplitFile); // System.err.println("uploaded split file to FS and DistributedCache"); // // // Set job configs: // jobConf.setInputFormat(TextInputFormat.class); // // jobConf.setOutputFormat(TextOutputFormat.class); // if (canonicalize) { // jobConf.setMapperClass(CDXCanonicalizerMapClass.class); // } else { // jobConf.setMapperClass(CDXMapClass.class); // } // jobConf.setOutputKeyClass(Text.class); // jobConf.setOutputValueClass(Text.class); // jobConf.set("mapred.textoutputformat.separator", " "); // jobConf.setPartitionerClass(AlphaPartitioner.class); int inputCount = 0; // Set job input: if (dereferenceInputs) { // SO SLOW... can't add one at a time... // FileReader is2 = new FileReader(new File(inputPath)); // BufferedReader bis2 = new BufferedReader(is2); // while (true) { // String line = bis2.readLine(); // if (line == null) { // break; // } // FileInputFormat.addInputPath(jobConf, new Path(line)); // inputCount++; // System.err.println("Added path(" + inputCount + "): " + line); // } // PASS 2: // FileReader is2 = new FileReader(new File(inputPath)); // BufferedReader bis2 = new BufferedReader(is2); // ArrayList<String> list = new ArrayList<String>(); // // while (true) { // String line = bis2.readLine(); // if (line == null) { // break; // } // list.add(line); // inputCount++; // } // Path arr[] = new Path[list.size()]; // for(int i=0; i < list.size(); i++) { // arr[i] = new Path(list.get(i)); // } // FileInputFormat.setInputPaths(jobConf, arr); // PASS 3: if (funkyInput) { jobConf.setMapperClass(FunkyDeReffingCDXCanonicalizerMapClass.class); } else { jobConf.setMapperClass(DeReffingCDXCanonicalizerMapClass.class); } FileInputFormat.setInputPaths(jobConf, new Path(inputPath)); inputCount = 1; } else { FileInputFormat.setInputPaths(jobConf, new Path(inputPath)); inputCount = 1; } // Set job output: FileOutputFormat.setOutputPath(jobConf, new Path(outputPath)); if (compressOutput) { FileOutputFormat.setCompressOutput(jobConf, true); FileOutputFormat.setOutputCompressorClass(jobConf, GzipCodec.class); } // System.out.println("Running on " + cluster.getTaskTrackers() // + " nodes, processing " + inputCount + " files/directories" // + " into " + outputPath + " with " // + partitioner.getNumPartitions() + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); jobResult = JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
From source file:org.asayler.WikiTitleCount.java
License:Apache License
/** * The main driver for wikititlecount map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker.//from ww w . ja v a 2 s.c o m */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), WikiTitleCount.class); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int num_maps = 1; int num_reducers = 1; conf.setJobName("wikititlecount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); /** Set Default Mappers */ num_maps = (int) (cluster.getMaxMapTasks()); /** Set Default Mappers */ num_reducers = (int) (cluster.getMaxReduceTasks() * 0.9); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { other_args.add(args[i]); } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); /* Set Mappers and Reducer */ conf.setNumMapTasks(num_maps); conf.setNumReduceTasks(num_reducers); JobClient.runJob(conf); return 0; }
From source file:org.asayler.WikiTitleSort.java
License:Apache License
/** * The main driver for wikititlecount map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker.//from www. j a v a 2 s. c om */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), WikiTitleSort.class); JobClient client = new JobClient(conf); ClusterStatus cluster = client.getClusterStatus(); int num_maps = 1; final int num_reducers = 1; conf.setJobName("wikititlesort"); conf.setMapperClass(MapClass.class); conf.setReducerClass(Reduce.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(Text.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); /** Set Default Mappers */ num_maps = (int) (cluster.getMaxMapTasks()); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { other_args.add(args[i]); } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); /* Set Mappers and Reducer */ conf.setNumMapTasks(num_maps); conf.setNumReduceTasks(num_reducers); JobClient.runJob(conf); return 0; }
From source file:org.cloudata.core.PerformanceTest.java
License:Apache License
private void runNIsMoreThanOne(final String cmd) throws IOException { checkTable();/* w w w . j a va 2s. c om*/ // Run a mapreduce job. Run as many maps as asked-for clients. // Before we start up the job, write out an input file with instruction // per client regards which row they are to start on. Path inputDir = writeInputFile(this.conf); this.conf.set(EvaluationMapTask.CMD_KEY, cmd); JobConf job = new JobConf(this.conf, this.getClass()); FileInputFormat.addInputPath(job, inputDir); job.setInputFormat(TextInputFormat.class); job.setJobName("Cloudata Performance Evaluation"); job.setMapperClass(EvaluationMapTask.class); job.setMaxMapAttempts(1); job.setMaxReduceAttempts(1); job.setNumMapTasks(this.N * 10); // Ten maps per client. job.setNumReduceTasks(1); job.setOutputFormat(TextOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(inputDir, "outputs")); JobClient.runJob(job); }
From source file:org.cloudata.core.testjob.performance.ManyTableJob.java
License:Apache License
public static Path putData() throws IOException { CloudataConf nconf = new CloudataConf(); JobConf jobConf = new JobConf(ManyTableJob.class); jobConf.set("user.name", nconf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); jobConf.setJobName("ManyTableJob_Put" + "(" + new Date() + ")"); jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000); Path outputPath = new Path("ManyTableJob_KEY_" + System.currentTimeMillis()); FileOutputFormat.setOutputPath(jobConf, outputPath); //<MAP> jobConf.setMapperClass(ManyTablePutMap.class); jobConf.setInputFormat(SimpleInputFormat.class); jobConf.setNumMapTasks(numOfTables); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0);/*w ww . j a va 2 s .c o m*/ //</MAP> //<REDUCE> jobConf.setNumReduceTasks(0); //</REDUCE> try { //Run Job JobClient.runJob(jobConf); return outputPath; } finally { //delete temp output path FileSystem fs = FileSystem.get(jobConf); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.cloudata.core.testjob.performance.TestMultiThreadCTable.java
License:Apache License
public static Path putData(String outputDir) throws IOException { CloudataConf nconf = new CloudataConf(); JobConf jobConf = new JobConf(TestMultiThreadCTable.class); jobConf.set("user.name", nconf.getUserId()); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); jobConf.setJobName("TestMultiThreadNTable_" + "(" + new Date() + ")"); jobConf.setLong("mapred.task.timeout", 30 * 60 * 1000); Path outputPath = new Path(outputDir); FileOutputFormat.setOutputPath(jobConf, outputPath); JobClient jobClient = new JobClient(); int numOfRowPerMap = 100000 / jobClient.getClusterStatus().getMaxMapTasks(); jobConf.setInt("numOfRowPerMap", numOfRowPerMap); //<MAP> jobConf.setMapperClass(PutDataMap.class); jobConf.setInputFormat(SimpleInputFormat.class); jobConf.setNumMapTasks(jobClient.getClusterStatus().getMaxMapTasks()); jobConf.setMapSpeculativeExecution(false); jobConf.setMaxMapAttempts(0);//from ww w . j a v a2s .co m //</MAP> //<REDUCE> jobConf.setNumReduceTasks(0); //</REDUCE> try { //Run Job JobClient.runJob(jobConf); return outputPath; } finally { FileSystem fs = FileSystem.get(jobConf); CloudataMapReduceUtil.clearMapReduce(libDir); } }