List of usage examples for org.apache.hadoop.mapred JobConf setNumMapTasks
public void setNumMapTasks(int n)
From source file:org.weikey.terasort.TeraSort.java
License:Apache License
@SuppressWarnings("deprecation") public int run(String[] args) throws Exception { LOG.info("starting"); JobConf job = (JobConf) getConf(); SortConfig sortConfig = new SortConfig(job); // if (args.length >= 3) { // job.setNumReduceTasks(Integer.valueOf(args[2])); // if (args.length >= 4) { // sortConfig.setStartKey(Integer.valueOf(args[3])); // if (args.length >= 5) { // sortConfig.setFieldSeparator(args[4]); // }//from w w w. ja v a2s . c o m // } // } Integer numMapTasks = null; Integer numReduceTasks = null; List<String> otherArgs = new ArrayList<String>(); boolean createLzopIndex = false; for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { job.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { job.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-f".equals(args[i]) || "--ignore-case".equals(args[i])) { sortConfig.setIgnoreCase(true); } else if ("-u".equals(args[i]) || "--unique".equals(args[i])) { sortConfig.setUnique(true); } else if ("-k".equals(args[i]) || "--key".equals(args[i])) { String[] parts = StringUtils.split(args[++i], ","); sortConfig.setStartKey(Integer.valueOf(parts[0])); if (parts.length > 1) { sortConfig.setEndKey(Integer.valueOf(parts[1])); } } else if ("-t".equals(args[i]) || "--field-separator".equals(args[i])) { sortConfig.setFieldSeparator(args[++i]); } else if ("--total-order".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) { maxSplits = Integer.MAX_VALUE; } } else if ("--lzop-index".equals(args[i])) { createLzopIndex = true; } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Make sure there are exactly 2 parameters left. if (otherArgs.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2."); return printUsage(); } Path inputDir = new Path(args[0]); inputDir = inputDir.makeQualified(inputDir.getFileSystem(job)); Path partitionFile = new Path(inputDir, TeraInputFormat.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME); TeraInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraSort"); job.setJarByClass(TeraSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormat(TeraInputFormat.class); job.setOutputFormat(TeraOutputFormat.class); job.setPartitionerClass(TotalOrderPartitioner.class); TeraInputFormat.writePartitionFile(job, partitionFile); DistributedCache.addCacheFile(partitionUri, job); DistributedCache.createSymlink(job); job.setInt("dfs.replication", 1); TeraOutputFormat.setFinalSync(job, true); JobClient.runJob(job); LOG.info("done"); return 0; }
From source file:PDI.Hadoop.Datamining.Tools.HistorianParser.java
/** * The main driver for historian map/reduce program. Invoke this method to * submit the map/reduce job.// w w w . j av a 2 s . c o m * * @throws IOException * When there is communication problems with the job tracker. */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), HistorianParser.class); JobClient jobClient = new JobClient(conf); List<String> sourcePaths = new ArrayList<String>(); String destPath = ""; String currentDate = DateUtils.getCurrentDateString(); String startTS = ""; String endTS = ""; String pointIDS = ""; String outputSize = ""; conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(StandardPointFile.class); conf.setMapperClass(MapClass.class); conf.setReducerClass(ReduceClass.class); conf.setInputFormat(HistorianInputFormat.class); conf.set("compression", "no"); conf.set("filePrefix", "devarchive_archive_"); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-startTS".equals(args[i])) { conf.set("startTS", args[++i]); startTS = args[i]; } else if ("-endTS".equals(args[i])) { conf.set("endTS", args[++i]); endTS = args[i]; } else if ("-pointIDS".equals(args[i])) { conf.set("pointIDS", args[++i]); pointIDS = args[i]; } else if ("-outputMaxSize".equals(args[i])) { conf.set("outputSize", args[++i]); outputSize = args[i]; } else if ("-sourcePATH".equals(args[i])) { String sourcePath = "" + args[++i]; if (sourcePath.indexOf(',') == -1) { sourcePaths.add(sourcePath); } else { String[] paths = sourcePath.split(","); for (int ii = 0; ii < paths.length; ii++) { sourcePaths.add(paths[ii]); } } } else if ("-destPATH".equals(args[i])) { destPath = "" + args[++i] + "/"; } else if ("-compression".equals(args[i])) { conf.set("compression", args[++i]); } else if ("-filePrefix".equals(args[i])) { conf.set("filePrefix", args[++i]); } else if ("-v".equals(args[i])) { pdi_showVersion(); return 0; } else if ("-verbose".equals(args[i])) { this.pdi_setVerbose(true); } else if ("-h".equals(args[i])) { return printUsage(); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Check for the user input parameters if ((0 == sourcePaths.size()) || destPath.equals("") || startTS.equals("") || endTS.equals("") || pointIDS.equals("") || outputSize.equals("") || (0 == conf.get("filePrefix").length())) { System.out.println("ERROR: Wrong input parameters."); return printUsage(); } String startTime = DateUtils.unixTimestampToHumanReadableTime2(startTS); String endTime = DateUtils.unixTimestampToHumanReadableTime2(endTS); System.out.println("-------------------------------------------------------"); System.out.println("jobName : " + currentDate); System.out.println("filePrefix : " + conf.get("filePrefix")); for (int i = 0; i < sourcePaths.size(); i++) { System.out.println("sourcePath[" + i + "]: " + sourcePaths.get(i)); } System.out.println("destPath : " + destPath); System.out.println("startTS : " + startTS + " (" + startTime + ")"); System.out.println("endTS : " + endTS + " (" + endTime + ")"); System.out.println("pointIDS : " + pointIDS); System.out.println("outputMaxSize: " + outputSize + " MB"); System.out.println("compression : " + conf.get("compression")); System.out.println("-------------------------------------------------------"); PathUtils utils = new PathUtils(this.pdi_isVerbose()); if (false == utils.pdi_setRecursiveInputPaths(conf, sourcePaths, startTS, endTS)) { return -1; } // set output path to current time FileOutputFormat.setOutputPath(conf, utils.getOutputPath(destPath, currentDate)); // set jobName to current time // conf.setJobName(date.toString()); conf.setJobName(currentDate); JobClient.runJob(conf); // run the job // mergeAndCopyToLocal(conf, destPath); return 0; }
From source file:ronchy.BigramCount.java
License:Apache License
/** * Runs this tool.//from w ww . ja v a2s. c o m */ public int run(String[] args) throws Exception { if (args.length != 4) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int mapTasks = Integer.parseInt(args[2]); int reduceTasks = Integer.parseInt(args[3]); sLogger.info("Tool: BigramCount"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - number of mappers: " + mapTasks); sLogger.info(" - number of reducers: " + reduceTasks); JobConf conf = new JobConf(BigramCount.class); conf.setJobName("BigramCount"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); /** * Note that these must match the Class arguments given in the mapper */ conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setMapperClass(MyMapper.class); conf.setReducerClass(MyReducer.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); JobClient.runJob(conf); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:sa.edu.kaust.fwindex.BuildIntDocVectorsForwardIndex.java
License:Apache License
/** * Runs this tool.//w w w .j av a 2 s . c o m */ public int run(String[] args) throws Exception { if (args.length != 2) { printUsage(); return -1; } String inPath = args[0]; String outPath = args[1]; JobConf conf = new JobConf(getConf(), BuildIntDocVectorsForwardIndex.class); FileSystem fs = FileSystem.get(conf); int mapTasks = 10; sLogger.info("Tool: BuildIntDocVectorsIndex"); String intDocVectorsPath = inPath; String forwardIndexPath = outPath; if (!fs.exists(new Path(intDocVectorsPath))) { sLogger.info("Error: IntDocVectors don't exist!"); return 0; } if (fs.exists(new Path(forwardIndexPath))) { sLogger.info("IntDocVectorsForwardIndex already exists: skipping!"); return 0; } conf.set("ForwardIndexPath", forwardIndexPath); conf.setJobName("BuildIntDocVectorsForwardIndex"); Path inputPath = new Path(intDocVectorsPath); FileInputFormat.setInputPaths(conf, inputPath); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(1); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapOutputKeyClass(TermDF.class); conf.setMapOutputValueClass(Text.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapRunnerClass(MyMapRunner.class); conf.setReducerClass(MyReducer.class); JobClient.runJob(conf); return 0; }
From source file:sa.edu.kaust.twitter.index.BuildPostingsForwardIndex.java
License:Apache License
/** * Runs this tool.//from ww w.j av a 2s . c o m */ public int run(String[] args) throws Exception { if (args.length != 2) { printUsage(); return -1; } JobConf conf = new JobConf(BuildPostingsForwardIndex.class); FileSystem fs = FileSystem.get(conf); int mapTasks = 10; sLogger.info("Tool: PostingsForwardIndex"); String postingsPath = args[0]; String forwardIndexPath = args[1]; if (!fs.exists(new Path(postingsPath))) { sLogger.info("Error: IntDocVectors don't exist!"); return 0; } // delete the output directory if it exists already //FileSystem.get(conf).delete(new Path(forwardIndexPath), true); if (fs.exists(new Path(forwardIndexPath))) { sLogger.info("PostingsForwardIndex already exists: skipping!"); return 0; } conf.set("ForwardIndexPath", forwardIndexPath); conf.setJobName("BuildPostingsForwardIndex"); Path inputPath = new Path(postingsPath); FileInputFormat.setInputPaths(conf, inputPath); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(1); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapRunnerClass(MyMapRunner.class); conf.setReducerClass(MyReducer.class); JobClient.runJob(conf); return 0; }
From source file:sa.edu.kaust.twitter.index.BuildTweetsForwardIndex.java
License:Apache License
/** * Runs this tool./*from w ww .ja v a2 s . c om*/ */ public int run(String[] args) throws Exception { if (args.length != 2) { printUsage(); return -1; } JobConf conf = new JobConf(BuildTweetsForwardIndex.class); FileSystem fs = FileSystem.get(conf); int mapTasks = 10; sLogger.info("Tool: TweetsForwardIndex"); String postingsPath = args[0]; String forwardIndexPath = args[1]; if (!fs.exists(new Path(postingsPath))) { sLogger.info("Error: IntDocVectors don't exist!"); return 0; } // delete the output directory if it exists already //FileSystem.get(conf).delete(new Path(forwardIndexPath), true); if (fs.exists(new Path(forwardIndexPath))) { sLogger.info("PostingsForwardIndex already exists: skipping!"); return 0; } conf.set("ForwardIndexPath", forwardIndexPath); conf.setJobName("BuildTweetsForwardIndex"); Path inputPath = new Path(postingsPath); FileInputFormat.setInputPaths(conf, inputPath); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(1); conf.set("mapred.child.java.opts", "-Xmx2048m"); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapOutputKeyClass(LongWritable.class); conf.setMapOutputValueClass(Text.class); conf.setOutputFormat(NullOutputFormat.class); conf.setMapRunnerClass(MyMapRunner.class); conf.setReducerClass(MyReducer.class); JobClient.runJob(conf); return 0; }
From source file:setest.FormatStorageMR.java
License:Open Source License
public static void main(String[] args) throws Exception { if (args.length != 2) { System.out.println("FormatStorageMR <input> <output>"); System.exit(-1);/*from ww w .j a v a 2s . co m*/ } JobConf conf = new JobConf(FormatStorageMR.class); conf.setJobName("FormatStorageMR"); conf.setNumMapTasks(1); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setMapperClass(FormatStorageTestMapper.class); conf.setReducerClass(FormatStorageTestReducer.class); conf.setInputFormat(FormatStorageInputFormat.class); conf.setOutputFormat(FormatStorageOutputFormat.class); conf.set("mapred.output.compress", "flase"); Head head = new Head(); initHead(head); head.toJobConf(conf); FileInputFormat.setInputPaths(conf, args[0]); Path outputPath = new Path(args[1]); FileOutputFormat.setOutputPath(conf, outputPath); FileSystem fs = outputPath.getFileSystem(conf); fs.delete(outputPath, true); JobClient jc = new JobClient(conf); RunningJob rj = null; rj = jc.submitJob(conf); String lastReport = ""; SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS"); long reportTime = System.currentTimeMillis(); long maxReportInterval = 3 * 1000; while (!rj.isComplete()) { try { Thread.sleep(1000); } catch (InterruptedException e) { } int mapProgress = Math.round(rj.mapProgress() * 100); int reduceProgress = Math.round(rj.reduceProgress() * 100); String report = " map = " + mapProgress + "%, reduce = " + reduceProgress + "%"; if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) { String output = dateFormat.format(Calendar.getInstance().getTime()) + report; System.out.println(output); lastReport = report; reportTime = System.currentTimeMillis(); } } System.exit(0); }
From source file:temp.WordCount.java
License:Apache License
/** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker.//from ww w. j a v a 2s. c om */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), WordCount.class); conf.setJobName("wordcount"); // the keys are words (strings) conf.setOutputKeyClass(Text.class); // the values are counts (ints) conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } conf.setInputPath(new Path(other_args.get(0))); conf.setOutputPath(new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:TVA.Hadoop.MapReduce.Development.Test_RecordReader_Alt.java
/** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker.//w w w . ja v a 2 s . c om */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), Test_RecordReader_Alt.class); conf.setJobName("Test_RecordReader_Alt"); // the keys are words (strings) //conf.setOutputKeyClass(IntWritable.class); //conf.setOutputValueClass(DoubleWritable.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(StandardPointFile.class); conf.set("gov.tva.mapreduce.AverageFrequency.connectionstring", "jdbc:sqlserver://rgocdsql:1433; databaseName=PhasorMeasurementData;user=NaspiApp;password=pw4site;"); conf.set("gov.tva.mapreduce.AverageFrequency.HistorianID", "2"); conf.setMapperClass(MapClass.class); //conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(DatAware_InputFormat.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } /* * at this point, we need to check for a parameter that represents the id * of any other info we may need to view * --- then set the parameter in the job configuration * ex: conf.set( "gov.tva.AvgFreq.Company.ID", other_args.get( n ) ); */ FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:TVA.Hadoop.Samples.TestRecordReader.java
/** * The main driver for word count map/reduce program. * Invoke this method to submit the map/reduce job. * @throws IOException When there is communication problems with the * job tracker./*from www. ja v a 2 s .co m*/ */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), TestRecordReader.class); conf.setJobName("TestRecordReader"); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(StandardPointFile.class); conf.setMapperClass(MapClass.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(HistorianInputFormat.class); List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }