List of usage examples for org.apache.hadoop.mapred JobConf setNumMapTasks
public void setNumMapTasks(int n)
From source file:edu.iu.sgd.SGDLauncher.java
License:Apache License
private Job configureSGDJob(Path inputDir, int r, double lambda, double epsilon, int numIterations, int trainRatio, int numMapTasks, int numThreadsPerWorker, double scheduleRatio, int mem, Path modelDir, Path outputDir, String testFilePath, Configuration configuration, int jobID) throws IOException, URISyntaxException { configuration.setInt(Constants.R, r); configuration.setDouble(Constants.LAMBDA, lambda); configuration.setDouble(Constants.EPSILON, epsilon); configuration.setInt(Constants.NUM_ITERATIONS, numIterations); configuration.setInt(Constants.TRAIN_RATIO, trainRatio); configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker); configuration.setDouble(Constants.SCHEDULE_RATIO, scheduleRatio); configuration.set(Constants.MODEL_DIR, modelDir.toString()); configuration.set(Constants.TEST_FILE_PATH, testFilePath); Job job = Job.getInstance(configuration, "sgd_job_" + jobID); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); // mapreduce.map.collective.memory.mb // 125000// ww w . j a va 2s .c om jobConf.setInt("mapreduce.map.collective.memory.mb", mem); // mapreduce.map.collective.java.opts // -Xmx120000m -Xms120000m int xmx = (mem - 5000) > (mem * 0.9) ? (mem - 5000) : (int) Math.ceil(mem * 0.9); int xmn = (int) Math.ceil(0.25 * xmx); jobConf.set("mapreduce.map.collective.java.opts", "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m"); jobConf.setNumMapTasks(numMapTasks); jobConf.setInt("mapreduce.job.max.split.locations", 10000); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(SGDLauncher.class); job.setMapperClass(SGDCollectiveMapper.class); job.setNumReduceTasks(0); return job; }
From source file:edu.iu.wdamds.MDSLauncher.java
License:Apache License
private Job prepareWDAMDSJob(int numMapTasks, Path dataDirPath, Path xFilePath, Path xOutFilePath, Path outDirPath, String idsFile, String labelsFile, double threshold, int d, double alpha, int n, int cgIter, int numThreads) throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf(), "map-collective-wdamds"); Configuration jobConfig = job.getConfiguration(); FileInputFormat.setInputPaths(job, dataDirPath); FileOutputFormat.setOutputPath(job, outDirPath); jobConfig.setInt(MDSConstants.NUM_MAPS, numMapTasks); // Load from HDFS // Now we ignore and don't read x file from // HDFS//from w w w. ja v a 2 s.com jobConfig.set(MDSConstants.X_FILE_PATH, xFilePath.toString()); // Output to HDFS jobConfig.set(MDSConstants.X_OUT_FILE_PATH, xOutFilePath.toString()); // Load from shared file system jobConfig.set(MDSConstants.IDS_FILE, idsFile); // Load from shared file system jobConfig.set(MDSConstants.LABELS_FILE, labelsFile); jobConfig.setDouble(MDSConstants.THRESHOLD, threshold); jobConfig.setInt(MDSConstants.D, d); jobConfig.setDouble(MDSConstants.ALPHA, alpha); jobConfig.setInt(MDSConstants.N, n); jobConfig.setInt(MDSConstants.CG_ITER, cgIter); jobConfig.setInt(MDSConstants.NUM_THREADS, numThreads); // input class to file-based class job.setInputFormatClass(SingleFileInputFormat.class); job.setJarByClass(MDSLauncher.class); job.setMapperClass(WDAMDSMapper.class); // When use MultiFileInputFormat, remember to // set the number of map tasks org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setNumMapTasks(numMapTasks); job.setNumReduceTasks(0); return job; }
From source file:edu.ldzm.analysis.AnalysisSummary.java
License:Apache License
/** * The main driver for word count map/reduce program. Invoke this method to * submit the map/reduce job./*from w w w .j ava 2 s.co m*/ * * @throws IOException * When there is communication problems with the job tracker. */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), AnalysisSummary.class); conf.setJobName("analysis_summery"); // the keys are words (strings) conf.setOutputKeyClass(Text.class); // the values are counts (ints) conf.setOutputValueClass(Text.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Combine.class); conf.setReducerClass(Reduce.class); boolean param = false; List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-l".equals(args[i])) { param = true; String[] fields = args[++i].split(SEPARATOR); conf.setInt("NAME_LIST_LENGTH", fields.length); for (int j = 0; j < fields.length; j++) { if ("timeStamp".equals(fields[j])) { conf.setInt("REQUEST_TIME_INDEX", j); } else if ("elapsed".equals(fields[j])) { conf.setInt("REQUEST_ELAPSE_TIME_INDEX", j); } else if ("label".equals(fields[j])) { conf.setInt("REQUEST_LABEL_INDEX", j); } else if ("success".equals(fields[j])) { conf.setInt("REQUEST_SUCCESSFUL_INDEX", j); } else if ("bytes".equals(fields[j])) { conf.setInt("REQUEST_BYTE_INDEX", j); } } } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } if (!param) { System.out.println("-l namelist.txt"); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:edu.ldzm.average.AverageResponseTime.java
License:Apache License
/** * The main driver for word count map/reduce program. Invoke this method to * submit the map/reduce job./*w w w.j ava 2 s . c o m*/ * * @throws IOException * When there is communication problems with the job tracker. */ public int run(String[] args) throws Exception { JobConf conf = new JobConf(getConf(), AverageResponseTime.class); conf.setJobName("average_response_time"); // the keys are words (strings) conf.setOutputKeyClass(Text.class); // the values are counts (ints) conf.setOutputValueClass(Text.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(Combine.class); conf.setReducerClass(Reduce.class); int param = 0; List<String> other_args = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { conf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { conf.setNumReduceTasks(Integer.parseInt(args[++i])); } else if ("-l".equals(args[i])) { param++; String[] fields = args[++i].split(SEPARATOR); conf.setInt("NAME_LIST_LENGTH", fields.length); for (int j = 0; j < fields.length; j++) { if ("timeStamp".equals(fields[j])) { conf.setInt("REQUEST_TIME_INDEX", j); } else if ("elapsed".equals(fields[j])) { conf.setInt("REQUEST_ELAPSE_TIME_INDEX", j); } else if ("label".equals(fields[j])) { conf.setInt("REQUEST_LABEL_INDEX", j); } else if ("success".equals(fields[j])) { conf.setInt("REQUEST_SUCCESSFUL_INDEX", j); } else if ("bytes".equals(fields[j])) { conf.setInt("REQUEST_BYTE_INDEX", j); } } } else if ("-i".equals(args[i])) { param++; conf.setInt("INTERVAL_TIME", Integer.parseInt(args[++i])); } else { other_args.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); } } // Make sure there are exactly 2 parameters left. if (other_args.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2."); return printUsage(); } if (param != 2) { System.out.println("-l -i?"); return printUsage(); } FileInputFormat.setInputPaths(conf, other_args.get(0)); FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1))); JobClient.runJob(conf); return 0; }
From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java
License:Apache License
public void submit(JobRequest request, long submissionID, File mapperFile, File reducerFile, File packageDir, Path inputPath) throws ValidationException, NotFoundException, CompilationException, InternalException { // Generate job output path Path outputDir = new Path(_homeDir, "out"); Path outputPath;/*from w w w .j a v a 2 s . com*/ try { FileSystem fs = outputDir.getFileSystem(new Configuration()); outputPath = JobServiceHandler.getNonexistantPath(outputDir, request.getName(), fs); } catch (IOException ex) { throw JobServiceHandler.wrapException("Could not construct output path.", ex); } JobConf conf = new JobConf(); conf.setJobName(request.getName()); // Set mapper and number of tasks if specified StreamJob.setStreamMapper(conf, mapperFile.toString()); if (request.isSetMapTasks()) conf.setNumMapTasks(request.getMapTasks()); // Set reducer and number of tasks if specified StreamJob.setStreamReducer(conf, reducerFile.toString()); if (request.isSetReduceTasks()) conf.setNumReduceTasks(request.getReduceTasks()); // Create and set job JAR, including necessary files ArrayList<String> jarFiles = new ArrayList<String>(); jarFiles.add(packageDir.toString()); String jarPath; try { jarPath = StreamJob.createJobJar(conf, jarFiles, _tempDir); } catch (IOException ex) { throw JobServiceHandler.wrapException("Could not create job jar.", ex); } if (jarPath != null) conf.setJar(jarPath); // TODO: This is a hack. Rewrite streaming to use DistributedCache. //conf.setPattern("mapreduce.job.jar.unpack.pattern", // Pattern.compile(".*")); // Set I/O formats and paths conf.setInputFormat(KeyValueTextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.addInputPath(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); // Use numeric sort if appropriate conf.setBoolean(CONF_NUMERIC, request.isNumericSort()); if (request.isNumericSort()) { conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class); conf.setPartitionerClass(KeyFieldBasedPartitioner.class); conf.setKeyFieldComparatorOptions("-n"); conf.setKeyFieldPartitionerOptions("-n"); } // Set other job information conf.set(CONF_USER, request.getUser()); conf.set(CONF_LANGUAGE, request.getLanguage()); conf.set(CONF_MAPPER, request.getMapper()); conf.set(CONF_REDUCER, request.getReducer()); // Attempt to submit the job RunningJob job; try { JobClient client = new JobClient(new JobConf()); job = client.submitJob(conf); } catch (IOException ex) { throw JobServiceHandler.wrapException("There was a serious error while attempting to submit the job.", ex); } try { SubmissionDatabase.setSubmitted(submissionID); SubmissionDatabase.setHadoopID(submissionID, job.getID().toString()); } catch (SQLException ex) { throw JobServiceHandler.wrapException("Could not update submission in database.", ex); } }
From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java
License:Apache License
private Object[] getInputSplits(JobConf conf, int desiredMaxMappers) throws Exception { Object[] splits = getInputSplits(conf); if (splits.length > desiredMaxMappers) { long totalInputSize = getInputSize(splits, conf); long goalSize = (totalInputSize / desiredMaxMappers); conf.setLong("mapred.min.split.size", goalSize); conf.setNumMapTasks(desiredMaxMappers); splits = getInputSplits(conf);//from ww w . j a v a 2s .c om } return splits; }
From source file:edu.ucsb.cs.lsh.projection.ProjectionsGenerator.java
License:Apache License
public static void main(JobConf job) throws IOException { int nBits/*D*/, nFeatures/*K*/, nReducers; job.setJobName(ProjectionsGenerator.class.getSimpleName()); FileSystem fs = FileSystem.get(job); nBits = job.getInt(ProjectionLshDriver.LSH_NBITS_PROPERTY, ProjectionLshDriver.LSH_NBITS_VALUE); nFeatures = readCollectionFeatureCount(fs, job); setParameters(nBits, nFeatures);//www .ja v a 2s . c om nReducers = job.getInt(ProjectionLshDriver.LSH_NREDUCER_PROPERTY, ProjectionLshDriver.LSH_NREDUCER_VALUE); Path inputPath = new Path(INPUT_DIR); Path outputPath = new Path(OUTPUT_DIR); if (fs.exists(outputPath)) fs.delete(outputPath, true); if (fs.exists(inputPath)) fs.delete(inputPath, true); SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, new Path(inputPath.toString() + "/file"), IntWritable.class, IntWritable.class); for (int i = 0; i < nReducers; i++) writer.append(new IntWritable(i), new IntWritable(i)); writer.close(); job.setInputFormat(SequenceFileInputFormat.class); job.setOutputFormat(SequenceFileOutputFormat.class); SequenceFileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); FileOutputFormat.setCompressOutput(job, false); job.set("mapred.child.java.opts", "-Xmx2048m"); job.setInt("mapred.map.max.attempts", 10); job.setInt("mapred.reduce.max.attempts", 10); job.setNumMapTasks(1); job.setNumReduceTasks(nReducers); job.setMapperClass(IdentityMapper.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(ProjectionReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(RandomVector.class); JobSubmitter.run(job, "LSH", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE)); }
From source file:edu.umd.cloud9.collection.aquaint2.DemoCountAquaint2Documents.java
License:Apache License
/** * Runs this tool./*from w ww . j a v a 2 s.co m*/ */ public int run(String[] args) throws Exception { if (args.length != 4) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; String mappingFile = args[2]; int mapTasks = Integer.parseInt(args[3]); System.out.println("input dir: " + inputPath); System.out.println("output dir: " + outputPath); System.out.println("mapping file: " + mappingFile); System.out.println("number of mappers: " + mapTasks); JobConf conf = new JobConf(DemoCountAquaint2Documents.class); conf.setJobName("DemoCountAquaint2Documents"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(0); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); conf.setInputFormat(Aquaint2DocumentInputFormatOld.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MyMapper.class); // delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); JobClient.runJob(conf); // clean up FileSystem.get(conf).delete(new Path(outputPath), true); return 0; }
From source file:edu.umd.cloud9.collection.aquaint2.NumberAquaint2Documents.java
License:Apache License
/** * Runs this tool./*from w ww .ja v a 2 s.c om*/ */ public int run(String[] args) throws Exception { if (args.length != 3) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; String outputFile = args[2]; int mapTasks = 10; LOG.info("Tool: " + NumberAquaint2Documents.class.getCanonicalName()); LOG.info(" - Input path: " + inputPath); LOG.info(" - Output path: " + outputPath); LOG.info(" - Output file: " + outputFile); JobConf conf = new JobConf(NumberAquaint2Documents.class); conf.setJobName(NumberAquaint2Documents.class.getSimpleName()); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(1); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); conf.setInputFormat(Aquaint2DocumentInputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(MyMapper.class); conf.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); JobClient.runJob(conf); Aquaint2DocnoMapping.writeDocnoData(new Path(outputPath + "/part-00000"), new Path(outputFile), FileSystem.get(conf)); return 0; }
From source file:edu.umd.cloud9.collection.clue.ClueWarcForwardIndexBuilder.java
License:Apache License
/** * Runs this tool./*w w w . ja va 2 s .com*/ */ @SuppressWarnings("static-access") public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg() .withDescription("(required) collection path (must be block-compressed SequenceFiles)") .create(COLLECTION_OPTION)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("(required) output index path") .create(INDEX_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(COLLECTION_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } JobConf conf = new JobConf(getConf(), ClueWarcForwardIndexBuilder.class); FileSystem fs = FileSystem.get(conf); String collectionPath = cmdline.getOptionValue(COLLECTION_OPTION); String indexFile = cmdline.getOptionValue(INDEX_OPTION); LOG.info("Tool name: " + ClueWarcForwardIndexBuilder.class.getSimpleName()); LOG.info(" - collection path: " + collectionPath); LOG.info(" - index file: " + indexFile); LOG.info("Note: This tool only works on block-compressed SequenceFiles!"); Random random = new Random(); Path outputPath = new Path( "tmp-" + ClueWarcForwardIndexBuilder.class.getSimpleName() + "-" + random.nextInt(10000)); conf.setJobName(ClueWarcForwardIndexBuilder.class.getSimpleName() + ":" + collectionPath); conf.setNumMapTasks(100); conf.setNumReduceTasks(1); // Note, we have to add the files one by one, otherwise, SequenceFileInputFormat // thinks its a MapFile. for (FileStatus status : fs.listStatus(new Path(collectionPath))) { FileInputFormat.addInputPath(conf, status.getPath()); } FileOutputFormat.setOutputPath(conf, outputPath); FileOutputFormat.setCompressOutput(conf, false); conf.setInputFormat(NoSplitSequenceFileInputFormat.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(Text.class); conf.setMapRunnerClass(MyMapRunner.class); conf.setReducerClass(IdentityReducer.class); // delete the output directory if it exists already fs.delete(outputPath, true); RunningJob job = JobClient.runJob(conf); Counters counters = job.getCounters(); int blocks = (int) counters.findCounter(Blocks.Total).getCounter(); LOG.info("number of blocks: " + blocks); LOG.info("Writing index file..."); LineReader reader = new LineReader(fs.open(new Path(outputPath + "/part-00000"))); FSDataOutputStream out = fs.create(new Path(indexFile), true); out.writeUTF(ClueWarcForwardIndex.class.getCanonicalName()); out.writeUTF(collectionPath); out.writeInt(blocks); int cnt = 0; Text line = new Text(); while (reader.readLine(line) > 0) { String[] arr = line.toString().split("\\s+"); int docno = Integer.parseInt(arr[0]); int offset = Integer.parseInt(arr[1]); short fileno = Short.parseShort(arr[2]); out.writeInt(docno); out.writeInt(offset); out.writeShort(fileno); cnt++; if (cnt % 100000 == 0) { LOG.info(cnt + " blocks written"); } } reader.close(); out.close(); if (cnt != blocks) { throw new RuntimeException("Error: mismatch in block count!"); } fs.delete(outputPath, true); return 0; }