List of usage examples for org.apache.hadoop.mapred JobConf setNumMapTasks
public void setNumMapTasks(int n)
From source file:de.l3s.streamcorpus.StreamCorpusIndexing.java
License:Mozilla Public License
/** Starts the MapReduce indexing. * @param args//from w w w . ja v a2s.c o m * @throws Exception */ public int run(String[] args) throws Exception { long time = System.currentTimeMillis(); // For the moment: Hard-code the terrier home to quick test System.setProperty("terrier.home", "/home/tuan.tran/executable/StreamCorpusIndexer"); boolean docPartitioned = false; int numberOfReducers = Integer .parseInt(ApplicationSetup.getProperty("terrier.hadoop.indexing.reducers", "26")); final HadoopPlugin.JobFactory jf = HadoopPlugin.getJobFactory("HOD-TerrierIndexing"); if (args.length == 2 && args[0].equals("-p")) { logger.debug("Document-partitioned Mode, " + numberOfReducers + " output indices."); numberOfReducers = Integer.parseInt(args[1]); docPartitioned = true; } else if (args.length == 1 && args[0].equals("--merge")) { if (numberOfReducers > 1) mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers); else logger.error("No point merging 1 reduce task output"); return 0; } else if (args.length == 0) { logger.debug("Term-partitioned Mode, " + numberOfReducers + " reducers creating one inverted index."); docPartitioned = false; if (numberOfReducers > MAX_REDUCE) { logger.warn("Excessive reduce tasks (" + numberOfReducers + ") in use " + "- SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm can use " + MAX_REDUCE + " at most"); } } /*else { logger.fatal(usage()); return 0; }*/ if (!(CompressionFactory.getCompressionConfiguration("inverted", new String[0], false) instanceof BitCompressionConfiguration)) { logger.error("Sorry, only default BitCompressionConfiguration is supported by HadoopIndexing" + " - you can recompress the inverted index later using IndexRecompressor"); return 0; } if (jf == null) throw new Exception("Could not get JobFactory from HadoopPlugin"); final JobConf conf = jf.newJob(); conf.setJarByClass(StreamCorpusIndexing.class); conf.setJobName("StreamCorpusIndexer: Terrier Indexing"); if (Files.exists(ApplicationSetup.TERRIER_INDEX_PATH) && Index.existsIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)) { logger.fatal("Cannot index while index exists at " + ApplicationSetup.TERRIER_INDEX_PATH + "," + ApplicationSetup.TERRIER_INDEX_PREFIX); return 0; } // boolean blockIndexing = ApplicationSetup.BLOCK_INDEXING; boolean blockIndexing = true; if (blockIndexing) { conf.setMapperClass(Hadoop_BlockSinglePassIndexer.class); conf.setReducerClass(Hadoop_BlockSinglePassIndexer.class); } else { conf.setMapperClass(Hadoop_BasicSinglePassIndexer.class); conf.setReducerClass(Hadoop_BasicSinglePassIndexer.class); } FileOutputFormat.setOutputPath(conf, new Path(ApplicationSetup.TERRIER_INDEX_PATH)); conf.set("indexing.hadoop.prefix", ApplicationSetup.TERRIER_INDEX_PREFIX); conf.setMapOutputKeyClass(SplitEmittedTerm.class); conf.setMapOutputValueClass(MapEmittedPostingList.class); conf.setBoolean("indexing.hadoop.multiple.indices", docPartitioned); if (!conf.get("mapred.job.tracker").equals("local")) { conf.setMapOutputCompressorClass(GzipCodec.class); conf.setCompressMapOutput(true); } else { conf.setCompressMapOutput(false); } conf.setInputFormat(MultiFileCollectionInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setOutputKeyComparatorClass(SplitEmittedTerm.SETRawComparatorTermSplitFlush.class); conf.setOutputValueGroupingComparator(SplitEmittedTerm.SETRawComparatorTerm.class); conf.setReduceSpeculativeExecution(false); //parse the collection.spec BufferedReader specBR = Files.openFileReader(ApplicationSetup.COLLECTION_SPEC); String line = null; List<Path> paths = new ArrayList<Path>(); while ((line = specBR.readLine()) != null) { if (line.startsWith("#")) continue; paths.add(new Path(line)); } specBR.close(); FileInputFormat.setInputPaths(conf, paths.toArray(new Path[paths.size()])); // not sure if this is effective in YARN conf.setNumMapTasks(2000); // increase the heap usage conf.set("mapreduce.map.memory.mb", "6100"); conf.set("mapred.job.map.memory.mb", "6100"); conf.set("mapreduce.reduce.memory.mb", "6144"); conf.set("mapred.job.reduce.memory.mb", "6144"); conf.set("mapreduce.map.java.opts", "-Xmx6100m"); conf.set("mapred.map.child.java.opts", "-Xmx6100m"); conf.set("mapreduce.reduce.java.opts", "-Xmx6144m"); conf.set("mapred.reduce.child.opts", "-Xmx6144m"); //conf.setBoolean("mapred.used.genericoptionsparser", true) ; // This is the nasty thing in MapReduce v2 and YARN: They always prefer their ancient jars first. Set this on to say you don't like it conf.set("mapreduce.job.user.classpath.first", "true"); // increase the yarn memory to 10 GB conf.set("yarn.nodemanager.resource.memory-mb", "12288"); conf.set("yarn.nodemanager.resource.cpu-vcores", "16"); conf.set("yarn.scheduler.minimum-allocation-mb", "4096"); conf.setNumReduceTasks(numberOfReducers); if (numberOfReducers > 1) { if (docPartitioned) conf.setPartitionerClass(SplitEmittedTerm.SETPartitioner.class); else conf.setPartitionerClass(SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm.class); } else { //for JUnit tests, we seem to need to restore the original partitioner class conf.setPartitionerClass(HashPartitioner.class); } /*JobID jobId = null; boolean ranOK = true; try{ RunningJob rj = JobClient.runJob(conf); jobId = rj.getID(); HadoopUtility.finishTerrierJob(conf); } catch (Exception e) { logger.error("Problem running job", e); e.printStackTrace(); ranOK = false; } if (jobId != null) { deleteTaskFiles(ApplicationSetup.TERRIER_INDEX_PATH, jobId); } */ //if (ranOK) //{ System.out.println("Merging indices"); if (!docPartitioned) { if (numberOfReducers > 1) mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers); } Hadoop_BasicSinglePassIndexer.finish(ApplicationSetup.TERRIER_INDEX_PATH, docPartitioned ? numberOfReducers : 1, jf); //} System.out.println("Time Taken = " + ((System.currentTimeMillis() - time) / 1000) + " seconds"); jf.close(); return 0; }
From source file:edu.brown.cs.mapreduce.BenchmarkBase.java
License:Open Source License
public JobConf getJobConf() { JobConf jobConf = new JobConf(this.conf, this.benchmarkClass); ////from w ww.j a v a2 s.c o m // Options // List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; i++) { try { // // Print property and exit // if ("-property".equals(args[i])) { String prop = jobConf.get(args[i + 1]); System.out.println(prop); System.exit(0); // // # of Maps // } else if ("-m".equals(args[i])) { this.num_of_maps = Integer.parseInt(args[++i]); // // # of Reduces // } else if ("-r".equals(args[i])) { this.num_of_reduces = Integer.parseInt(args[++i]); // // Enable debug // } else if ("-debug".equals(args[i])) { this.debug = true; // // Enable single output file for results // } else if ("-combine".equals(args[i])) { this.combine = true; // // Tell jobs to compress their intermediate output files // } else if ("-compress".equals(args[i])) { this.compress = true; // // We're using TupleWritable (which has to be in a SequenceFile) // } else if ("-tuple".equals(args[i])) { this.tuple_data = true; this.sequence_file = true; // // Use SequenceFiles for initial input // } else if ("-sequence".equals(args[i])) { this.sequence_file = true; // // Recursively load directories // } else if ("-recursive-dirs".equals(args[i])) { this.load_directories = true; // // Job Basename // } else if ("-basename".equals(args[i])) { this.job_name = args[++i]; // // Misc. Properties // } else if ("-D".equals(args[i].substring(0, 2))) { String arg = args[i].substring(2); int pos = arg.indexOf('='); if (pos == -1) { System.err.println("ERROR: Invalid properties option '" + arg + "'"); System.exit(1); } this.options.put(arg.substring(0, pos), arg.substring(pos + 1)); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.err.println("ERROR: Integer expected instead of " + args[i]); System.exit(1); } catch (ArrayIndexOutOfBoundsException except) { System.err.println("ERROR: Required parameter missing from " + args[i - 1]); System.exit(1); } } // FOR // // Make sure there are exactly 2 parameters left. // if (otherArgs.size() < 2) { System.err.println("ERROR: Wrong number of parameters: " + otherArgs.size()); System.exit(1); } // // Set these flags so the jobs know about them // if (this.getSequenceFile()) this.options.put(PROPERTY_SEQUENCEFILE, "true"); if (this.getTupleData()) this.options.put(PROPERTY_TUPLEDATA, "true"); if (this.getDebug()) this.options.put(PROPERTY_DEBUG, "true"); FileSystem fs = null; try { fs = FileSystem.get(conf); } catch (Exception ex) { ex.printStackTrace(); System.exit(-1); } // // Input Paths // int cnt = otherArgs.size() - 1; this.input_paths = new ArrayList<Path>(); for (int ctr = 0; ctr < cnt; ctr++) { Path new_path = new Path(otherArgs.get(ctr)); try { if (this.load_directories && fs.getFileStatus(new_path).isDir()) { //int limit = 10; FileStatus paths[] = fs.listStatus(new_path); for (FileStatus p : paths) { this.input_paths.add(p.getPath()); FileInputFormat.addInputPath(jobConf, p.getPath()); //if (limit-- <= 0) break; } // FOR } else { this.input_paths.add(new_path); FileInputFormat.addInputPath(jobConf, new_path); } } catch (Exception ex) { ex.printStackTrace(); System.exit(-1); } } // FOR if (this.input_paths.isEmpty()) { System.err.println( "ERROR: No input paths were defined for '" + this.benchmarkClass.getSimpleName() + "'"); System.exit(-1); } // // Output Paths // this.output_path = new Path(otherArgs.get(otherArgs.size() - 1)); FileOutputFormat.setOutputPath(jobConf, this.output_path); jobConf.setJobName(this.job_name != null ? this.job_name : this.benchmarkClass.getSimpleName()); if (this.num_of_maps >= 0) jobConf.setNumMapTasks(this.num_of_maps); if (this.num_of_reduces >= 0) jobConf.setNumReduceTasks(this.num_of_reduces); // // Set all properties // for (String key : this.options.keySet()) { jobConf.set(key, this.options.get(key)); } return (jobConf); }
From source file:edu.brown.cs.mapreduce.BenchmarkBase.java
License:Open Source License
public void runCombine() throws Exception { if (this.last_job == null) { throw new NullPointerException("ERROR: Last job is Null"); }/* w ww . ja va 2s . co m*/ JobConf job = new JobConf(this.conf, this.benchmarkClass); job.setJobName((this.job_name != null ? this.job_name : this.benchmarkClass.getSimpleName()) + ".combine"); job.setMapperClass(IdentityMapper.class); job.setNumMapTasks(0); job.setReducerClass(IdentityReducer.class); job.setNumReduceTasks(1); // this is needed to get a single output file // Input FileInputFormat.setInputPaths(job, FileOutputFormat.getOutputPath(this.last_job)); job.setInputFormat(KeyValueTextInputFormat.class); // Output FileOutputFormat.setOutputPath(job, new Path(FileOutputFormat.getOutputPath(this.last_job).toString() + "/combine")); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); JobConf real_last_job = this.last_job; this.runJob(job); this.last_job = real_last_job; return; }
From source file:edu.brown.cs.mapreduce.benchmarks.Benchmark3.java
License:Open Source License
public int run(String[] args) throws Exception { BenchmarkBase base = new BenchmarkBase(this.getConf(), this.getClass(), args); Date startTime = new Date(); System.out.println("Job started: " + startTime); // ------------------------------------------- // Phase #1/*from w ww. j a va2s .co m*/ // ------------------------------------------- JobConf p1_job = base.getJobConf(); p1_job.setJobName(p1_job.getJobName() + ".Phase1"); Path p1_output = new Path(base.getOutputPath().toString() + "/phase1"); FileOutputFormat.setOutputPath(p1_job, p1_output); // // Make sure we have our properties // String required[] = { BenchmarkBase.PROPERTY_START_DATE, BenchmarkBase.PROPERTY_STOP_DATE }; for (String req : required) { if (!base.getOptions().containsKey(req)) { System.err.println("ERROR: The property '" + req + "' is not set"); System.exit(1); } } // FOR p1_job.setInputFormat( base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class); if (base.getSequenceFile()) p1_job.setOutputFormat(SequenceFileOutputFormat.class); p1_job.setOutputKeyClass(Text.class); p1_job.setOutputValueClass(Text.class); p1_job.setMapperClass( base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TupleWritableMap.class : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TextMap.class); p1_job.setReducerClass( base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TupleWritableReduce.class : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TextReduce.class); p1_job.setCompressMapOutput(base.getCompress()); // ------------------------------------------- // Phase #2 // ------------------------------------------- JobConf p2_job = base.getJobConf(); p2_job.setJobName(p2_job.getJobName() + ".Phase2"); p2_job.setInputFormat( base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class); if (base.getSequenceFile()) p2_job.setOutputFormat(SequenceFileOutputFormat.class); p2_job.setOutputKeyClass(Text.class); p2_job.setOutputValueClass(Text.class); p2_job.setMapperClass(IdentityMapper.class); p2_job.setReducerClass( base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase2.TupleWritableReduce.class : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase2.TextReduce.class); p2_job.setCompressMapOutput(base.getCompress()); p2_job.setNumMapTasks(60); // ------------------------------------------- // Phase #3 // ------------------------------------------- JobConf p3_job = base.getJobConf(); p3_job.setJobName(p3_job.getJobName() + ".Phase3"); p3_job.setNumReduceTasks(1); p3_job.setInputFormat( base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class); p3_job.setOutputKeyClass(Text.class); p3_job.setOutputValueClass(Text.class); //p3_job.setMapperClass(Phase3Map.class); p3_job.setMapperClass(IdentityMapper.class); p3_job.setReducerClass( base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase3.TupleWritableReduce.class : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase3.TextReduce.class); // // Execute #1 // base.runJob(p1_job); // // Execute #2 // Path p2_output = new Path(base.getOutputPath().toString() + "/phase2"); FileOutputFormat.setOutputPath(p2_job, p2_output); FileInputFormat.setInputPaths(p2_job, p1_output); base.runJob(p2_job); // // Execute #3 // Path p3_output = new Path(base.getOutputPath().toString() + "/phase3"); FileOutputFormat.setOutputPath(p3_job, p3_output); FileInputFormat.setInputPaths(p3_job, p2_output); base.runJob(p3_job); // There does need to be a combine if (base.getCombine()) base.runCombine(); return 0; }
From source file:edu.iu.benchmark.JobLauncher.java
License:Apache License
private Job configureBenchmarkJob(String cmd, int bytesPerPartition, int numPartitions, int numMappers, int numIterations, Path inputDirPath, Path outputDirPath) throws IOException, URISyntaxException { Job job = Job.getInstance(getConf(), "benchmark_job"); FileInputFormat.setInputPaths(job, inputDirPath); FileOutputFormat.setOutputPath(job, outputDirPath); job.setInputFormatClass(SingleFileInputFormat.class); job.setJarByClass(JobLauncher.class); job.setMapperClass(BenchmarkMapper.class); org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setNumMapTasks(numMappers); job.setNumReduceTasks(0);//from ww w . ja va 2 s . c om jobConf.set(Constants.BENCHMARK_CMD, cmd); jobConf.setInt(Constants.BYTES_PER_PARTITION, bytesPerPartition); jobConf.setInt(Constants.NUM_PARTITIONS, numPartitions); jobConf.setInt(Constants.NUM_MAPPERS, numMappers); jobConf.setInt(Constants.NUM_ITERATIONS, numIterations); return job; }
From source file:edu.iu.ccd.CCDLauncher.java
License:Apache License
private Job configureCCDJob(Path inputDir, int r, double lambda, int numIterations, int numMapTasks, int numThreadsPerWorker, int numModelSlices, Path modelDir, Path outputDir, String testFilePath, Configuration configuration, int jobID) throws IOException, URISyntaxException { configuration.setInt(Constants.R, r); configuration.setDouble(Constants.LAMBDA, lambda); configuration.setInt(Constants.NUM_ITERATIONS, numIterations); configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker); System.out.println("Model Dir Path: " + modelDir.toString()); configuration.set(Constants.MODEL_DIR, modelDir.toString()); configuration.setInt(Constants.NUM_MODEL_SLICES, numModelSlices); configuration.set(Constants.TEST_FILE_PATH, testFilePath); Job job = Job.getInstance(configuration, "ccd_job_" + jobID); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setNumMapTasks(numMapTasks); jobConf.setInt("mapreduce.job.max.split.locations", 10000); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(CCDLauncher.class); job.setMapperClass(CCDMPCollectiveMapper.class); job.setNumReduceTasks(0);/*from w w w .ja v a2 s .com*/ return job; }
From source file:edu.iu.daal_cov.COVDaalLauncher.java
License:Apache License
private Job configureCOVJob(Path inputDir, int mem, int numMapTasks, int numThreadsPerWorker, Path modelDir, Path outputDir, Configuration configuration) throws IOException, URISyntaxException { // configuration.set(Constants.TEST_FILE_PATH, testDirPath); // configuration.set(Constants.TEST_TRUTH_PATH, testGroundTruthDirPath); configuration.setInt(Constants.NUM_MAPPERS, numMapTasks); configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker); // configuration.setInt(Constants.BATCH_SIZE, batchSize); Job job = Job.getInstance(configuration, "cov_job"); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setInt("mapreduce.job.max.split.locations", 10000); jobConf.setInt("mapreduce.map.collective.memory.mb", mem); int xmx = (int) Math.ceil((mem - 2000) * 0.5); int xmn = (int) Math.ceil(0.25 * xmx); jobConf.set("mapreduce.map.collective.java.opts", "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m"); jobConf.setNumMapTasks(numMapTasks); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(COVDaalLauncher.class); job.setMapperClass(COVDaalCollectiveMapper.class); job.setNumReduceTasks(0);/*from ww w. ja v a 2s .c om*/ System.out.println("Launcher launched"); return job; }
From source file:edu.iu.daal_linreg.LinRegDaalLauncher.java
License:Apache License
private Job configureLinRegJob(Path inputDir, String testDirPath, String testGroundTruthDirPath, int mem, int batchSize, int numMapTasks, int numThreadsPerWorker, Path modelDir, Path outputDir, Configuration configuration) throws IOException, URISyntaxException { configuration.set(Constants.TEST_FILE_PATH, testDirPath); configuration.set(Constants.TEST_TRUTH_PATH, testGroundTruthDirPath); configuration.setInt(Constants.NUM_MAPPERS, numMapTasks); configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker); configuration.setInt(Constants.BATCH_SIZE, batchSize); Job job = Job.getInstance(configuration, "linreg_job"); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setInt("mapreduce.job.max.split.locations", 10000); // mapreduce.map.collective.memory.mb // 125000/* w ww.j ava 2s. co m*/ jobConf.setInt("mapreduce.map.collective.memory.mb", mem); int xmx = (int) Math.ceil((mem - 2000) * 0.5); int xmn = (int) Math.ceil(0.25 * xmx); jobConf.set("mapreduce.map.collective.java.opts", "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m"); jobConf.setNumMapTasks(numMapTasks); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(LinRegDaalLauncher.class); job.setMapperClass(LinRegDaalCollectiveMapper.class); job.setNumReduceTasks(0); System.out.println("Launcher launched"); return job; }
From source file:edu.iu.daal_mom.MOMDaalLauncher.java
License:Apache License
private Job configureMOMJob(Path inputDir, int mem, int numMapTasks, int numThreadsPerWorker, Path modelDir, Path outputDir, Configuration configuration) throws IOException, URISyntaxException { // configuration.set(Constants.TEST_FILE_PATH, testDirPath); // configuration.set(Constants.TEST_TRUTH_PATH, testGroundTruthDirPath); configuration.setInt(Constants.NUM_MAPPERS, numMapTasks); configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker); // configuration.setInt(Constants.BATCH_SIZE, batchSize); Job job = Job.getInstance(configuration, "mom_job"); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setInt("mapreduce.job.max.split.locations", 10000); // mapreduce.map.collective.memory.mb // 125000//from w ww .j ava2 s.c o m jobConf.setInt("mapreduce.map.collective.memory.mb", mem); int xmx = (int) Math.ceil((mem - 2000) * 0.5); int xmn = (int) Math.ceil(0.25 * xmx); jobConf.set("mapreduce.map.collective.java.opts", "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m"); jobConf.setNumMapTasks(numMapTasks); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(MOMDaalLauncher.class); job.setMapperClass(MOMDaalCollectiveMapper.class); job.setNumReduceTasks(0); System.out.println("Launcher launched"); return job; }
From source file:edu.iu.daal_naive.NaiveDaalLauncher.java
License:Apache License
private Job configureNaiveJob(Path inputDir, String testDirPath, String testGroundTruthDirPath, int mem, int vecsize, int num_class, int num_test, int numMapTasks, int numThreadsPerWorker, Path modelDir, Path outputDir, Configuration configuration) throws IOException, URISyntaxException { configuration.set(Constants.TEST_FILE_PATH, testDirPath); configuration.set(Constants.TEST_TRUTH_PATH, testGroundTruthDirPath); configuration.setInt(Constants.NUM_MAPPERS, numMapTasks); configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker); configuration.setInt(Constants.VECTOR_SIZE, vecsize); configuration.setInt(Constants.NUM_CLASS, num_class); configuration.setInt(Constants.NUM_TEST, num_test); Job job = Job.getInstance(configuration, "naive_job"); JobConf jobConf = (JobConf) job.getConfiguration(); jobConf.set("mapreduce.framework.name", "map-collective"); jobConf.setInt("mapreduce.job.max.split.locations", 10000); // mapreduce.map.collective.memory.mb // 125000/* w w w.ja v a 2 s . c o m*/ jobConf.setInt("mapreduce.map.collective.memory.mb", mem); // mapreduce.map.collective.java.opts // -Xmx120000m -Xms120000m // int xmx = (mem - 5000) > (mem * 0.5) // ? (mem - 5000) : (int) Math.ceil(mem * 0.5); int xmx = (int) Math.ceil((mem - 5000) * 0.5); int xmn = (int) Math.ceil(0.25 * xmx); jobConf.set("mapreduce.map.collective.java.opts", "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m"); jobConf.setInt("mapred.task.timeout", 1800000); jobConf.setNumMapTasks(numMapTasks); FileInputFormat.setInputPaths(job, inputDir); FileOutputFormat.setOutputPath(job, outputDir); job.setInputFormatClass(MultiFileInputFormat.class); job.setJarByClass(NaiveDaalLauncher.class); job.setMapperClass(NaiveDaalCollectiveMapper.class); job.setNumReduceTasks(0); System.out.println("Launcher launched"); return job; }