Example usage for org.apache.hadoop.mapred JobConf setNumMapTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setNumMapTasks.

Prototype

public void setNumMapTasks(int n)

Source Link

Document

Set the number of map tasks for this job.

Usage

From source file:de.l3s.streamcorpus.StreamCorpusIndexing.java

License:Mozilla Public License

/** Starts the MapReduce indexing.
 * @param args//from   w  w w  . ja  v  a2s.c o m
 * @throws Exception
 */
public int run(String[] args) throws Exception {
    long time = System.currentTimeMillis();

    // For the moment: Hard-code the terrier home to quick test
    System.setProperty("terrier.home", "/home/tuan.tran/executable/StreamCorpusIndexer");

    boolean docPartitioned = false;
    int numberOfReducers = Integer
            .parseInt(ApplicationSetup.getProperty("terrier.hadoop.indexing.reducers", "26"));
    final HadoopPlugin.JobFactory jf = HadoopPlugin.getJobFactory("HOD-TerrierIndexing");
    if (args.length == 2 && args[0].equals("-p")) {
        logger.debug("Document-partitioned Mode, " + numberOfReducers + " output indices.");
        numberOfReducers = Integer.parseInt(args[1]);
        docPartitioned = true;
    } else if (args.length == 1 && args[0].equals("--merge")) {
        if (numberOfReducers > 1)
            mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers);
        else
            logger.error("No point merging 1 reduce task output");
        return 0;
    } else if (args.length == 0) {
        logger.debug("Term-partitioned Mode, " + numberOfReducers + " reducers creating one inverted index.");
        docPartitioned = false;
        if (numberOfReducers > MAX_REDUCE) {
            logger.warn("Excessive reduce tasks (" + numberOfReducers + ") in use "
                    + "- SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm can use " + MAX_REDUCE + " at most");
        }
    }

    /*else
    {
       logger.fatal(usage());
       return 0;
    }*/

    if (!(CompressionFactory.getCompressionConfiguration("inverted", new String[0],
            false) instanceof BitCompressionConfiguration)) {
        logger.error("Sorry, only default BitCompressionConfiguration is supported by HadoopIndexing"
                + " - you can recompress the inverted index later using IndexRecompressor");
        return 0;
    }

    if (jf == null)
        throw new Exception("Could not get JobFactory from HadoopPlugin");
    final JobConf conf = jf.newJob();
    conf.setJarByClass(StreamCorpusIndexing.class);
    conf.setJobName("StreamCorpusIndexer: Terrier Indexing");
    if (Files.exists(ApplicationSetup.TERRIER_INDEX_PATH)
            && Index.existsIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)) {
        logger.fatal("Cannot index while index exists at " + ApplicationSetup.TERRIER_INDEX_PATH + ","
                + ApplicationSetup.TERRIER_INDEX_PREFIX);
        return 0;
    }

    // boolean blockIndexing = ApplicationSetup.BLOCK_INDEXING;
    boolean blockIndexing = true;
    if (blockIndexing) {
        conf.setMapperClass(Hadoop_BlockSinglePassIndexer.class);
        conf.setReducerClass(Hadoop_BlockSinglePassIndexer.class);
    } else {
        conf.setMapperClass(Hadoop_BasicSinglePassIndexer.class);
        conf.setReducerClass(Hadoop_BasicSinglePassIndexer.class);
    }
    FileOutputFormat.setOutputPath(conf, new Path(ApplicationSetup.TERRIER_INDEX_PATH));
    conf.set("indexing.hadoop.prefix", ApplicationSetup.TERRIER_INDEX_PREFIX);
    conf.setMapOutputKeyClass(SplitEmittedTerm.class);
    conf.setMapOutputValueClass(MapEmittedPostingList.class);
    conf.setBoolean("indexing.hadoop.multiple.indices", docPartitioned);

    if (!conf.get("mapred.job.tracker").equals("local")) {
        conf.setMapOutputCompressorClass(GzipCodec.class);
        conf.setCompressMapOutput(true);
    } else {
        conf.setCompressMapOutput(false);
    }

    conf.setInputFormat(MultiFileCollectionInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setOutputKeyComparatorClass(SplitEmittedTerm.SETRawComparatorTermSplitFlush.class);
    conf.setOutputValueGroupingComparator(SplitEmittedTerm.SETRawComparatorTerm.class);
    conf.setReduceSpeculativeExecution(false);
    //parse the collection.spec
    BufferedReader specBR = Files.openFileReader(ApplicationSetup.COLLECTION_SPEC);
    String line = null;
    List<Path> paths = new ArrayList<Path>();
    while ((line = specBR.readLine()) != null) {
        if (line.startsWith("#"))
            continue;
        paths.add(new Path(line));
    }
    specBR.close();
    FileInputFormat.setInputPaths(conf, paths.toArray(new Path[paths.size()]));

    // not sure if this is effective in YARN
    conf.setNumMapTasks(2000);

    // increase the heap usage
    conf.set("mapreduce.map.memory.mb", "6100");
    conf.set("mapred.job.map.memory.mb", "6100");
    conf.set("mapreduce.reduce.memory.mb", "6144");
    conf.set("mapred.job.reduce.memory.mb", "6144");

    conf.set("mapreduce.map.java.opts", "-Xmx6100m");
    conf.set("mapred.map.child.java.opts", "-Xmx6100m");
    conf.set("mapreduce.reduce.java.opts", "-Xmx6144m");
    conf.set("mapred.reduce.child.opts", "-Xmx6144m");

    //conf.setBoolean("mapred.used.genericoptionsparser", true) ;

    // This is the nasty thing in MapReduce v2 and YARN: They always prefer their ancient jars first. Set this on to say you don't like it
    conf.set("mapreduce.job.user.classpath.first", "true");

    // increase the yarn memory to 10 GB
    conf.set("yarn.nodemanager.resource.memory-mb", "12288");
    conf.set("yarn.nodemanager.resource.cpu-vcores", "16");
    conf.set("yarn.scheduler.minimum-allocation-mb", "4096");

    conf.setNumReduceTasks(numberOfReducers);
    if (numberOfReducers > 1) {
        if (docPartitioned)
            conf.setPartitionerClass(SplitEmittedTerm.SETPartitioner.class);
        else
            conf.setPartitionerClass(SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm.class);
    } else {
        //for JUnit tests, we seem to need to restore the original partitioner class
        conf.setPartitionerClass(HashPartitioner.class);
    }

    /*JobID jobId = null;
    boolean ranOK = true;
    try{
       RunningJob rj = JobClient.runJob(conf);
       jobId = rj.getID();
       HadoopUtility.finishTerrierJob(conf);
    } catch (Exception e) { 
       logger.error("Problem running job", e);
       e.printStackTrace();
       ranOK = false;
    }
    if (jobId != null)
    {
       deleteTaskFiles(ApplicationSetup.TERRIER_INDEX_PATH, jobId);
    }  */

    //if (ranOK)
    //{
    System.out.println("Merging indices");
    if (!docPartitioned) {
        if (numberOfReducers > 1)
            mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers);
    }

    Hadoop_BasicSinglePassIndexer.finish(ApplicationSetup.TERRIER_INDEX_PATH,
            docPartitioned ? numberOfReducers : 1, jf);
    //}
    System.out.println("Time Taken = " + ((System.currentTimeMillis() - time) / 1000) + " seconds");
    jf.close();
    return 0;
}

From source file:edu.brown.cs.mapreduce.BenchmarkBase.java

License:Open Source License

public JobConf getJobConf() {
    JobConf jobConf = new JobConf(this.conf, this.benchmarkClass);
    ////from w  ww.j a v  a2 s.c  o  m
    // Options
    //
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; i++) {
        try {
            //
            // Print property and exit
            //
            if ("-property".equals(args[i])) {
                String prop = jobConf.get(args[i + 1]);
                System.out.println(prop);
                System.exit(0);
                //
                // # of Maps
                //
            } else if ("-m".equals(args[i])) {
                this.num_of_maps = Integer.parseInt(args[++i]);
                //
                // # of Reduces
                //
            } else if ("-r".equals(args[i])) {
                this.num_of_reduces = Integer.parseInt(args[++i]);
                //
                // Enable debug
                //
            } else if ("-debug".equals(args[i])) {
                this.debug = true;
                //
                // Enable single output file for results
                //
            } else if ("-combine".equals(args[i])) {
                this.combine = true;
                //
                // Tell jobs to compress their intermediate output files
                //
            } else if ("-compress".equals(args[i])) {
                this.compress = true;
                //
                // We're using TupleWritable (which has to be in a SequenceFile)
                //
            } else if ("-tuple".equals(args[i])) {
                this.tuple_data = true;
                this.sequence_file = true;
                //
                // Use SequenceFiles for initial input
                //
            } else if ("-sequence".equals(args[i])) {
                this.sequence_file = true;
                //
                // Recursively load directories
                //
            } else if ("-recursive-dirs".equals(args[i])) {
                this.load_directories = true;
                //
                // Job Basename
                //
            } else if ("-basename".equals(args[i])) {
                this.job_name = args[++i];
                //
                // Misc. Properties
                //
            } else if ("-D".equals(args[i].substring(0, 2))) {
                String arg = args[i].substring(2);
                int pos = arg.indexOf('=');
                if (pos == -1) {
                    System.err.println("ERROR: Invalid properties option '" + arg + "'");
                    System.exit(1);
                }
                this.options.put(arg.substring(0, pos), arg.substring(pos + 1));
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.err.println("ERROR: Integer expected instead of " + args[i]);
            System.exit(1);
        } catch (ArrayIndexOutOfBoundsException except) {
            System.err.println("ERROR: Required parameter missing from " + args[i - 1]);
            System.exit(1);
        }
    } // FOR
      //
      // Make sure there are exactly 2 parameters left.
      //
    if (otherArgs.size() < 2) {
        System.err.println("ERROR: Wrong number of parameters: " + otherArgs.size());
        System.exit(1);
    }

    //
    // Set these flags so the jobs know about them
    //
    if (this.getSequenceFile())
        this.options.put(PROPERTY_SEQUENCEFILE, "true");
    if (this.getTupleData())
        this.options.put(PROPERTY_TUPLEDATA, "true");
    if (this.getDebug())
        this.options.put(PROPERTY_DEBUG, "true");

    FileSystem fs = null;
    try {
        fs = FileSystem.get(conf);
    } catch (Exception ex) {
        ex.printStackTrace();
        System.exit(-1);
    }

    //
    // Input Paths
    //
    int cnt = otherArgs.size() - 1;
    this.input_paths = new ArrayList<Path>();
    for (int ctr = 0; ctr < cnt; ctr++) {
        Path new_path = new Path(otherArgs.get(ctr));
        try {
            if (this.load_directories && fs.getFileStatus(new_path).isDir()) {
                //int limit = 10;
                FileStatus paths[] = fs.listStatus(new_path);
                for (FileStatus p : paths) {
                    this.input_paths.add(p.getPath());
                    FileInputFormat.addInputPath(jobConf, p.getPath());
                    //if (limit-- <= 0) break;
                } // FOR
            } else {
                this.input_paths.add(new_path);
                FileInputFormat.addInputPath(jobConf, new_path);
            }
        } catch (Exception ex) {
            ex.printStackTrace();
            System.exit(-1);
        }
    } // FOR
    if (this.input_paths.isEmpty()) {
        System.err.println(
                "ERROR: No input paths were defined for '" + this.benchmarkClass.getSimpleName() + "'");
        System.exit(-1);
    }

    //
    // Output Paths
    //
    this.output_path = new Path(otherArgs.get(otherArgs.size() - 1));
    FileOutputFormat.setOutputPath(jobConf, this.output_path);

    jobConf.setJobName(this.job_name != null ? this.job_name : this.benchmarkClass.getSimpleName());
    if (this.num_of_maps >= 0)
        jobConf.setNumMapTasks(this.num_of_maps);
    if (this.num_of_reduces >= 0)
        jobConf.setNumReduceTasks(this.num_of_reduces);

    //
    // Set all properties
    //
    for (String key : this.options.keySet()) {
        jobConf.set(key, this.options.get(key));
    }

    return (jobConf);
}

From source file:edu.brown.cs.mapreduce.BenchmarkBase.java

License:Open Source License

public void runCombine() throws Exception {
    if (this.last_job == null) {
        throw new NullPointerException("ERROR: Last job is Null");
    }/* w ww  . ja va 2s  .  co m*/
    JobConf job = new JobConf(this.conf, this.benchmarkClass);
    job.setJobName((this.job_name != null ? this.job_name : this.benchmarkClass.getSimpleName()) + ".combine");
    job.setMapperClass(IdentityMapper.class);
    job.setNumMapTasks(0);
    job.setReducerClass(IdentityReducer.class);
    job.setNumReduceTasks(1); // this is needed to get a single output file

    // Input
    FileInputFormat.setInputPaths(job, FileOutputFormat.getOutputPath(this.last_job));
    job.setInputFormat(KeyValueTextInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job,
            new Path(FileOutputFormat.getOutputPath(this.last_job).toString() + "/combine"));
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    JobConf real_last_job = this.last_job;
    this.runJob(job);
    this.last_job = real_last_job;
    return;
}

From source file:edu.brown.cs.mapreduce.benchmarks.Benchmark3.java

License:Open Source License

public int run(String[] args) throws Exception {
    BenchmarkBase base = new BenchmarkBase(this.getConf(), this.getClass(), args);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);

    // -------------------------------------------
    // Phase #1/*from w  ww. j a va2s .co m*/
    // -------------------------------------------
    JobConf p1_job = base.getJobConf();
    p1_job.setJobName(p1_job.getJobName() + ".Phase1");
    Path p1_output = new Path(base.getOutputPath().toString() + "/phase1");
    FileOutputFormat.setOutputPath(p1_job, p1_output);

    //
    // Make sure we have our properties
    //
    String required[] = { BenchmarkBase.PROPERTY_START_DATE, BenchmarkBase.PROPERTY_STOP_DATE };
    for (String req : required) {
        if (!base.getOptions().containsKey(req)) {
            System.err.println("ERROR: The property '" + req + "' is not set");
            System.exit(1);
        }
    } // FOR

    p1_job.setInputFormat(
            base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class);
    if (base.getSequenceFile())
        p1_job.setOutputFormat(SequenceFileOutputFormat.class);
    p1_job.setOutputKeyClass(Text.class);
    p1_job.setOutputValueClass(Text.class);
    p1_job.setMapperClass(
            base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TupleWritableMap.class
                    : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TextMap.class);
    p1_job.setReducerClass(
            base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TupleWritableReduce.class
                    : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TextReduce.class);
    p1_job.setCompressMapOutput(base.getCompress());

    // -------------------------------------------
    // Phase #2
    // -------------------------------------------
    JobConf p2_job = base.getJobConf();
    p2_job.setJobName(p2_job.getJobName() + ".Phase2");
    p2_job.setInputFormat(
            base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class);
    if (base.getSequenceFile())
        p2_job.setOutputFormat(SequenceFileOutputFormat.class);
    p2_job.setOutputKeyClass(Text.class);
    p2_job.setOutputValueClass(Text.class);
    p2_job.setMapperClass(IdentityMapper.class);
    p2_job.setReducerClass(
            base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase2.TupleWritableReduce.class
                    : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase2.TextReduce.class);
    p2_job.setCompressMapOutput(base.getCompress());
    p2_job.setNumMapTasks(60);

    // -------------------------------------------
    // Phase #3
    // -------------------------------------------
    JobConf p3_job = base.getJobConf();
    p3_job.setJobName(p3_job.getJobName() + ".Phase3");
    p3_job.setNumReduceTasks(1);
    p3_job.setInputFormat(
            base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class);
    p3_job.setOutputKeyClass(Text.class);
    p3_job.setOutputValueClass(Text.class);
    //p3_job.setMapperClass(Phase3Map.class);
    p3_job.setMapperClass(IdentityMapper.class);
    p3_job.setReducerClass(
            base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase3.TupleWritableReduce.class
                    : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase3.TextReduce.class);

    //
    // Execute #1
    //
    base.runJob(p1_job);

    //
    // Execute #2
    //
    Path p2_output = new Path(base.getOutputPath().toString() + "/phase2");
    FileOutputFormat.setOutputPath(p2_job, p2_output);
    FileInputFormat.setInputPaths(p2_job, p1_output);
    base.runJob(p2_job);

    //
    // Execute #3
    //
    Path p3_output = new Path(base.getOutputPath().toString() + "/phase3");
    FileOutputFormat.setOutputPath(p3_job, p3_output);
    FileInputFormat.setInputPaths(p3_job, p2_output);
    base.runJob(p3_job);

    // There does need to be a combine if (base.getCombine()) base.runCombine();

    return 0;
}

From source file:edu.iu.benchmark.JobLauncher.java

License:Apache License

private Job configureBenchmarkJob(String cmd, int bytesPerPartition, int numPartitions, int numMappers,
        int numIterations, Path inputDirPath, Path outputDirPath) throws IOException, URISyntaxException {
    Job job = Job.getInstance(getConf(), "benchmark_job");
    FileInputFormat.setInputPaths(job, inputDirPath);
    FileOutputFormat.setOutputPath(job, outputDirPath);
    job.setInputFormatClass(SingleFileInputFormat.class);
    job.setJarByClass(JobLauncher.class);
    job.setMapperClass(BenchmarkMapper.class);
    org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.set("mapreduce.framework.name", "map-collective");
    jobConf.setNumMapTasks(numMappers);
    job.setNumReduceTasks(0);//from ww  w  . ja va  2 s . c om
    jobConf.set(Constants.BENCHMARK_CMD, cmd);
    jobConf.setInt(Constants.BYTES_PER_PARTITION, bytesPerPartition);
    jobConf.setInt(Constants.NUM_PARTITIONS, numPartitions);
    jobConf.setInt(Constants.NUM_MAPPERS, numMappers);
    jobConf.setInt(Constants.NUM_ITERATIONS, numIterations);
    return job;
}

From source file:edu.iu.ccd.CCDLauncher.java

License:Apache License

private Job configureCCDJob(Path inputDir, int r, double lambda, int numIterations, int numMapTasks,
        int numThreadsPerWorker, int numModelSlices, Path modelDir, Path outputDir, String testFilePath,
        Configuration configuration, int jobID) throws IOException, URISyntaxException {
    configuration.setInt(Constants.R, r);
    configuration.setDouble(Constants.LAMBDA, lambda);
    configuration.setInt(Constants.NUM_ITERATIONS, numIterations);
    configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker);
    System.out.println("Model Dir Path: " + modelDir.toString());
    configuration.set(Constants.MODEL_DIR, modelDir.toString());
    configuration.setInt(Constants.NUM_MODEL_SLICES, numModelSlices);
    configuration.set(Constants.TEST_FILE_PATH, testFilePath);
    Job job = Job.getInstance(configuration, "ccd_job_" + jobID);
    JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.set("mapreduce.framework.name", "map-collective");
    jobConf.setNumMapTasks(numMapTasks);
    jobConf.setInt("mapreduce.job.max.split.locations", 10000);
    FileInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setInputFormatClass(MultiFileInputFormat.class);
    job.setJarByClass(CCDLauncher.class);
    job.setMapperClass(CCDMPCollectiveMapper.class);
    job.setNumReduceTasks(0);/*from w  w  w  .ja  v  a2 s .com*/
    return job;
}

From source file:edu.iu.daal_cov.COVDaalLauncher.java

License:Apache License

private Job configureCOVJob(Path inputDir, int mem, int numMapTasks, int numThreadsPerWorker, Path modelDir,
        Path outputDir, Configuration configuration) throws IOException, URISyntaxException {

    // configuration.set(Constants.TEST_FILE_PATH, testDirPath);
    // configuration.set(Constants.TEST_TRUTH_PATH, testGroundTruthDirPath);
    configuration.setInt(Constants.NUM_MAPPERS, numMapTasks);
    configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker);
    // configuration.setInt(Constants.BATCH_SIZE, batchSize);

    Job job = Job.getInstance(configuration, "cov_job");
    JobConf jobConf = (JobConf) job.getConfiguration();

    jobConf.set("mapreduce.framework.name", "map-collective");

    jobConf.setInt("mapreduce.job.max.split.locations", 10000);

    jobConf.setInt("mapreduce.map.collective.memory.mb", mem);

    int xmx = (int) Math.ceil((mem - 2000) * 0.5);
    int xmn = (int) Math.ceil(0.25 * xmx);
    jobConf.set("mapreduce.map.collective.java.opts",
            "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m");

    jobConf.setNumMapTasks(numMapTasks);

    FileInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);

    job.setInputFormatClass(MultiFileInputFormat.class);
    job.setJarByClass(COVDaalLauncher.class);
    job.setMapperClass(COVDaalCollectiveMapper.class);
    job.setNumReduceTasks(0);/*from  ww w.  ja  v  a  2s  .c om*/

    System.out.println("Launcher launched");
    return job;
}

From source file:edu.iu.daal_linreg.LinRegDaalLauncher.java

License:Apache License

private Job configureLinRegJob(Path inputDir, String testDirPath, String testGroundTruthDirPath, int mem,
        int batchSize, int numMapTasks, int numThreadsPerWorker, Path modelDir, Path outputDir,
        Configuration configuration) throws IOException, URISyntaxException {

    configuration.set(Constants.TEST_FILE_PATH, testDirPath);
    configuration.set(Constants.TEST_TRUTH_PATH, testGroundTruthDirPath);
    configuration.setInt(Constants.NUM_MAPPERS, numMapTasks);
    configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker);
    configuration.setInt(Constants.BATCH_SIZE, batchSize);

    Job job = Job.getInstance(configuration, "linreg_job");
    JobConf jobConf = (JobConf) job.getConfiguration();

    jobConf.set("mapreduce.framework.name", "map-collective");

    jobConf.setInt("mapreduce.job.max.split.locations", 10000);

    // mapreduce.map.collective.memory.mb
    // 125000/* w  ww.j  ava  2s. co  m*/
    jobConf.setInt("mapreduce.map.collective.memory.mb", mem);

    int xmx = (int) Math.ceil((mem - 2000) * 0.5);
    int xmn = (int) Math.ceil(0.25 * xmx);
    jobConf.set("mapreduce.map.collective.java.opts",
            "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m");

    jobConf.setNumMapTasks(numMapTasks);

    FileInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);

    job.setInputFormatClass(MultiFileInputFormat.class);
    job.setJarByClass(LinRegDaalLauncher.class);
    job.setMapperClass(LinRegDaalCollectiveMapper.class);
    job.setNumReduceTasks(0);

    System.out.println("Launcher launched");
    return job;
}

From source file:edu.iu.daal_mom.MOMDaalLauncher.java

License:Apache License

private Job configureMOMJob(Path inputDir, int mem, int numMapTasks, int numThreadsPerWorker, Path modelDir,
        Path outputDir, Configuration configuration) throws IOException, URISyntaxException {

    // configuration.set(Constants.TEST_FILE_PATH, testDirPath);
    // configuration.set(Constants.TEST_TRUTH_PATH, testGroundTruthDirPath);
    configuration.setInt(Constants.NUM_MAPPERS, numMapTasks);
    configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker);
    // configuration.setInt(Constants.BATCH_SIZE, batchSize);

    Job job = Job.getInstance(configuration, "mom_job");
    JobConf jobConf = (JobConf) job.getConfiguration();

    jobConf.set("mapreduce.framework.name", "map-collective");

    jobConf.setInt("mapreduce.job.max.split.locations", 10000);

    // mapreduce.map.collective.memory.mb
    // 125000//from  w  ww  .j ava2  s.c o m
    jobConf.setInt("mapreduce.map.collective.memory.mb", mem);

    int xmx = (int) Math.ceil((mem - 2000) * 0.5);
    int xmn = (int) Math.ceil(0.25 * xmx);
    jobConf.set("mapreduce.map.collective.java.opts",
            "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m");

    jobConf.setNumMapTasks(numMapTasks);

    FileInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);

    job.setInputFormatClass(MultiFileInputFormat.class);
    job.setJarByClass(MOMDaalLauncher.class);
    job.setMapperClass(MOMDaalCollectiveMapper.class);
    job.setNumReduceTasks(0);

    System.out.println("Launcher launched");
    return job;
}

From source file:edu.iu.daal_naive.NaiveDaalLauncher.java

License:Apache License

private Job configureNaiveJob(Path inputDir, String testDirPath, String testGroundTruthDirPath, int mem,
        int vecsize, int num_class, int num_test, int numMapTasks, int numThreadsPerWorker, Path modelDir,
        Path outputDir, Configuration configuration) throws IOException, URISyntaxException {

    configuration.set(Constants.TEST_FILE_PATH, testDirPath);
    configuration.set(Constants.TEST_TRUTH_PATH, testGroundTruthDirPath);
    configuration.setInt(Constants.NUM_MAPPERS, numMapTasks);
    configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker);
    configuration.setInt(Constants.VECTOR_SIZE, vecsize);
    configuration.setInt(Constants.NUM_CLASS, num_class);
    configuration.setInt(Constants.NUM_TEST, num_test);

    Job job = Job.getInstance(configuration, "naive_job");
    JobConf jobConf = (JobConf) job.getConfiguration();

    jobConf.set("mapreduce.framework.name", "map-collective");

    jobConf.setInt("mapreduce.job.max.split.locations", 10000);

    // mapreduce.map.collective.memory.mb
    // 125000/*  w  w  w.ja v a  2  s . c o  m*/
    jobConf.setInt("mapreduce.map.collective.memory.mb", mem);
    // mapreduce.map.collective.java.opts
    // -Xmx120000m -Xms120000m
    // int xmx = (mem - 5000) > (mem * 0.5)
    //   ? (mem - 5000) : (int) Math.ceil(mem * 0.5);
    int xmx = (int) Math.ceil((mem - 5000) * 0.5);
    int xmn = (int) Math.ceil(0.25 * xmx);
    jobConf.set("mapreduce.map.collective.java.opts",
            "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m");

    jobConf.setInt("mapred.task.timeout", 1800000);

    jobConf.setNumMapTasks(numMapTasks);

    FileInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);

    job.setInputFormatClass(MultiFileInputFormat.class);
    job.setJarByClass(NaiveDaalLauncher.class);
    job.setMapperClass(NaiveDaalCollectiveMapper.class);
    job.setNumReduceTasks(0);

    System.out.println("Launcher launched");
    return job;
}