Example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks

List of usage examples for org.apache.hadoop.mapreduce Job setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks.

Prototype

public void setNumReduceTasks(int tasks) throws IllegalStateException 

Source Link

Document

Set the number of reduce tasks for the job.

Usage

From source file:edu.iu.daal_naive.NaiveDaalLauncher.java

License:Apache License

private Job configureNaiveJob(Path inputDir, String testDirPath, String testGroundTruthDirPath, int mem,
        int vecsize, int num_class, int num_test, int numMapTasks, int numThreadsPerWorker, Path modelDir,
        Path outputDir, Configuration configuration) throws IOException, URISyntaxException {

    configuration.set(Constants.TEST_FILE_PATH, testDirPath);
    configuration.set(Constants.TEST_TRUTH_PATH, testGroundTruthDirPath);
    configuration.setInt(Constants.NUM_MAPPERS, numMapTasks);
    configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker);
    configuration.setInt(Constants.VECTOR_SIZE, vecsize);
    configuration.setInt(Constants.NUM_CLASS, num_class);
    configuration.setInt(Constants.NUM_TEST, num_test);

    Job job = Job.getInstance(configuration, "naive_job");
    JobConf jobConf = (JobConf) job.getConfiguration();

    jobConf.set("mapreduce.framework.name", "map-collective");

    jobConf.setInt("mapreduce.job.max.split.locations", 10000);

    // mapreduce.map.collective.memory.mb
    // 125000/* www . j  av  a  2 s  .co m*/
    jobConf.setInt("mapreduce.map.collective.memory.mb", mem);
    // mapreduce.map.collective.java.opts
    // -Xmx120000m -Xms120000m
    // int xmx = (mem - 5000) > (mem * 0.5)
    //   ? (mem - 5000) : (int) Math.ceil(mem * 0.5);
    int xmx = (int) Math.ceil((mem - 5000) * 0.5);
    int xmn = (int) Math.ceil(0.25 * xmx);
    jobConf.set("mapreduce.map.collective.java.opts",
            "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m");

    jobConf.setInt("mapred.task.timeout", 1800000);

    jobConf.setNumMapTasks(numMapTasks);

    FileInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);

    job.setInputFormatClass(MultiFileInputFormat.class);
    job.setJarByClass(NaiveDaalLauncher.class);
    job.setMapperClass(NaiveDaalCollectiveMapper.class);
    job.setNumReduceTasks(0);

    System.out.println("Launcher launched");
    return job;
}

From source file:edu.iu.daal_pca.PCADaalLauncher.java

License:Apache License

private Job configurePCAJob(int numOfDataPoints, int vectorSize, int numPointFiles, int numMapTasks,
        int numThreads, int mem, Path dataDir, Path outDir, Configuration configuration)
        throws IOException, URISyntaxException {
    Job job = Job.getInstance(configuration, "PCA_job");

    FileInputFormat.setInputPaths(job, dataDir);
    FileOutputFormat.setOutputPath(job, outDir);

    job.setInputFormatClass(MultiFileInputFormat.class);
    job.setJarByClass(PCADaalLauncher.class);
    job.setMapperClass(PCADaalCollectiveMapper.class);
    org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.set("mapreduce.framework.name", "map-collective");
    jobConf.setNumMapTasks(numMapTasks);
    jobConf.setInt("mapreduce.job.max.split.locations", 10000);

    // mapreduce.map.collective.memory.mb
    // 125000//from w w w  . j  ava2  s.c o m
    jobConf.setInt("mapreduce.map.collective.memory.mb", mem);
    int xmx = (int) Math.ceil((mem - 2000) * 0.5);
    int xmn = (int) Math.ceil(0.25 * xmx);
    jobConf.set("mapreduce.map.collective.java.opts",
            "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m");

    job.setNumReduceTasks(0);
    Configuration jobConfig = job.getConfiguration();
    jobConfig.setInt(Constants.POINTS_PER_FILE, numOfDataPoints / numPointFiles);
    jobConfig.setInt(Constants.VECTOR_SIZE, vectorSize);
    jobConfig.setInt(Constants.NUM_MAPPERS, numMapTasks);
    jobConfig.setInt(Constants.NUM_THREADS, numThreads);
    return job;
}

From source file:edu.iu.daal_subgraph.SCDaalLauncher.java

License:Apache License

private Job configureSCJob(String graphDir, String template, String outDir, int numMapTasks,
        boolean useLocalMultiThread, int numThreads, int numCores, String affinity, String omp_opt, int tpc,
        int mem, double memjavaratio, int send_array_limit, int nbr_split_len, boolean rotation_pipeline,
        int numIteration) throws IOException {

    Configuration configuration = getConf();

    Job job = Job.getInstance(configuration, "subgraph counting");
    Configuration jobConfig = job.getConfiguration();
    Path jobOutDir = new Path(outDir);
    FileSystem fs = FileSystem.get(configuration);
    if (fs.exists(jobOutDir)) {
        fs.delete(jobOutDir, true);/*from w  w  w.j av a 2s  .c om*/
    }

    FileInputFormat.setInputPaths(job, graphDir);
    FileOutputFormat.setOutputPath(job, jobOutDir);

    //job.setInputFormatClass(KeyValueTextInputFormat.class);
    //use harp multifile input format to have a better control on num of map tasks
    job.setInputFormatClass(MultiFileInputFormat.class);

    job.setJarByClass(SCDaalLauncher.class);
    job.setMapperClass(SCDaalCollectiveMapper.class);
    JobConf jobConf = (JobConf) job.getConfiguration();

    jobConf.set("mapreduce.framework.name", "map-collective");

    // mapreduce.map.collective.memory.mb
    // 125000
    jobConf.setInt("mapreduce.map.collective.memory.mb", mem);
    // mapreduce.map.collective.java.opts
    // -Xmx120000m -Xms120000m
    // int xmx = (mem - 5000) > (mem * 0.9)
    //     ? (mem - 5000) : (int) Math.ceil(mem * 0.5);
    // int xmx = (int) Math.ceil((mem - 5000)*0.2);
    int xmx = (int) Math.ceil((mem - 5000) * memjavaratio);
    int xmn = (int) Math.ceil(0.25 * xmx);
    jobConf.set("mapreduce.map.collective.java.opts",
            "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m");

    jobConf.setNumMapTasks(numMapTasks);
    jobConf.setInt("mapreduce.job.max.split.locations", 10000);

    jobConf.setInt("mapreduce.task.timeout", 60000000);

    job.setNumReduceTasks(0);

    jobConfig.setInt(SCConstants.NUM_MAPPERS, numMapTasks);

    jobConfig.set(SCConstants.TEMPLATE_PATH, template);

    jobConfig.set(SCConstants.OUTPUT_PATH, outDir);

    jobConfig.setBoolean(SCConstants.USE_LOCAL_MULTITHREAD, useLocalMultiThread);

    jobConfig.setInt(SCConstants.NUM_THREADS_PER_NODE, numThreads);

    jobConfig.setInt(SCConstants.THREAD_NUM, numThreads);
    jobConfig.setInt(SCConstants.CORE_NUM, numCores);
    jobConfig.set(SCConstants.THD_AFFINITY, affinity);
    jobConfig.set(SCConstants.OMPSCHEDULE, omp_opt);
    jobConfig.setInt(SCConstants.TPC, tpc);
    jobConfig.setInt(SCConstants.SENDLIMIT, send_array_limit);
    jobConfig.setInt(SCConstants.NBRTASKLEN, nbr_split_len);

    jobConfig.setBoolean(SCConstants.ROTATION_PIPELINE, rotation_pipeline);
    jobConfig.setInt(SCConstants.NUM_ITERATION, numIteration);

    return job;
}

From source file:edu.iu.data_aux.Initialize.java

License:Apache License

public Job createJob(String job_name, java.lang.Class<?> launcherCls,
        java.lang.Class<? extends org.apache.hadoop.mapreduce.Mapper> mapperCls)
        throws IOException, URISyntaxException {//{{{
    Job thisjob = Job.getInstance(this.conf, job_name);
    JobConf thisjobConf = (JobConf) thisjob.getConfiguration();

    //override mapred.xml content
    thisjobConf.set("mapreduce.framework.name", "map-collective");
    thisjobConf.setInt("mapreduce.job.max.split.locations", 10000);
    thisjobConf.setInt("mapreduce.map.collective.memory.mb", this.mem);
    thisjobConf.setInt("mapreduce.task.timeout", 60000000);
    int xmx = (int) Math.ceil((mem - 2000) * 0.5);
    int xmn = (int) Math.ceil(0.25 * xmx);
    thisjobConf.set("mapreduce.map.collective.java.opts",
            "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m");

    // set mapper number and reducer number
    thisjobConf.setNumMapTasks(this.num_mapper);
    thisjob.setNumReduceTasks(0);

    // set input and output Path
    FileInputFormat.setInputPaths(thisjob, this.inputPath);
    FileOutputFormat.setOutputPath(thisjob, this.outputPath);
    // set the input format
    thisjob.setInputFormatClass(MultiFileInputFormat.class);

    thisjob.setJarByClass(launcherCls);/*from   w  w  w . j  a v a2s  . co m*/
    thisjob.setMapperClass(mapperCls);

    return thisjob;

}

From source file:edu.iu.examples.ExamplesMain.java

License:Apache License

private Job configureBenchmarkJob(String cmd, int bytesPerPartition, int numPartitions, int numMappers,
        int numIterations, boolean verify, Path inputDirPath, Path outputDirPath) throws IOException {
    Job job = Job.getInstance(getConf(), "example_job");
    FileInputFormat.setInputPaths(job, inputDirPath);
    FileOutputFormat.setOutputPath(job, outputDirPath);
    job.setInputFormatClass(SingleFileInputFormat.class);
    job.setJarByClass(ExamplesMain.class);
    if (cmd.equals("allreduce")) {
        job.setMapperClass(AllReduce.class);
    } else if (cmd.equals("allgather")) {
        job.setMapperClass(AllGather.class);
    } else if (cmd.equals("reduce")) {
        job.setMapperClass(Reduce.class);
    } else if (cmd.equals("bcast")) {
        job.setMapperClass(BCast.class);
    } else if (cmd.equals("rotate")) {
        job.setMapperClass(Rotate.class);
    }/*  www  .  j  ava 2  s  .c  o  m*/
    org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.set("mapreduce.framework.name", "map-collective");
    jobConf.setNumMapTasks(numMappers);
    job.setNumReduceTasks(0);
    jobConf.set(Constants.ARGS_OPERATION, cmd);
    jobConf.setInt(Constants.ARGS_ELEMENTS, bytesPerPartition);
    jobConf.setInt(Constants.ARGS_PARTITIONS, numPartitions);
    jobConf.setInt(Constants.ARGS_MAPPERS, numMappers);
    jobConf.setInt(Constants.ARGS_ITERATIONS, numIterations);
    jobConf.setBoolean(Constants.ARGS_VERIFY, verify);
    return job;
}

From source file:edu.iu.frlayout.FRJobLauncher.java

License:Apache License

private Job prepareJob(String inputDir, String layoutFile, int totalVtx, int iterationCount, int jobCount,
        int numMapTasks, int partiitonPerWorker, String outputDirPath, Configuration configuration)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    Job job = new Job(configuration, "fr-" + jobCount);
    Configuration jobConfig = job.getConfiguration();
    Path outputDir = new Path(outputDirPath);
    FileInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    jobConfig.set(FRConstants.LAYOUT_FILE, layoutFile);
    jobConfig.setInt(FRConstants.ITERATION, iterationCount);
    jobConfig.setInt(FRConstants.TOTAL_VTX, totalVtx);
    jobConfig.setInt(FRConstants.NUM_MAPS, numMapTasks);
    jobConfig.setInt(FRConstants.PARTITION_PER_WORKER, partiitonPerWorker);
    jobConfig.setInt("mapreduce.job.max.split.locations", 10000);
    // input class to file-based class
    // job.setInputFormatClass(DataFileInputFormat.class);
    job.setInputFormatClass(MultiFileInputFormat.class);
    // job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setJarByClass(FRJobLauncher.class);
    job.setMapperClass(FRCollectiveMapper.class);
    org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.set("mapreduce.framework.name", "map-collective");
    jobConf.setNumMapTasks(numMapTasks);
    job.setNumReduceTasks(0);
    return job;//from   w  ww. j a  va2s  . c o  m
}

From source file:edu.iu.kmeans.KMeansMapCollective.java

License:Apache License

private Job configureKMeansJob(int numOfDataPoints, int numCentroids, int vectorSize, int numPointFiles,
        int numMapTasks, Configuration configuration, Path workDirPath, Path dataDir, Path cDir, Path outDir,
        int jobID, int iterationCount) throws IOException, URISyntaxException {
    Job job = new Job(configuration, "kmeans_job_" + jobID);
    Configuration jobConfig = job.getConfiguration();
    Path jobOutDir = new Path(outDir, "kmeans_out_" + jobID);
    FileSystem fs = FileSystem.get(configuration);
    if (fs.exists(jobOutDir)) {
        fs.delete(jobOutDir, true);//  w  w w. ja v a2s  .com
    }
    FileInputFormat.setInputPaths(job, dataDir);
    FileOutputFormat.setOutputPath(job, jobOutDir);
    // The first centroid file with ID 0,
    // which should match with the centroid file name in data generation
    Path cFile = new Path(cDir, KMeansConstants.CENTROID_FILE_PREFIX + jobID);
    System.out.println("Centroid File Path: " + cFile.toString());
    jobConfig.set(KMeansConstants.CFILE, cFile.toString());
    jobConfig.setInt(KMeansConstants.JOB_ID, jobID);
    jobConfig.setInt(KMeansConstants.ITERATION_COUNT, iterationCount);
    // input class to file-based class
    // job.setInputFormatClass(DataFileInputFormat.class);
    job.setInputFormatClass(MultiFileInputFormat.class);
    // job.setOutputKeyClass(IntWritable.class);
    // job.setOutputValueClass(V2DDataWritable.class);
    // job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setJarByClass(KMeansMapCollective.class);
    job.setMapperClass(KMeansCollectiveMapper.class);
    org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.set("mapreduce.framework.name", "map-collective");
    jobConf.setNumMapTasks(numMapTasks);
    jobConf.setInt("mapreduce.job.max.split.locations", 10000);
    job.setNumReduceTasks(0);
    jobConfig.setInt(KMeansConstants.VECTOR_SIZE, vectorSize);
    jobConfig.setInt(KMeansConstants.NUM_CENTROIDS, numCentroids);
    jobConfig.setInt(KMeansConstants.POINTS_PER_FILE, numOfDataPoints / numPointFiles);
    jobConfig.set(KMeansConstants.WORK_DIR, workDirPath.toString());
    jobConfig.setInt(KMeansConstants.NUM_MAPPERS, numMapTasks);
    return job;
}

From source file:edu.iu.kmeans.regroupallgather.KMeansLauncher.java

License:Apache License

private Job configureKMeansJob(int numOfDataPoints, int numCentroids, int vectorSize, int numPointFiles,
        int numMapTasks, int numThreads, int numIterations, Path dataDir, Path cenDir, Path outDir,
        Configuration configuration) throws IOException, URISyntaxException {
    Job job = Job.getInstance(configuration, "kmeans_job");
    FileInputFormat.setInputPaths(job, dataDir);
    FileOutputFormat.setOutputPath(job, outDir);
    job.setInputFormatClass(MultiFileInputFormat.class);
    job.setJarByClass(KMeansLauncher.class);
    job.setMapperClass(KMeansCollectiveMapper.class);
    org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.set("mapreduce.framework.name", "map-collective");
    jobConf.setNumMapTasks(numMapTasks);
    jobConf.setInt("mapreduce.job.max.split.locations", 10000);
    job.setNumReduceTasks(0);
    Configuration jobConfig = job.getConfiguration();
    jobConfig.setInt(Constants.POINTS_PER_FILE, numOfDataPoints / numPointFiles);
    jobConfig.setInt(Constants.NUM_CENTROIDS, numCentroids);
    jobConfig.setInt(Constants.VECTOR_SIZE, vectorSize);
    jobConfig.setInt(Constants.NUM_MAPPERS, numMapTasks);
    jobConfig.setInt(Constants.NUM_THREADS, numThreads);
    jobConfig.setInt(Constants.NUM_ITERATIONS, numIterations);
    jobConfig.set(Constants.CEN_DIR, cenDir.toString());
    return job;/*ww  w .  java 2  s.c o  m*/
}

From source file:edu.iu.lda.LDALauncher.java

License:Apache License

private Job configureLDAJob(Path docDir, int numTopics, double alpha, double beta, int numIterations,
        int minBound, int maxBound, int numMapTasks, int numThreadsPerWorker, double scheduleRatio, int mem,
        boolean printModel, Path modelDir, Path outputDir, Configuration configuration, int jobID)
        throws IOException, URISyntaxException {
    configuration.setInt(Constants.NUM_TOPICS, numTopics);
    configuration.setDouble(Constants.ALPHA, alpha);
    configuration.setDouble(Constants.BETA, beta);
    configuration.setInt(Constants.NUM_ITERATIONS, numIterations);
    configuration.setInt(Constants.MIN_BOUND, minBound);
    configuration.setInt(Constants.MAX_BOUND, maxBound);
    configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker);
    configuration.setDouble(Constants.SCHEDULE_RATIO, scheduleRatio);
    System.out.println("Model Dir Path: " + modelDir.toString());
    configuration.set(Constants.MODEL_DIR, modelDir.toString());
    configuration.setBoolean(Constants.PRINT_MODEL, printModel);
    Job job = Job.getInstance(configuration, "lda_job_" + jobID);
    JobConf jobConf = (JobConf) job.getConfiguration();

    jobConf.set("mapreduce.framework.name", "map-collective");
    // mapreduce.map.collective.memory.mb
    // 125000//from w  ww .  j  a v a 2s .c  om
    jobConf.setInt("mapreduce.map.collective.memory.mb", mem);
    // mapreduce.map.collective.java.opts
    // -Xmx120000m -Xms120000m
    int xmx = (mem - 5000) > (mem * 0.9) ? (mem - 5000) : (int) Math.ceil(mem * 0.9);
    int xmn = (int) Math.ceil(0.25 * xmx);
    jobConf.set("mapreduce.map.collective.java.opts",
            "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m");
    jobConf.setNumMapTasks(numMapTasks);
    jobConf.setInt("mapreduce.job.max.split.locations", 10000);
    FileInputFormat.setInputPaths(job, docDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setInputFormatClass(MultiFileInputFormat.class);
    job.setJarByClass(LDALauncher.class);
    job.setMapperClass(LDAMPCollectiveMapper.class);
    job.setNumReduceTasks(0);
    return job;
}

From source file:edu.iu.mds.MDSMapReduce.java

License:Apache License

private Job prepareBCCalcJob(String inputDir, String xFile, String outputDirPath, int iterationCount,
        int jobCount, Configuration configuration, int numPoints, int xWidth, int numMapTasks,
        int partitionPerWorker)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    Job job = new Job(configuration, "map-collective-mds-bc" + jobCount);
    Configuration jobConfig = job.getConfiguration();
    Path outputDir = new Path(outputDirPath);
    FileInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    jobConfig.setInt(MDSConstants.ITERATION, iterationCount);
    jobConfig.setInt(MDSConstants.NUMPOINTS, numPoints);
    jobConfig.setInt(MDSConstants.XWIDTH, xWidth);
    jobConfig.set(MDSConstants.XFILE, xFile);
    jobConfig.setInt(MDSConstants.NUM_MAPS, numMapTasks);
    jobConfig.setInt(MDSConstants.PARTITION_PER_WORKER, partitionPerWorker);
    // input class to file-based class
    job.setInputFormatClass(MultiFileInputFormat.class);
    // job.setInputFormatClass(DataFileInputFormat.class);
    // job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setJarByClass(MDSMapReduce.class);
    job.setMapperClass(MDSAllgatherMultiThreadMapper.class);
    // When use MultiFileInputFormat, remember to set the number of map tasks
    org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.set("mapreduce.framework.name", "map-collective");
    jobConf.setNumMapTasks(numMapTasks);
    job.setNumReduceTasks(0);
    return job;/*w ww.j a  v a  2 s  .co m*/
}