Example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks.

Prototype

public void setNumReduceTasks(int n)

Source Link

Document

Set the requisite number of reduce tasks for this job.

Usage

From source file:map_reduce.MapReduce_OptimizedBrandesDeletions_DO_JUNG.java

License:Open Source License

@SuppressWarnings("deprecation")
@Override/*ww w. j  a v a2  s. com*/
public int run(String[] args) throws Exception {
    if (args.length < 1) {
        System.err.println("Usage:\n");
        System.exit(1);
    }

    //       Job job = new Job(super.getConf());

    //      READ IN ALL COMMAND LINE ARGUMENTS
    //      EXAMPLE: 
    // hadoop jar MapReduce_OptimizedBrandesDeletions_DO_JUNG.jar
    // -libjars collections-generic-4.01.jar,jung-graph-impl-2.0.1.jar,jung-api-2.0.1.jar
    // -Dmapred.job.map.memory.mb=4096
    // -Dmapred.job.reduce.memory.mb=4096
    // -Dmapred.child.java.opts=-Xmx3500m
    // -Dmapreduce.task.timeout=60000000
    // -Dmapreduce.job.queuename=QUEUENAME
    // input_iterbrandes_deletions_nocomb_10k_1 output_iterbrandes_deletions_nocomb_10k_1
    // 10 1 10000 55245 10k 10k_randedges 100 1 false times/ betweenness/

    int m = -1;

    // input path to use on hdfs
    Path inputPath = new Path(args[++m]);

    // output path to use on hdfs
    Path outputPath = new Path(args[++m]);

    // number of Mappers to split the sources: e.g., 1, 10, 100 etc.
    // rule of thumb: the larger the graph (i.e., number of roots to test), the larger should be this number.
    int numOfMaps = Integer.parseInt(args[++m]);

    // number of Reducers to collect the output
    int numOfReduce = Integer.parseInt(args[++m]);

    // Number of vertices in graph
    int N = Integer.parseInt(args[++m]);

    // Number of edges in graph
    int M = Integer.parseInt(args[++m]);

    // Graph file (edge list, tab delimited) (full path)
    String graph = args[++m];

    // File with edges to be added (tab delimited) (full path)
    // Note: this version handles only edges between existing vertices in the graph.
    String random_edges = args[++m];

    // Number of random edges added
    int re = Integer.parseInt(args[++m]);

    // Experiment iteration (in case of multiple experiments)
    int iter = Integer.parseInt(args[++m]);

    // Use combiner or not (true/false)
    Boolean comb = Boolean.valueOf(args[++m]);

    // Output path for file with stats
    String statsoutputpath = args[++m];

    // Output path for file with final betweenness values
    String betoutputpath = args[++m];

    //      BEGIN INITIALIZATION

    JobConf conf = new JobConf(getConf(), MapReduce_OptimizedBrandesDeletions_DO_JUNG.class);
    FileSystem fs = FileSystem.get(conf);

    String setup = "_deletions_edges" + re + "_maps" + numOfMaps + "_comb" + comb;
    conf.setJobName("OptimizedBrandesDeletionsDOJung_" + graph + setup + "_" + iter);
    conf.set("HDFS_GRAPH", graph + setup);
    conf.set("HDFS_Random_Edges", random_edges + setup);
    conf.set("output", outputPath.getName());
    conf.set("setup", setup);

    //      CREATE INPUT FILES FOR MAPPERS

    int numOfTasksperMap = (int) Math.ceil(N / numOfMaps);
    //generate an input file for each map task
    for (int i = 0; i < numOfMaps - 1; i++) {
        Path file = new Path(inputPath, "part-r-" + i);
        IntWritable start = new IntWritable(i * numOfTasksperMap);
        IntWritable end = new IntWritable((i * numOfTasksperMap) + numOfTasksperMap - 1);

        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class,
                IntWritable.class, CompressionType.NONE);
        try {
            writer.append(start, end);
        } finally {
            writer.close();
        }
        System.out.println("Wrote input for Map #" + i + ": " + start + " - " + end);
    }

    // last mapper takes what is left
    Path file = new Path(inputPath, "part-r-" + (numOfMaps - 1));
    IntWritable start = new IntWritable((numOfMaps - 1) * numOfTasksperMap);
    IntWritable end = new IntWritable(N - 1);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class,
            CompressionType.NONE);
    try {
        writer.append(start, end);
    } finally {
        writer.close();
    }
    System.out.println("Wrote input for Map #" + (numOfMaps - 1) + ": " + start + " - " + end);

    //      COPY FILES TO MAPPERS
    System.out.println("Copying graph to cache");
    String LOCAL_GRAPH = graph;
    Path hdfsPath = new Path(graph + setup);

    // upload the file to hdfs. Overwrite any existing copy.
    fs.copyFromLocalFile(false, true, new Path(LOCAL_GRAPH), hdfsPath);
    DistributedCache.addCacheFile(hdfsPath.toUri(), conf);

    System.out.println("Copying random edges to cache");
    String LOCAL_Random_Edges = random_edges;
    hdfsPath = new Path(random_edges + setup);

    // upload the file to hdfs. Overwrite any existing copy.
    fs.copyFromLocalFile(false, true, new Path(LOCAL_Random_Edges), hdfsPath);
    DistributedCache.addCacheFile(hdfsPath.toUri(), conf);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(DoubleWritable.class);

    conf.setMapperClass(IterBrandesMapper.class);
    conf.setNumMapTasks(numOfMaps);

    if (comb)
        conf.setCombinerClass(IterBrandesReducer.class);

    conf.setReducerClass(IterBrandesReducer.class);
    conf.setNumReduceTasks(numOfReduce);

    // turn off speculative execution, because DFS doesn't handle multiple writers to the same file.
    conf.setSpeculativeExecution(false);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    // conf.set("mapred.job.name", "APS-" + outputPath.getName());
    conf.setNumTasksToExecutePerJvm(-1); // JVM reuse

    System.out.println("Starting the execution...! Pray!! \n");
    long time1 = System.nanoTime();
    RunningJob rj = JobClient.runJob(conf);
    long time2 = System.nanoTime();

    //      READ OUTPUT FILES

    System.out.println("\nFinished and now reading/writing Betweenness Output...\n");

    // Assuming 1 reducer.
    Path inFile = new Path(outputPath, "part-00000");
    IntWritable id = new IntWritable();
    DoubleWritable betweenness = new DoubleWritable();
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);

    FileWriter fw = new FileWriter(new File(betoutputpath + graph + setup + "_betweenness_" + iter));
    try {
        int i = 0;
        for (; i < (N + (M - re)); i++) {
            reader.next(id, betweenness);
            fw.write(id + "\t" + betweenness + "\n");
            fw.flush();
        }
    } finally {
        reader.close();
        fw.close();
    }

    System.out.println("\nWriting times Output...\n");

    fw = new FileWriter(new File(statsoutputpath + graph + setup + "_times_" + iter));

    fw.write("Total-time:\t" + (time2 - time1) + "\n");
    fw.write("total-map\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("SLOTS_MILLIS_MAPS") + "\n");
    fw.write("total-reduce\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("SLOTS_MILLIS_REDUCES") + "\n");
    fw.write("total-cpu-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("CPU_MILLISECONDS") + "\n");
    fw.write("total-gc-mr\t"
            + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter").getCounter("GC_TIME_MILLIS")
            + "\n");
    fw.write("total-phy-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("PHYSICAL_MEMORY_BYTES") + "\n");
    fw.write("total-vir-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("VIRTUAL_MEMORY_BYTES") + "\n");
    fw.write("brandes\t" + rj.getCounters().getGroup("TimeForBrandes").getCounter("exectime_initial_brandes")
            + "\n");
    fw.write("reduce\t" + rj.getCounters().getGroup("TimeForReduce").getCounter("reduceafteralledges") + "\n");
    fw.flush();

    try {
        Iterator<Counters.Counter> counters = rj.getCounters().getGroup("TimeForRandomEdges").iterator();
        while (counters.hasNext()) {
            Counter cc = counters.next();
            fw.write(cc.getName() + "\t" + cc.getCounter() + "\n");
            fw.flush();
        }
    } finally {
        fw.close();
    }

    return 0;
}

From source file:me.tingri.graphs.cc.block.ConnectedComponentsBlock.java

License:Apache License

protected RunningJob join(short blockWidth, int recurDiagonalMult, Path edgePath, Path curVectorPath,
        Path tempVectorPath, int numOfReducers, String makeSymmetric, FileSystem fs) throws Exception {
    Utility.deleteIfExists(fs, tempVectorPath);

    JobConf conf = new JobConf(getConf(), ConnectedComponentsBlock.class);

    conf.set(BLOCK_WIDTH, "" + blockWidth);
    conf.set(RECURSIVE_DIAG_MULT, "" + recurDiagonalMult);
    conf.set(FIELD_SEPARATOR, DEFAULT_FIELD_SEPARATOR);
    conf.set(VECTOR_INDICATOR, DEFAULT_VECTOR_INDICATOR);
    conf.set(SEPARATOR_WITHIN_VALUE, SPACE);
    conf.set(MAKE_SYMMETRIC, makeSymmetric);

    conf.setJobName("CCBlock_join");

    conf.setMapperClass(JoinMapper.class);
    conf.setReducerClass(JoinReducer.class);

    FileInputFormat.setInputPaths(conf, edgePath, curVectorPath);
    FileOutputFormat.setOutputPath(conf, tempVectorPath);

    conf.setNumReduceTasks(numOfReducers);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    return JobClient.runJob(conf);
}

From source file:me.tingri.graphs.cc.block.ConnectedComponentsBlock.java

License:Apache License

protected RunningJob merge(short blockWidth, Path tempVectorPath, Path nextVectorPath, int numOfReducers,
        FileSystem fs) throws Exception {
    Utility.deleteIfExists(fs, nextVectorPath);

    JobConf conf = new JobConf(getConf(), ConnectedComponentsBlock.class);
    conf.set(BLOCK_WIDTH, "" + blockWidth);
    conf.set(VECTOR_INDICATOR, DEFAULT_VECTOR_INDICATOR);

    conf.setJobName("CCBlock_reduce");

    conf.setMapperClass(MergeMapper.class);
    conf.setReducerClass(MergeReducer.class);

    FileInputFormat.setInputPaths(conf, tempVectorPath);
    FileOutputFormat.setOutputPath(conf, nextVectorPath);

    conf.setNumReduceTasks(numOfReducers);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    return JobClient.runJob(conf);
}

From source file:me.tingri.graphs.cc.ConnectedComponents.java

License:Apache License

protected RunningJob join(FileSystem fs, Path edgePath, Path vecPath, Path tempVectorPath, String makeSymmetric,
        int numOfReducers) throws Exception {
    Utility.deleteIfExists(fs, tempVectorPath);

    JobConf conf = new JobConf(getConf(), ConnectedComponents.class);
    conf.set(FIELD_SEPARATOR, DEFAULT_FIELD_SEPARATOR);
    conf.set(VECTOR_INDICATOR, DEFAULT_VECTOR_INDICATOR);
    conf.set(MAKE_SYMMETRIC, makeSymmetric);

    conf.setJobName("ConnectedComponents_Join");

    conf.setMapperClass(JoinMapper.class);
    conf.setReducerClass(JoinReducer.class);

    FileInputFormat.setInputPaths(conf, edgePath, vecPath);
    FileOutputFormat.setOutputPath(conf, tempVectorPath);

    conf.setNumReduceTasks(numOfReducers);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    return JobClient.runJob(conf);
}

From source file:me.tingri.graphs.cc.ConnectedComponents.java

License:Apache License

protected RunningJob merge(FileSystem fs, Path tempVectorPath, Path nextVectorPath, int numOfReducers)
        throws Exception {
    Utility.deleteIfExists(fs, nextVectorPath);

    JobConf conf = new JobConf(getConf(), ConnectedComponents.class);
    conf.set(VECTOR_INDICATOR, DEFAULT_VECTOR_INDICATOR);

    conf.setJobName("ConnectedComponents_Merge");

    conf.setMapperClass(MergeMapper.class);
    conf.setReducerClass(MergeReducer.class);

    FileInputFormat.setInputPaths(conf, tempVectorPath);
    FileOutputFormat.setOutputPath(conf, nextVectorPath);

    conf.setNumReduceTasks(numOfReducers);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    return JobClient.runJob(conf);
}

From source file:net.sf.katta.indexing.IndexerJob.java

License:Apache License

public void startIndexer(String path, String finalDestination, int numOfShards) throws IOException {
    // create job conf with class pointing into job jar.
    JobConf jobConf = new JobConf(IndexerJob.class);
    jobConf.setJobName("indexer");
    jobConf.setMapRunnerClass(Indexer.class);
    // alternative use a text file and a TextInputFormat
    jobConf.setInputFormat(SequenceFileInputFormat.class);

    Path input = new Path(path);
    FileInputFormat.setInputPaths(jobConf, input);
    // we just set the output path to make hadoop happy.
    FileOutputFormat.setOutputPath(jobConf, new Path(finalDestination));
    // setting the folder where lucene indexes will be copied when finished.
    jobConf.set("finalDestination", finalDestination);
    // important to switch spec exec off.
    // We dont want to have something duplicated.
    jobConf.setSpeculativeExecution(false);

    // The num of map tasks is equal to the num of input splits.
    // The num of input splits by default is equal to the num of hdf blocks
    // for the input file(s). To get the right num of shards we need to
    // calculate the best input split size.

    FileSystem fs = FileSystem.get(input.toUri(), jobConf);
    FileStatus[] status = fs.globStatus(input);
    long size = 0;
    for (FileStatus fileStatus : status) {
        size += fileStatus.getLen();// w  w  w.j  av a  2  s.  co m
    }
    long optimalSplisize = size / numOfShards;
    jobConf.set("mapred.min.split.size", "" + optimalSplisize);

    // give more mem to lucene tasks.
    jobConf.set("mapred.child.java.opts", "-Xmx2G");
    jobConf.setNumMapTasks(1);
    jobConf.setNumReduceTasks(0);
    JobClient.runJob(jobConf);
}

From source file:nl.tudelft.graphalytics.mapreducev2.MapReduceJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    FileSystem dfs = FileSystem.get(getConf());
    String inPath = inputPath;/*from w ww.j a  v a  2s .com*/

    while (!isFinished()) {
        iteration++;

        // Prepare job configuration
        JobConf jobConfiguration = new JobConf(this.getConf());
        jobConfiguration.setJarByClass(this.getClass());

        jobConfiguration.setMapOutputKeyClass(getMapOutputKeyClass());
        jobConfiguration.setMapOutputValueClass(getMapOutputValueClass());

        jobConfiguration.setMapperClass(getMapperClass());
        if (getCombinerClass() != null)
            jobConfiguration.setCombinerClass(getCombinerClass());
        jobConfiguration.setReducerClass(getReducerClass());

        jobConfiguration.setOutputKeyClass(getOutputKeyClass());
        jobConfiguration.setOutputValueClass(getOutputValueClass());

        jobConfiguration.setInputFormat(getInputFormatClass());
        jobConfiguration.setOutputFormat(getOutputFormatClass());

        if (getNumMappers() != -1)
            jobConfiguration.setNumMapTasks(getNumMappers());
        if (getNumReducers() != -1)
            jobConfiguration.setNumReduceTasks(getNumReducers());

        setConfigurationParameters(jobConfiguration);

        // Set the input and output paths
        String outPath = intermediatePath + "/iteration-" + iteration;
        FileInputFormat.addInputPath(jobConfiguration, new Path(inPath));
        FileOutputFormat.setOutputPath(jobConfiguration, new Path(outPath));

        // Execute the current iteration
        RunningJob jobExecution = JobClient.runJob(jobConfiguration);
        jobExecution.waitForCompletion();

        // Remove the output of the previous job (unless it is the input graph)
        if (iteration != 1) {
            dfs.delete(new Path(inPath), true);
        }
        inPath = outPath;

        processJobOutput(jobExecution);
    }

    // Rename the last job output to the specified output path
    try {
        dfs.mkdirs(new Path(outputPath).getParent());
        dfs.rename(new Path(inPath), new Path(outputPath));
    } catch (Exception e) {
        LOG.warn("Failed to rename MapReduce job output.", e);
    }

    return 0;
}

From source file:nlp.com.knowledgebooks.mapreduce.NameFinder.java

License:Open Source License

/**
 * The main driver for name finder map/reduce program.
 * <p/>//from w  w  w  . j  a  v a 2 s.c  om
 * NOTE: copied with modifications from Hadoppjava example programs
 * <p/>
 * Invoke this method to submit the map/reduce job.
 *
 * @throws IOException When there is communication problems with the
 *                     job tracker.
 */
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), NameFinder.class);
    conf.setJobName("namefinder");
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(MapClass.class);
    //conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);
    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (Exception ex) {
            System.err.println("ERROR: " + ex);
        }
    }
    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));
    JobClient.runJob(conf);
    return 0;
}

From source file:nthu.scopelab.tsqr.ssvd.ABtDenseOutJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPath, Path inputBt, Path outputPath, int k, int p,
        int reduceTasks, int mis) throws Exception {

    JobConf job = new JobConf(conf, ABtDenseOutJob.class);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    job.setInt(QJob.PROP_K, k);//w w  w  .j  a  v  a 2 s .  c  o  m
    job.setInt(QJob.PROP_P, p);
    job.set(PROP_BT_PATH, inputBt.toString());

    FileOutputFormat.setOutputPath(job, outputPath);
    job.setJobName("ABtDenseOutJob");

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(LMatrixWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(LMatrixWritable.class);

    job.setMapperClass(ABtMapper.class);

    fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job));
    mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job));
    job.setNumMapTasks(fgather.recNumMapTasks(mis));

    job.setNumReduceTasks(0);

    FileInputFormat.setInputPaths(job, inputPath);

    RunningJob rj = JobClient.runJob(job);
}

From source file:nthu.scopelab.tsqr.ssvd.BtJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPath, Path btPath, String qrfPath, int k, int p,
        int outerBlockHeight, int reduceTasks, boolean outputBBtProducts, String reduceSchedule, int mis)
        throws Exception {
    boolean outputQ = true;

    String stages[] = reduceSchedule.split(",");

    JobConf job = new JobConf(conf, BtJob.class);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setInt(SCHEDULE_NUM, stages.length);
    job.setInt(PROP_OUTER_PROD_BLOCK_HEIGHT, outerBlockHeight);
    job.setInt(QJob.PROP_K, k);// ww w  . j  ava 2  s.c o  m
    job.setInt(QJob.PROP_P, p);
    job.setBoolean(QmultiplyJob.OUTPUT_Q, outputQ);
    job.setBoolean(PROP_OUPTUT_BBT_PRODUCTS, outputBBtProducts);
    job.set(QmultiplyJob.QRF_DIR, qrfPath);
    FileSystem.get(job).delete(btPath, true);

    FileOutputFormat.setOutputPath(job, btPath);

    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    job.setJobName("BtJob");

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(SparseRowBlockWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    //job.setOutputValueClass(SparseRowBlockWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(BtMapper.class);
    job.setCombinerClass(OuterProductCombiner.class);
    job.setReducerClass(OuterProductReducer.class);

    fileGather fgather = new fileGather(inputPath, "", FileSystem.get(job));
    mis = Checker.checkMis(mis, fgather.getInputSize(), FileSystem.get(job));
    job.setNumMapTasks(fgather.recNumMapTasks(mis));

    //job.setNumReduceTasks(0);
    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, inputPath);

    if (outputQ) {
        MultipleOutputs.addNamedOutput(job, QmultiplyJob.Q_MAT, SequenceFileOutputFormat.class,
                IntWritable.class, LMatrixWritable.class);
    }
    if (outputBBtProducts) {
        MultipleOutputs.addNamedOutput(job, OUTPUT_BBT, SequenceFileOutputFormat.class, IntWritable.class,
                VectorWritable.class);
    }
    RunningJob rj = JobClient.runJob(job);
    System.out.println("Btjob Job ID: " + rj.getJobID().toString());
}