Example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks

List of usage examples for org.apache.hadoop.mapred JobConf setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setNumReduceTasks.

Prototype

public void setNumReduceTasks(int n) 

Source Link

Document

Set the requisite number of reduce tasks for this job.

Usage

From source file:dinocode.SpeciesGraphBuilder.java

public static void main(String[] args) throws Exception {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(SpeciesDriver.class);
    conf.setJobName("Page-rank Species Graph Builder");
    final File f = new File(SpeciesDriver.class.getProtectionDomain().getCodeSource().getLocation().getPath());
    String inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/InputFiles/species_medium.txt";
    String outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result";
    FileInputFormat.setInputPaths(conf, new Path(inFiles));
    FileOutputFormat.setOutputPath(conf, new Path(outFiles));

    //conf.setOutputKeyClass(Text.class); 
    //conf.setOutputValueClass(Text.class); 
    conf.setMapperClass(SpeciesGraphBuilderMapperd.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    //conf.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class); 
    //conf.setOutputFormat(org.apache.hadoop.mapred.SequenceFileOutputFormat.class); 
    conf.setReducerClass(SpeciesGraphBuilderReducerd.class);
    //conf.setCombinerClass(SpeciesGraphBuilderReducer.class); 

    //conf.setInputPath(new Path("graph1")); 
    //conf.setOutputPath(new Path("graph2")); 
    // take the input and output from the command line
    FileInputFormat.setInputPaths(conf, new Path(inFiles));
    FileOutputFormat.setOutputPath(conf, new Path(outFiles));

    client.setConf(conf);/*ww  w . j  a va2 s  .  c  o m*/
    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }

    inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result/part-00000";
    for (int i = 0; i < 500; i++) {
        client = new JobClient();
        conf = new JobConf(SpeciesDriver.class);
        conf.setJobName("Species Iter");

        int count = i + 1;
        outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result" + count;
        conf.setNumReduceTasks(5);

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);

        FileInputFormat.setInputPaths(conf, new Path(inFiles));
        FileOutputFormat.setOutputPath(conf, new Path(outFiles));

        conf.setMapperClass(SpeciesIterMapper2d.class);
        conf.setReducerClass(SpeciesIterReducer2d.class);
        conf.setCombinerClass(SpeciesIterReducer2d.class);

        client.setConf(conf);
        try {
            JobClient.runJob(conf);
        } catch (Exception e) {
            e.printStackTrace();
        }
        inFiles = outFiles;

    }

    //Viewer
    client = new JobClient();
    conf = new JobConf(SpeciesDriver.class);
    conf.setJobName("Species Viewer");

    conf.setOutputKeyClass(FloatWritable.class);
    conf.setOutputValueClass(Text.class);

    inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/Result500/part-00000";
    outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outputFiles/ResultFinal";

    FileInputFormat.setInputPaths(conf, new Path(inFiles));
    FileOutputFormat.setOutputPath(conf, new Path(outFiles));

    conf.setMapperClass(SpeciesViewerMapperd.class);
    conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class);

    client.setConf(conf);
    try {
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }

}

From source file:edu.brown.cs.mapreduce.BenchmarkBase.java

License:Open Source License

public JobConf getJobConf() {
    JobConf jobConf = new JobConf(this.conf, this.benchmarkClass);
    ///*from w  w  w.  j a  va 2  s  .c  o m*/
    // Options
    //
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; i++) {
        try {
            //
            // Print property and exit
            //
            if ("-property".equals(args[i])) {
                String prop = jobConf.get(args[i + 1]);
                System.out.println(prop);
                System.exit(0);
                //
                // # of Maps
                //
            } else if ("-m".equals(args[i])) {
                this.num_of_maps = Integer.parseInt(args[++i]);
                //
                // # of Reduces
                //
            } else if ("-r".equals(args[i])) {
                this.num_of_reduces = Integer.parseInt(args[++i]);
                //
                // Enable debug
                //
            } else if ("-debug".equals(args[i])) {
                this.debug = true;
                //
                // Enable single output file for results
                //
            } else if ("-combine".equals(args[i])) {
                this.combine = true;
                //
                // Tell jobs to compress their intermediate output files
                //
            } else if ("-compress".equals(args[i])) {
                this.compress = true;
                //
                // We're using TupleWritable (which has to be in a SequenceFile)
                //
            } else if ("-tuple".equals(args[i])) {
                this.tuple_data = true;
                this.sequence_file = true;
                //
                // Use SequenceFiles for initial input
                //
            } else if ("-sequence".equals(args[i])) {
                this.sequence_file = true;
                //
                // Recursively load directories
                //
            } else if ("-recursive-dirs".equals(args[i])) {
                this.load_directories = true;
                //
                // Job Basename
                //
            } else if ("-basename".equals(args[i])) {
                this.job_name = args[++i];
                //
                // Misc. Properties
                //
            } else if ("-D".equals(args[i].substring(0, 2))) {
                String arg = args[i].substring(2);
                int pos = arg.indexOf('=');
                if (pos == -1) {
                    System.err.println("ERROR: Invalid properties option '" + arg + "'");
                    System.exit(1);
                }
                this.options.put(arg.substring(0, pos), arg.substring(pos + 1));
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.err.println("ERROR: Integer expected instead of " + args[i]);
            System.exit(1);
        } catch (ArrayIndexOutOfBoundsException except) {
            System.err.println("ERROR: Required parameter missing from " + args[i - 1]);
            System.exit(1);
        }
    } // FOR
      //
      // Make sure there are exactly 2 parameters left.
      //
    if (otherArgs.size() < 2) {
        System.err.println("ERROR: Wrong number of parameters: " + otherArgs.size());
        System.exit(1);
    }

    //
    // Set these flags so the jobs know about them
    //
    if (this.getSequenceFile())
        this.options.put(PROPERTY_SEQUENCEFILE, "true");
    if (this.getTupleData())
        this.options.put(PROPERTY_TUPLEDATA, "true");
    if (this.getDebug())
        this.options.put(PROPERTY_DEBUG, "true");

    FileSystem fs = null;
    try {
        fs = FileSystem.get(conf);
    } catch (Exception ex) {
        ex.printStackTrace();
        System.exit(-1);
    }

    //
    // Input Paths
    //
    int cnt = otherArgs.size() - 1;
    this.input_paths = new ArrayList<Path>();
    for (int ctr = 0; ctr < cnt; ctr++) {
        Path new_path = new Path(otherArgs.get(ctr));
        try {
            if (this.load_directories && fs.getFileStatus(new_path).isDir()) {
                //int limit = 10;
                FileStatus paths[] = fs.listStatus(new_path);
                for (FileStatus p : paths) {
                    this.input_paths.add(p.getPath());
                    FileInputFormat.addInputPath(jobConf, p.getPath());
                    //if (limit-- <= 0) break;
                } // FOR
            } else {
                this.input_paths.add(new_path);
                FileInputFormat.addInputPath(jobConf, new_path);
            }
        } catch (Exception ex) {
            ex.printStackTrace();
            System.exit(-1);
        }
    } // FOR
    if (this.input_paths.isEmpty()) {
        System.err.println(
                "ERROR: No input paths were defined for '" + this.benchmarkClass.getSimpleName() + "'");
        System.exit(-1);
    }

    //
    // Output Paths
    //
    this.output_path = new Path(otherArgs.get(otherArgs.size() - 1));
    FileOutputFormat.setOutputPath(jobConf, this.output_path);

    jobConf.setJobName(this.job_name != null ? this.job_name : this.benchmarkClass.getSimpleName());
    if (this.num_of_maps >= 0)
        jobConf.setNumMapTasks(this.num_of_maps);
    if (this.num_of_reduces >= 0)
        jobConf.setNumReduceTasks(this.num_of_reduces);

    //
    // Set all properties
    //
    for (String key : this.options.keySet()) {
        jobConf.set(key, this.options.get(key));
    }

    return (jobConf);
}

From source file:edu.brown.cs.mapreduce.BenchmarkBase.java

License:Open Source License

public void runCombine() throws Exception {
    if (this.last_job == null) {
        throw new NullPointerException("ERROR: Last job is Null");
    }// www.j a v a 2  s  . c  o  m
    JobConf job = new JobConf(this.conf, this.benchmarkClass);
    job.setJobName((this.job_name != null ? this.job_name : this.benchmarkClass.getSimpleName()) + ".combine");
    job.setMapperClass(IdentityMapper.class);
    job.setNumMapTasks(0);
    job.setReducerClass(IdentityReducer.class);
    job.setNumReduceTasks(1); // this is needed to get a single output file

    // Input
    FileInputFormat.setInputPaths(job, FileOutputFormat.getOutputPath(this.last_job));
    job.setInputFormat(KeyValueTextInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job,
            new Path(FileOutputFormat.getOutputPath(this.last_job).toString() + "/combine"));
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    JobConf real_last_job = this.last_job;
    this.runJob(job);
    this.last_job = real_last_job;
    return;
}

From source file:edu.brown.cs.mapreduce.benchmarks.Benchmark1.java

License:Open Source License

public int run(String[] args) throws Exception {
    BenchmarkBase base = new BenchmarkBase(this.getConf(), this.getClass(), args);
    JobConf job = base.getJobConf();

    job.setInputFormat(base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    if (base.getTupleData()) {
        job.setMapperClass(Benchmark1.TupleMap.class);
    } else {/*w w  w  .  j a v  a 2 s  .  c o m*/
        job.setMapperClass(Benchmark1.TextMap.class);
    }
    //job.setReducerClass(IdentityReducer.class);
    job.setNumReduceTasks(0);

    try {
        base.runJob(job);
        if (base.getCombine())
            base.runCombine();
    } catch (Exception ex) {
        ex.printStackTrace();
        System.exit(1);
    }
    return 0;
}

From source file:edu.brown.cs.mapreduce.benchmarks.Benchmark3.java

License:Open Source License

public int run(String[] args) throws Exception {
    BenchmarkBase base = new BenchmarkBase(this.getConf(), this.getClass(), args);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);

    // -------------------------------------------
    // Phase #1/*from   www.jav a 2  s  .  c o m*/
    // -------------------------------------------
    JobConf p1_job = base.getJobConf();
    p1_job.setJobName(p1_job.getJobName() + ".Phase1");
    Path p1_output = new Path(base.getOutputPath().toString() + "/phase1");
    FileOutputFormat.setOutputPath(p1_job, p1_output);

    //
    // Make sure we have our properties
    //
    String required[] = { BenchmarkBase.PROPERTY_START_DATE, BenchmarkBase.PROPERTY_STOP_DATE };
    for (String req : required) {
        if (!base.getOptions().containsKey(req)) {
            System.err.println("ERROR: The property '" + req + "' is not set");
            System.exit(1);
        }
    } // FOR

    p1_job.setInputFormat(
            base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class);
    if (base.getSequenceFile())
        p1_job.setOutputFormat(SequenceFileOutputFormat.class);
    p1_job.setOutputKeyClass(Text.class);
    p1_job.setOutputValueClass(Text.class);
    p1_job.setMapperClass(
            base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TupleWritableMap.class
                    : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TextMap.class);
    p1_job.setReducerClass(
            base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TupleWritableReduce.class
                    : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase1.TextReduce.class);
    p1_job.setCompressMapOutput(base.getCompress());

    // -------------------------------------------
    // Phase #2
    // -------------------------------------------
    JobConf p2_job = base.getJobConf();
    p2_job.setJobName(p2_job.getJobName() + ".Phase2");
    p2_job.setInputFormat(
            base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class);
    if (base.getSequenceFile())
        p2_job.setOutputFormat(SequenceFileOutputFormat.class);
    p2_job.setOutputKeyClass(Text.class);
    p2_job.setOutputValueClass(Text.class);
    p2_job.setMapperClass(IdentityMapper.class);
    p2_job.setReducerClass(
            base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase2.TupleWritableReduce.class
                    : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase2.TextReduce.class);
    p2_job.setCompressMapOutput(base.getCompress());
    p2_job.setNumMapTasks(60);

    // -------------------------------------------
    // Phase #3
    // -------------------------------------------
    JobConf p3_job = base.getJobConf();
    p3_job.setJobName(p3_job.getJobName() + ".Phase3");
    p3_job.setNumReduceTasks(1);
    p3_job.setInputFormat(
            base.getSequenceFile() ? SequenceFileInputFormat.class : KeyValueTextInputFormat.class);
    p3_job.setOutputKeyClass(Text.class);
    p3_job.setOutputValueClass(Text.class);
    //p3_job.setMapperClass(Phase3Map.class);
    p3_job.setMapperClass(IdentityMapper.class);
    p3_job.setReducerClass(
            base.getTupleData() ? edu.brown.cs.mapreduce.benchmarks.benchmark3.phase3.TupleWritableReduce.class
                    : edu.brown.cs.mapreduce.benchmarks.benchmark3.phase3.TextReduce.class);

    //
    // Execute #1
    //
    base.runJob(p1_job);

    //
    // Execute #2
    //
    Path p2_output = new Path(base.getOutputPath().toString() + "/phase2");
    FileOutputFormat.setOutputPath(p2_job, p2_output);
    FileInputFormat.setInputPaths(p2_job, p1_output);
    base.runJob(p2_job);

    //
    // Execute #3
    //
    Path p3_output = new Path(base.getOutputPath().toString() + "/phase3");
    FileOutputFormat.setOutputPath(p3_job, p3_output);
    FileInputFormat.setInputPaths(p3_job, p2_output);
    base.runJob(p3_job);

    // There does need to be a combine if (base.getCombine()) base.runCombine();

    return 0;
}

From source file:edu.brown.cs.mapreduce.demo.OrderSum.java

License:Open Source License

/**
 * The main driver for word count map/reduce program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the job tracker.
 */// ww w  .  jav a  2 s  .  c  o m
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(this.getConf(), OrderSum.class);
    conf.setJobName(OrderSum.class.getSimpleName());

    // Input File Format
    conf.setInputFormat(KeyValueTextInputFormat.class);

    // Output Key/Value Types
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);

    // Map/Reduce Classes
    conf.setMapperClass(OrderSum.OrderSumMapper.class);
    conf.setReducerClass(OrderSum.OrderSumReducer.class);

    // Input/Output Paths (HDFS)
    FileInputFormat.setInputPaths(conf, "/demo/input/");
    FileOutputFormat.setOutputPath(conf, new Path("/demo/output/"));

    /***** Additional Features *****/
    // Compression
    //conf.setCompressMapOutput(true);

    // Combine
    //conf.setCombinerClass(OrderSum.OrderSumReducer.class);

    // Create a single output file
    conf.setNumReduceTasks(1);

    // Pass search date on command-line
    /* uncomment configure!
    if (args.length == 1) {
       conf.set("edu.brown.cs.pavlo.search_date", args[0]);
    }*/

    // Bombs away!
    JobClient.runJob(conf);

    return 0;
}

From source file:edu.iu.wordcount.CollectiveWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);//ww  w.  j a  va2 s .c om
    }
    Job job = new Job(conf, "collective word count");
    JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.set("mapreduce.framework.name", "map-collective");
    jobConf.setNumReduceTasks(0);
    job.setJarByClass(CollectiveWordCount.class);
    job.setMapperClass(WordCountMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:edu.ldzm.analysis.AnalysisSummary.java

License:Apache License

/**
 * The main driver for word count map/reduce program. Invoke this method to
 * submit the map/reduce job./*from w w w.j a  v  a2 s  .c o m*/
 * 
 * @throws IOException
 *             When there is communication problems with the job tracker.
 */
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), AnalysisSummary.class);
    conf.setJobName("analysis_summery");

    // the keys are words (strings)
    conf.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(Combine.class);
    conf.setReducerClass(Reduce.class);

    boolean param = false;
    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-l".equals(args[i])) {
                param = true;
                String[] fields = args[++i].split(SEPARATOR);
                conf.setInt("NAME_LIST_LENGTH", fields.length);
                for (int j = 0; j < fields.length; j++) {
                    if ("timeStamp".equals(fields[j])) {
                        conf.setInt("REQUEST_TIME_INDEX", j);
                    } else if ("elapsed".equals(fields[j])) {
                        conf.setInt("REQUEST_ELAPSE_TIME_INDEX", j);
                    } else if ("label".equals(fields[j])) {
                        conf.setInt("REQUEST_LABEL_INDEX", j);
                    } else if ("success".equals(fields[j])) {
                        conf.setInt("REQUEST_SUCCESSFUL_INDEX", j);
                    } else if ("bytes".equals(fields[j])) {
                        conf.setInt("REQUEST_BYTE_INDEX", j);
                    }
                }
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }

    if (!param) {
        System.out.println("-l namelist.txt");
        return printUsage();
    }
    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:edu.ldzm.average.AverageResponseTime.java

License:Apache License

/**
 * The main driver for word count map/reduce program. Invoke this method to
 * submit the map/reduce job./*  w  w  w .j  a  v a2 s  .  c om*/
 * 
 * @throws IOException
 *             When there is communication problems with the job tracker.
 */
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), AverageResponseTime.class);
    conf.setJobName("average_response_time");

    // the keys are words (strings)
    conf.setOutputKeyClass(Text.class);
    // the values are counts (ints)
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(Combine.class);
    conf.setReducerClass(Reduce.class);

    int param = 0;
    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-l".equals(args[i])) {
                param++;
                String[] fields = args[++i].split(SEPARATOR);
                conf.setInt("NAME_LIST_LENGTH", fields.length);
                for (int j = 0; j < fields.length; j++) {
                    if ("timeStamp".equals(fields[j])) {
                        conf.setInt("REQUEST_TIME_INDEX", j);
                    } else if ("elapsed".equals(fields[j])) {
                        conf.setInt("REQUEST_ELAPSE_TIME_INDEX", j);
                    } else if ("label".equals(fields[j])) {
                        conf.setInt("REQUEST_LABEL_INDEX", j);
                    } else if ("success".equals(fields[j])) {
                        conf.setInt("REQUEST_SUCCESSFUL_INDEX", j);
                    } else if ("bytes".equals(fields[j])) {
                        conf.setInt("REQUEST_BYTE_INDEX", j);
                    }
                }
            } else if ("-i".equals(args[i])) {
                param++;
                conf.setInt("INTERVAL_TIME", Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }

    if (param != 2) {
        System.out.println("-l  -i?");
        return printUsage();
    }

    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java

License:Apache License

public void submit(JobRequest request, long submissionID, File mapperFile, File reducerFile, File packageDir,
        Path inputPath) throws ValidationException, NotFoundException, CompilationException, InternalException {
    // Generate job output path
    Path outputDir = new Path(_homeDir, "out");
    Path outputPath;/*from  w  ww .j a v a 2s.  co  m*/
    try {
        FileSystem fs = outputDir.getFileSystem(new Configuration());
        outputPath = JobServiceHandler.getNonexistantPath(outputDir, request.getName(), fs);
    } catch (IOException ex) {
        throw JobServiceHandler.wrapException("Could not construct output path.", ex);
    }

    JobConf conf = new JobConf();
    conf.setJobName(request.getName());

    // Set mapper and number of tasks if specified
    StreamJob.setStreamMapper(conf, mapperFile.toString());
    if (request.isSetMapTasks())
        conf.setNumMapTasks(request.getMapTasks());

    // Set reducer and number of tasks if specified
    StreamJob.setStreamReducer(conf, reducerFile.toString());
    if (request.isSetReduceTasks())
        conf.setNumReduceTasks(request.getReduceTasks());

    // Create and set job JAR, including necessary files
    ArrayList<String> jarFiles = new ArrayList<String>();
    jarFiles.add(packageDir.toString());
    String jarPath;
    try {
        jarPath = StreamJob.createJobJar(conf, jarFiles, _tempDir);
    } catch (IOException ex) {
        throw JobServiceHandler.wrapException("Could not create job jar.", ex);
    }
    if (jarPath != null)
        conf.setJar(jarPath);

    // TODO: This is a hack. Rewrite streaming to use DistributedCache.
    //conf.setPattern("mapreduce.job.jar.unpack.pattern",
    //              Pattern.compile(".*"));

    // Set I/O formats and paths
    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    // Use numeric sort if appropriate
    conf.setBoolean(CONF_NUMERIC, request.isNumericSort());
    if (request.isNumericSort()) {
        conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
        conf.setPartitionerClass(KeyFieldBasedPartitioner.class);
        conf.setKeyFieldComparatorOptions("-n");
        conf.setKeyFieldPartitionerOptions("-n");
    }

    // Set other job information
    conf.set(CONF_USER, request.getUser());
    conf.set(CONF_LANGUAGE, request.getLanguage());
    conf.set(CONF_MAPPER, request.getMapper());
    conf.set(CONF_REDUCER, request.getReducer());

    // Attempt to submit the job

    RunningJob job;
    try {
        JobClient client = new JobClient(new JobConf());
        job = client.submitJob(conf);
    } catch (IOException ex) {
        throw JobServiceHandler.wrapException("There was a serious error while attempting to submit the job.",
                ex);
    }

    try {
        SubmissionDatabase.setSubmitted(submissionID);
        SubmissionDatabase.setHadoopID(submissionID, job.getID().toString());
    } catch (SQLException ex) {
        throw JobServiceHandler.wrapException("Could not update submission in database.", ex);
    }
}