Example usage for org.apache.hadoop.mapred JobConf setNumMapTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setNumMapTasks.

Prototype

public void setNumMapTasks(int n)

Source Link

Document

Set the number of map tasks for this job.

Usage

From source file:org.acacia.csr.java.NotInFinder.java

License:Apache License

public static void main(String[] args) throws Exception {
    String dir1 = "/user/miyuru/wcout";
    String dir2 = "/user/miyuru/notinverts";
    //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());

    if (fs1.exists(new Path(dir2))) {
        fs1.delete(new Path(dir2), true);
    }/*from   w ww. ja va2s .  c o  m*/

    JobConf conf = new JobConf();
    conf.setNumMapTasks(96);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(LongWritable.class);
    conf.setMapperClass(TokenizerMapper.class);
    conf.setReducerClass(IntSumReducer.class);
    conf.setCombinerClass(IntSumReducer.class);
    conf.setInputFormat(NLinesInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    FileInputFormat.setInputPaths(conf, new Path(dir1));
    FileOutputFormat.setOutputPath(conf, new Path(dir2));
    Job job = new Job(conf, "NotInFinder");
    job.setJarByClass(WordCount.class);
    //   job.setMapperClass(TokenizerMapper.class);
    //   job.setCombinerClass(IntSumReducer.class);
    //   job.setReducerClass(IntSumReducer.class);
    //   job.setOutputKeyClass(LongWritable.class);
    //   job.setOutputValueClass(LongWritable.class);

    job.setSortComparatorClass(SortComparator.class);
    job.waitForCompletion(true);

}

From source file:org.acacia.csr.java.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    /*// w ww. j  ava2s  . co m
    String dir1 = "/user/miyuru/wcout";
     //We first delete the temporary directories if they exist on the HDFS
      FileSystem fs1 = FileSystem.get(new JobConf());
              
     if(fs1.exists(new Path(dir1))){
        fs1.delete(new Path(dir1), true);
     }
            
    JobConf conf = new JobConf();
    conf.setNumMapTasks(96);
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(LongWritable.class);
            
    job.setSortComparatorClass(SortComparator.class);
    FileInputFormat.addInputPath(job, new Path("/user/miyuru/input"));
    FileOutputFormat.setOutputPath(job, new Path(dir1));
    job.waitForCompletion(true); 
    */

    String dir3 = "/user/miyuru/wcout";
    String dir5 = "/user/miyuru/input";
    //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs3 = FileSystem.get(new JobConf());

    if (fs3.exists(new Path(dir3))) {
        fs3.delete(new Path(dir3), true);
    }

    JobConf conf3 = new JobConf();
    conf3.setNumMapTasks(96);
    FileInputFormat.addInputPath(conf3, new Path(dir5));
    FileOutputFormat.setOutputPath(conf3, new Path(dir3));
    Job job3 = new Job(conf3, "word count");
    job3.setJarByClass(WordCount.class);
    job3.setMapperClass(TokenizerMapper.class);
    job3.setCombinerClass(IntSumReducer.class);
    job3.setReducerClass(IntSumReducer.class);
    job3.setOutputKeyClass(LongWritable.class);
    job3.setOutputValueClass(LongWritable.class);

    job3.setSortComparatorClass(SortComparator.class);

    job3.waitForCompletion(true);

    PrintWriter writer;
    try {
        writer = new PrintWriter("/tmp/wfile", "UTF-8");
        writer.println("");
        writer.flush();
        writer.close();
    } catch (FileNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (UnsupportedEncodingException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    System.out.println("------Done Word Count---------------");

}

From source file:org.acacia.csr.java.ZeroVertexSearcher.java

License:Apache License

public static void main(String[] args) throws Exception {
    /*/*from   www  . j a  v  a 2s .  c  o m*/
    String dir1 = "/user/miyuru/wcout";
     //We first delete the temporary directories if they exist on the HDFS
      FileSystem fs1 = FileSystem.get(new JobConf());
              
     if(fs1.exists(new Path(dir1))){
        fs1.delete(new Path(dir1), true);
     }
            
    JobConf conf = new JobConf();
    conf.setNumMapTasks(96);
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(LongWritable.class);
            
    job.setSortComparatorClass(SortComparator.class);
    FileInputFormat.addInputPath(job, new Path("/user/miyuru/input"));
    FileOutputFormat.setOutputPath(job, new Path(dir1));
    job.waitForCompletion(true); 
    */

    String dir3 = "/user/miyuru/zout";
    String dir5 = "/user/miyuru/input";
    //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs3 = FileSystem.get(new JobConf());

    if (fs3.exists(new Path(dir3))) {
        fs3.delete(new Path(dir3), true);
    }

    JobConf conf3 = new JobConf();
    conf3.setNumMapTasks(96);
    FileInputFormat.addInputPath(conf3, new Path(dir5));
    FileOutputFormat.setOutputPath(conf3, new Path(dir3));
    conf3.set("mapred.map.max.attempts", "0");//If the job fails we assume that it happens because we found zero. Therfore we do not attempt again.
    Job job3 = new Job(conf3, "zero_vertex_search");
    job3.setJarByClass(ZeroVertexSearcher.class);
    job3.setMapperClass(TokenizerMapper.class);
    job3.setCombinerClass(IntSumReducer.class);
    job3.setReducerClass(IntSumReducer.class);
    job3.setOutputKeyClass(LongWritable.class);
    job3.setOutputValueClass(LongWritable.class);
    job3.setNumReduceTasks(0);

    job3.setSortComparatorClass(SortComparator.class);
    try {
        job3.waitForCompletion(true);
    } catch (org.acacia.csr.java.ZeroFoundException ex) {
        System.out.println("Found Zero vertex");
        job3.killJob();
    }
    System.out.println("------Done Zero Vertex search---------------");

}

From source file:org.apache.hcatalog.hcatmix.load.HadoopLoadGenerator.java

License:Apache License

/**
 * Prepare input directory/jobConf and launch the hadoop job, for load testing
 *
 * @param confFileName The properties file for the task, should be available in the classpath
 * @param conf/*from   w ww  . j av  a2s  . co m*/
 * @return
 * @throws IOException
 * @throws MetaException
 * @throws TException
 */
public SortedMap<Long, ReduceResult> runLoadTest(String confFileName, Configuration conf)
        throws Exception, MetaException, TException {
    JobConf jobConf;
    if (conf != null) {
        jobConf = new JobConf(conf);
    } else {
        jobConf = new JobConf(new Configuration());
    }
    InputStream confFileIS;
    try {
        confFileIS = HCatMixUtils.getInputStream(confFileName);
    } catch (Exception e) {
        LOG.error("Couldn't load configuration file " + confFileName);
        throw e;
    }
    Properties props = new Properties();
    try {
        props.load(confFileIS);
    } catch (IOException e) {
        LOG.error("Couldn't load properties file: " + confFileName, e);
        throw e;
    }

    LOG.info("Loading configuration file: " + confFileName);
    addToJobConf(jobConf, props, Conf.MAP_RUN_TIME_MINUTES);
    addToJobConf(jobConf, props, Conf.STAT_COLLECTION_INTERVAL_MINUTE);
    addToJobConf(jobConf, props, Conf.THREAD_INCREMENT_COUNT);
    addToJobConf(jobConf, props, Conf.THREAD_INCREMENT_INTERVAL_MINUTES);
    addToJobConf(jobConf, props, Conf.THREAD_COMPLETION_BUFFER_MINUTES);

    int numMappers = Integer
            .parseInt(props.getProperty(Conf.NUM_MAPPERS.propName, "" + Conf.NUM_MAPPERS.defaultValue));
    Path inputDir = new Path(props.getProperty(Conf.INPUT_DIR.propName, Conf.INPUT_DIR.defaultValueStr));
    Path outputDir = new Path(props.getProperty(Conf.OUTPUT_DIR.propName, Conf.OUTPUT_DIR.defaultValueStr));

    jobConf.setJobName(JOB_NAME);
    jobConf.setNumMapTasks(numMappers);
    jobConf.setMapperClass(HCatMapper.class);
    jobConf.setJarByClass(HCatMapper.class);
    jobConf.setReducerClass(HCatReducer.class);
    jobConf.setMapOutputKeyClass(LongWritable.class);
    jobConf.setMapOutputValueClass(IntervalResult.class);
    jobConf.setOutputKeyClass(LongWritable.class);
    jobConf.setOutputValueClass(ReduceResult.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    jobConf.set(Conf.TASK_CLASS_NAMES.getJobConfKey(),
            props.getProperty(Conf.TASK_CLASS_NAMES.propName, Conf.TASK_CLASS_NAMES.defaultValueStr));

    fs = FileSystem.get(jobConf);
    Path jarRoot = new Path("/tmp/hcatmix_jar_" + new Random().nextInt());
    HadoopUtils.uploadClasspathAndAddToJobConf(jobConf, jarRoot);
    fs.deleteOnExit(jarRoot);

    FileInputFormat.setInputPaths(jobConf, createInputFiles(inputDir, numMappers));
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }
    FileOutputFormat.setOutputPath(jobConf, outputDir);

    // Set up delegation token required for hiveMetaStoreClient in map task
    HiveConf hiveConf = new HiveConf(HadoopLoadGenerator.class);
    HiveMetaStoreClient hiveClient = new HiveMetaStoreClient(hiveConf);
    String tokenStr = hiveClient.getDelegationToken(UserGroupInformation.getCurrentUser().getUserName(),
            "mapred");
    Token<? extends AbstractDelegationTokenIdentifier> token = new Token<DelegationTokenIdentifier>();
    token.decodeFromUrlString(tokenStr);
    token.setService(new Text(METASTORE_TOKEN_SIGNATURE));
    jobConf.getCredentials().addToken(new Text(METASTORE_TOKEN_KEY), token);

    // Submit the job, once the job is complete see output
    LOG.info("Submitted hadoop job");
    RunningJob j = JobClient.runJob(jobConf);
    LOG.info("JobID is: " + j.getJobName());
    if (!j.isSuccessful()) {
        throw new IOException("Job failed");
    }
    return readResult(outputDir, jobConf);
}

From source file:org.apache.mahout.classifier.bayes.BayesThetaNormalizerDriver.java

License:Apache License

/**
 * Run the job/*from  ww w.  j  a  v  a 2 s  .co m*/
 *
 * @param input  the input pathname String
 * @param output the output pathname String
 */
public static void runJob(String input, String output) throws IOException {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(BayesThetaNormalizerDriver.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);
    FileInputFormat.addInputPath(conf, new Path(output + "/trainer-tfIdf/trainer-tfIdf"));
    Path outPath = new Path(output + "/trainer-thetaNormalizer");
    FileOutputFormat.setOutputPath(conf, outPath);
    conf.setNumMapTasks(100);
    //conf.setNumReduceTasks(1);
    conf.setMapperClass(BayesThetaNormalizerMapper.class);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setCombinerClass(BayesThetaNormalizerReducer.class);
    conf.setReducerClass(BayesThetaNormalizerReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.set("io.serializations",
            "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
    // Dont ever forget this. People should keep track of how hadoop conf parameters and make or break a piece of code

    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
        dfs.delete(outPath, true);
    }

    Path Sigma_kFiles = new Path(output + "/trainer-weights/Sigma_k/*");
    Map<String, Double> labelWeightSum = SequenceFileModelReader.readLabelSums(dfs, Sigma_kFiles, conf);
    DefaultStringifier<Map<String, Double>> mapStringifier = new DefaultStringifier<Map<String, Double>>(conf,
            GenericsUtil.getClass(labelWeightSum));
    String labelWeightSumString = mapStringifier.toString(labelWeightSum);

    log.info("Sigma_k for Each Label");
    Map<String, Double> c = mapStringifier.fromString(labelWeightSumString);
    log.info("{}", c);
    conf.set("cnaivebayes.sigma_k", labelWeightSumString);

    Path sigma_kSigma_jFile = new Path(output + "/trainer-weights/Sigma_kSigma_j/*");
    double sigma_jSigma_k = SequenceFileModelReader.readSigma_jSigma_k(dfs, sigma_kSigma_jFile, conf);
    DefaultStringifier<Double> stringifier = new DefaultStringifier<Double>(conf, Double.class);
    String sigma_jSigma_kString = stringifier.toString(sigma_jSigma_k);

    log.info("Sigma_kSigma_j for each Label and for each Features");
    double retSigma_jSigma_k = stringifier.fromString(sigma_jSigma_kString);
    log.info("{}", retSigma_jSigma_k);
    conf.set("cnaivebayes.sigma_jSigma_k", sigma_jSigma_kString);

    Path vocabCountFile = new Path(output + "/trainer-tfIdf/trainer-vocabCount/*");
    double vocabCount = SequenceFileModelReader.readVocabCount(dfs, vocabCountFile, conf);
    String vocabCountString = stringifier.toString(vocabCount);

    log.info("Vocabulary Count");
    conf.set("cnaivebayes.vocabCount", vocabCountString);
    double retvocabCount = stringifier.fromString(vocabCountString);
    log.info("{}", retvocabCount);

    client.setConf(conf);

    JobClient.runJob(conf);

}

From source file:org.apache.mahout.classifier.bayes.common.BayesFeatureDriver.java

License:Apache License

/**
 * Run the job/*from  w w w .j a  va2 s  .  c o  m*/
 *
 * @param input  the input pathname String
 * @param output the output pathname String
 */
public static void runJob(String input, String output, int gramSize) throws IOException {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(BayesFeatureDriver.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);

    FileInputFormat.setInputPaths(conf, new Path(input));
    Path outPath = new Path(output);
    FileOutputFormat.setOutputPath(conf, outPath);
    conf.setNumMapTasks(100);
    //conf.setNumReduceTasks(1);
    conf.setMapperClass(BayesFeatureMapper.class);

    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setCombinerClass(BayesFeatureReducer.class);
    conf.setReducerClass(BayesFeatureReducer.class);
    conf.setOutputFormat(BayesFeatureOutputFormat.class);

    conf.set("io.serializations",
            "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
    // Dont ever forget this. People should keep track of how hadoop conf parameters and make or break a piece of code

    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
        dfs.delete(outPath, true);
    }

    DefaultStringifier<Integer> intStringifier = new DefaultStringifier<Integer>(conf, Integer.class);
    String gramSizeString = intStringifier.toString(gramSize);

    log.info("{}", intStringifier.fromString(gramSizeString));
    conf.set("bayes.gramSize", gramSizeString);

    client.setConf(conf);
    JobClient.runJob(conf);

}

From source file:org.apache.mahout.classifier.bayes.common.BayesTfIdfDriver.java

License:Apache License

/**
 * Run the job//from   w  w w .j  av a 2  s.c  o m
 *
 * @param input  the input pathname String
 * @param output the output pathname String
 */
public static void runJob(String input, String output) throws IOException {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(BayesTfIdfDriver.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);

    FileInputFormat.addInputPath(conf, new Path(output + "/trainer-termDocCount"));
    FileInputFormat.addInputPath(conf, new Path(output + "/trainer-wordFreq"));
    FileInputFormat.addInputPath(conf, new Path(output + "/trainer-featureCount"));
    Path outPath = new Path(output + "/trainer-tfIdf");
    FileOutputFormat.setOutputPath(conf, outPath);
    conf.setNumMapTasks(100);

    conf.setMapperClass(BayesTfIdfMapper.class);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setCombinerClass(BayesTfIdfReducer.class);
    conf.setReducerClass(BayesTfIdfReducer.class);
    conf.setOutputFormat(BayesTfIdfOutputFormat.class);

    conf.set("io.serializations",
            "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
    // Dont ever forget this. People should keep track of how hadoop conf parameters and make or break a piece of code
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
        dfs.delete(outPath, true);
    }

    Path interimFile = new Path(output + "/trainer-docCount/part-*");

    Map<String, Double> labelDocumentCounts = SequenceFileModelReader.readLabelDocumentCounts(dfs, interimFile,
            conf);

    DefaultStringifier<Map<String, Double>> mapStringifier = new DefaultStringifier<Map<String, Double>>(conf,
            GenericsUtil.getClass(labelDocumentCounts));

    String labelDocumentCountString = mapStringifier.toString(labelDocumentCounts);
    log.info("Counts of documents in Each Label");
    Map<String, Double> c = mapStringifier.fromString(labelDocumentCountString);
    log.info("{}", c);

    conf.set("cnaivebayes.labelDocumentCounts", labelDocumentCountString);

    client.setConf(conf);

    JobClient.runJob(conf);
}

From source file:org.apache.mahout.classifier.bayes.common.BayesWeightSummerDriver.java

License:Apache License

/**
 * Run the job/*from  w ww  .  jav  a  2 s  .c om*/
 *
 * @param input  the input pathname String
 * @param output the output pathname String
 */
public static void runJob(String input, String output) throws IOException {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(BayesWeightSummerDriver.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);

    FileInputFormat.addInputPath(conf, new Path(output + "/trainer-tfIdf/trainer-tfIdf"));
    Path outPath = new Path(output + "/trainer-weights");
    FileOutputFormat.setOutputPath(conf, outPath);
    //conf.setNumReduceTasks(1);
    conf.setNumMapTasks(100);
    conf.setMapperClass(BayesWeightSummerMapper.class);
    //see the javadoc for the spec for file input formats: first token is key, rest is input.  Whole document on one line
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setCombinerClass(BayesWeightSummerReducer.class);
    conf.setReducerClass(BayesWeightSummerReducer.class);
    conf.setOutputFormat(BayesWeightSummerOutputFormat.class);
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
        dfs.delete(outPath, true);
    }
    client.setConf(conf);

    JobClient.runJob(conf);
}

From source file:org.apache.mahout.classifier.cbayes.CBayesNormalizedWeightDriver.java

License:Apache License

/**
 * Run the job//  w  ww  .j  av  a 2 s.c o  m
 *
 * @param input  the input pathname String
 * @param output the output pathname String
 */
public static void runJob(String input, String output) throws IOException {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(CBayesNormalizedWeightDriver.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);
    FileInputFormat.addInputPath(conf, new Path(output + "/trainer-theta"));
    Path outPath = new Path(output + "/trainer-weight");
    FileOutputFormat.setOutputPath(conf, outPath);
    conf.setNumMapTasks(100);
    //conf.setNumReduceTasks(1);
    conf.setMapperClass(CBayesNormalizedWeightMapper.class);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setCombinerClass(CBayesNormalizedWeightReducer.class);
    conf.setReducerClass(CBayesNormalizedWeightReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    conf.set("io.serializations",
            "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
    // Dont ever forget this. People should keep track of how hadoop conf parameters and make or break a piece of code
    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
        dfs.delete(outPath, true);
    }

    Path thetaNormalizationsFiles = new Path(output + "/trainer-thetaNormalizer/part*");
    Map<String, Double> thetaNormalizer = SequenceFileModelReader.readLabelSums(dfs, thetaNormalizationsFiles,
            conf);
    double perLabelWeightSumNormalisationFactor = Double.MAX_VALUE;
    for (Map.Entry<String, Double> stringDoubleEntry1 : thetaNormalizer.entrySet()) {

        double Sigma_W_ij = stringDoubleEntry1.getValue();
        if (perLabelWeightSumNormalisationFactor > Math.abs(Sigma_W_ij)) {
            perLabelWeightSumNormalisationFactor = Math.abs(Sigma_W_ij);
        }
    }

    for (Map.Entry<String, Double> stringDoubleEntry : thetaNormalizer.entrySet()) {
        double Sigma_W_ij = stringDoubleEntry.getValue();
        thetaNormalizer.put(stringDoubleEntry.getKey(), Sigma_W_ij / perLabelWeightSumNormalisationFactor);
    }

    DefaultStringifier<Map<String, Double>> mapStringifier = new DefaultStringifier<Map<String, Double>>(conf,
            GenericsUtil.getClass(thetaNormalizer));
    String thetaNormalizationsString = mapStringifier.toString(thetaNormalizer);

    Map<String, Double> c = mapStringifier.fromString(thetaNormalizationsString);
    log.info("{}", c);
    conf.set("cnaivebayes.thetaNormalizations", thetaNormalizationsString);

    client.setConf(conf);

    JobClient.runJob(conf);

}

From source file:org.apache.mahout.classifier.cbayes.CBayesThetaNormalizerDriver.java

License:Apache License

/**
 * Run the job/*from w w w  . jav a 2 s .com*/
 *
 * @param input  the input pathname String
 * @param output the output pathname String
 */
public static void runJob(String input, String output) throws IOException {
    JobClient client = new JobClient();
    JobConf conf = new JobConf(CBayesThetaNormalizerDriver.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);
    FileInputFormat.addInputPath(conf, new Path(output + "/trainer-weights/Sigma_j"));
    FileInputFormat.addInputPath(conf, new Path(output + "/trainer-tfIdf/trainer-tfIdf"));
    Path outPath = new Path(output + "/trainer-thetaNormalizer");
    FileOutputFormat.setOutputPath(conf, outPath);
    conf.setNumMapTasks(100);
    //conf.setNumReduceTasks(1);
    conf.setMapperClass(CBayesThetaNormalizerMapper.class);
    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setCombinerClass(CBayesThetaNormalizerReducer.class);
    conf.setReducerClass(CBayesThetaNormalizerReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.set("io.serializations",
            "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
    // Dont ever forget this. People should keep track of how hadoop conf parameters and make or break a piece of code

    FileSystem dfs = FileSystem.get(outPath.toUri(), conf);
    if (dfs.exists(outPath)) {
        dfs.delete(outPath, true);
    }

    Path Sigma_kFiles = new Path(output + "/trainer-weights/Sigma_k/*");
    Map<String, Double> labelWeightSum = SequenceFileModelReader.readLabelSums(dfs, Sigma_kFiles, conf);
    DefaultStringifier<Map<String, Double>> mapStringifier = new DefaultStringifier<Map<String, Double>>(conf,
            GenericsUtil.getClass(labelWeightSum));
    String labelWeightSumString = mapStringifier.toString(labelWeightSum);

    log.info("Sigma_k for Each Label");
    Map<String, Double> c = mapStringifier.fromString(labelWeightSumString);
    log.info("{}", c);
    conf.set("cnaivebayes.sigma_k", labelWeightSumString);

    Path sigma_kSigma_jFile = new Path(output + "/trainer-weights/Sigma_kSigma_j/*");
    double sigma_jSigma_k = SequenceFileModelReader.readSigma_jSigma_k(dfs, sigma_kSigma_jFile, conf);
    DefaultStringifier<Double> stringifier = new DefaultStringifier<Double>(conf, Double.class);
    String sigma_jSigma_kString = stringifier.toString(sigma_jSigma_k);

    log.info("Sigma_kSigma_j for each Label and for each Features");
    double retSigma_jSigma_k = stringifier.fromString(sigma_jSigma_kString);
    log.info("{}", retSigma_jSigma_k);
    conf.set("cnaivebayes.sigma_jSigma_k", sigma_jSigma_kString);

    Path vocabCountFile = new Path(output + "/trainer-tfIdf/trainer-vocabCount/*");
    double vocabCount = SequenceFileModelReader.readVocabCount(dfs, vocabCountFile, conf);
    String vocabCountString = stringifier.toString(vocabCount);

    log.info("Vocabulary Count");
    conf.set("cnaivebayes.vocabCount", vocabCountString);
    double retvocabCount = stringifier.fromString(vocabCountString);
    log.info("{}", retvocabCount);

    client.setConf(conf);

    JobClient.runJob(conf);

}