Example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setNumReduceTasks.

Prototype

public void setNumReduceTasks(int tasks) throws IllegalStateException

Source Link

Document

Set the number of reduce tasks for the job.

Usage

From source file:com.kse.bigdata.main.Driver.java

License:Apache License

public static void main(String[] args) throws Exception {
    /**********************************************************************************
     **    Merge the source files into one.                                          **
    /**    Should change the directories of each file before executing the program   **
    ***********************************************************************************/
    //        String inputFileDirectory = "/media/bk/??/BigData_Term_Project/Debug";
    //        String resultFileDirectory = "/media/bk/??/BigData_Term_Project/debug.csv";
    //        File resultFile = new File(resultFileDirectory);
    //        if(!resultFile.exists())
    //            new SourceFileMerger(inputFileDirectory, resultFileDirectory).mergeFiles();

    /**********************************************************************************
     * Hadoop Operation./*w  ww.  j  a  v  a 2s  .  c  om*/
     * Befort Start, Check the Length of Sequence We Want to Predict.
     **********************************************************************************/

    Configuration conf = new Configuration();

    //Enable MapReduce intermediate compression as Snappy
    conf.setBoolean("mapred.compress.map.output", true);
    conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec");

    //Enable Profiling
    //conf.setBoolean("mapred.task.profile", true);

    String testPath = null;
    String inputPath = null;
    String outputPath = null;

    int sampleSize = 1;
    ArrayList<String> results = new ArrayList<String>();

    for (int index = 0; index < args.length; index++) {

        /*
         * Mandatory command
         */
        //Extract input path string from command line.
        if (args[index].equals("-in"))
            inputPath = args[index + 1];

        //Extract output path string from command line.
        if (args[index].equals("-out"))
            outputPath = args[index + 1];

        //Extract test data path string from command line.
        if (args[index].equals("-test"))
            testPath = args[index + 1];

        /*
         * Optional command
         */
        //Extract a number of neighbors.
        if (args[index].equals("-nn"))
            conf.setInt(Reduce.NUMBER_OF_NEAREAST_NEIGHBOR, Integer.parseInt(args[index + 1]));

        //Whether job uses normalization or not.
        if (args[index].equals("-norm"))
            conf.setBoolean(Map.NORMALIZATION, true);

        //Extract the number of sample size to test.
        if (args[index].equals("-s"))
            sampleSize = Integer.valueOf(args[index + 1]);

        //Whether job uses mean or median
        //[Default : mean]
        if (args[index].equals("-med"))
            conf.setBoolean(Reduce.MEDIAN, true);
    }

    String outputFileName = "part-r-00000";
    SequenceSampler sampler = new SequenceSampler(testPath, sampleSize);
    LinkedList<Sequence> testSequences = sampler.getRandomSample();

    //        Test Sequence
    //        String testSeqString = "13.591-13.674-13.778-13.892-13.958-14.049-14.153-14.185-14.169-14.092-13.905-13.702-13.438-13.187-13.0-12.914-12.868-12.766-12.62-12.433-12.279-12.142-12.063-12.025-100";
    //        Sequence testSeq = new Sequence(testSeqString);
    //        LinkedList<Sequence> testSequences = new LinkedList<>();
    //        testSequences.add(testSeq);

    for (Sequence seq : testSequences) {

        /*
         ********************  Hadoop Launch ***********************
         */

        System.out.println(seq.getTailString());

        conf.set(Map.INPUT_SEQUENCE, seq.toString());

        Job job = new Job(conf);
        job.setJarByClass(Driver.class);
        job.setJobName("term-project-driver");

        job.setMapperClass(Map.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(Text.class);

        //          Should think another way to implement the combiner class
        //          Current Implementation is not helpful to Job.
        //          job.setCombinerClass(Combiner.class);

        //Set 1 for number of reduce task for keeping 100 most neighbors in sorted set.
        job.setNumReduceTasks(1);
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.setInputPaths(job, new Path(inputPath));
        FileOutputFormat.setOutputPath(job, new Path(outputPath));

        job.waitForCompletion(true);

        /*
         * if job finishes, get result of the job and store it in results(list).
         */
        try {
            FileSystem hdfs = FileSystem.get(new Configuration());
            BufferedReader fileReader = new BufferedReader(
                    new InputStreamReader(hdfs.open(new Path(outputPath + "/" + outputFileName))));

            String line;
            while ((line = fileReader.readLine()) != null) {
                results.add(seq.getSeqString() + " " + line);
            }

            fileReader.close();

            hdfs.delete(new Path(outputPath), true);
            hdfs.close();

        } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
    }

    /*
     * if all jobs finish, store results of jobs to output/result.txt file.
     */
    String finalOutputPath = "output/result.csv";
    try {
        FileSystem hdfs = FileSystem.get(new Configuration());
        Path file = new Path(finalOutputPath);
        if (hdfs.exists(file)) {
            hdfs.delete(file, true);
        }

        OutputStream os = hdfs.create(file);
        PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(os, "UTF-8"));

        //CSV File Header
        printWriter.println("Actual,Predicted,MER,MAE");
        printWriter.flush();

        for (String result : results) {
            String[] tokens = result.split("\\s+");

            printWriter.println(tokens[0] + "," + tokens[1] + "," + tokens[2] + "," + tokens[3]);
            printWriter.flush();
        }

        printWriter.close();
        hdfs.close();
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
    }

}

From source file:com.lakala.hbase.IndexBuilder2.java

License:Apache License

/**
 * Job configuration./*w  w w .j  a  v a  2  s  . com*/
 */
public static Job configureJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];
    String columnFamily = args[1];
    System.out.println("****" + tableName);
    conf.set(TableInputFormat.SCAN, convertScanToString(new Scan()));
    conf.set(TableInputFormat.INPUT_TABLE, tableName);
    conf.set("index.tablename", tableName);
    conf.set("index.familyname", columnFamily);
    String[] fields = new String[args.length - 2];
    for (int i = 0; i < fields.length; i++) {
        fields[i] = args[i + 2];
    }
    conf.setStrings("index.fields", fields);
    conf.set("index.familyname", "attributes");
    Job job = new Job(conf, tableName);
    job.setJarByClass(IndexBuilder.class);
    job.setMapperClass(Map.class);
    job.setNumReduceTasks(0);
    job.setInputFormatClass(TableInputFormat.class);
    job.setOutputFormatClass(MultiTableOutputFormat.class);
    return job;
}

From source file:com.lightboxtechnologies.nsrl.HashLoader.java

License:Apache License

public static void main(String[] args) throws Exception {
    final Configuration conf = new Configuration();

    final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 6) {
        System.err//from w w  w  .j a v  a 2 s . co m
                .println("Usage: HashLoader <mfgfile> <osfile> <prodfile> <hashfile> <outpath> <num_reducers>");
        System.exit(2);
    }

    final String mfg_filename = otherArgs[0];
    final String os_filename = otherArgs[1];
    final String prod_filename = otherArgs[2];
    final String hash_filename = otherArgs[3];
    final String output_filename = otherArgs[4];

    conf.set("mfg_filename", mfg_filename);
    conf.set("os_filename", os_filename);
    conf.set("prod_filename", prod_filename);

    conf.setLong("timestamp", System.currentTimeMillis());
    SKJobFactory.addDependencies(conf);

    final Job job = new Job(conf, "HashLoader");
    job.setJarByClass(HashLoader.class);
    job.setMapperClass(HashLoaderMapper.class);
    job.setReducerClass(KeyValueSortReducer.class);
    job.setNumReduceTasks(Integer.parseInt(otherArgs[5]));

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(HFileOutputFormat.class);

    TextInputFormat.addInputPath(job, new Path(hash_filename));
    HFileOutputFormat.setOutputPath(job, new Path(output_filename));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.lightboxtechnologies.spectrum.BlockHasher.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: BlockHasher <imageID> <image> <output>");
        return 2;
    }/*from  w  w  w . j a va 2s. c om*/

    final String imageID = args[0];
    final String image = args[1];
    final String output = args[2];

    Configuration conf = getConf();

    final Job job = SKJobFactory.createJobFromConf(imageID, image, "BlockHasher", conf);
    job.setJarByClass(BlockHasher.class);
    job.setMapperClass(BlockHashMapper.class);
    // job.setReducerClass(Reducer.class);
    job.setNumReduceTasks(0);

    // job ctor copies the Configuration we pass it, get the real one
    conf = job.getConfiguration();

    conf.setLong("timestamp", System.currentTimeMillis());

    job.setInputFormatClass(RawFileInputFormat.class);
    RawFileInputFormat.addInputPath(job, new Path(image));

    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(MD5Hash.class);
    FileOutputFormat.setOutputPath(job, new Path(output));

    conf.setInt("mapred.job.reuse.jvm.num.tasks", -1);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.lightboxtechnologies.spectrum.ExtentsExtractor.java

License:Apache License

public static int run(String imageID, String friendlyName, String outDir) throws Exception {

    Job job = SKJobFactory.createJob(imageID, friendlyName, "ExtentsExtractor");

    job.setJarByClass(ExtentsExtractor.class);
    job.setMapperClass(ExtentsExtractorMapper.class);

    job.setNumReduceTasks(1);
    job.setReducerClass(Reducer.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(JsonWritable.class);
    job.setInputFormatClass(FsEntryHBaseInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(outDir));

    FsEntryHBaseInputFormat.setupJob(job, imageID);

    System.out.println("Spinning off ExtentsExtractor Job...");
    job.waitForCompletion(true);/*from  w  ww  .  j  a  v  a2s  . com*/
    return 0;
}

From source file:com.lightboxtechnologies.spectrum.ExtractData.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 4) {
        System.err.println("Usage: ExtractData <imageID> <friendly_name> <extents_file> <evidence file>");
        return 2;
    }//from  w  w  w  . j ava  2  s. c o  m

    final String imageID = args[0];
    final String friendlyName = args[1];
    final String extentsPath = args[2];
    final String image = args[3];

    Configuration conf = getConf();

    final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "ExtractData", conf);
    job.setJarByClass(ExtractData.class);
    job.setMapperClass(ExtractDataMapper.class);
    job.setReducerClass(KeyValueSortReducer.class);
    job.setNumReduceTasks(1);

    // job ctor copies the Configuration we pass it, get the real one
    conf = job.getConfiguration();

    conf.setLong("timestamp", System.currentTimeMillis());

    job.setInputFormatClass(RawFileInputFormat.class);
    RawFileInputFormat.addInputPath(job, new Path(image));

    job.setOutputFormatClass(HFileOutputFormat.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);

    conf.setInt("mapreduce.job.jvm.numtasks", -1);

    final FileSystem fs = FileSystem.get(conf);
    Path hfileDir = new Path("/texaspete/ev/tmp", UUID.randomUUID().toString());
    hfileDir = hfileDir.makeQualified(fs);
    LOG.info("Hashes will be written temporarily to " + hfileDir);

    HFileOutputFormat.setOutputPath(job, hfileDir);

    final Path extp = new Path(extentsPath);
    final URI extents = extp.toUri();
    LOG.info("extents file is " + extents);

    DistributedCache.addCacheFile(extents, conf);
    conf.set("com.lbt.extentsname", extp.getName());
    // job.getConfiguration().setBoolean("mapred.task.profile", true);
    // job.getConfiguration().setBoolean("mapreduce.task.profile", true);

    HBaseTables.summon(conf, HBaseTables.HASH_TBL_B, HBaseTables.HASH_COLFAM_B);

    HBaseTables.summon(conf, HBaseTables.ENTRIES_TBL_B, HBaseTables.ENTRIES_COLFAM_B);

    final boolean result = job.waitForCompletion(true);
    if (result) {
        LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
        HBaseConfiguration.addHbaseResources(conf);
        loader.setConf(conf);
        LOG.info("Loading hashes into hbase");
        chmodR(fs, hfileDir);
        loader.doBulkLoad(hfileDir, new HTable(conf, HBaseTables.HASH_TBL_B));
        //      result = fs.delete(hfileDir, true);
    }
    return result ? 0 : 1;
}

From source file:com.lightboxtechnologies.spectrum.FolderCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    final Configuration conf = new Configuration();
    final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: FolderCount <table> <outpath>");
        System.exit(2);/*from www.  jav  a 2  s  .  c o  m*/
    }

    final Job job = new Job(conf, "FolderCount");
    job.setJarByClass(FolderCount.class);
    job.setMapperClass(FolderCountMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setNumReduceTasks(1);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(FsEntryHBaseInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    final Scan scan = new Scan();
    scan.addFamily(HBaseTables.ENTRIES_COLFAM_B);
    job.getConfiguration().set(TableInputFormat.INPUT_TABLE, otherArgs[0]);
    job.getConfiguration().set(TableInputFormat.SCAN, convertScanToString(scan));

    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.lightboxtechnologies.spectrum.JsonImport.java

License:Apache License

public static int run(String jsonPath, String imageHash, String friendlyName, Configuration conf)
        throws Exception {
    if (conf == null) {
        conf = HBaseConfiguration.create();
    }//ww w  .  jav  a2s. c  o  m
    conf.set(HBaseTables.ENTRIES_TBL_VAR, HBaseTables.ENTRIES_TBL);

    final Job job = SKJobFactory.createJobFromConf(imageHash, friendlyName, "JsonImport", conf);
    job.setJarByClass(JsonImport.class);
    job.setMapperClass(FsEntryMapLoader.class);
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(FsEntryHBaseOutputFormat.class);
    FileInputFormat.addInputPath(job, new Path(jsonPath));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.lightboxtechnologies.spectrum.MRCoffeeJob.java

License:Apache License

public static int run(String imageID, String outpath, String[] command, Configuration conf)
        throws ClassNotFoundException, DecoderException, IOException, InterruptedException {
    conf.setStrings("command", command);
    conf.setLong("timestamp", System.currentTimeMillis());

    final Job job = new Job(conf, "MRCoffeeJob");
    job.setJarByClass(MRCoffeeJob.class);

    job.setMapperClass(MRCoffeeMapper.class);

    //    job.setReducerClass(KeyValueSortReducer.class);
    //    job.setNumReduceTasks(1);
    job.setNumReduceTasks(0);

    FsEntryHBaseInputFormat.setupJob(job, imageID);
    job.setInputFormatClass(FsEntryHBaseInputFormat.class);

    job.setOutputKeyClass(ImmutableHexWritable.class);
    //    job.setOutputValueClass(KeyValue.class);
    job.setOutputValueClass(JsonWritable.class);
    //    job.setOutputFormatClass(HFileOutputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    //    HFileOutputFormat.setOutputPath(job, new Path(outpath));
    TextOutputFormat.setOutputPath(job, new Path(outpath));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.lightboxtechnologies.spectrum.PythonJob.java

License:Apache License

public static int run(String imageID, String friendlyName, String outpath, String pymap, String pyred,
        String format, Configuration conf) throws Exception {
    if (conf == null) {
        conf = HBaseConfiguration.create();
    }//from  w  w w .ja v a  2 s .  c  o  m
    final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "PythonJob", conf);
    job.setJarByClass(PythonJob.class);

    job.setMapperClass(PythonMapper.class);
    PyEngine py = new PyEngine();
    configPyTask(job, py, "map", pymap);
    job.setMapOutputKeyClass(py.getKeyClass());
    job.setMapOutputValueClass(py.getValueClass());

    int numReduces = 1;
    job.setOutputKeyClass(py.getKeyClass());
    job.setOutputValueClass(py.getValueClass());
    if (pyred.equals("none")) {
        numReduces = 0;
    } else if (pyred.equals("identity")) {
        job.setReducerClass(Reducer.class);
        job.setOutputKeyClass(py.getKeyClass());
        job.setOutputValueClass(py.getValueClass());
    } else if (pyred.equals("LongSumReducer")) {
        job.setReducerClass(LongSumReducer.class);
        job.setCombinerClass(LongSumReducer.class);
    } else {
        job.setReducerClass(PythonReducer.class);
        configPyTask(job, py, "reduce", pyred);
        job.setOutputKeyClass(py.getKeyClass());
        job.setOutputValueClass(py.getValueClass());
    }
    job.setNumReduceTasks(numReduces);

    // it is possible to run over a flat json file...
    // String input = otherArgs[0];
    // if (input.endsWith(".json") == true) {
    //   job.setInputFormatClass(FsEntryJsonInputFormat.class);
    //   FsEntryJsonInputFormat.addInputPath(job, new Path(input));
    // }
    // else {

    FsEntryHBaseInputFormat.setupJob(job, imageID);
    job.setInputFormatClass(FsEntryHBaseInputFormat.class);

    if (format != null && format.equals("SequenceFileOutputFormat")) {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    } else {
        job.setOutputFormatClass(TextOutputFormat.class);
    }
    FileOutputFormat.setOutputPath(job, new Path(outpath));
    return job.waitForCompletion(true) ? 0 : 1;
}