Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(Configuration conf, Class exampleClass) 

Source Link

Document

Construct a map/reduce job configuration.

Usage

From source file:BMTColumnLoader.java

License:Apache License

public int run(String[] args) {
    JobConf conf = new JobConf(getConf(), BMTColumnLoader.class);
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);

    conf.setJobName("BMTColumnLoader");
    conf.setMapperClass(Map.class);
    conf.setNumReduceTasks(0);//from   w ww. j a  v  a 2s  .  co  m
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        other_args.add(args[i]);
    }

    FileInputFormat.setInputPaths(conf, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));
    try {
        JobClient.runJob(conf);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    return 0;
}

From source file:LinkReverser.java

License:Apache License

/**
 * The main driver for word count map/reduce program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the 
 *                     job tracker./*from ww  w  .  j  a v a2 s.c  o  m*/
 */
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), LinkReverser.class);
    conf.setJobName("indexreverser");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }
    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:SleepJob.java

License:Apache License

public JobConf setupJobConf(int numMapper, int numReducer, long mapSleepTime, int mapSleepCount,
        long reduceSleepTime, int reduceSleepCount) {
    JobConf job = new JobConf(getConf(), SleepJob.class);
    job.setNumMapTasks(numMapper);//from w  ww .  ja va 2  s .  c  o m
    job.setNumReduceTasks(numReducer);
    job.setMapperClass(SleepJob.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setReducerClass(SleepJob.class);
    job.setOutputFormat(NullOutputFormat.class);
    job.setInputFormat(SleepInputFormat.class);
    job.setPartitionerClass(SleepJob.class);
    job.setSpeculativeExecution(false);
    FileInputFormat.addInputPath(job, new Path("ignored"));
    job.setLong("sleep.job.map.sleep.time", mapSleepTime);
    job.setLong("sleep.job.reduce.sleep.time", reduceSleepTime);
    job.setInt("sleep.job.map.sleep.count", mapSleepCount);
    job.setInt("sleep.job.reduce.sleep.count", reduceSleepCount);
    return job;
}

From source file:BMTTableLoader.java

License:Apache License

public int run(String[] args) {
    JobConf conf = new JobConf(getConf(), BMTTableLoader.class);
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);

    conf.setJobName("BMTTableLoader");
    conf.setMapperClass(Map.class);
    conf.setNumReduceTasks(0);/*from  ww w .j av  a2  s  .  c o m*/
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        other_args.add(args[i]);
    }

    FileInputFormat.setInputPaths(conf, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));
    try {
        JobClient.runJob(conf);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    return 0;
}

From source file:BMTKeyValueLoader.java

License:Apache License

public int run(String[] args) {
    JobConf conf = new JobConf(getConf(), CassandraTableLoader.class);
    GenericOptionsParser parser = new GenericOptionsParser(conf, args);

    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setJobName("BMTKeyValueLoader");
    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        other_args.add(args[i]);/*from www .  j av a 2s.  c o m*/
    }

    FileInputFormat.setInputPaths(conf, new Path(other_args.get(0)));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));
    try {
        JobClient.runJob(conf);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    return 0;
}

From source file:HoopRemoteTask.java

License:Open Source License

/**
*
*///from   w w w.ja  v a2s  . co m
public static void main(String args[]) throws Exception {
    // run the HoopLink constructor; We need this to have a global settings registry       
    @SuppressWarnings("unused")
    HoopLink link = new HoopLink();

    dbg("main ()");

    showTimeStamp();

    /**
     * I've taken out the statistics portion since it relies on code that isn't distributed
     * The next version will have this solved. I might try the solution in:
     * http://stackoverflow.com/questions/7443074/initialize-public-static-variable-in-hadoop-through-arguments
     * Although chances are I will switch to using Hoop to collect much better performance and distribution 
     * statistics. See Hoop.java for more information
     */

    HoopPerformanceMeasure metrics = new HoopPerformanceMeasure();
    metrics.setMarker("main");
    HoopLink.metrics.getDataSet().add(metrics);

    if (parseArgs(args) == false) {
        usage();
        return;
    }

    if (HoopLink.postonly == true) {
        postOnly();
        return;
    }

    if (HoopLink.task.equals("none") == true) {
        dbg("No task defined, please use the commandline option -task <task>");
        return;
    }

    dbg("Starting system ...");

    HoopRemoteTask driver = new HoopRemoteTask();

    if (HoopLink.useHadoop == false) {
        dbg("Starting built-in mapper ...");

        driver.indexDocuments();
    } else {
        dbg("Starting hadoop job ...");

        Configuration conf = new Configuration();

        // TRANSFER SETTHoopGS FROM HoopLink to Configuration!!!

        transferConf(conf);

        // Now we're feeling much better

        HoopRemoteTask.hdfs = FileSystem.get(conf);

        if (HoopLink.dbglocal == true) {
            dbg("Enabling local debugging ...");
            conf.set("mapred.job.tracker", "local");
        } else
            dbg("Disabling local debugging");

        JobConf job = new JobConf(conf, HoopRemoteTask.class);

        job.setJobName(driver.getClassName());

        driver.setJob(job);

        @SuppressWarnings("unused")
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

        job.setJarByClass(HoopRemoteTask.class);

        if (HoopLink.task.equals("invert") == true) {
            dbg("Configuring job for invert task ...");

            job.setReducerClass(HoopInvertedListReducer.class);
            job.setMapperClass(HoopInvertedListMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
        }

        if (HoopLink.task.equals("wordcount") == true) {
            dbg("Configuring job for wordcount task ...");

            job.setReducerClass(HoopWordCountReducer.class);
            job.setMapperClass(HoopWordCountMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
        }

        dbg("Using input path: " + HoopLink.datapath);
        dbg("Using output path: " + HoopLink.outputpath);

        FileInputFormat.addInputPath(job, new Path(HoopLink.datapath));
        FileOutputFormat.setOutputPath(job, new Path(HoopLink.outputpath));

        job.setInputFormat(HoopWholeFileInputFormat.class);

        if ((HoopLink.shardcreate.equals("mos") == true) && (HoopLink.nrshards > 1)) {
            dbg("Setting output to sharded output streams class ...");

            job.setOutputFormat(HoopShardedOutputFormat.class);
        } else
            job.setOutputFormat(TextOutputFormat.class);

        /**
         * Temporarily commented out for testing purposes
         */

        //job.setPartitionerClass (HoopPartitioner.class);                      

        driver.register("Main");

        JobClient.runJob(job);

        postProcess(conf);
    }

    showTimeStamp();

    metrics.closeMarker();
    long timeTaken = metrics.getYValue();
    //long timeTaken=metrics.getMarkerRaw ();
    metrics.printMetrics(timeTaken);

    driver.unregister();

    /**
     * I've taken out the statistics portion since it relies on code that isn't distributed
     * The next version will have this solved. I might try the solution in:
     * http://stackoverflow.com/questions/7443074/initialize-public-static-variable-in-hadoop-through-arguments
     * Although chances are I will switch to using Hoop to collect much better performance and distribution 
     * statistics. See Hoop.java for more information
     */
    //stats.calcStatistics();
    //dbg (stats.printStatistics());
}

From source file:IndexWords.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 2) {
        return -1;
    }//from w  w w .j av a 2 s.c  o  m

    checkWords = new String[args.length - 2];

    int numIter = 5;

    Path input = new Path(args[0]);

    for (int i = 0; i < numIter; i++) {
        JobConf conf = new JobConf(getConf(), IndexWords.class);
        conf.setJobName("indexwords");

        conf.setInputFormat(KeyValueTextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);

        conf.setMapperClass(MapClass.class);
        conf.setReducerClass(Reduce.class);

        FileInputFormat.setInputPaths(conf, input);
        FileOutputFormat.setOutputPath(conf, new Path(args[1] + Integer.toString(i)));

        RunningJob rj = JobClient.runJob(conf);
        input = new Path(args[1] + Integer.toString(i));
        double resVal = rj.getCounters().getCounter(RecordCounters.RESIDUAL_COUNTER) * 1.0 / 10000;
        System.out.println(N + " " + (resVal / (1.0 * N)));
        if (resVal / (1.0 * N) < 0.001)
            break;
    }

    return 0;
}

From source file:BP.java

License:Apache License

protected JobConf configInitMessage() throws Exception {
    final JobConf conf = new JobConf(getConf(), BP.class);
    conf.set("nstate", "" + nstate);
    conf.set("compat_matrix_str", "" + edge_potential_str);
    conf.setJobName("BP_Init_Belief");

    conf.setMapperClass(MapInitializeBelief.class);

    fs.delete(message_cur_path, true);//from  w  ww  . ja  va 2s  .  c o m

    FileInputFormat.setInputPaths(conf, edge_path);
    FileOutputFormat.setOutputPath(conf, message_cur_path);

    conf.setNumReduceTasks(0);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    return conf;
}

From source file:BP.java

License:Apache License

protected JobConf configUpdateMessage() throws Exception {
    final JobConf conf = new JobConf(getConf(), BP.class);
    conf.set("nstate", "" + nstate);
    conf.set("compat_matrix_str", "" + edge_potential_str);
    conf.setJobName("BP_Update_message");

    conf.setMapperClass(MapUpdateMessage.class);
    conf.setReducerClass(RedUpdateMessage.class);

    fs.delete(message_next_path, true);//from   www  .  j  av a  2  s. co m

    FileInputFormat.setInputPaths(conf, message_cur_path, prior_path);
    FileOutputFormat.setOutputPath(conf, message_next_path);

    conf.setNumReduceTasks(nreducer);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}

From source file:BP.java

License:Apache License

protected JobConf configCheckErr() throws Exception {
    final JobConf conf = new JobConf(getConf(), BP.class);
    conf.set("nstate", "" + nstate);
    conf.setJobName("BP_Check Err");

    fs.delete(check_error_path, true);//from w  w  w  . j  a  va 2s.com

    conf.setMapperClass(MapCheckErr.class);
    conf.setReducerClass(RedCheckErr.class);

    FileInputFormat.setInputPaths(conf, message_cur_path, message_next_path);
    FileOutputFormat.setOutputPath(conf, check_error_path);

    conf.setNumReduceTasks(nreducer);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}