Example usage for org.apache.hadoop.mapred JobConf set

List of usage examples for org.apache.hadoop.mapred JobConf set

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf set.

Prototype

public void set(String name, String value) 

Source Link

Document

Set the value of the name property.

Usage

From source file:jobimtext.thesaurus.distributional.hadoop.mapreduce.SimCounts1WithFeatures.java

License:Apache License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {

    JobConf conf = HadoopUtil.generateJobConf(args);

    /* set the new defined type to be used */
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    if (args.length > 3) {
        conf.setInt("threshold", Integer.parseInt(args[3]));
    }//w w  w  .j  a  v  a  2 s .c om
    /* number of milliseconds before killing a not responding task */
    conf.set("mapred.task.timeout", "600000");

    /* change to 128mb */
    conf.set("dfs.block.size", "134217728");

    /* set the maximum number of task per node */
    int maptasks = 200;

    /*
     * Number of map tasks to deploy on each machine. 0.5 to 2 *
     * (cores/node)
     */
    conf.set("mapred.tasktracker.map.tasks.maximum", "" + maptasks);
    conf.set("mapred.tasktracker.map", "" + maptasks);
    /*
     * The default number of map tasks per job. Typically set to a prime
     * several times greater than number of available hosts.
     */
    conf.set("mapred.map.tasks", "" + maptasks);

    int reducetasks = 20;

    conf.set("mapred.tasktracker.reduce.tasks.maximum", "" + reducetasks);
    conf.set("mapred.tasktracker.reduce", "" + reducetasks);
    conf.set("mapred.reduce.tasks", "" + reducetasks);

    /*
     * how much virtual memory the entire process tree of each map/reduce
     * task will use
     */
    conf.set("mapred.job.map.memory.mb", "4000");
    conf.set("mapred.job.reduce.memory.mb", "4000");

    conf.set("dfs.replication", "1");

    /*
     * reduce I/O load
     */
    conf.set("mapred.child.java.opts", "-Xmx1400M");

    conf.set("io.sort.mb", "300");
    conf.set("io.sort.factor", "30");

    JobClient.runJob(conf);

}

From source file:jobimtext.thesaurus.distributional.hadoop.mapreduce.SimCountsLog.java

License:Apache License

/**
 * The reducer step will sum all float values, i.e. the
 * weight for any (word1,word2) pair sharing a feature.
 *///from  w w w  .  j a  v  a  2s.  co m

public static void main(String[] args) throws Exception {

    JobConf conf = HadoopUtil.generateJobConf(args);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(FloatWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(DoubleSumReducer.class);
    conf.setReducerClass(DoubleSumReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    /* number of milliseconds before killing a not responding task */
    conf.set("mapred.task.timeout", "600000");

    /* change to 128mb */
    conf.set("dfs.block.size", "134217728");

    /* set the maximum number of task per node */
    int maptasks = 100;

    /* Number of map tasks to deploy on each machine. 0.5 to 2 * (cores/node) */
    conf.set("mapred.tasktracker.map.tasks.maximum", "" + maptasks);
    conf.set("mapred.tasktracker.map", "" + maptasks);
    /* The default number of map tasks per job. Typically set to a prime several
       times greater than number of available hosts. */
    conf.set("mapred.map.tasks", "" + maptasks);

    int reducetasks = 100;

    conf.set("mapred.tasktracker.reduce.tasks.maximum", "" + reducetasks);
    conf.set("mapred.tasktracker.reduce", "" + reducetasks);
    conf.set("mapred.reduce.tasks", "" + reducetasks);

    JobClient.runJob(conf);

}

From source file:junto.algorithm.parallel.AdsorptionHadoop.java

License:Apache License

public static void main(String[] args) throws Exception {
    Hashtable config = ConfigReader.read_config(args);

    String baseInputFilePat = Defaults.GetValueOrDie(config, "hdfs_input_pattern");
    String baseOutputFilePat = Defaults.GetValueOrDie(config, "hdfs_output_base");
    int numIterations = Integer.parseInt(Defaults.GetValueOrDie(config, "iters"));

    String currInputFilePat = baseInputFilePat;
    String currOutputFilePat = "";
    for (int iter = 1; iter <= numIterations; ++iter) {
        JobConf conf = new JobConf(AdsorptionHadoop.class);
        conf.setJobName("adsorption_hadoop");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);

        conf.setMapperClass(Map.class);
        // conf.setCombinerClass(Reduce.class);
        conf.setReducerClass(Reduce.class);

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        // hyperparameters
        conf.set("mu1", Defaults.GetValueOrDie(config, "mu1"));
        conf.set("mu2", Defaults.GetValueOrDie(config, "mu2"));
        conf.set("mu3", Defaults.GetValueOrDie(config, "mu3"));
        conf.set("keepTopKLabels", Defaults.GetValueOrDefault((String) config.get("keep_top_k_labels"),
                Integer.toString(Integer.MAX_VALUE)));

        if (iter > 1) {
            // output from last iteration is the input for current iteration
            currInputFilePat = currOutputFilePat + "/*";
        }//from   www . j av  a 2  s .  co m
        FileInputFormat.setInputPaths(conf, new Path(currInputFilePat));

        currOutputFilePat = baseOutputFilePat + "_" + iter;
        FileOutputFormat.setOutputPath(conf, new Path(currOutputFilePat));

        JobClient.runJob(conf);
    }
}

From source file:junto.algorithm.parallel.LP_ZGL_Hadoop.java

License:Apache License

public static void main(String[] args) throws Exception {
    Hashtable config = ConfigReader.read_config(args);

    String baseInputFilePat = Defaults.GetValueOrDie(config, "hdfs_input_pattern");
    String baseOutputFilePat = Defaults.GetValueOrDie(config, "hdfs_output_base");
    int numIterations = Integer.parseInt(Defaults.GetValueOrDie(config, "iters"));

    String currInputFilePat = baseInputFilePat;
    String currOutputFilePat = "";
    for (int iter = 1; iter <= numIterations; ++iter) {
        JobConf conf = new JobConf(LP_ZGL_Hadoop.class);
        conf.setJobName("lp_zgl_hadoop");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);

        conf.setMapperClass(LP_ZGL_Map.class);
        // conf.setCombinerClass(LP_ZGL_Reduce.class);
        conf.setReducerClass(LP_ZGL_Reduce.class);

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        // hyperparameters
        conf.set("mu1", Defaults.GetValueOrDie(config, "mu1"));
        conf.set("mu2", Defaults.GetValueOrDie(config, "mu2"));
        conf.set("keepTopKLabels", Defaults.GetValueOrDefault((String) config.get("keep_top_k_labels"),
                Integer.toString(Integer.MAX_VALUE)));

        if (iter > 1) {
            // output from last iteration is the input for current iteration
            currInputFilePat = currOutputFilePat + "/*";
        }/*from ww  w .java 2  s  .com*/
        FileInputFormat.setInputPaths(conf, new Path(currInputFilePat));

        currOutputFilePat = baseOutputFilePat + "_" + iter;
        FileOutputFormat.setOutputPath(conf, new Path(currOutputFilePat));

        JobClient.runJob(conf);
    }
}

From source file:junto.algorithm.parallel.MADHadoop.java

License:Apache License

public static void main(String[] args) throws Exception {
    Hashtable config = ConfigReader.read_config(args);

    String baseInputFilePat = Defaults.GetValueOrDie(config, "hdfs_input_pattern");
    String baseOutputFilePat = Defaults.GetValueOrDie(config, "hdfs_output_base");
    int numIterations = Integer.parseInt(Defaults.GetValueOrDie(config, "iters"));
    int numReducers = Defaults.GetValueOrDefault((String) config.get("num_reducers"), 10);

    String currInputFilePat = baseInputFilePat;
    String currOutputFilePat = "";
    for (int iter = 1; iter <= numIterations; ++iter) {
        JobConf conf = new JobConf(MADHadoop.class);
        conf.setJobName("mad_hadoop");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(Text.class);

        conf.setMapperClass(MADHadoopMap.class);
        // conf.setCombinerClass(MADHadoopReduce.class);
        conf.setReducerClass(MADHadoopReduce.class);
        conf.setNumReduceTasks(numReducers);

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        // hyperparameters
        conf.set("mu1", Defaults.GetValueOrDie(config, "mu1"));
        conf.set("mu2", Defaults.GetValueOrDie(config, "mu2"));
        conf.set("mu3", Defaults.GetValueOrDie(config, "mu3"));
        conf.set("keepTopKLabels", Defaults.GetValueOrDefault((String) config.get("keep_top_k_labels"),
                Integer.toString(Integer.MAX_VALUE)));

        if (iter > 1) {
            // output from last iteration is the input for current iteration
            currInputFilePat = currOutputFilePat + "/*";
        }//from  w  ww .  j a va 2s  .  c  om
        FileInputFormat.setInputPaths(conf, new Path(currInputFilePat));

        currOutputFilePat = baseOutputFilePat + "_iter_" + iter;
        FileOutputFormat.setOutputPath(conf, new Path(currOutputFilePat));

        JobClient.runJob(conf);
    }
}

From source file:kafka.etl.impl.DataGenerator.java

License:Apache License

protected void generateOffsets() throws Exception {
    JobConf conf = new JobConf();
    conf.set("hadoop.job.ugi", _props.getProperty("hadoop.job.ugi"));
    conf.setCompressMapOutput(false);/*from   ww  w . ja  v a  2  s  . c  om*/
    Path outPath = new Path(_offsetsDir + Path.SEPARATOR + "1.dat");
    FileSystem fs = outPath.getFileSystem(conf);
    if (fs.exists(outPath))
        fs.delete(outPath);

    KafkaETLRequest request = new KafkaETLRequest(_topic, "tcp://" + _uri.getHost() + ":" + _uri.getPort(), 0);

    System.out.println("Dump " + request.toString() + " to " + outPath.toUri().toString());
    byte[] bytes = request.toString().getBytes("UTF-8");
    KafkaETLKey dummyKey = new KafkaETLKey();
    SequenceFile.setCompressionType(conf, SequenceFile.CompressionType.NONE);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outPath, KafkaETLKey.class,
            BytesWritable.class);
    writer.append(dummyKey, new BytesWritable(bytes));
    writer.close();
}

From source file:kafka.etl.tweet.producer.TweetProducer.java

License:Apache License

protected void generateOffsets() throws Exception {
    JobConf conf = new JobConf();
    java.util.Date date = new java.util.Date();
    conf.set("hadoop.job.ugi", _props.getProperty("hadoop.job.ugi"));
    conf.setCompressMapOutput(false);/*w  ww  .  j  a v a  2s  .  c o m*/
    Calendar cal = Calendar.getInstance();
    Path outPath = new Path(_offsetsDir + Path.SEPARATOR + "1.dat");
    FileSystem fs = outPath.getFileSystem(conf);
    if (fs.exists(outPath))
        fs.delete(outPath);

    KafkaETLRequest request = new KafkaETLRequest(_topic, "tcp://" + _uri.getHost() + ":" + _uri.getPort(), 0);

    System.out.println("Dump " + request.toString() + " to " + outPath.toUri().toString());

    byte[] bytes = request.toString().getBytes("UTF-8");
    KafkaETLKey dummyKey = new KafkaETLKey();
    SequenceFile.setDefaultCompressionType(conf, SequenceFile.CompressionType.NONE);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, outPath, KafkaETLKey.class,
            BytesWritable.class);
    writer.append(dummyKey, new BytesWritable(bytes));
    writer.close();
}

From source file:mapreduce.DosAttack.java

License:Apache License

private void issue() throws IOException {
    LOG.info("Starting DOS on url[{}] with clients[{}]", wsURL, numMappers);
    DosMapper.init(wsURL);//from w  w w .j  a v a 2s . c  o m
    JobConf job = new JobConf(DosAttack.class);
    job.setJarByClass(DosAttack.class);
    job.setJobName("DOS Attack");
    job.setNumReduceTasks(0);
    job.setInputFormat(NullInputFormat.class);
    job.setOutputFormat(NullOutputFormat.class);
    job.setMapperClass(DosMapper.class);
    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setNumMapTasks(numMappers);
    job.setInt(NUM_MAPPERS_KEY, numMappers);
    job.setInt(NUM_REQUESTS_KEY, numRequests);
    job.set(TARGET_URL_KEY, wsURL);
    JobClient.runJob(job);
}