Example usage for org.apache.hadoop.mapreduce Job setMapperClass

List of usage examples for org.apache.hadoop.mapreduce Job setMapperClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapperClass.

Prototype

public void setMapperClass(Class<? extends Mapper> cls) throws IllegalStateException 

Source Link

Document

Set the Mapper for the job.

Usage

From source file:com.hortonworks.mapreduce.URLCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", " ");
    Job job = Job.getInstance(conf, "URLCount");
    job.setJarByClass(getClass());//from w  w  w. j a  v  a  2  s.c  o  m
    job.setInputFormatClass(KeyValueTextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapperClass(URLCountM.class);
    job.setReducerClass(URLCountR.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    return (job.waitForCompletion(true) == true ? 0 : -1);
}

From source file:com.hortonworks.pso.data.generator.mapreduce.DataGenTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Job job = Job.getInstance(getConf()); // new Job(conf, this.getClass().getCanonicalName());

    //        Configuration conf = getConf();

    int mappers = 2;
    String output = null;//from w w  w  .  j a va2 s  .co m
    String config = null;
    long count = 100;

    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-mappers".equals(args[i])) {
                mappers = Integer.parseInt(args[++i]);
                otherArgs.add("-Dmapreduce.job.maps=" + Integer.toString(mappers));
            } else if ("-output".equals(args[i])) {
                output = args[++i];
            } else if ("-json.cfg".equals(args[i])) {
                config = args[++i];
            } else if ("-count".equals(args[i])) {
                count = Long.parseLong(args[++i]);
            } else {
                otherArgs.add(args[i]);
            }

        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    job.getConfiguration().set("json.cfg", config);

    String[] altArgs = new String[otherArgs.size()];
    otherArgs.toArray(altArgs);

    GenericOptionsParser gop = new GenericOptionsParser(job.getConfiguration(), altArgs);

    DataGenInputFormat.setNumberOfRows(job, count);

    job.setJarByClass(DataGenTool.class);

    Path output_path = new Path(output);

    if (output_path.getFileSystem(getConf()).exists(output_path)) {
        throw new IOException("Output directory " + output_path + " already exists.");
    }

    FileOutputFormat.setOutputPath(job, output_path);

    job.setMapperClass(DataGenMapper.class);
    // Map Only Job
    job.setNumReduceTasks(0);
    //        job.setReducerClass(RerateReducer.class);

    job.setInputFormatClass(DataGenInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(NullWritable.class);
    job.setMapOutputValueClass(Text.class);
    //        job.setOutputKeyClass(Text.class);
    //        job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:com.howbuy.hadoop.mr.online.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysrot <in> <out>");
        System.exit(2);/*from   w  w  w  . jav a2  s . co  m*/
    }
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    // job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setNumReduceTasks(3);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.huihui.mr.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {

    /*HadoopJava.util.Properties??Apache Jakarta Commons Configuration??
     * ????API?org.apache.hadoop.conf.Configuration???
     *//*ww  w.  ja v  a2 s .  c o  m*/
    Configuration conf = new Configuration();
    /*
     * ?HadoopGenericOptionsParser
    ???
    -D mapreduce.job.queuename  ??getRemainingArgs()?
    ?"xrli/STJoin_in","xrli/STJoin_out"?otherArgs
            
    ? fs jt libjars files archives D tokenCacheFile
     */
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);
    }
    conf.set("fs.defaultFS", "hdfs://localhost:9000");
    //
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);

    //??? 
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    //Path    ???URI?Path???Path
    String input = "hdfs://localhost:9000/input/";
    String output = "hdfs://localhost:9000/user/hdfs/log_kpi/browser1";
    FileInputFormat.addInputPath(job, new Path(input));
    FileOutputFormat.setOutputPath(job, new Path(output));
    //????
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.hzy.test.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    //        String input = "hdfs://192.168.1.118:9000/user/hdfs/log_kpi/";
    //        String output = "hdfs://192.168.1.118:9000/user/hdfs/log_kpi/wc/";
    String input = "/tmp/data.txt";
    // String input = args[0];
    String output = "/tmp/t1";

    // String output = args[1];

    Configuration conf = HdfsDAO.config();

    //        conf.set("mapreduce.framework.name", "yarn");
    ////        conf.set("hbase.zookeeper.quorum", "hadoop01:2181");
    //        conf.set("fs.default.name", "hdfs://hadoop01:9000");
    //        conf.set("yarn.resourcemanager.resource-tracker.address", "hadoop01:8031");
    //        conf.set("yarn.resourcemanager.address", "hadoop01:8032");
    //        conf.set("yarn.resourcemanager.scheduler.address", "hadoop01:8030");
    //        conf.set("yarn.resourcemanager.admin.address", "hadoop01:8033");
    //        conf.set("mapreduce.jobhistory.address", "hadoop01:10020");
    //        conf.set("mapreduce.jobhistory.webapp.address", "hadoop01:19888");

    //        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    //        if (otherArgs.length < 2) {
    //            System.err.println("Usage: wordcount <in> [<in>...] <out>");
    //            System.exit(2);
    //        }//from  www .  j av a2  s  .  c  o  m
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    //        for (int i = 0; i < otherArgs.length - 1; ++i) {
    FileInputFormat.addInputPath(job, new Path(input));
    //        }
    FileOutputFormat.setOutputPath(job, new Path(output));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.ifeng.hadoop.thinker.LogDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }/*from www .  j av a  2 s.  c  o m*/

    Job job = Job.getInstance(super.getConf());
    job.setJarByClass(getClass());

    FileUtil.fullyDelete(new File(args[1]));

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(LogMapper.class);
    job.setReducerClass(LogReducer.class);
    job.setCombinerClass(LogReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.ifeng.hadoop.thinker.LogLocalDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//from w w w. j av  a  2  s . co  m

    Job job = Job.getInstance(super.getConf());
    job.setJarByClass(getClass());

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(LogMapper.class);
    job.setReducerClass(LogReducer.class);
    job.setCombinerClass(LogReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.ifeng.ipserver.IPServerLogParseDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(super.getConf());
    job.setJarByClass(getClass());//  w  w w.  j  ava2 s .  c  o  m

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(IPServerLogParseMapper.class);
    job.setReducerClass(IPServerLogParseReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.ifeng.logparser.NginxLogDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }/*from  ww w . ja  v  a2  s  . c  om*/

    Job job = Job.getInstance(super.getConf());
    FileInputFormat.setInputDirRecursive(job, true);

    //FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    FileInputFormat.addInputPaths(job, args[0]);

    job.setMapperClass(NginxLogMapper.class);
    job.setReducerClass(NginxLogReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.ifeng.logparser.NginxLogDriver2.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//from   w w w.j  a va  2 s  .  c  o  m

    log.info("Input: {} , Outpu: {}", args[0], args[1]);

    Job job = Job.getInstance(super.getConf());
    //FileInputFormat.setInputDirRecursive(job, true);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    //FileInputFormat.addInputPaths(job, args[0]);

    job.setMapperClass(NginxLogMapper.class);
    job.setReducerClass(NginxLogReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    return job.waitForCompletion(true) ? 0 : 1;
}