Example usage for org.apache.hadoop.mapreduce Job setJarByClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJarByClass.

Prototype

public void setJarByClass(Class<?> cls)

Source Link

Document

Set the Jar by finding where a given class came from.

Usage

From source file:com.aliyun.emr.example.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    GenericOptionsParser optionParser = new GenericOptionsParser(conf, args);
    String[] remainingArgs = optionParser.getRemainingArgs();
    if (!(remainingArgs.length == 2 || remainingArgs.length == 4)) {
        System.err.println("Usage: wordcount <in> <out> [-skip skipPatternFile]");
        System.exit(2);/*from w w w .j  a v a2 s .  c o  m*/
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < remainingArgs.length; ++i) {
        if ("-skip".equals(remainingArgs[i])) {
            job.addCacheFile(new Path(EMapReduceOSSUtil.buildOSSCompleteUri(remainingArgs[++i], conf)).toUri());
            job.getConfiguration().setBoolean("wordcount.skip.patterns", true);
        } else {
            otherArgs.add(remainingArgs[i]);
        }
    }
    FileInputFormat.addInputPath(job, new Path(EMapReduceOSSUtil.buildOSSCompleteUri(otherArgs.get(0), conf)));
    FileOutputFormat.setOutputPath(job,
            new Path(EMapReduceOSSUtil.buildOSSCompleteUri(otherArgs.get(1), conf)));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.aliyun.openservices.tablestore.hadoop.RowCounter.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (!parseArgs(args)) {
        printUsage();/*from ww  w  .  ja v  a  2 s . c om*/
        System.exit(1);
    }
    if (endpoint == null || accessKeyId == null || accessKeySecret == null || table == null
            || outputPath == null) {
        printUsage();
        System.exit(1);
    }

    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "row count");
    job.setJarByClass(RowCounter.class);
    job.setMapperClass(RowCounterMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setInputFormatClass(TableStoreInputFormat.class);

    TableStore.setCredential(job, accessKeyId, accessKeySecret, securityToken);
    TableStore.setEndpoint(job, endpoint, instance);
    TableStoreInputFormat.addCriteria(job, fetchCriteria());
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.antbrains.crf.hadoop.CalcFeatureWeights.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 3 && otherArgs.length != 4) {
        System.err.println("CalcFeatureWeights <inDir> <tmpDir> <outDir> [startStep]");
        System.exit(-1);//from w ww .  ja v  a  2 s. c o m
    }
    int startStep = 1;
    if (otherArgs.length == 4) {
        startStep = Integer.valueOf(otherArgs[otherArgs.length - 1]);
    }
    FileSystem fs = FileSystem.get(conf);
    if (startStep <= 1) {
        System.out.println("calc");
        fs.delete(new Path(otherArgs[1]), true);
        Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName());
        job.setNumReduceTasks(1);
        job.setJarByClass(CalcFeatureWeights.class);
        job.setMapperClass(CalcFeatureMapper.class);
        job.setReducerClass(CalcFeatureReducer.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setInputFormatClass(SequenceFileInputFormat.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(MyKey.class);

        job.setOutputKeyClass(MyKey.class);
        job.setOutputValueClass(MyValue.class);
        FileInputFormat.setInputPaths(job, new Path(otherArgs[0]));

        FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

        boolean res = job.waitForCompletion(true);
        if (!res) {
            System.err.println("step1 failed");
            return;
        }
    }

    if (startStep <= 2)
    // sort
    {
        fs.delete(new Path(otherArgs[2]), true);
        System.out.println("sort");
        Job job = new Job(conf, CalcFeatureWeights.class.getSimpleName());

        job.setNumReduceTasks(1);
        job.setJarByClass(CalcFeatureWeights.class);
        job.setMapperClass(IdentityMapper.class);
        job.setReducerClass(IdentityReducer.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setInputFormatClass(SequenceFileInputFormat.class);

        job.setMapOutputKeyClass(MyKey.class);
        job.setMapOutputValueClass(MyValue.class);
        job.setOutputKeyClass(MyKey.class);
        job.setOutputValueClass(MyValue.class);

        FileInputFormat.setInputPaths(job, new Path(otherArgs[1]));

        FileOutputFormat.setOutputPath(job, new Path(otherArgs[2]));

        boolean res = job.waitForCompletion(true);
        if (!res) {
            System.err.println("step2 failed");
            return;
        }
    }

}

From source file:com.antbrains.crf.hadoop.FeatureCounter.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: wordcount <in> <out> <templatefile>");
        System.exit(2);// w w  w  . j a v  a  2s .co  m
    }

    String[] templates = SgdCrf.readTemplates(otherArgs[2]).toArray(new String[0]);
    conf.set("templates", strArr2Str(templates));

    Job job = new Job(conf, FeatureCounter.class.getSimpleName());

    job.setJarByClass(FeatureCounter.class);
    job.setMapperClass(CounterMapper.class);
    job.setCombinerClass(SumReducer.class);
    job.setReducerClass(SumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.antbrains.crf.hadoop.FeatureFilter.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 4) {
        System.err.println("Usage: wordcount <in> <out> filterRuleFile statOnly");
        System.exit(-1);// w w w .ja  v  a  2  s  .com
    }

    boolean statOnly = true;
    if (otherArgs[3].equalsIgnoreCase("false")) {
        statOnly = false;
    }
    conf.set("statOnly", statOnly + "");

    String rules = FileTools.readFile(otherArgs[2], "UTF8");
    conf.set("rules", rules);
    conf.set("mapred.reduce.tasks", "0");
    Job job = new Job(conf, FeatureFilter.class.getSimpleName());

    job.setJarByClass(FeatureFilter.class);
    job.setMapperClass(CounterMapper.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.antbrains.crf.hadoop.FeatureStat.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out> ");
        System.exit(2);// ww w  .  j ava  2 s .c o  m
    }

    Job job = new Job(conf, FeatureStat.class.getSimpleName());

    job.setJarByClass(FeatureStat.class);
    job.setMapperClass(CounterMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.antbrains.crf.hadoop.InstanceGenerator.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 4) {
        System.err.println("InstanceGenerator <in> <out> <featuredict> <template>");
        System.exit(-1);/*from   ww  w  . j  a  v a2  s  .  c om*/
    }
    Template template = new Template(otherArgs[3], "UTF8");
    conf.set("template", object2String(template));
    // conf.set("tc", object2String(tc));

    DistributedCache.addCacheFile(new URI(otherArgs[2]), conf);
    conf.set("dict", otherArgs[2]);
    conf.set("mapred.reduce.tasks", "0");
    Job job = new Job(conf, InstanceGenerator.class.getSimpleName());

    job.setJarByClass(InstanceGenerator.class);
    job.setMapperClass(CounterMapper.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.antbrains.crf.hadoop.ParallelTraining.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 4) {
        System.err.println("ParallelTraining <instanceDir> <outDir> <featurecount> <training-params>");
        System.exit(-1);/*  w w w .  ja va 2s  . c om*/
    }
    int featureCount = Integer.valueOf(otherArgs[2]);
    // conf.set("tc", object2String(tc));

    conf.set("pt.iterate", "1");
    conf.set("pt.featureCount", featureCount + "");

    TrainingParams params = SgdCrf.loadParams(otherArgs[3]);
    System.out.println(new Gson().toJson(params));
    conf.set("pt.params", object2String(params));

    Job job = new Job(conf, ParallelTraining.class.getSimpleName());

    job.setJarByClass(ParallelTraining.class);
    job.setMapperClass(TrainingMapper.class);
    job.setReducerClass(TrainingReducer.class);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(TrainingWeights.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.antbrains.crf.hadoop.ParallelTraining2.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    FileSystem fs = FileSystem.get(conf);
    TrainingParams params = SgdCrf.loadParams(otherArgs[3]);
    System.out.println(new Gson().toJson(params));

    if (otherArgs.length != 5) {
        System.err.println(//from   ww w  .j  a v  a2 s .c o  m
                "ParallelTraining2 <instanceDir> <outDir> <featurecount> <training-params> <out-iter>");
        System.exit(-1);
    }
    int featureCount = Integer.valueOf(otherArgs[2]);
    // conf.set("tc", object2String(tc));
    int outIter = Integer.valueOf(otherArgs[4]);

    String prevOutDir = "";
    for (int i = 1; i <= outIter; i++) {
        System.out.println("iterator: " + i);
        conf.set("pt.iterate", i + "");
        conf.set("pt.featureCount", featureCount + "");

        conf.set("pt.params", object2String(params));
        String outDir = otherArgs[1] + "/result" + i;

        if (i > 1) {
            conf.set("paramDir", prevOutDir);
        }
        prevOutDir = outDir;
        fs.delete(new Path(outDir), true);

        Job job = new Job(conf, ParallelTraining2.class.getSimpleName());

        job.setJarByClass(ParallelTraining2.class);
        job.setMapperClass(TrainingMapper.class);
        job.setReducerClass(TrainingReducer.class);

        job.setOutputFormatClass(SequenceFileOutputFormat.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(DoubleWritable.class);
        FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
        System.out.println("outDir: " + outDir);
        FileOutputFormat.setOutputPath(job, new Path(outDir));

        boolean res = job.waitForCompletion(true);
        if (!res) {
            System.err.println("iter " + i + " failed");
            break;
        }
    }
}

From source file:com.antbrains.crf.hadoop.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from   w w w .j  a  v a2s .  c  o m*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}