Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException

Source Link

Document

Set the combiner class for the job.

Usage

From source file:com.marklogic.mapreduce.examples.LinkCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    if (args.length < 2) {
        System.err.println("Usage: LinkCount configFile outputDir");
        System.exit(2);//from w ww .j a va2 s .c  o m
    }
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = Job.getInstance(conf, "link count");
    job.setJarByClass(LinkCount.class);
    job.setInputFormatClass(ValueInputFormat.class);
    job.setMapperClass(RefMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, Writable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.marklogic.mapreduce.examples.LinkCountCooccurrences.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    if (args.length < 2) {
        System.err.println("Usage: LinkCountCooccurrences configFile outputDir");
        System.exit(2);//w  ww .  j a  v a 2 s .  c  om
    }
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = Job.getInstance(conf, "link count cooccurrences");
    job.setJarByClass(LinkCountCooccurrences.class);
    job.setInputFormatClass(KeyValueInputFormat.class);
    job.setMapperClass(RefMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_KEY_CLASS, Text.class, Writable.class);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, Writable.class);
    conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS, HrefTitleMap.class,
            ElemAttrValueCooccurrences.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.marklogic.mapreduce.examples.LinkCountValue.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    if (args.length < 2) {
        System.err.println("Usage: LinkCountValue configFile outputDir");
        System.exit(2);//w w  w  .ja  va 2s  .  co m
    }
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = Job.getInstance(conf, "link count value");
    job.setJarByClass(LinkCountValue.class);
    job.setInputFormatClass(ValueInputFormat.class);
    job.setMapperClass(RefMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, Writable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.mb.saas.bi.job.WordCountJob.java

License:Apache License

public static boolean runHadoopMapReduceJob() throws Exception {
    System.setProperty("HADOOP_USER_NAME", "hadoop");

    File jarFile = UploadResource.createTempJar("bin");
    ClassLoader classLoader = UploadResource.getClassLoader();
    Thread.currentThread().setContextClassLoader(classLoader);

    Configuration conf = new Configuration();

    conf.set("fs.defaultFS", "hdfs://mbcluster/");
    conf.set("dfs.nameservices", "mbcluster");
    conf.set("dfs.ha.namenodes.mbcluster", "ns1,ns2");
    conf.set("dfs.namenode.rpc-address.mbcluster.ns1", "master:4001");
    conf.set("dfs.namenode.rpc-address.mbcluster.ns2", "backup:4001");
    conf.set("dfs.client.failover.proxy.provider.mbcluster",
            "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider");

    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCountJob.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    if (jarFile != null)
        ((JobConf) job.getConfiguration()).setJar(jarFile.getAbsolutePath());

    boolean isMapReduceJarSetted = false;
    String hadoopMapReduceJar = "F:/henry_projects/mbHiveAnalyzer/t.jar";
    File file = new File(hadoopMapReduceJar);
    if (file.exists()) {
        ((JobConf) job.getConfiguration()).setJar(hadoopMapReduceJar);
        isMapReduceJarSetted = true;/*from  w  w  w  .  jav a2s.  c om*/
    }

    if (!isMapReduceJarSetted && jarFile != null)
        ((JobConf) job.getConfiguration()).setJar(jarFile.getAbsolutePath());

    job.setNumReduceTasks(1);
    FileInputFormat.addInputPath(job, new Path("/input/wordcount.txt"));
    FileOutputFormat.setOutputPath(job, new Path("/output/001"));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
    return true;
}

From source file:com.mb.saas.bi.job.WordCountJob.java

License:Apache License

public static void main(String[] args) throws Exception {
    System.setProperty("HADOOP_USER_NAME", "hadoop");

    File jarFile = UploadResource.createTempJar("bin");
    System.setProperty("hadoop.home.dir", "F:/hadoop");
    ClassLoader classLoader = UploadResource.getClassLoader();
    Thread.currentThread().setContextClassLoader(classLoader);

    Configuration conf = new Configuration();
    //   conf.set("fs.defaultFS", "hdfs://slave1:4001");
    //   conf.set("mapreduce.framework.name", "yarn");
    //   conf.set("yarn.resourcemanager.address", "master:8032");
    //   conf.set("yarn.resourcemanager.scheduler.address", "master:8030");

    conf.set("fs.defaultFS", "hdfs://mbcluster/");
    conf.set("dfs.nameservices", "mbcluster");
    conf.set("dfs.ha.namenodes.mbcluster", "ns1,ns2");
    conf.set("dfs.namenode.rpc-address.mbcluster.ns1", "master:4001");
    conf.set("dfs.namenode.rpc-address.mbcluster.ns2", "backup:4001");
    conf.set("dfs.client.failover.proxy.provider.mbcluster",
            "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider");

    conf.set("mapred.remote.os", "Linux");
    System.out.println(conf.get("mapred.remote.os"));

    //   conf.set("mapreduce.job.reduces", "2");
    //   conf.set("mapreduce.tasktracker.map.tasks.maximum", "8");
    //   conf.set("mapreduce.input.fileinputformat.split.maxsize","123");

    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCountJob.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    if (jarFile != null)
        ((JobConf) job.getConfiguration()).setJar(jarFile.getAbsolutePath());

    //    job.setMaxMapAttempts(2);
    job.setNumReduceTasks(1);//from   w  w  w .j a va 2s . c o  m
    FileInputFormat.addInputPath(job, new Path("/input/wordcount2.txt"));
    //    FileInputFormat.addInputPath(job, new Path("/input/wordcount2.txt"));
    FileOutputFormat.setOutputPath(job, new Path("/output/001002"));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.metamx.druid.indexer.DeterminePartitionsJob.java

License:Open Source License

public boolean run() {
    try {/*ww w  .j  a v  a  2  s .c o  m*/
        /*
         * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear
         * in the final segment.
         */

        if (!config.getPartitionsSpec().isAssumeGrouped()) {
            final Job groupByJob = new Job(new Configuration(), String.format(
                    "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals()));

            injectSystemProperties(groupByJob);
            groupByJob.setInputFormatClass(TextInputFormat.class);
            groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class);
            groupByJob.setMapOutputKeyClass(BytesWritable.class);
            groupByJob.setMapOutputValueClass(NullWritable.class);
            groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setOutputKeyClass(BytesWritable.class);
            groupByJob.setOutputValueClass(NullWritable.class);
            groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
            groupByJob.setJarByClass(DeterminePartitionsJob.class);

            config.addInputPaths(groupByJob);
            config.intoConfiguration(groupByJob);
            FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());

            groupByJob.submit();
            log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(),
                    groupByJob.getTrackingURL());

            if (!groupByJob.waitForCompletion(true)) {
                log.error("Job failed: %s", groupByJob.getJobID());
                return false;
            }
        } else {
            log.info("Skipping group-by job.");
        }

        /*
         * Read grouped data and determine appropriate partitions.
         */
        final Job dimSelectionJob = new Job(new Configuration(), String.format(
                "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals()));

        dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19");

        injectSystemProperties(dimSelectionJob);

        if (!config.getPartitionsSpec().isAssumeGrouped()) {
            // Read grouped data from the groupByJob.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class);
            dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class);
            FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir());
        } else {
            // Directly read the source data, since we assume it's already grouped.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class);
            dimSelectionJob.setInputFormatClass(TextInputFormat.class);
            config.addInputPaths(dimSelectionJob);
        }

        SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob);
        dimSelectionJob.setMapOutputValueClass(Text.class);
        dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class);
        dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class);
        dimSelectionJob.setOutputKeyClass(BytesWritable.class);
        dimSelectionJob.setOutputValueClass(Text.class);
        dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
        dimSelectionJob.setJarByClass(DeterminePartitionsJob.class);

        config.intoConfiguration(dimSelectionJob);
        FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath());

        dimSelectionJob.submit();
        log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(),
                dimSelectionJob.getTrackingURL());

        if (!dimSelectionJob.waitForCompletion(true)) {
            log.error("Job failed: %s", dimSelectionJob.getJobID().toString());
            return false;
        }

        /*
         * Load partitions determined by the previous job.
         */

        log.info("Job completed, loading up partitions for intervals[%s].",
                config.getSegmentGranularIntervals());
        FileSystem fileSystem = null;
        Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
        int shardCount = 0;
        for (Interval segmentGranularity : config.getSegmentGranularIntervals()) {
            DateTime bucket = segmentGranularity.getStart();

            final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0));
            if (fileSystem == null) {
                fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration());
            }
            if (fileSystem.exists(partitionInfoPath)) {
                List<ShardSpec> specs = config.jsonMapper.readValue(
                        Utils.openInputStream(dimSelectionJob, partitionInfoPath),
                        new TypeReference<List<ShardSpec>>() {
                        });

                List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
                for (int i = 0; i < specs.size(); ++i) {
                    actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
                    log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i));
                }

                shardSpecs.put(bucket, actualSpecs);
            } else {
                log.info("Path[%s] didn't exist!?", partitionInfoPath);
            }
        }
        config.setShardSpecs(shardSpecs);

        return true;
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
}

From source file:com.michaeljones.hellohadoopworldmaven.HelloMapReduce.java

public static Job RunJobAsync(Path inputPath, Path outputPath, Configuration conf) throws Exception {
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(HelloMapReduce.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    return job;/*ww w. jav  a  2s . co  m*/
}

From source file:com.michaeljones.hellohadoopworldmaven.HelloMapReduce.java

public static Job RunJobAnalysisAsync(Path inputPath, Path outputPath, Configuration conf) throws Exception {
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(HelloMapReduce.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumCombinerAnalyser.class);
    job.setReducerClass(IntSumReducerAnalyser.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    return job;//from  ww  w.  j a  v  a2 s .  co m
}

From source file:com.mongodb.hadoop.examples.snmp.SnmpStatisticWithCombiner.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    final Configuration conf = getConf();
    final com.mongodb.MongoURI outputUri = MongoConfigUtil.getOutputURI(conf);
    if (outputUri == null)
        throw new IllegalStateException("output uri is not set");
    if (MongoConfigUtil.getInputURI(conf) == null)
        throw new IllegalStateException("input uri is not set");
    final String outputCollectionName = outputUri.getCollection();
    final Job job = new Job(conf, "snmp analysis " + outputCollectionName);
    job.setJarByClass(SnmpStatisticWithCombiner.class);
    job.setMapperClass(MapHostUploadOnEachAPPerDay.class);
    job.setCombinerClass(CombineHostUploadOnEachAPPerDay.class);
    job.setReducerClass(ReduceHostUploadOnEachAPPerDay.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);
    job.setInputFormatClass(MongoInputFormat.class);
    job.setOutputFormatClass(MongoOutputFormat.class);
    try {//from  w w w  .  j ava  2s . c  om
        boolean result = job.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result);
    } catch (Exception e) {
        System.out.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
    }
    return 0;
}

From source file:com.mongodb.hadoop.examples.snmp.SnmpStatisticWithCombiner.java

License:Apache License

public static void main(String[] args) throws Exception {
    boolean use_shards = true;
    boolean use_chunks = false;
    final Configuration Conf = new Configuration();
    MongoConfigUtil.setInputURI(Conf, "mongodb://localhost:30000/test.snmp");
    Conf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, use_shards);
    Conf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, use_chunks);
    String output_table = null;//from   ww w . j a  v  a 2 s  . c o  m
    if (use_chunks) {
        if (use_shards)
            output_table = "snmp_with_shards_and_chunks";
        else
            output_table = "snmp_with_chunks";
    } else {
        if (use_shards)
            output_table = "snmpWithShards";
        else
            output_table = "snmp_no_splits";
    }
    MongoConfigUtil.setOutputURI(Conf, "mongodb://localhost:30000/test." + output_table);
    final Job snmpJob = new Job(Conf, "snmp analysis " + output_table);
    snmpJob.setJarByClass(SnmpStatisticWithCombiner.class);
    snmpJob.setMapperClass(MapHostUploadOnEachAPPerDay.class);
    snmpJob.setCombinerClass(CombineHostUploadOnEachAPPerDay.class);
    snmpJob.setReducerClass(ReduceHostUploadOnEachAPPerDay.class);
    snmpJob.setOutputKeyClass(Text.class);
    snmpJob.setOutputValueClass(LongWritable.class);
    snmpJob.setInputFormatClass(MongoInputFormat.class);
    snmpJob.setOutputFormatClass(MongoOutputFormat.class);
    try {
        boolean result = snmpJob.waitForCompletion(true);
        System.out.println("job.waitForCompletion( true ) returned " + result);
    } catch (Exception e) {
        System.out.println("job.waitForCompletion( true ) threw Exception");
        e.printStackTrace();
    }
}