Example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass.

Prototype

public void setOutputKeyClass(Class<?> theClass) throws IllegalStateException

Source Link

Document

Set the key class for the job output data.

Usage

From source file:com.marklogic.mapreduce.examples.LinkCountInDoc.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    if (args.length < 1) {
        System.err.println("Usage: LinkCountInDoc configFile");
        System.exit(2);/*www  .  j  av a  2s . c om*/
    }
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = Job.getInstance(conf, "link count in doc");
    job.setJarByClass(LinkCountInDoc.class);
    job.setInputFormatClass(NodeInputFormat.class);
    job.setMapperClass(RefMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputFormatClass(NodeOutputFormat.class);
    job.setOutputKeyClass(NodePath.class);
    job.setOutputValueClass(MarkLogicNode.class);

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.marklogic.mapreduce.examples.LinkCountInProperty.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    if (args.length < 1) {
        System.err.println("Usage: LinkCountInProperty configFile");
        System.exit(2);/*from   w ww . j  av  a2 s  .  c o  m*/
    }
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = Job.getInstance(conf, "link count in property");
    job.setJarByClass(LinkCountInProperty.class);
    job.setInputFormatClass(ValueInputFormat.class);
    job.setMapperClass(RefMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputFormatClass(PropertyOutputFormat.class);
    job.setOutputKeyClass(DocumentURI.class);
    job.setOutputValueClass(MarkLogicNode.class);

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, Writable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.marklogic.mapreduce.examples.LinkCountValue.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    if (args.length < 2) {
        System.err.println("Usage: LinkCountValue configFile outputDir");
        System.exit(2);/*from   ww w  .j  av a  2  s. co m*/
    }
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = Job.getInstance(conf, "link count value");
    job.setJarByClass(LinkCountValue.class);
    job.setInputFormatClass(ValueInputFormat.class);
    job.setMapperClass(RefMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, Writable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.marklogic.mapreduce.examples.RevisionGrouper.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    if (args.length < 1) {
        System.err.println("Usage: RevisionGrouper configFile");
        System.exit(2);/*from   w w w.j ava  2s  .  c o  m*/
    }
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = Job.getInstance(conf, "revision grouper");
    job.setJarByClass(RevisionGrouper.class);
    job.setInputFormatClass(NodeInputFormat.class);
    job.setMapperClass(RevisionMapper.class);

    job.setOutputFormatClass(KeyValueOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.marklogic.mapreduce.test.CustomQuery.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    if (args.length < 1) {
        System.err.println("Usage: CustomQuery configFile");
        System.exit(2);// w w w  . j a va  2  s.  c o  m
    }
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = Job.getInstance(conf, "custom query");
    job.setJarByClass(CustomQuery.class);

    job.setInputFormatClass(NodeInputFormat.class);
    job.setMapperClass(QueryMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setReducerClass(QueryReducer.class);
    job.setOutputFormatClass(KeyValueOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.mb.saas.bi.job.WordCountJob.java

License:Apache License

public static boolean runHadoopMapReduceJob() throws Exception {
    System.setProperty("HADOOP_USER_NAME", "hadoop");

    File jarFile = UploadResource.createTempJar("bin");
    ClassLoader classLoader = UploadResource.getClassLoader();
    Thread.currentThread().setContextClassLoader(classLoader);

    Configuration conf = new Configuration();

    conf.set("fs.defaultFS", "hdfs://mbcluster/");
    conf.set("dfs.nameservices", "mbcluster");
    conf.set("dfs.ha.namenodes.mbcluster", "ns1,ns2");
    conf.set("dfs.namenode.rpc-address.mbcluster.ns1", "master:4001");
    conf.set("dfs.namenode.rpc-address.mbcluster.ns2", "backup:4001");
    conf.set("dfs.client.failover.proxy.provider.mbcluster",
            "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider");

    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCountJob.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    if (jarFile != null)
        ((JobConf) job.getConfiguration()).setJar(jarFile.getAbsolutePath());

    boolean isMapReduceJarSetted = false;
    String hadoopMapReduceJar = "F:/henry_projects/mbHiveAnalyzer/t.jar";
    File file = new File(hadoopMapReduceJar);
    if (file.exists()) {
        ((JobConf) job.getConfiguration()).setJar(hadoopMapReduceJar);
        isMapReduceJarSetted = true;/*from   w ww  .j  a v a2s  .  c om*/
    }

    if (!isMapReduceJarSetted && jarFile != null)
        ((JobConf) job.getConfiguration()).setJar(jarFile.getAbsolutePath());

    job.setNumReduceTasks(1);
    FileInputFormat.addInputPath(job, new Path("/input/wordcount.txt"));
    FileOutputFormat.setOutputPath(job, new Path("/output/001"));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
    return true;
}

From source file:com.mb.saas.bi.job.WordCountJob.java

License:Apache License

public static void main(String[] args) throws Exception {
    System.setProperty("HADOOP_USER_NAME", "hadoop");

    File jarFile = UploadResource.createTempJar("bin");
    System.setProperty("hadoop.home.dir", "F:/hadoop");
    ClassLoader classLoader = UploadResource.getClassLoader();
    Thread.currentThread().setContextClassLoader(classLoader);

    Configuration conf = new Configuration();
    //   conf.set("fs.defaultFS", "hdfs://slave1:4001");
    //   conf.set("mapreduce.framework.name", "yarn");
    //   conf.set("yarn.resourcemanager.address", "master:8032");
    //   conf.set("yarn.resourcemanager.scheduler.address", "master:8030");

    conf.set("fs.defaultFS", "hdfs://mbcluster/");
    conf.set("dfs.nameservices", "mbcluster");
    conf.set("dfs.ha.namenodes.mbcluster", "ns1,ns2");
    conf.set("dfs.namenode.rpc-address.mbcluster.ns1", "master:4001");
    conf.set("dfs.namenode.rpc-address.mbcluster.ns2", "backup:4001");
    conf.set("dfs.client.failover.proxy.provider.mbcluster",
            "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider");

    conf.set("mapred.remote.os", "Linux");
    System.out.println(conf.get("mapred.remote.os"));

    //   conf.set("mapreduce.job.reduces", "2");
    //   conf.set("mapreduce.tasktracker.map.tasks.maximum", "8");
    //   conf.set("mapreduce.input.fileinputformat.split.maxsize","123");

    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCountJob.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    if (jarFile != null)
        ((JobConf) job.getConfiguration()).setJar(jarFile.getAbsolutePath());

    //    job.setMaxMapAttempts(2);
    job.setNumReduceTasks(1);//from   ww w . j  a v  a 2 s.c o  m
    FileInputFormat.addInputPath(job, new Path("/input/wordcount2.txt"));
    //    FileInputFormat.addInputPath(job, new Path("/input/wordcount2.txt"));
    FileOutputFormat.setOutputPath(job, new Path("/output/001002"));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.metamx.druid.indexer.DeterminePartitionsJob.java

License:Open Source License

public boolean run() {
    try {/*from  ww w.j a v a 2s .c  o m*/
        /*
         * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear
         * in the final segment.
         */

        if (!config.getPartitionsSpec().isAssumeGrouped()) {
            final Job groupByJob = new Job(new Configuration(), String.format(
                    "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals()));

            injectSystemProperties(groupByJob);
            groupByJob.setInputFormatClass(TextInputFormat.class);
            groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class);
            groupByJob.setMapOutputKeyClass(BytesWritable.class);
            groupByJob.setMapOutputValueClass(NullWritable.class);
            groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class);
            groupByJob.setOutputKeyClass(BytesWritable.class);
            groupByJob.setOutputValueClass(NullWritable.class);
            groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class);
            groupByJob.setJarByClass(DeterminePartitionsJob.class);

            config.addInputPaths(groupByJob);
            config.intoConfiguration(groupByJob);
            FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir());

            groupByJob.submit();
            log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(),
                    groupByJob.getTrackingURL());

            if (!groupByJob.waitForCompletion(true)) {
                log.error("Job failed: %s", groupByJob.getJobID());
                return false;
            }
        } else {
            log.info("Skipping group-by job.");
        }

        /*
         * Read grouped data and determine appropriate partitions.
         */
        final Job dimSelectionJob = new Job(new Configuration(), String.format(
                "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals()));

        dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19");

        injectSystemProperties(dimSelectionJob);

        if (!config.getPartitionsSpec().isAssumeGrouped()) {
            // Read grouped data from the groupByJob.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class);
            dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class);
            FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir());
        } else {
            // Directly read the source data, since we assume it's already grouped.
            dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class);
            dimSelectionJob.setInputFormatClass(TextInputFormat.class);
            config.addInputPaths(dimSelectionJob);
        }

        SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob);
        dimSelectionJob.setMapOutputValueClass(Text.class);
        dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class);
        dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class);
        dimSelectionJob.setOutputKeyClass(BytesWritable.class);
        dimSelectionJob.setOutputValueClass(Text.class);
        dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class);
        dimSelectionJob.setJarByClass(DeterminePartitionsJob.class);

        config.intoConfiguration(dimSelectionJob);
        FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath());

        dimSelectionJob.submit();
        log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(),
                dimSelectionJob.getTrackingURL());

        if (!dimSelectionJob.waitForCompletion(true)) {
            log.error("Job failed: %s", dimSelectionJob.getJobID().toString());
            return false;
        }

        /*
         * Load partitions determined by the previous job.
         */

        log.info("Job completed, loading up partitions for intervals[%s].",
                config.getSegmentGranularIntervals());
        FileSystem fileSystem = null;
        Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance());
        int shardCount = 0;
        for (Interval segmentGranularity : config.getSegmentGranularIntervals()) {
            DateTime bucket = segmentGranularity.getStart();

            final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0));
            if (fileSystem == null) {
                fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration());
            }
            if (fileSystem.exists(partitionInfoPath)) {
                List<ShardSpec> specs = config.jsonMapper.readValue(
                        Utils.openInputStream(dimSelectionJob, partitionInfoPath),
                        new TypeReference<List<ShardSpec>>() {
                        });

                List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
                for (int i = 0; i < specs.size(); ++i) {
                    actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++));
                    log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i));
                }

                shardSpecs.put(bucket, actualSpecs);
            } else {
                log.info("Path[%s] didn't exist!?", partitionInfoPath);
            }
        }
        config.setShardSpecs(shardSpecs);

        return true;
    } catch (Exception e) {
        throw Throwables.propagate(e);
    }
}

From source file:com.metamx.druid.indexer.IndexGeneratorJob.java

License:Open Source License

public boolean run() {
    try {/*from w ww.ja va  2  s . c  om*/
        Job job = new Job(new Configuration(),
                String.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals()));

        job.getConfiguration().set("io.sort.record.percent", "0.23");

        for (String propName : System.getProperties().stringPropertyNames()) {
            Configuration conf = job.getConfiguration();
            if (propName.startsWith("hadoop.")) {
                conf.set(propName.substring("hadoop.".length()), System.getProperty(propName));
            }
        }

        job.setInputFormatClass(TextInputFormat.class);

        job.setMapperClass(IndexGeneratorMapper.class);
        job.setMapOutputValueClass(Text.class);

        SortableBytes.useSortableBytesAsMapOutputKey(job);

        job.setNumReduceTasks(Iterables.size(config.getAllBuckets()));
        job.setPartitionerClass(IndexGeneratorPartitioner.class);

        job.setReducerClass(IndexGeneratorReducer.class);
        job.setOutputKeyClass(BytesWritable.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(IndexGeneratorOutputFormat.class);
        FileOutputFormat.setOutputPath(job, config.makeIntermediatePath());

        config.addInputPaths(job);
        config.intoConfiguration(job);

        job.setJarByClass(IndexGeneratorJob.class);

        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());

        boolean success = job.waitForCompletion(true);

        Counter invalidRowCount = job.getCounters()
                .findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER);
        jobStats.setInvalidRowCount(invalidRowCount.getValue());

        return success;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:com.michaeljones.hellohadoopworldmaven.HelloMapReduce.java

public static Job RunJobAsync(Path inputPath, Path outputPath, Configuration conf) throws Exception {
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(HelloMapReduce.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);

    return job;// ww  w  . j a  v  a 2  s.c o m
}