Example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass

List of usage examples for org.apache.hadoop.mapreduce Job setOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputKeyClass.

Prototype

public void setOutputKeyClass(Class<?> theClass) throws IllegalStateException 

Source Link

Document

Set the key class for the job output data.

Usage

From source file:com.linkedin.thirdeye.hadoop.topk.TopKPhaseJob.java

License:Apache License

public Job run() throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName(name);//from   ww  w .  j  a  v a2 s  .co m
    job.setJarByClass(TopKPhaseJob.class);

    Configuration configuration = job.getConfiguration();
    FileSystem fs = FileSystem.get(configuration);

    // Properties
    LOGGER.info("Properties {}", props);

    // Input Path
    String inputPathDir = getAndSetConfiguration(configuration, TOPK_PHASE_INPUT_PATH);
    LOGGER.info("Input path dir: " + inputPathDir);
    for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) {
        LOGGER.info("Adding input:" + inputPath);
        Path input = new Path(inputPath);
        FileInputFormat.addInputPath(job, input);
    }

    // Output path
    Path outputPath = new Path(getAndSetConfiguration(configuration, TOPK_PHASE_OUTPUT_PATH));
    LOGGER.info("Output path dir: " + outputPath.toString());
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
    }
    FileOutputFormat.setOutputPath(job, outputPath);

    // Schema
    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
    LOGGER.info("Schema : {}", avroSchema.toString(true));

    // ThirdEyeConfig
    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
    LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode());
    job.getConfiguration().set(TOPK_PHASE_THIRDEYE_CONFIG.toString(),
            OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));

    // Map config
    job.setMapperClass(TopKPhaseMapper.class);
    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(BytesWritable.class);

    // Combiner
    job.setCombinerClass(TopKPhaseCombiner.class);

    // Reduce config
    job.setReducerClass(TopKPhaseReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setNumReduceTasks(1);

    job.waitForCompletion(true);

    return job;
}

From source file:com.littlehotspot.hadoop.mr.box.BoxLog.java

License:Open Source License

@Override
public int run(String[] arg) throws Exception {
    try {/*from ww w .  ja va2  s  .c o m*/
        // ???
        if (arg.length > 2) {
            BOX_LOG_FORMAT_REGEX = Pattern.compile(arg[2]);
        }

        Job job = Job.getInstance(this.getConf(), BoxLog.class.getSimpleName());
        job.setJarByClass(BoxLog.class);

        /***/
        Path inputPath = new Path(arg[0]);
        FileInputFormat.addInputPath(job, inputPath);
        job.setMapperClass(BoxMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        /***/
        Path outputPath = new Path(arg[1]);
        FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), new Configuration());
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }
        FileOutputFormat.setOutputPath(job, outputPath);
        job.setReducerClass(BoxReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        boolean status = job.waitForCompletion(true);
        if (!status) {
            throw new Exception("MapReduce task execute failed.........");
        }
        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return 1;
    }
}

From source file:com.littlehotspot.hadoop.mr.mobile.MobileLog.java

License:Open Source License

@Override
public int run(String[] arg) throws Exception {
    try {/*w w  w .j ava  2 s .  c o  m*/
        Job job = Job.getInstance(this.getConf(), MobileLog.class.getSimpleName());
        job.setJarByClass(MobileLog.class);

        /***/
        Path inputPath = new Path(arg[0]);
        FileInputFormat.addInputPath(job, inputPath);
        job.setMapperClass(MobileMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        /***/
        Path outputPath = new Path(arg[1]);
        FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), new Configuration());
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }
        FileOutputFormat.setOutputPath(job, outputPath);
        job.setReducerClass(MobileReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        boolean status = job.waitForCompletion(true);
        if (!status) {
            throw new Exception("MapReduce task execute failed.........");
        }
        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return 1;
    }
}

From source file:com.littlehotspot.hadoop.mr.nginx.module.cdf.CDFScheduler.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    try {/*w w w. ja  va  2 s . com*/
        CommonVariables.initMapReduce(this.getConf(), args);// ? MAP REDUCE

        String matcherRegex = CommonVariables.getParameterValue(Argument.MapperInputFormatRegex);
        String hdfsInputPath = CommonVariables.getParameterValue(Argument.InputPath);
        String hdfsOutputPath = CommonVariables.getParameterValue(Argument.OutputPath);

        // ???
        if (StringUtils.isNotBlank(matcherRegex)) {
            CommonVariables.MAPPER_INPUT_FORMAT_REGEX = Pattern.compile(matcherRegex);
        }

        Path inputPath = new Path(hdfsInputPath);
        Path outputPath = new Path(hdfsOutputPath);

        Job job = Job.getInstance(this.getConf(), this.getClass().getName());
        job.setJarByClass(this.getClass());

        FileInputFormat.addInputPath(job, inputPath);
        FileOutputFormat.setOutputPath(job, outputPath);

        job.setMapperClass(CDFMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(GeneralReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), this.getConf());
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }

        boolean status = job.waitForCompletion(true);
        if (!status) {
            throw new Exception("MapReduce task execute failed.........");
        }
        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return 1;
    }
}

From source file:com.littlehotspot.hadoop.mr.nginx.module.hdfs2hbase.api.user.UserScheduler.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    try {/*from  ww w  . jav a 2 s .  co  m*/
        CommonVariables.initMapReduce(this.getConf(), args);// ? MAP REDUCE
        CommonVariables.hBaseHelper = new HBaseHelper(this.getConf());

        // ??
        String matcherRegex = CommonVariables.getParameterValue(Argument.MapperInputFormatRegex);
        String hdfsInputPath = CommonVariables.getParameterValue(Argument.InputPath);
        String hdfsOutputPath = CommonVariables.getParameterValue(Argument.OutputPath);

        // ???
        if (StringUtils.isNotBlank(matcherRegex)) {
            CommonVariables.MAPPER_INPUT_FORMAT_REGEX = Pattern.compile(matcherRegex);
        }

        Path inputPath = new Path(hdfsInputPath);
        Path outputPath = new Path(hdfsOutputPath);

        Job job = Job.getInstance(this.getConf(), this.getClass().getName());
        job.setJarByClass(this.getClass());

        job.setMapperClass(UserMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(UserReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, inputPath);
        FileOutputFormat.setOutputPath(job, outputPath);

        FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), this.getConf());
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }

        // 
        boolean state = job.waitForCompletion(true);
        if (!state) {
            throw new Exception("MapReduce task execute failed.........");
        }

        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return 1;
    }
}

From source file:com.marklogic.mapreduce.examples.BinaryReader.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    if (args.length < 2) {
        System.err.println("Usage: BinaryReader configFile outputDir");
        System.exit(2);/*  w w w .jav a  2  s.c o  m*/
    }
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    Job job = Job.getInstance(conf, "binary reader");
    job.setJarByClass(BinaryReader.class);
    job.setInputFormatClass(DocumentInputFormat.class);
    job.setMapperClass(DocMapper.class);
    job.setMapOutputKeyClass(DocumentURI.class);
    job.setMapOutputValueClass(BytesWritable.class);
    job.setOutputFormatClass(BinaryOutputFormat.class);
    job.setOutputKeyClass(DocumentURI.class);
    job.setOutputValueClass(BytesWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.marklogic.mapreduce.examples.HelloWorld.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();

    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    Job job = Job.getInstance(conf, "hello world");
    job.setJarByClass(HelloWorld.class);

    // Map related configuration
    job.setInputFormatClass(DocumentInputFormat.class);
    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(Text.class);

    // Reduce related configuration
    job.setReducerClass(MyReducer.class);
    job.setOutputFormatClass(ContentOutputFormat.class);
    job.setOutputKeyClass(DocumentURI.class);
    job.setOutputValueClass(Text.class);

    conf = job.getConfiguration();// w  w w.j av a2 s .c  om
    conf.addResource("marklogic-hello-world.xml");

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.marklogic.mapreduce.examples.LinkCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    if (args.length < 2) {
        System.err.println("Usage: LinkCount configFile outputDir");
        System.exit(2);//  ww w .j a v  a  2  s.  c o m
    }
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = Job.getInstance(conf, "link count");
    job.setJarByClass(LinkCount.class);
    job.setInputFormatClass(ValueInputFormat.class);
    job.setMapperClass(RefMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, Writable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.marklogic.mapreduce.examples.LinkCountCooccurrences.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    if (args.length < 2) {
        System.err.println("Usage: LinkCountCooccurrences configFile outputDir");
        System.exit(2);/* w ww . j  av a  2 s .  c  o  m*/
    }
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = Job.getInstance(conf, "link count cooccurrences");
    job.setJarByClass(LinkCountCooccurrences.class);
    job.setInputFormatClass(KeyValueInputFormat.class);
    job.setMapperClass(RefMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    conf = job.getConfiguration();
    conf.addResource(otherArgs[0]);
    conf.setClass(MarkLogicConstants.INPUT_KEY_CLASS, Text.class, Writable.class);
    conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, Writable.class);
    conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS, HrefTitleMap.class,
            ElemAttrValueCooccurrences.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.marklogic.mapreduce.examples.LinkCountHDFS.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    if (args.length < 2) {
        System.err.println("Usage: LinkCountHDFS inputDir outputDir");
        System.exit(2);/*from   w  w w .  jav a2 s.c  o m*/
    }
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    Job job = Job.getInstance(conf, "link count hdfs");
    job.setJarByClass(LinkCountHDFS.class);
    job.setInputFormatClass(HDFSInputFormat.class);
    job.setMapperClass(RefMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(IntSumReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    HDFSInputFormat.setInputPaths(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    System.exit(job.waitForCompletion(true) ? 0 : 1);
}