Example usage for org.apache.hadoop.mapreduce Job setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> cls) throws IllegalStateException

Source Link

Document

Set the Reducer for the job.

Usage

From source file:com.lightboxtechnologies.spectrum.ExtractData.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 4) {
        System.err.println("Usage: ExtractData <imageID> <friendly_name> <extents_file> <evidence file>");
        return 2;
    }/*  w ww  .j  a  v a2s . co m*/

    final String imageID = args[0];
    final String friendlyName = args[1];
    final String extentsPath = args[2];
    final String image = args[3];

    Configuration conf = getConf();

    final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "ExtractData", conf);
    job.setJarByClass(ExtractData.class);
    job.setMapperClass(ExtractDataMapper.class);
    job.setReducerClass(KeyValueSortReducer.class);
    job.setNumReduceTasks(1);

    // job ctor copies the Configuration we pass it, get the real one
    conf = job.getConfiguration();

    conf.setLong("timestamp", System.currentTimeMillis());

    job.setInputFormatClass(RawFileInputFormat.class);
    RawFileInputFormat.addInputPath(job, new Path(image));

    job.setOutputFormatClass(HFileOutputFormat.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);

    conf.setInt("mapreduce.job.jvm.numtasks", -1);

    final FileSystem fs = FileSystem.get(conf);
    Path hfileDir = new Path("/texaspete/ev/tmp", UUID.randomUUID().toString());
    hfileDir = hfileDir.makeQualified(fs);
    LOG.info("Hashes will be written temporarily to " + hfileDir);

    HFileOutputFormat.setOutputPath(job, hfileDir);

    final Path extp = new Path(extentsPath);
    final URI extents = extp.toUri();
    LOG.info("extents file is " + extents);

    DistributedCache.addCacheFile(extents, conf);
    conf.set("com.lbt.extentsname", extp.getName());
    // job.getConfiguration().setBoolean("mapred.task.profile", true);
    // job.getConfiguration().setBoolean("mapreduce.task.profile", true);

    HBaseTables.summon(conf, HBaseTables.HASH_TBL_B, HBaseTables.HASH_COLFAM_B);

    HBaseTables.summon(conf, HBaseTables.ENTRIES_TBL_B, HBaseTables.ENTRIES_COLFAM_B);

    final boolean result = job.waitForCompletion(true);
    if (result) {
        LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
        HBaseConfiguration.addHbaseResources(conf);
        loader.setConf(conf);
        LOG.info("Loading hashes into hbase");
        chmodR(fs, hfileDir);
        loader.doBulkLoad(hfileDir, new HTable(conf, HBaseTables.HASH_TBL_B));
        //      result = fs.delete(hfileDir, true);
    }
    return result ? 0 : 1;
}

From source file:com.lightboxtechnologies.spectrum.FolderCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    final Configuration conf = new Configuration();
    final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: FolderCount <table> <outpath>");
        System.exit(2);//from w w  w  . jav  a  2  s  .  c  o  m
    }

    final Job job = new Job(conf, "FolderCount");
    job.setJarByClass(FolderCount.class);
    job.setMapperClass(FolderCountMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setNumReduceTasks(1);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(FsEntryHBaseInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    final Scan scan = new Scan();
    scan.addFamily(HBaseTables.ENTRIES_COLFAM_B);
    job.getConfiguration().set(TableInputFormat.INPUT_TABLE, otherArgs[0]);
    job.getConfiguration().set(TableInputFormat.SCAN, convertScanToString(scan));

    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.lightboxtechnologies.spectrum.PythonJob.java

License:Apache License

public static int run(String imageID, String friendlyName, String outpath, String pymap, String pyred,
        String format, Configuration conf) throws Exception {
    if (conf == null) {
        conf = HBaseConfiguration.create();
    }/*  w ww .jav a2 s .  co m*/
    final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "PythonJob", conf);
    job.setJarByClass(PythonJob.class);

    job.setMapperClass(PythonMapper.class);
    PyEngine py = new PyEngine();
    configPyTask(job, py, "map", pymap);
    job.setMapOutputKeyClass(py.getKeyClass());
    job.setMapOutputValueClass(py.getValueClass());

    int numReduces = 1;
    job.setOutputKeyClass(py.getKeyClass());
    job.setOutputValueClass(py.getValueClass());
    if (pyred.equals("none")) {
        numReduces = 0;
    } else if (pyred.equals("identity")) {
        job.setReducerClass(Reducer.class);
        job.setOutputKeyClass(py.getKeyClass());
        job.setOutputValueClass(py.getValueClass());
    } else if (pyred.equals("LongSumReducer")) {
        job.setReducerClass(LongSumReducer.class);
        job.setCombinerClass(LongSumReducer.class);
    } else {
        job.setReducerClass(PythonReducer.class);
        configPyTask(job, py, "reduce", pyred);
        job.setOutputKeyClass(py.getKeyClass());
        job.setOutputValueClass(py.getValueClass());
    }
    job.setNumReduceTasks(numReduces);

    // it is possible to run over a flat json file...
    // String input = otherArgs[0];
    // if (input.endsWith(".json") == true) {
    //   job.setInputFormatClass(FsEntryJsonInputFormat.class);
    //   FsEntryJsonInputFormat.addInputPath(job, new Path(input));
    // }
    // else {

    FsEntryHBaseInputFormat.setupJob(job, imageID);
    job.setInputFormatClass(FsEntryHBaseInputFormat.class);

    if (format != null && format.equals("SequenceFileOutputFormat")) {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    } else {
        job.setOutputFormatClass(TextOutputFormat.class);
    }
    FileOutputFormat.setOutputPath(job, new Path(outpath));
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.linkedin.hadoop.example.WordCountCounters.java

License:Apache License

/**
 * Azkaban will look for a method named `run` to start your job. Use this method to setup all the
 * Hadoop-related configuration for your job and submit it.
 *
 * @throws Exception If there is an exception during the configuration or submission of your job
 *//*from  w ww.j av a2s.  c  o m*/
public void run() throws Exception {
    _logger.info(String.format("Configuring job for the class %s", getClass().getSimpleName()));

    Job job = Job.getInstance(getConf());
    job.setJarByClass(WordCountJob.class);
    job.setJobName(_name);

    job.setMapperClass(WordCountMapper.class);
    job.setCombinerClass(WordCountCombiner.class);
    job.setReducerClass(WordCountReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    String inputPath = _properties.getProperty("input.path");
    String outputPath = _properties.getProperty("output.path");
    boolean forceOverwrite = Boolean.parseBoolean(_properties.getProperty("force.output.overwrite", "false"));

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    // Before we submit the job, remove the old the output directory
    if (forceOverwrite) {
        FileSystem fs = FileSystem.get(job.getConfiguration());
        fs.delete(FileOutputFormat.getOutputPath(job), true);
    }

    // Since we have Kerberos enabled at LinkedIn, we must add the token to our configuration. If
    // you don't use Kerberos security for your Hadoop cluster, you don't need this code.
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        job.getConfiguration().set("mapreduce.job.credentials.binary",
                System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Submit the job for execution
    _logger.info(String.format("About to submit the job named %s", _name));
    boolean succeeded = job.waitForCompletion(true);

    // Before we return, display our custom counters for the job in the Azkaban logs
    long inputWords = job.getCounters().findCounter(WordCountCounters.INPUT_WORDS).getValue();
    _logger.info(String.format("Read a total of %d input words", inputWords));

    // Azkaban will not realize the Hadoop job failed unless you specifically throw an exception
    if (!succeeded) {
        throw new Exception(String.format("Azkaban job %s failed", _name));
    }
}

From source file:com.linkedin.oneclick.wordcount.WordCount.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    Job job = new Job(conf, "Word Count");
    job.setJarByClass(WordCount.class);

    String workDirectory = args.length >= 1 ? args[0] : "wordcount";
    Path input = new Path(workDirectory, "input.txt");
    FileSystem fs = input.getFileSystem(conf);
    fs.mkdirs(input.getParent());/* w ww  .j av a  2  s .  c o m*/
    copy(resourceInputStream(getClass().getResource("/onegin.txt")), createOutputStream(conf, input), conf);
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(WordCountMapper.class);
    FileInputFormat.addInputPath(job, input);

    job.setCombinerClass(WordCountReducer.class);
    job.setReducerClass(WordCountReducer.class);

    job.setOutputFormatClass(TextOutputFormat.class);
    Path output = clean(conf, new Path(workDirectory, "wordcount"));
    FileOutputFormat.setOutputPath(job, output);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    return job.waitForCompletion(true) ? 0 : -1;
}

From source file:com.linkedin.thirdeye.hadoop.aggregation.AggregationPhaseJob.java

License:Apache License

public Job run() throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName(name);//from   w  w w . j  a  va2 s.  c o m
    job.setJarByClass(AggregationPhaseJob.class);

    FileSystem fs = FileSystem.get(getConf());
    Configuration configuration = job.getConfiguration();

    // Properties
    LOGGER.info("Properties {}", props);

    // Input Path
    String inputPathDir = getAndSetConfiguration(configuration, AGG_PHASE_INPUT_PATH);
    LOGGER.info("Input path dir: " + inputPathDir);
    for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) {
        LOGGER.info("Adding input:" + inputPath);
        Path input = new Path(inputPath);
        FileInputFormat.addInputPath(job, input);
    }

    // Output path
    Path outputPath = new Path(getAndSetConfiguration(configuration, AGG_PHASE_OUTPUT_PATH));
    LOGGER.info("Output path dir: " + outputPath.toString());
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
    }
    FileOutputFormat.setOutputPath(job, outputPath);

    // Schema
    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
    LOGGER.info("Schema : {}", avroSchema.toString(true));
    job.getConfiguration().set(AGG_PHASE_AVRO_SCHEMA.toString(), avroSchema.toString());

    // ThirdEyeConfig
    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
    LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode());
    job.getConfiguration().set(AGG_PHASE_THIRDEYE_CONFIG.toString(),
            OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));

    // Map config
    job.setMapperClass(AggregationMapper.class);
    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(BytesWritable.class);

    // Reduce config
    job.setReducerClass(AggregationReducer.class);
    job.setOutputKeyClass(AvroKey.class);
    job.setOutputValueClass(NullWritable.class);
    AvroJob.setOutputKeySchema(job, avroSchema);
    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    String numReducers = props.getProperty(ThirdEyeJobProperties.THIRDEYE_NUM_REDUCERS.getName());
    LOGGER.info("Num Reducers : {}", numReducers);
    if (StringUtils.isNotBlank(numReducers)) {
        job.setNumReduceTasks(Integer.valueOf(numReducers));
        LOGGER.info("Setting num reducers {}", job.getNumReduceTasks());
    }

    job.waitForCompletion(true);

    Counter counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS);
    LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
    if (counter.getValue() == 0) {
        throw new IllegalStateException("No input records in " + inputPathDir);
    }
    counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS_FLATTENED);
    LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());

    for (String metric : thirdeyeConfig.getMetricNames()) {
        counter = job.getCounters().findCounter(thirdeyeConfig.getCollection(), metric);
        LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
    }

    return job;
}

From source file:com.linkedin.thirdeye.hadoop.topk.TopKPhaseJob.java

License:Apache License

public Job run() throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName(name);//from   w w w  .  j  av  a  2s .c  om
    job.setJarByClass(TopKPhaseJob.class);

    Configuration configuration = job.getConfiguration();
    FileSystem fs = FileSystem.get(configuration);

    // Properties
    LOGGER.info("Properties {}", props);

    // Input Path
    String inputPathDir = getAndSetConfiguration(configuration, TOPK_PHASE_INPUT_PATH);
    LOGGER.info("Input path dir: " + inputPathDir);
    for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) {
        LOGGER.info("Adding input:" + inputPath);
        Path input = new Path(inputPath);
        FileInputFormat.addInputPath(job, input);
    }

    // Output path
    Path outputPath = new Path(getAndSetConfiguration(configuration, TOPK_PHASE_OUTPUT_PATH));
    LOGGER.info("Output path dir: " + outputPath.toString());
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
    }
    FileOutputFormat.setOutputPath(job, outputPath);

    // Schema
    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
    LOGGER.info("Schema : {}", avroSchema.toString(true));

    // ThirdEyeConfig
    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
    LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode());
    job.getConfiguration().set(TOPK_PHASE_THIRDEYE_CONFIG.toString(),
            OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));

    // Map config
    job.setMapperClass(TopKPhaseMapper.class);
    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(BytesWritable.class);

    // Combiner
    job.setCombinerClass(TopKPhaseCombiner.class);

    // Reduce config
    job.setReducerClass(TopKPhaseReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setNumReduceTasks(1);

    job.waitForCompletion(true);

    return job;
}

From source file:com.littlehotspot.hadoop.mr.box.BoxLog.java

License:Open Source License

@Override
public int run(String[] arg) throws Exception {
    try {/*from   w  w w .j a v a2s .c  o m*/
        // ???
        if (arg.length > 2) {
            BOX_LOG_FORMAT_REGEX = Pattern.compile(arg[2]);
        }

        Job job = Job.getInstance(this.getConf(), BoxLog.class.getSimpleName());
        job.setJarByClass(BoxLog.class);

        /***/
        Path inputPath = new Path(arg[0]);
        FileInputFormat.addInputPath(job, inputPath);
        job.setMapperClass(BoxMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        /***/
        Path outputPath = new Path(arg[1]);
        FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), new Configuration());
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }
        FileOutputFormat.setOutputPath(job, outputPath);
        job.setReducerClass(BoxReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        boolean status = job.waitForCompletion(true);
        if (!status) {
            throw new Exception("MapReduce task execute failed.........");
        }
        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return 1;
    }
}

From source file:com.littlehotspot.hadoop.mr.mobile.MobileLog.java

License:Open Source License

@Override
public int run(String[] arg) throws Exception {
    try {/*from   ww w .jav  a 2 s  .  c o m*/
        Job job = Job.getInstance(this.getConf(), MobileLog.class.getSimpleName());
        job.setJarByClass(MobileLog.class);

        /***/
        Path inputPath = new Path(arg[0]);
        FileInputFormat.addInputPath(job, inputPath);
        job.setMapperClass(MobileMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        /***/
        Path outputPath = new Path(arg[1]);
        FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), new Configuration());
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }
        FileOutputFormat.setOutputPath(job, outputPath);
        job.setReducerClass(MobileReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        boolean status = job.waitForCompletion(true);
        if (!status) {
            throw new Exception("MapReduce task execute failed.........");
        }
        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return 1;
    }
}

From source file:com.littlehotspot.hadoop.mr.nginx.module.cdf.CDFScheduler.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    try {/* www .j av  a  2s .c o  m*/
        CommonVariables.initMapReduce(this.getConf(), args);// ? MAP REDUCE

        String matcherRegex = CommonVariables.getParameterValue(Argument.MapperInputFormatRegex);
        String hdfsInputPath = CommonVariables.getParameterValue(Argument.InputPath);
        String hdfsOutputPath = CommonVariables.getParameterValue(Argument.OutputPath);

        // ???
        if (StringUtils.isNotBlank(matcherRegex)) {
            CommonVariables.MAPPER_INPUT_FORMAT_REGEX = Pattern.compile(matcherRegex);
        }

        Path inputPath = new Path(hdfsInputPath);
        Path outputPath = new Path(hdfsOutputPath);

        Job job = Job.getInstance(this.getConf(), this.getClass().getName());
        job.setJarByClass(this.getClass());

        FileInputFormat.addInputPath(job, inputPath);
        FileOutputFormat.setOutputPath(job, outputPath);

        job.setMapperClass(CDFMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(GeneralReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), this.getConf());
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }

        boolean status = job.waitForCompletion(true);
        if (!status) {
            throw new Exception("MapReduce task execute failed.........");
        }
        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return 1;
    }
}