Example usage for org.apache.hadoop.mapreduce Job setReducerClass

List of usage examples for org.apache.hadoop.mapreduce Job setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> cls) throws IllegalStateException 

Source Link

Document

Set the Reducer for the job.

Usage

From source file:com.lightboxtechnologies.spectrum.ExtractData.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 4) {
        System.err.println("Usage: ExtractData <imageID> <friendly_name> <extents_file> <evidence file>");
        return 2;
    }/*  w ww  .j  a  v a2s . co m*/

    final String imageID = args[0];
    final String friendlyName = args[1];
    final String extentsPath = args[2];
    final String image = args[3];

    Configuration conf = getConf();

    final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "ExtractData", conf);
    job.setJarByClass(ExtractData.class);
    job.setMapperClass(ExtractDataMapper.class);
    job.setReducerClass(KeyValueSortReducer.class);
    job.setNumReduceTasks(1);

    // job ctor copies the Configuration we pass it, get the real one
    conf = job.getConfiguration();

    conf.setLong("timestamp", System.currentTimeMillis());

    job.setInputFormatClass(RawFileInputFormat.class);
    RawFileInputFormat.addInputPath(job, new Path(image));

    job.setOutputFormatClass(HFileOutputFormat.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);

    conf.setInt("mapreduce.job.jvm.numtasks", -1);

    final FileSystem fs = FileSystem.get(conf);
    Path hfileDir = new Path("/texaspete/ev/tmp", UUID.randomUUID().toString());
    hfileDir = hfileDir.makeQualified(fs);
    LOG.info("Hashes will be written temporarily to " + hfileDir);

    HFileOutputFormat.setOutputPath(job, hfileDir);

    final Path extp = new Path(extentsPath);
    final URI extents = extp.toUri();
    LOG.info("extents file is " + extents);

    DistributedCache.addCacheFile(extents, conf);
    conf.set("com.lbt.extentsname", extp.getName());
    // job.getConfiguration().setBoolean("mapred.task.profile", true);
    // job.getConfiguration().setBoolean("mapreduce.task.profile", true);

    HBaseTables.summon(conf, HBaseTables.HASH_TBL_B, HBaseTables.HASH_COLFAM_B);

    HBaseTables.summon(conf, HBaseTables.ENTRIES_TBL_B, HBaseTables.ENTRIES_COLFAM_B);

    final boolean result = job.waitForCompletion(true);
    if (result) {
        LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
        HBaseConfiguration.addHbaseResources(conf);
        loader.setConf(conf);
        LOG.info("Loading hashes into hbase");
        chmodR(fs, hfileDir);
        loader.doBulkLoad(hfileDir, new HTable(conf, HBaseTables.HASH_TBL_B));
        //      result = fs.delete(hfileDir, true);
    }
    return result ? 0 : 1;
}

From source file:com.lightboxtechnologies.spectrum.FolderCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    final Configuration conf = new Configuration();
    final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: FolderCount <table> <outpath>");
        System.exit(2);//from w w  w  . jav  a  2  s  .  c  o  m
    }

    final Job job = new Job(conf, "FolderCount");
    job.setJarByClass(FolderCount.class);
    job.setMapperClass(FolderCountMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setNumReduceTasks(1);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(FsEntryHBaseInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    final Scan scan = new Scan();
    scan.addFamily(HBaseTables.ENTRIES_COLFAM_B);
    job.getConfiguration().set(TableInputFormat.INPUT_TABLE, otherArgs[0]);
    job.getConfiguration().set(TableInputFormat.SCAN, convertScanToString(scan));

    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.lightboxtechnologies.spectrum.PythonJob.java

License:Apache License

public static int run(String imageID, String friendlyName, String outpath, String pymap, String pyred,
        String format, Configuration conf) throws Exception {
    if (conf == null) {
        conf = HBaseConfiguration.create();
    }/*  w ww .jav a2 s .  co m*/
    final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "PythonJob", conf);
    job.setJarByClass(PythonJob.class);

    job.setMapperClass(PythonMapper.class);
    PyEngine py = new PyEngine();
    configPyTask(job, py, "map", pymap);
    job.setMapOutputKeyClass(py.getKeyClass());
    job.setMapOutputValueClass(py.getValueClass());

    int numReduces = 1;
    job.setOutputKeyClass(py.getKeyClass());
    job.setOutputValueClass(py.getValueClass());
    if (pyred.equals("none")) {
        numReduces = 0;
    } else if (pyred.equals("identity")) {
        job.setReducerClass(Reducer.class);
        job.setOutputKeyClass(py.getKeyClass());
        job.setOutputValueClass(py.getValueClass());
    } else if (pyred.equals("LongSumReducer")) {
        job.setReducerClass(LongSumReducer.class);
        job.setCombinerClass(LongSumReducer.class);
    } else {
        job.setReducerClass(PythonReducer.class);
        configPyTask(job, py, "reduce", pyred);
        job.setOutputKeyClass(py.getKeyClass());
        job.setOutputValueClass(py.getValueClass());
    }
    job.setNumReduceTasks(numReduces);

    // it is possible to run over a flat json file...
    // String input = otherArgs[0];
    // if (input.endsWith(".json") == true) {
    //   job.setInputFormatClass(FsEntryJsonInputFormat.class);
    //   FsEntryJsonInputFormat.addInputPath(job, new Path(input));
    // }
    // else {

    FsEntryHBaseInputFormat.setupJob(job, imageID);
    job.setInputFormatClass(FsEntryHBaseInputFormat.class);

    if (format != null && format.equals("SequenceFileOutputFormat")) {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    } else {
        job.setOutputFormatClass(TextOutputFormat.class);
    }
    FileOutputFormat.setOutputPath(job, new Path(outpath));
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.linkedin.hadoop.example.WordCountCounters.java

License:Apache License

/**
 * Azkaban will look for a method named `run` to start your job. Use this method to setup all the
 * Hadoop-related configuration for your job and submit it.
 *
 * @throws Exception If there is an exception during the configuration or submission of your job
 *//*from  w ww.j av a2s.  c  o m*/
public void run() throws Exception {
    _logger.info(String.format("Configuring job for the class %s", getClass().getSimpleName()));

    Job job = Job.getInstance(getConf());
    job.setJarByClass(WordCountJob.class);
    job.setJobName(_name);

    job.setMapperClass(WordCountMapper.class);
    job.setCombinerClass(WordCountCombiner.class);
    job.setReducerClass(WordCountReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    String inputPath = _properties.getProperty("input.path");
    String outputPath = _properties.getProperty("output.path");
    boolean forceOverwrite = Boolean.parseBoolean(_properties.getProperty("force.output.overwrite", "false"));

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    // Before we submit the job, remove the old the output directory
    if (forceOverwrite) {
        FileSystem fs = FileSystem.get(job.getConfiguration());
        fs.delete(FileOutputFormat.getOutputPath(job), true);
    }

    // Since we have Kerberos enabled at LinkedIn, we must add the token to our configuration. If
    // you don't use Kerberos security for your Hadoop cluster, you don't need this code.
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        job.getConfiguration().set("mapreduce.job.credentials.binary",
                System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Submit the job for execution
    _logger.info(String.format("About to submit the job named %s", _name));
    boolean succeeded = job.waitForCompletion(true);

    // Before we return, display our custom counters for the job in the Azkaban logs
    long inputWords = job.getCounters().findCounter(WordCountCounters.INPUT_WORDS).getValue();
    _logger.info(String.format("Read a total of %d input words", inputWords));

    // Azkaban will not realize the Hadoop job failed unless you specifically throw an exception
    if (!succeeded) {
        throw new Exception(String.format("Azkaban job %s failed", _name));
    }
}

From source file:com.linkedin.oneclick.wordcount.WordCount.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();

    Job job = new Job(conf, "Word Count");
    job.setJarByClass(WordCount.class);

    String workDirectory = args.length >= 1 ? args[0] : "wordcount";
    Path input = new Path(workDirectory, "input.txt");
    FileSystem fs = input.getFileSystem(conf);
    fs.mkdirs(input.getParent());/* w ww  .j av a  2  s .  c o m*/
    copy(resourceInputStream(getClass().getResource("/onegin.txt")), createOutputStream(conf, input), conf);
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(WordCountMapper.class);
    FileInputFormat.addInputPath(job, input);

    job.setCombinerClass(WordCountReducer.class);
    job.setReducerClass(WordCountReducer.class);

    job.setOutputFormatClass(TextOutputFormat.class);
    Path output = clean(conf, new Path(workDirectory, "wordcount"));
    FileOutputFormat.setOutputPath(job, output);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    return job.waitForCompletion(true) ? 0 : -1;
}

From source file:com.linkedin.thirdeye.hadoop.aggregation.AggregationPhaseJob.java

License:Apache License

public Job run() throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName(name);//from   w  w w . j  a  va2 s.  c o m
    job.setJarByClass(AggregationPhaseJob.class);

    FileSystem fs = FileSystem.get(getConf());
    Configuration configuration = job.getConfiguration();

    // Properties
    LOGGER.info("Properties {}", props);

    // Input Path
    String inputPathDir = getAndSetConfiguration(configuration, AGG_PHASE_INPUT_PATH);
    LOGGER.info("Input path dir: " + inputPathDir);
    for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) {
        LOGGER.info("Adding input:" + inputPath);
        Path input = new Path(inputPath);
        FileInputFormat.addInputPath(job, input);
    }

    // Output path
    Path outputPath = new Path(getAndSetConfiguration(configuration, AGG_PHASE_OUTPUT_PATH));
    LOGGER.info("Output path dir: " + outputPath.toString());
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
    }
    FileOutputFormat.setOutputPath(job, outputPath);

    // Schema
    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
    LOGGER.info("Schema : {}", avroSchema.toString(true));
    job.getConfiguration().set(AGG_PHASE_AVRO_SCHEMA.toString(), avroSchema.toString());

    // ThirdEyeConfig
    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
    LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode());
    job.getConfiguration().set(AGG_PHASE_THIRDEYE_CONFIG.toString(),
            OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));

    // Map config
    job.setMapperClass(AggregationMapper.class);
    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(BytesWritable.class);

    // Reduce config
    job.setReducerClass(AggregationReducer.class);
    job.setOutputKeyClass(AvroKey.class);
    job.setOutputValueClass(NullWritable.class);
    AvroJob.setOutputKeySchema(job, avroSchema);
    job.setOutputFormatClass(AvroKeyOutputFormat.class);
    String numReducers = props.getProperty(ThirdEyeJobProperties.THIRDEYE_NUM_REDUCERS.getName());
    LOGGER.info("Num Reducers : {}", numReducers);
    if (StringUtils.isNotBlank(numReducers)) {
        job.setNumReduceTasks(Integer.valueOf(numReducers));
        LOGGER.info("Setting num reducers {}", job.getNumReduceTasks());
    }

    job.waitForCompletion(true);

    Counter counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS);
    LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
    if (counter.getValue() == 0) {
        throw new IllegalStateException("No input records in " + inputPathDir);
    }
    counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS_FLATTENED);
    LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());

    for (String metric : thirdeyeConfig.getMetricNames()) {
        counter = job.getCounters().findCounter(thirdeyeConfig.getCollection(), metric);
        LOGGER.info(counter.getDisplayName() + " : " + counter.getValue());
    }

    return job;
}

From source file:com.linkedin.thirdeye.hadoop.topk.TopKPhaseJob.java

License:Apache License

public Job run() throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName(name);//from   w w w  .  j  av  a  2s .c  om
    job.setJarByClass(TopKPhaseJob.class);

    Configuration configuration = job.getConfiguration();
    FileSystem fs = FileSystem.get(configuration);

    // Properties
    LOGGER.info("Properties {}", props);

    // Input Path
    String inputPathDir = getAndSetConfiguration(configuration, TOPK_PHASE_INPUT_PATH);
    LOGGER.info("Input path dir: " + inputPathDir);
    for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) {
        LOGGER.info("Adding input:" + inputPath);
        Path input = new Path(inputPath);
        FileInputFormat.addInputPath(job, input);
    }

    // Output path
    Path outputPath = new Path(getAndSetConfiguration(configuration, TOPK_PHASE_OUTPUT_PATH));
    LOGGER.info("Output path dir: " + outputPath.toString());
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
    }
    FileOutputFormat.setOutputPath(job, outputPath);

    // Schema
    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir);
    LOGGER.info("Schema : {}", avroSchema.toString(true));

    // ThirdEyeConfig
    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
    LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode());
    job.getConfiguration().set(TOPK_PHASE_THIRDEYE_CONFIG.toString(),
            OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));

    // Map config
    job.setMapperClass(TopKPhaseMapper.class);
    job.setInputFormatClass(AvroKeyInputFormat.class);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(BytesWritable.class);

    // Combiner
    job.setCombinerClass(TopKPhaseCombiner.class);

    // Reduce config
    job.setReducerClass(TopKPhaseReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);
    job.setNumReduceTasks(1);

    job.waitForCompletion(true);

    return job;
}

From source file:com.littlehotspot.hadoop.mr.box.BoxLog.java

License:Open Source License

@Override
public int run(String[] arg) throws Exception {
    try {/*from   w  w w .j a v a2s .c  o m*/
        // ???
        if (arg.length > 2) {
            BOX_LOG_FORMAT_REGEX = Pattern.compile(arg[2]);
        }

        Job job = Job.getInstance(this.getConf(), BoxLog.class.getSimpleName());
        job.setJarByClass(BoxLog.class);

        /***/
        Path inputPath = new Path(arg[0]);
        FileInputFormat.addInputPath(job, inputPath);
        job.setMapperClass(BoxMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        /***/
        Path outputPath = new Path(arg[1]);
        FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), new Configuration());
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }
        FileOutputFormat.setOutputPath(job, outputPath);
        job.setReducerClass(BoxReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        boolean status = job.waitForCompletion(true);
        if (!status) {
            throw new Exception("MapReduce task execute failed.........");
        }
        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return 1;
    }
}

From source file:com.littlehotspot.hadoop.mr.mobile.MobileLog.java

License:Open Source License

@Override
public int run(String[] arg) throws Exception {
    try {/*from   ww w .jav  a 2 s  .  c o m*/
        Job job = Job.getInstance(this.getConf(), MobileLog.class.getSimpleName());
        job.setJarByClass(MobileLog.class);

        /***/
        Path inputPath = new Path(arg[0]);
        FileInputFormat.addInputPath(job, inputPath);
        job.setMapperClass(MobileMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        /***/
        Path outputPath = new Path(arg[1]);
        FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), new Configuration());
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }
        FileOutputFormat.setOutputPath(job, outputPath);
        job.setReducerClass(MobileReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        boolean status = job.waitForCompletion(true);
        if (!status) {
            throw new Exception("MapReduce task execute failed.........");
        }
        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return 1;
    }
}

From source file:com.littlehotspot.hadoop.mr.nginx.module.cdf.CDFScheduler.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    try {/* www .j av  a  2s .c o  m*/
        CommonVariables.initMapReduce(this.getConf(), args);// ? MAP REDUCE

        String matcherRegex = CommonVariables.getParameterValue(Argument.MapperInputFormatRegex);
        String hdfsInputPath = CommonVariables.getParameterValue(Argument.InputPath);
        String hdfsOutputPath = CommonVariables.getParameterValue(Argument.OutputPath);

        // ???
        if (StringUtils.isNotBlank(matcherRegex)) {
            CommonVariables.MAPPER_INPUT_FORMAT_REGEX = Pattern.compile(matcherRegex);
        }

        Path inputPath = new Path(hdfsInputPath);
        Path outputPath = new Path(hdfsOutputPath);

        Job job = Job.getInstance(this.getConf(), this.getClass().getName());
        job.setJarByClass(this.getClass());

        FileInputFormat.addInputPath(job, inputPath);
        FileOutputFormat.setOutputPath(job, outputPath);

        job.setMapperClass(CDFMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);

        job.setReducerClass(GeneralReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), this.getConf());
        if (fileSystem.exists(outputPath)) {
            fileSystem.delete(outputPath, true);
        }

        boolean status = job.waitForCompletion(true);
        if (!status) {
            throw new Exception("MapReduce task execute failed.........");
        }
        return 0;
    } catch (Exception e) {
        e.printStackTrace();
        return 1;
    }
}