Example usage for org.apache.hadoop.mapreduce Job setReducerClass

List of usage examples for org.apache.hadoop.mapreduce Job setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> cls) throws IllegalStateException 

Source Link

Document

Set the Reducer for the job.

Usage

From source file:com.cloudera.recordservice.examples.mapreduce.MapReduceAgeCount.java

License:Apache License

public int run(String[] args) throws Exception {
    org.apache.log4j.BasicConfigurator.configure();

    if (args.length != 2) {
        System.err.println("Usage: MapReduceAgeCount <input path> <output path>");
        return -1;
    }/*from www  .  ja  v a 2 s  . co  m*/

    Job job = Job.getInstance(getConf());
    job.setJarByClass(MapReduceAgeCount.class);
    job.setJobName("Age Count");

    // RECORDSERVICE:
    // To read from a table instead of a path, comment out
    // FileInputFormat.setInputPaths() and instead use:
    // FileInputFormat.setInputPaths(job, new Path(args[0]));
    RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]);

    // RECORDSERVICE:
    // Use the RecordService version of the AvroKeyValueInputFormat
    job.setInputFormatClass(com.cloudera.recordservice.avro.mapreduce.AvroKeyValueInputFormat.class);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(AgeCountMapper.class);
    // Set schema for input key and value.
    AvroJob.setInputKeySchema(job, UserKey.getClassSchema());
    AvroJob.setInputValueSchema(job, UserValue.getClassSchema());

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    job.setReducerClass(AgeCountReducer.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.cloudera.recordservice.examples.mapreduce.MapReduceColorCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    org.apache.log4j.BasicConfigurator.configure();

    if (args.length != 2) {
        System.err.println("Usage: MapReduceColorCount <input path> <output path>");
        return -1;
    }//  www  . j a  va  2 s  .  com

    Job job = Job.getInstance(getConf());
    job.setJarByClass(MapReduceColorCount.class);
    job.setJobName("Color Count");

    // RECORDSERVICE:
    // To read from a table instead of a path, comment out
    // FileInputFormat.setInputPaths() and instead use:
    //FileInputFormat.setInputPaths(job, new Path(args[0]));
    RecordServiceConfig.setInputTable(job.getConfiguration(), "rs", "users");

    // RECORDSERVICE:
    // Use the RecordService version of the AvroKeyInputFormat
    job.setInputFormatClass(com.cloudera.recordservice.avro.mapreduce.AvroKeyInputFormat.class);
    //job.setInputFormatClass(AvroKeyInputFormat.class);

    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(ColorCountMapper.class);
    AvroJob.setInputKeySchema(job, User.getClassSchema());
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(AvroKeyValueOutputFormat.class);
    job.setReducerClass(ColorCountReducer.class);
    AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING));
    AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT));

    return (job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.cloudera.recordservice.examples.mapreduce.RecordCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: RecordCount <input_query> <output_path>");
        System.exit(1);/* ww  w.j a  va 2 s  .c  o m*/
    }
    String inputQuery = args[0];
    String output = args[1];

    Job job = Job.getInstance(getConf());
    job.setJobName("recordcount");
    job.setJarByClass(RecordCount.class);
    job.setMapperClass(Map.class);
    job.setCombinerClass(Reduce.class);
    job.setReducerClass(Reduce.class);
    job.setNumReduceTasks(1);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(LongWritable.class);

    RecordServiceConfig.setInputQuery(job.getConfiguration(), inputQuery);
    job.setInputFormatClass(RecordServiceInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileSystem fs = FileSystem.get(job.getConfiguration());
    Path outputPath = new Path(output);
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    FileOutputFormat.setOutputPath(job, outputPath);

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.cloudera.recordservice.examples.terasort.TeraChecksum.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    boolean useRecordService = false;
    Job job = Job.getInstance(getConf());
    if (args.length != 2 && args.length != 3) {
        usage();/*from ww w  .j  a v  a2s . co  m*/
        return 2;
    }
    if (args.length == 3) {
        useRecordService = Boolean.parseBoolean(args[2]);
    }

    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJobName("TeraSum");
    job.setJarByClass(TeraChecksum.class);
    job.setMapperClass(ChecksumMapper.class);
    job.setReducerClass(ChecksumReducer.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Unsigned16.class);
    // force a single reducer
    job.setNumReduceTasks(1);
    if (useRecordService) {
        RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]);
        job.setInputFormatClass(RecordServiceTeraInputFormat.class);
    } else {
        TeraInputFormat.setInputPaths(job, new Path(args[0]));
        job.setInputFormatClass(TeraInputFormat.class);
    }
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.cloudera.recordservice.examples.terasort.TeraValidate.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    boolean useRecordService = false;
    if (args.length != 2 && args.length != 3) {
        usage();//from  www  .  j a v  a2 s .  c o  m
        return 1;
    }
    if (args.length == 3) {
        useRecordService = Boolean.parseBoolean(args[2]);
    }

    Job job = Job.getInstance(getConf());
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJobName("TeraValidate");
    job.setJarByClass(TeraValidate.class);
    job.setMapperClass(ValidateMapper.class);
    job.setReducerClass(ValidateReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    // force a single reducer
    job.setNumReduceTasks(1);
    // force a single split
    FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE);
    if (useRecordService) {
        RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]);
        job.setInputFormatClass(RecordServiceTeraInputFormat.class);
    } else {
        TeraInputFormat.setInputPaths(job, new Path(args[0]));
        job.setInputFormatClass(TeraInputFormat.class);
    }
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.cloudera.sa.hbasebulkload.HBASEBulkLoadDriver.java

@Override
public int run(String[] args) throws Exception {
    Configuration config = getConf();
    args = new GenericOptionsParser(config, args).getRemainingArgs();

    if (args.length < 6) {
        /*System.out.println("hadoop jar HBASEBulkLoad.jar "
         + "com.cloudera.sa.hbasebulkload.HBASEBulkLoadDriver"
         + " <inputpath> <outputpath> <hbaseTable> <hbaseColumnFamily"
         + " \"<hbaseColumns (delimiter seperated)>\" <column delimiter>");*/
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }/*from   w w  w . j a va2s. com*/

    String hbaseTab = args[2];
    String hbaseColumnFamily = args[3];
    String hbaseColumns = args[4];
    String hbaseColumnSeperator = args[5];
    config.set(HBASEBulkLoadConstants.HBASE_TABLE_KEY, hbaseTab.trim().toLowerCase(Locale.ENGLISH));
    config.set(HBASEBulkLoadConstants.HBASE_COLUMN_FAMILY_KEY, hbaseColumnFamily);
    config.set(HBASEBulkLoadConstants.HBASE_COLUMNS_KEY, hbaseColumns.trim().toLowerCase(Locale.ENGLISH));
    config.set(HBASEBulkLoadConstants.HBASE_COLUMN_SEPERATOR_KEY, hbaseColumnSeperator);
    System.out.println(2);
    Job job = Job.getInstance(config, this.getClass().getName() + "-" + hbaseTab);
    HBaseConfiguration.addHbaseResources(config);

    job.setInputFormatClass(TextInputFormat.class);
    job.setJarByClass(HBASEBulkLoadDriver.class);
    job.setMapperClass(HBASEBulkLoadKeyValueMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(Put.class);
    job.setCombinerClass(PutCombiner.class);
    job.setReducerClass(PutSortReducer.class);

    Connection connection = ConnectionFactory.createConnection(config);
    Table hTab = connection.getTable(TableName.valueOf(hbaseTab));

    FileSystem.get(getConf()).delete(new Path(args[1]), true);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    //job.setOutputFormatClass(HFileOutputFormat2.class);
    TableMapReduceUtil.initTableReducerJob(hTab.getName().getNameAsString(), null, job);
    //job.setNumReduceTasks(0);
    TableMapReduceUtil.addDependencyJars(job);
    HFileOutputFormat2.configureIncrementalLoadMap(job, hTab);

    int exitCode = job.waitForCompletion(true) ? HBASEBulkLoadConstants.SUCCESS
            : HBASEBulkLoadConstants.FAILURE;
    System.out.println(8);
    if (HBASEBulkLoadConstants.SUCCESS == exitCode) {
        LoadIncrementalHFiles loader = new LoadIncrementalHFiles(config);
        loader.doBulkLoad(new Path(args[1]), (HTable) hTab);
        connection.close();
    }
    return exitCode;
}

From source file:com.cloudera.sa.securewordcount.SecureWordCountDriver.java

@Override
public int run(String[] args) throws Exception {
    Configuration config = getConf();
    args = new GenericOptionsParser(config, args).getRemainingArgs();

    if (args.length < 2) {

        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }/*from ww  w.  j  a  v a2 s  .  c om*/
    Job job = Job.getInstance(config, this.getClass().getName() + "-wordcount");
    job.setJarByClass(SecureWordCountDriver.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:com.cloudera.sqoop.mapreduce.db.TestDataDrivenDBInputFormat.java

License:Apache License

public void testDateSplits() throws Exception {
    Statement s = connection.createStatement();
    final String DATE_TABLE = "datetable";
    final String COL = "foo";
    try {//w w  w .  j  a v  a  2s . c  om
        try {
            // delete the table if it already exists.
            s.executeUpdate("DROP TABLE " + DATE_TABLE);
        } catch (SQLException e) {
            // Ignored; proceed regardless of whether we deleted the table;
            // it may have simply not existed.
        }

        // Create the table.
        s.executeUpdate("CREATE TABLE " + DATE_TABLE + "(" + COL + " TIMESTAMP)");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-01')");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-02')");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-05-01')");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2011-04-01')");

        // commit this tx.
        connection.commit();

        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "file:///");
        FileSystem fs = FileSystem.getLocal(conf);
        fs.delete(new Path(OUT_DIR), true);

        // now do a dd import
        Job job = new Job(conf);
        job.setMapperClass(ValMapper.class);
        job.setReducerClass(Reducer.class);
        job.setMapOutputKeyClass(DateCol.class);
        job.setMapOutputValueClass(NullWritable.class);
        job.setOutputKeyClass(DateCol.class);
        job.setOutputValueClass(NullWritable.class);
        job.setNumReduceTasks(1);
        job.getConfiguration().setInt("mapreduce.map.tasks", 2);
        FileOutputFormat.setOutputPath(job, new Path(OUT_DIR));
        DBConfiguration.configureDB(job.getConfiguration(), DRIVER_CLASS, DB_URL, (String) null, (String) null);
        DataDrivenDBInputFormat.setInput(job, DateCol.class, DATE_TABLE, null, COL, COL);

        boolean ret = job.waitForCompletion(true);
        assertTrue("job failed", ret);

        // Check to see that we imported as much as we thought we did.
        assertEquals("Did not get all the records", 4, job.getCounters()
                .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue());
    } finally {
        s.close();
    }
}

From source file:com.cloudera.sqoop.mapreduce.MergeJob.java

License:Apache License

public boolean runMergeJob() throws IOException {
    Configuration conf = options.getConf();
    Job job = new Job(conf);

    String userClassName = options.getClassName();
    if (null == userClassName) {
        // Shouldn't get here.
        throw new IOException("Record class name not specified with " + "--class-name.");
    }//  w  w w .j  a  v  a  2s . co  m

    // Set the external jar to use for the job.
    String existingJar = options.getExistingJarName();
    if (existingJar != null) {
        // User explicitly identified a jar path.
        LOG.debug("Setting job jar to user-specified jar: " + existingJar);
        job.getConfiguration().set("mapred.jar", existingJar);
    } else {
        // Infer it from the location of the specified class, if it's on the
        // classpath.
        try {
            Class<? extends Object> userClass = conf.getClassByName(userClassName);
            if (null != userClass) {
                String userJar = Jars.getJarPathForClass(userClass);
                LOG.debug("Setting job jar based on user class " + userClassName + ": " + userJar);
                job.getConfiguration().set("mapred.jar", userJar);
            } else {
                LOG.warn("Specified class " + userClassName + " is not in a jar. "
                        + "MapReduce may not find the class");
            }
        } catch (ClassNotFoundException cnfe) {
            throw new IOException(cnfe);
        }
    }

    try {
        Path oldPath = new Path(options.getMergeOldPath());
        Path newPath = new Path(options.getMergeNewPath());

        Configuration jobConf = job.getConfiguration();
        FileSystem fs = FileSystem.get(jobConf);
        oldPath = oldPath.makeQualified(fs);
        newPath = newPath.makeQualified(fs);

        FileInputFormat.addInputPath(job, oldPath);
        FileInputFormat.addInputPath(job, newPath);

        jobConf.set(MERGE_OLD_PATH_KEY, oldPath.toString());
        jobConf.set(MERGE_NEW_PATH_KEY, newPath.toString());
        jobConf.set(MERGE_KEY_COL_KEY, options.getMergeKeyCol());
        jobConf.set(MERGE_SQOOP_RECORD_KEY, userClassName);

        FileOutputFormat.setOutputPath(job, new Path(options.getTargetDir()));

        if (ExportJobBase.isSequenceFiles(jobConf, newPath)) {
            job.setInputFormatClass(SequenceFileInputFormat.class);
            job.setOutputFormatClass(SequenceFileOutputFormat.class);
            job.setMapperClass(MergeRecordMapper.class);
        } else {
            job.setMapperClass(MergeTextMapper.class);
            job.setOutputFormatClass(RawKeyTextOutputFormat.class);
        }

        jobConf.set("mapred.output.key.class", userClassName);
        job.setOutputValueClass(NullWritable.class);

        job.setReducerClass(MergeReducer.class);

        // Set the intermediate data types.
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(MergeRecord.class);

        // Make sure Sqoop and anything else we need is on the classpath.
        cacheJars(job, null);
        return this.runJob(job);
    } catch (InterruptedException ie) {
        throw new IOException(ie);
    } catch (ClassNotFoundException cnfe) {
        throw new IOException(cnfe);
    }
}

From source file:com.cloudera.test.UseHCat.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    args = new GenericOptionsParser(conf, args).getRemainingArgs();

    // Get the input and output table names as arguments
    String inputTableName = args[0];
    String outputTableName = args[1];
    // Assume the default database
    String dbName = null;/*from w w  w .  j  ava  2  s  .com*/

    Job job = new Job(conf, "UseHCat");
    HCatInputFormat.setInput(job, dbName, inputTableName);
    job.setJarByClass(UseHCat.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    // An HCatalog record as input
    job.setInputFormatClass(HCatInputFormat.class);

    // Mapper emits a string as key and an integer as value
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    // Ignore the key for the reducer output; emitting an HCatalog record as value
    job.setOutputKeyClass(WritableComparable.class);
    job.setOutputValueClass(DefaultHCatRecord.class);
    job.setOutputFormatClass(HCatOutputFormat.class);

    HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null));
    HCatSchema s = HCatOutputFormat.getTableSchema(job);
    System.err.println("INFO: output schema explicitly set for writing:" + s);
    HCatOutputFormat.setSchema(job, s);
    return (job.waitForCompletion(true) ? 0 : 1);
}