Example usage for org.apache.hadoop.mapreduce Job setReducerClass

List of usage examples for org.apache.hadoop.mapreduce Job setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> cls) throws IllegalStateException 

Source Link

Document

Set the Reducer for the job.

Usage

From source file:com.moz.fiji.mapreduce.framework.MapReduceJobBuilder.java

License:Apache License

/**
 * Configures the MapReduce reducer for the job.
 *
 * @param job The Hadoop MR job.//from w  ww .  j  a v  a 2  s  . c o m
 * @throws IOException If there is an error.
 */
protected void configureReducer(Job job) throws IOException {
    final FijiReducer<?, ?, ?, ?> reducer = getReducer();
    if (null == reducer) {
        LOG.info("No reducer provided. This will be a map-only job");
        job.setNumReduceTasks(0);

        // Set the job output key/value classes based on what the map output key/value classes were
        // since this a map-only job.
        job.setOutputKeyClass(job.getMapOutputKeyClass());
        Schema mapOutputKeySchema = AvroJob.getMapOutputKeySchema(job.getConfiguration());
        if (null != mapOutputKeySchema) {
            AvroJob.setOutputKeySchema(job, mapOutputKeySchema);
        }
        job.setOutputValueClass(job.getMapOutputValueClass());
        Schema mapOutputValueSchema = AvroJob.getMapOutputValueSchema(job.getConfiguration());
        if (null != mapOutputValueSchema) {
            AvroJob.setOutputValueSchema(job, mapOutputValueSchema);
        }
        return;
    }
    if (reducer instanceof Configurable) {
        ((Configurable) reducer).setConf(job.getConfiguration());
    }
    job.setReducerClass(reducer.getClass());

    // Set output key class.
    Class<?> outputKeyClass = reducer.getOutputKeyClass();
    job.setOutputKeyClass(outputKeyClass);
    Schema outputKeyWriterSchema = AvroMapReduce.getAvroKeyWriterSchema(reducer);
    if (AvroKey.class.isAssignableFrom(outputKeyClass)) {
        if (null == outputKeyWriterSchema) {
            throw new JobConfigurationException("Using AvroKey output, but a writer schema was not provided. "
                    + "Did you forget to implement AvroKeyWriter in your FijiReducer?");
        }
        AvroJob.setOutputKeySchema(job, outputKeyWriterSchema);
    } else if (null != outputKeyWriterSchema) {
        throw new JobConfigurationException(
                reducer.getClass().getName() + ".getAvroKeyWriterSchema() returned a non-null Schema"
                        + " but the output key class was not AvroKey.");
    }

    // Set output value class.
    Class<?> outputValueClass = reducer.getOutputValueClass();
    job.setOutputValueClass(outputValueClass);
    Schema outputValueWriterSchema = AvroMapReduce.getAvroValueWriterSchema(reducer);
    if (AvroValue.class.isAssignableFrom(outputValueClass)) {
        if (null == outputValueWriterSchema) {
            throw new JobConfigurationException("Using AvroValue output, but a writer schema was not provided. "
                    + "Did you forget to implement AvroValueWriter in your FijiReducer?");
        }
        AvroJob.setOutputValueSchema(job, outputValueWriterSchema);
    } else if (null != outputValueWriterSchema) {
        throw new JobConfigurationException(
                reducer.getClass().getName() + ".getAvroValueWriterSchema() returned a non-null Schema"
                        + " but the output value class was not AvroValue.");
    }
}

From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java

License:Apache License

public Job setupJob(String jobName, Path outputFile, Class<? extends Mapper> mapperClass,
        Class<? extends Reducer> reducerClass, EntityId startKey, EntityId limitKey, FijiRowFilter filter)
        throws Exception {
    final Job job = new Job(createConfiguration());
    final Configuration conf = job.getConfiguration();

    // Get settings for test.
    final FijiDataRequest request = FijiDataRequest.builder()
            .addColumns(ColumnsDef.create().add("info", "name").add("info", "email")).build();

    job.setJarByClass(IntegrationTestFijiTableInputFormat.class);

    // Setup the InputFormat.
    FijiTableInputFormat.configureJob(job, getFooTable().getURI(), request, startKey, limitKey, filter);
    job.setInputFormatClass(HBaseFijiTableInputFormat.class);

    // Duplicate functionality from MapReduceJobBuilder, since we are not using it here:
    final List<Path> jarFiles = Lists.newArrayList();
    final FileSystem fs = FileSystem.getLocal(conf);
    for (String cpEntry : System.getProperty("java.class.path").split(":")) {
        if (cpEntry.endsWith(".jar")) {
            jarFiles.add(fs.makeQualified(new Path(cpEntry)));
        }/*from  w w  w  .  j  ava  2s. c  o  m*/
    }
    DistributedCacheJars.addJarsToDistributedCache(job, jarFiles);

    // Create a test job.
    job.setJobName(jobName);

    // Setup the OutputFormat.
    TextOutputFormat.setOutputPath(job, outputFile.getParent());
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Set the mapper class.
    if (null != mapperClass) {
        job.setMapperClass(mapperClass);
    }
    // Set the reducer class.
    if (null != reducerClass) {
        job.setReducerClass(reducerClass);
    }

    return job;
}

From source file:com.mozilla.main.ReadHBaseWriteHdfs.java

License:LGPL

@Override
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set("mapred.job.queue.name", "prod");
    Job job = new Job(conf, "ReadHBaseWriteHDFS");
    job.setJarByClass(ReadHBaseWriteHdfs.class);
    Scan scan = new Scan();
    scan.addFamily("data".getBytes());

    TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, ReadHBaseWriteHdfsMapper.class, Text.class,
            Text.class, job);

    job.setReducerClass(ReadHBaseWriteHdfsReducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setNumReduceTasks(1000);/* w ww . j  a v a 2  s .co m*/

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(args[0]));

    job.waitForCompletion(true);
    if (job.isSuccessful()) {
        System.out.println("DONE");
    }

    return 0;
}

From source file:com.mozilla.socorro.hadoop.CrashCountToHbase.java

License:LGPL

/**
 * @param args//  w  w  w .j a v a  2s  .  c o m
 * @return
 * @throws IOException
 * @throws ParseException
 */
public Job initJob(String[] args) throws IOException {
    Job job = new Job(getConf());
    job.setJobName(NAME);
    job.setJarByClass(CrashCountToHbase.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));

    job.setMapperClass(CrashCountToHBaseMapper.class);
    job.setReducerClass(CrashCountToHBaseReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    return job;
}

From source file:com.mozilla.socorro.hadoop.CrashReportJob.java

License:LGPL

/**
 * @param args// ww w. j  a v a 2 s . co m
 * @return
 * @throws IOException
 * @throws ParseException
 */
public static Job initJob(String jobName, Configuration conf, Class<?> mainClass,
        Class<? extends TableMapper> mapperClass, Class<? extends Reducer> combinerClass,
        Class<? extends Reducer> reducerClass, Map<byte[], byte[]> columns,
        Class<? extends WritableComparable> keyOut, Class<? extends Writable> valueOut, Path outputPath)
        throws IOException, ParseException {
    // Set both start/end time and start/stop row
    Calendar startCal = Calendar.getInstance();
    Calendar endCal = Calendar.getInstance();

    SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");

    String startDateStr = conf.get(START_DATE);
    String endDateStr = conf.get(END_DATE);
    if (!StringUtils.isBlank(startDateStr)) {
        startCal.setTime(sdf.parse(startDateStr));
    }
    if (!StringUtils.isBlank(endDateStr)) {
        endCal.setTime(sdf.parse(endDateStr));
    }

    conf.setLong(START_TIME, startCal.getTimeInMillis());
    conf.setLong(END_TIME, DateUtil.getEndTimeAtResolution(endCal.getTimeInMillis(), Calendar.DATE));

    Job job = new Job(conf);
    job.setJobName(jobName);
    job.setJarByClass(mainClass);

    // input table configuration
    Scan[] scans = MultiScanTableMapReduceUtil.generateScans(startCal, endCal, columns, 100, false);
    MultiScanTableMapReduceUtil.initMultiScanTableMapperJob(TABLE_NAME_CRASH_REPORTS, scans, mapperClass,
            keyOut, valueOut, job);

    if (combinerClass != null) {
        job.setCombinerClass(combinerClass);
    }

    if (reducerClass != null) {
        job.setReducerClass(reducerClass);
    } else {
        job.setNumReduceTasks(0);
    }

    FileOutputFormat.setOutputPath(job, outputPath);

    return job;
}

From source file:com.mvdb.platform.action.VersionMerge.java

License:Apache License

public static void main(String[] args) throws Exception {
    logger.error("error1");
    logger.warn("warning1");
    logger.info("info1");
    logger.debug("debug1");
    logger.trace("trace1");
    ActionUtils.setUpInitFileProperty();
    //        LoggerContext lc = (LoggerContext) LoggerFactory.getILoggerFactory();
    //        StatusPrinter.print(lc);

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    //Also add  lastMergedTimeStamp and  mergeUptoTimestamp and passive db name which would be mv1 or mv2
    if (otherArgs.length != 3) {
        System.err.println("Usage: versionmerge <customer-directory>");
        System.exit(2);//from   w w w.j  a  v  a2s. c  o  m
    }
    //Example: file:/home/umesh/.mvdb/etl/data/alpha
    //Example: hdfs://localhost:9000/data/alpha
    String customerDirectory = otherArgs[0];
    String lastMergedDirName = otherArgs[1];
    String lastCopiedDirName = otherArgs[2];

    org.apache.hadoop.conf.Configuration conf1 = new org.apache.hadoop.conf.Configuration();
    //conf1.addResource(new Path("/home/umesh/ops/hadoop-1.2.0/conf/core-site.xml"));
    FileSystem hdfsFileSystem = FileSystem.get(conf1);

    Path topPath = new Path(customerDirectory);

    //Clean scratch db
    Path passiveDbPath = new Path(topPath, "db/mv1");
    Path tempDbPath = new Path(topPath, "db/tmp-" + (int) (Math.random() * 100000));
    if (hdfsFileSystem.exists(tempDbPath)) {
        boolean success = hdfsFileSystem.delete(tempDbPath, true);
        if (success == false) {
            System.err.println(String.format("Unable to delete temp directory %s", tempDbPath.toString()));
            System.exit(1);
        }
    }
    //last three parameters are hardcoded and  the nulls must be replaced later after changing inout parameters. 
    Path[] inputPaths = getInputPaths(hdfsFileSystem, topPath, lastMergedDirName, lastCopiedDirName, null);
    Set<String> tableNameSet = new HashSet<String>();
    for (Path path : inputPaths) {
        tableNameSet.add(path.getName());
    }

    Job job = new Job(conf, "versionmerge");
    job.setJarByClass(VersionMerge.class);
    job.setMapperClass(VersionMergeMapper.class);
    job.setReducerClass(VersionMergeReducer.class);
    job.setMapOutputKeyClass(MergeKey.class);
    job.setMapOutputValueClass(BytesWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    String lastDirName = null;
    if (inputPaths != null && inputPaths.length > 1) {
        lastDirName = inputPaths[(inputPaths.length) - 2].getParent().getName();
    }
    for (Path inputPath : inputPaths) {
        FileInputFormat.addInputPath(job, inputPath);
    }
    FileOutputFormat.setOutputPath(job, tempDbPath);

    for (String table : tableNameSet) {
        if (table.endsWith(".dat") == false) {
            continue;
        }
        table = table.replaceAll("-", "");
        table = table.replaceAll(".dat", "");
        MultipleOutputs.addNamedOutput(job, table, SequenceFileOutputFormat.class, Text.class,
                BytesWritable.class);
    }
    boolean success = job.waitForCompletion(true);
    System.out.println("Success:" + success);
    System.out.println(ManagementFactory.getRuntimeMXBean().getName());
    if (success && lastDirName != null) {
        ActionUtils.setConfigurationValue(new Path(customerDirectory).getName(),
                ConfigurationKeys.LAST_MERGE_TO_MVDB_DIRNAME, lastDirName);
    }
    //hdfsFileSystem.delete(passiveDbPath, true);
    //hdfsFileSystem.rename(tempDbPath, passiveDbPath);
    System.exit(success ? 0 : 1);
}

From source file:com.mvdb.platform.scratch.action.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    logger.error("error1");
    logger.warn("warning1");
    logger.info("info1");
    logger.debug("debug1");
    logger.trace("trace1");

    ILoggerFactory lc = LoggerFactory.getILoggerFactory();
    System.err.println("lc:" + lc);
    // print logback's internal status
    //StatusPrinter.print(lc);

    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs != null) {
        for (String arg : otherArgs) {
            System.out.println(arg);
        }/*w w  w  . j  a  va2s.c  o m*/
    }
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.mycompany.hadooptrain.WordCount.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    Path inputPath = new Path(args[0]);
    Path outputDir = new Path(args[1]);

    // Create configuration
    Configuration conf = new Configuration(true);

    // Create job
    Job job = new Job(conf, "WordCount");
    job.setJarByClass(WordCountMapper.class);

    // Setup MapReduce
    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReducer.class);
    job.setNumReduceTasks(1);//www  .j  a  v a2  s. c  o  m

    // Specify key / value
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(TextInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Delete output if exists
    FileSystem hdfs = FileSystem.get(conf);
    if (hdfs.exists(outputDir))
        hdfs.delete(outputDir, true);

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;
    System.exit(code);

}

From source file:com.mycompany.keywordsearch.KeywordSearch.java

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    conf.set(FileInputFormat.INPUT_DIR_RECURSIVE, String.valueOf(true));
    Path input = new Path(args[0]);
    Path output = new Path(args[1]);
    BufferedReader in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    System.out.print("Keyword:\t");
    conf.set(KEYWORD, in.readLine());//from   w  w  w  . j ava 2 s. c  o  m
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClass(KeywordSearch.class);
    job.setInputFormatClass(TextInputFormatV2.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    clearOutput(conf, output);
    FileInputFormat.addInputPath(job, input);
    FileOutputFormat.setOutputPath(job, output);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.mycompany.maprpractice.runnerClass.WordCount.java

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName());
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//from  w  ww .j  a va2 s. c  om

    Job job = new org.apache.hadoop.mapreduce.Job();
    job.setJarByClass(WordCount.class);
    job.setJobName("WordCounter");

    String inputPath = "C:\\Users\\priyamdixit\\Desktop\\TestData\\wordCount.txt";
    String outputPath = "C:\\Users\\priyamdixit\\Desktop\\TestData";

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    //      FileInputFormat.addInputPath(job, new Path(args[0]));
    //      FileOutputFormat.setOutputPath(job, new Path(args[1]));
    //   
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReducer.class);

    int returnValue = job.waitForCompletion(true) ? 0 : 1;
    System.out.println("job.isSuccessful " + job.isSuccessful());
    return returnValue;
}