Example usage for org.apache.hadoop.mapreduce Job setMapperClass

List of usage examples for org.apache.hadoop.mapreduce Job setMapperClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapperClass.

Prototype

public void setMapperClass(Class<? extends Mapper> cls) throws IllegalStateException 

Source Link

Document

Set the Mapper for the job.

Usage

From source file:co.cask.cdap.examples.loganalysis.HitCounterProgram.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    Job job = context.getHadoopJob();
    job.setMapperClass(Emitter.class);
    job.setReducerClass(Counter.class);

    context.addInput(Input.ofStream(LogAnalysisApp.LOG_STREAM));
    context.addOutput(Output.ofDataset(LogAnalysisApp.HIT_COUNT_STORE));
}

From source file:co.cask.cdap.examples.sportresults.ScoreCounter.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    Job job = context.getHadoopJob();
    job.setMapperClass(ResultsMapper.class);
    job.setReducerClass(TeamCounter.class);
    job.setNumReduceTasks(1);/*from   w  w  w .  jav a  2 s  . co m*/

    String league = context.getRuntimeArguments().get("league");
    Preconditions.checkNotNull(league);

    // Configure the input to read all seasons for the league
    Map<String, String> inputArgs = Maps.newHashMap();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs,
            PartitionFilter.builder().addValueCondition("league", league).build());
    context.addInput(Input.ofDataset("results", inputArgs));

    // Each run writes its output to a partition for the league
    Map<String, String> outputArgs = Maps.newHashMap();
    PartitionKey outputKey = PartitionKey.builder().addStringField("league", league).build();
    PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, outputKey);
    context.addOutput(Output.ofDataset("totals", outputArgs));

    // used only for logging:
    PartitionedFileSet input = context.getDataset("results", inputArgs);
    PartitionedFileSet outputFileSet = context.getDataset("totals", outputArgs);
    String outputPath = FileSetArguments
            .getOutputPath(outputFileSet.getEmbeddedFileSet().getRuntimeArguments());
    LOG.info("input: {}, output: {}", input.getEmbeddedFileSet().getInputLocations(), outputPath);
}

From source file:co.cask.cdap.examples.streamconversion.StreamConversionMapReduce.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    Job job = context.getHadoopJob();
    job.setMapperClass(StreamConversionMapper.class);
    job.setNumReduceTasks(0);/*from  w ww.jav  a2  s .c  om*/
    job.setMapOutputKeyClass(AvroKey.class);
    job.setMapOutputValueClass(NullWritable.class);
    AvroJob.setOutputKeySchema(job, SCHEMA);

    // read 5 minutes of events from the stream, ending at the logical start time of this run
    long logicalTime = context.getLogicalStartTime();
    context.addInput(Input.ofStream("events", logicalTime - TimeUnit.MINUTES.toMillis(5), logicalTime));

    // each run writes its output to a partition with the logical start time.
    TimePartitionedFileSetArguments.setOutputPartitionTime(dsArguments, logicalTime);
    context.addOutput(Output.ofDataset("converted", dsArguments));

    TimePartitionedFileSet partitionedFileSet = context.getDataset("converted", dsArguments);
    LOG.info("Output location for new partition is: {}",
            partitionedFileSet.getEmbeddedFileSet().getOutputLocation());
}

From source file:co.cask.cdap.examples.wikipedia.StreamToDataset.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    Job job = context.getHadoopJob();
    job.setNumReduceTasks(0);/*from   www.j a  v  a 2s .  co  m*/
    WorkflowToken workflowToken = context.getWorkflowToken();
    Class<? extends Mapper> mapper = PageTitleToDatasetMapper.class;
    String inputStream = WikipediaPipelineApp.PAGE_TITLES_STREAM;
    String outputDataset = WikipediaPipelineApp.PAGE_TITLES_DATASET;
    if (workflowToken != null) {
        Value likesToDatasetResult = workflowToken.get("result", WikipediaPipelineApp.LIKES_TO_DATASET_MR_NAME);
        if (likesToDatasetResult != null && likesToDatasetResult.getAsBoolean()) {
            // The "likes" stream to the dataset has already run and has been successful in this run so far.
            // Now run raw wikipedia stream to dataset.
            mapper = RawWikiDataToDatasetMapper.class;
            inputStream = WikipediaPipelineApp.RAW_WIKIPEDIA_STREAM;
            outputDataset = WikipediaPipelineApp.RAW_WIKIPEDIA_DATASET;
        }
    }
    LOG.info("Using '{}' as the input stream and '{}' as the output dataset.", inputStream, outputDataset);
    job.setMapperClass(mapper);
    StreamBatchReadable.useStreamInput(context, inputStream);
    context.addOutput(outputDataset);
}

From source file:co.cask.cdap.examples.wikipedia.TopNMapReduce.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    Map<String, String> runtimeArguments = context.getRuntimeArguments();
    Job job = context.getHadoopJob();
    WorkflowToken workflowToken = context.getWorkflowToken();
    int topNRank = 10;
    if (runtimeArguments.containsKey("topn.rank")) {
        topNRank = Integer.parseInt(runtimeArguments.get("topn.rank"));
    }//from   ww  w  .j  a  va2s .c  om
    if (workflowToken != null) {
        workflowToken.put("topn.rank", Value.of(topNRank));
    }
    int numReduceTasks = 1;
    if (runtimeArguments.containsKey("num.reduce.tasks")) {
        numReduceTasks = Integer.parseInt(runtimeArguments.get("num.reduce.tasks"));
    }
    job.setNumReduceTasks(numReduceTasks);
    job.setMapperClass(TokenizerMapper.class);
    job.setReducerClass(TopNReducer.class);
    context.addInput(Input.ofDataset(WikipediaPipelineApp.NORMALIZED_WIKIPEDIA_DATASET));
    context.addOutput(Output.ofDataset(WikipediaPipelineApp.MAPREDUCE_TOPN_OUTPUT));
}

From source file:co.cask.cdap.examples.wikipedia.WikiContentValidatorAndNormalizer.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    Job job = context.getHadoopJob();
    job.setMapperClass(FilterNormalizerMapper.class);
    job.setNumReduceTasks(0);/* w w  w.jav  a 2 s.co m*/
    context.addInput(Input.ofDataset(WikipediaPipelineApp.RAW_WIKIPEDIA_DATASET));
    context.addOutput(Output.ofDataset(WikipediaPipelineApp.NORMALIZED_WIKIPEDIA_DATASET));
}

From source file:co.cask.cdap.examples.wikipedia.WikipediaDataDownloader.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    Job job = context.getHadoopJob();
    job.setMapperClass(WikipediaDataDownloaderMapper.class);
    job.setNumReduceTasks(0);/*from www. jav  a2 s  . co m*/
    context.addInput(Input.ofDataset(WikipediaPipelineApp.PAGE_TITLES_DATASET));
    context.addOutput(Output.ofDataset(WikipediaPipelineApp.RAW_WIKIPEDIA_DATASET));
}

From source file:co.cask.cdap.internal.app.runtime.batch.AggregateMetricsByTag.java

License:Apache License

static void configureJob(Job job) throws IOException {
    job.setMapperClass(Map.class);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(Reduce.class);
}

From source file:co.cask.cdap.internal.app.runtime.batch.MapperWrapper.java

License:Apache License

/**
 * Wraps the mapper defined in the job with this {@link MapperWrapper} if it is defined.
 * @param job The MapReduce job/* w w w.  j av  a 2 s.c  o  m*/
 */
public static void wrap(Job job) {
    // NOTE: we don't use job.getMapperClass() as we don't need to load user class here
    Configuration conf = job.getConfiguration();
    String mapClass = conf.get(MRJobConfig.MAP_CLASS_ATTR, Mapper.class.getName());
    conf.set(MapperWrapper.ATTR_MAPPER_CLASS, mapClass);
    job.setMapperClass(MapperWrapper.class);
}

From source file:co.cask.cdap.internal.app.runtime.batch.WordCount.java

License:Apache License

public static void configureJob(Job job, String inputPath, String outputPath) throws IOException {
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
}