Example usage for org.apache.hadoop.mapreduce Job setMapperClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMapperClass.

Prototype

public void setMapperClass(Class<? extends Mapper> cls) throws IllegalStateException

Source Link

Document

Set the Mapper for the job.

Usage

From source file:co.cask.cdap.examples.loganalysis.HitCounterProgram.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    Job job = context.getHadoopJob();
    job.setMapperClass(Emitter.class);
    job.setReducerClass(Counter.class);

    context.addInput(Input.ofStream(LogAnalysisApp.LOG_STREAM));
    context.addOutput(Output.ofDataset(LogAnalysisApp.HIT_COUNT_STORE));
}

From source file:co.cask.cdap.examples.sportresults.ScoreCounter.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    Job job = context.getHadoopJob();
    job.setMapperClass(ResultsMapper.class);
    job.setReducerClass(TeamCounter.class);
    job.setNumReduceTasks(1);/*from   w  w  w .  jav a  2 s  . co m*/

    String league = context.getRuntimeArguments().get("league");
    Preconditions.checkNotNull(league);

    // Configure the input to read all seasons for the league
    Map<String, String> inputArgs = Maps.newHashMap();
    PartitionedFileSetArguments.setInputPartitionFilter(inputArgs,
            PartitionFilter.builder().addValueCondition("league", league).build());
    context.addInput(Input.ofDataset("results", inputArgs));

    // Each run writes its output to a partition for the league
    Map<String, String> outputArgs = Maps.newHashMap();
    PartitionKey outputKey = PartitionKey.builder().addStringField("league", league).build();
    PartitionedFileSetArguments.setOutputPartitionKey(outputArgs, outputKey);
    context.addOutput(Output.ofDataset("totals", outputArgs));

    // used only for logging:
    PartitionedFileSet input = context.getDataset("results", inputArgs);
    PartitionedFileSet outputFileSet = context.getDataset("totals", outputArgs);
    String outputPath = FileSetArguments
            .getOutputPath(outputFileSet.getEmbeddedFileSet().getRuntimeArguments());
    LOG.info("input: {}, output: {}", input.getEmbeddedFileSet().getInputLocations(), outputPath);
}

From source file:co.cask.cdap.examples.streamconversion.StreamConversionMapReduce.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    Job job = context.getHadoopJob();
    job.setMapperClass(StreamConversionMapper.class);
    job.setNumReduceTasks(0);/*from  w ww.jav  a2  s .c  om*/
    job.setMapOutputKeyClass(AvroKey.class);
    job.setMapOutputValueClass(NullWritable.class);
    AvroJob.setOutputKeySchema(job, SCHEMA);

    // read 5 minutes of events from the stream, ending at the logical start time of this run
    long logicalTime = context.getLogicalStartTime();
    context.addInput(Input.ofStream("events", logicalTime - TimeUnit.MINUTES.toMillis(5), logicalTime));

    // each run writes its output to a partition with the logical start time.
    TimePartitionedFileSetArguments.setOutputPartitionTime(dsArguments, logicalTime);
    context.addOutput(Output.ofDataset("converted", dsArguments));

    TimePartitionedFileSet partitionedFileSet = context.getDataset("converted", dsArguments);
    LOG.info("Output location for new partition is: {}",
            partitionedFileSet.getEmbeddedFileSet().getOutputLocation());
}

From source file:co.cask.cdap.examples.wikipedia.StreamToDataset.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    Job job = context.getHadoopJob();
    job.setNumReduceTasks(0);/*from   www.j a  v  a 2s .  co  m*/
    WorkflowToken workflowToken = context.getWorkflowToken();
    Class<? extends Mapper> mapper = PageTitleToDatasetMapper.class;
    String inputStream = WikipediaPipelineApp.PAGE_TITLES_STREAM;
    String outputDataset = WikipediaPipelineApp.PAGE_TITLES_DATASET;
    if (workflowToken != null) {
        Value likesToDatasetResult = workflowToken.get("result", WikipediaPipelineApp.LIKES_TO_DATASET_MR_NAME);
        if (likesToDatasetResult != null && likesToDatasetResult.getAsBoolean()) {
            // The "likes" stream to the dataset has already run and has been successful in this run so far.
            // Now run raw wikipedia stream to dataset.
            mapper = RawWikiDataToDatasetMapper.class;
            inputStream = WikipediaPipelineApp.RAW_WIKIPEDIA_STREAM;
            outputDataset = WikipediaPipelineApp.RAW_WIKIPEDIA_DATASET;
        }
    }
    LOG.info("Using '{}' as the input stream and '{}' as the output dataset.", inputStream, outputDataset);
    job.setMapperClass(mapper);
    StreamBatchReadable.useStreamInput(context, inputStream);
    context.addOutput(outputDataset);
}

From source file:co.cask.cdap.examples.wikipedia.TopNMapReduce.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    Map<String, String> runtimeArguments = context.getRuntimeArguments();
    Job job = context.getHadoopJob();
    WorkflowToken workflowToken = context.getWorkflowToken();
    int topNRank = 10;
    if (runtimeArguments.containsKey("topn.rank")) {
        topNRank = Integer.parseInt(runtimeArguments.get("topn.rank"));
    }//from   ww  w  .j  a  va2s .c  om
    if (workflowToken != null) {
        workflowToken.put("topn.rank", Value.of(topNRank));
    }
    int numReduceTasks = 1;
    if (runtimeArguments.containsKey("num.reduce.tasks")) {
        numReduceTasks = Integer.parseInt(runtimeArguments.get("num.reduce.tasks"));
    }
    job.setNumReduceTasks(numReduceTasks);
    job.setMapperClass(TokenizerMapper.class);
    job.setReducerClass(TopNReducer.class);
    context.addInput(Input.ofDataset(WikipediaPipelineApp.NORMALIZED_WIKIPEDIA_DATASET));
    context.addOutput(Output.ofDataset(WikipediaPipelineApp.MAPREDUCE_TOPN_OUTPUT));
}

From source file:co.cask.cdap.examples.wikipedia.WikiContentValidatorAndNormalizer.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    Job job = context.getHadoopJob();
    job.setMapperClass(FilterNormalizerMapper.class);
    job.setNumReduceTasks(0);/* w w  w.jav  a 2 s.co m*/
    context.addInput(Input.ofDataset(WikipediaPipelineApp.RAW_WIKIPEDIA_DATASET));
    context.addOutput(Output.ofDataset(WikipediaPipelineApp.NORMALIZED_WIKIPEDIA_DATASET));
}

From source file:co.cask.cdap.examples.wikipedia.WikipediaDataDownloader.java

License:Apache License

@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
    Job job = context.getHadoopJob();
    job.setMapperClass(WikipediaDataDownloaderMapper.class);
    job.setNumReduceTasks(0);/*from www. jav  a2 s  . co m*/
    context.addInput(Input.ofDataset(WikipediaPipelineApp.PAGE_TITLES_DATASET));
    context.addOutput(Output.ofDataset(WikipediaPipelineApp.RAW_WIKIPEDIA_DATASET));
}

From source file:co.cask.cdap.internal.app.runtime.batch.AggregateMetricsByTag.java

License:Apache License

static void configureJob(Job job) throws IOException {
    job.setMapperClass(Map.class);
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setReducerClass(Reduce.class);
}

From source file:co.cask.cdap.internal.app.runtime.batch.MapperWrapper.java

License:Apache License

/**
 * Wraps the mapper defined in the job with this {@link MapperWrapper} if it is defined.
 * @param job The MapReduce job/* w w w.  j av  a 2 s.c  o  m*/
 */
public static void wrap(Job job) {
    // NOTE: we don't use job.getMapperClass() as we don't need to load user class here
    Configuration conf = job.getConfiguration();
    String mapClass = conf.get(MRJobConfig.MAP_CLASS_ATTR, Mapper.class.getName());
    conf.set(MapperWrapper.ATTR_MAPPER_CLASS, mapClass);
    job.setMapperClass(MapperWrapper.class);
}

From source file:co.cask.cdap.internal.app.runtime.batch.WordCount.java

License:Apache License

public static void configureJob(Job job, String inputPath, String outputPath) throws IOException {
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
}