co.cask.cdap.internal.app.runtime.batch.AppWithMapReduceUsingFileSet.java Source code

Java tutorial

Introduction

Here is the source code for co.cask.cdap.internal.app.runtime.batch.AppWithMapReduceUsingFileSet.java

Source

/*
 * Copyright  2014-2016 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.cdap.internal.app.runtime.batch;

import co.cask.cdap.api.app.AbstractApplication;
import co.cask.cdap.api.data.batch.Input;
import co.cask.cdap.api.data.batch.Output;
import co.cask.cdap.api.dataset.lib.FileSetProperties;
import co.cask.cdap.api.mapreduce.AbstractMapReduce;
import co.cask.cdap.api.mapreduce.MapReduceContext;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import java.io.IOException;

/**
 * App used to test whether M/R can read from file datasets.
 */
public class AppWithMapReduceUsingFileSet extends AbstractApplication {

    public static String inputDataset = System.getProperty("INPUT_DATASET_NAME");
    public static String outputDataset = System.getProperty("OUTPUT_DATASET_NAME");

    @Override
    public void configure() {
        setName("AppWithMapReduceUsingFile");
        setDescription("Application with MapReduce job using file as dataset");
        createDataset(inputDataset, "fileSet",
                FileSetProperties.builder().setInputFormat(TextInputFormat.class)
                        .setOutputFormat(TextOutputFormat.class).setOutputProperty(TextOutputFormat.SEPERATOR, ":")
                        .build());
        if (!outputDataset.equals(inputDataset)) {
            createDataset(outputDataset, "fileSet",
                    FileSetProperties.builder().setBasePath("foo/my-file-output")
                            .setInputFormat(TextInputFormat.class).setOutputFormat(TextOutputFormat.class)
                            .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build());
        }
        addMapReduce(new ComputeSum());
    }

    /**
     *
     */
    public static final class ComputeSum extends AbstractMapReduce {

        @Override
        public void beforeSubmit(MapReduceContext context) throws Exception {
            Job job = context.getHadoopJob();
            job.setReducerClass(FileReducer.class);

            // user can opt to define the mapper class through our APIs, instead of directly on the job
            context.addInput(Input.ofDataset(inputDataset), FileMapper.class);
            context.addOutput(Output.ofDataset(outputDataset));
        }
    }

    public static class FileMapper extends Mapper<LongWritable, Text, Text, LongWritable> {
        public static final String ONLY_KEY = "x";

        @Override
        public void map(LongWritable key, Text data, Context context) throws IOException, InterruptedException {
            context.write(new Text(ONLY_KEY), new LongWritable(Long.valueOf(data.toString())));
        }
    }

    public static class FileReducer extends Reducer<Text, LongWritable, String, Long> {
        @Override
        public void reduce(Text key, Iterable<LongWritable> values, Context context)
                throws IOException, InterruptedException {
            long sum = 0L;
            for (LongWritable value : values) {
                sum += value.get();
            }
            context.write(key.toString(), sum);
        }
    }

}