binningbycategories.BinningbyCategories.java Source code

Java tutorial

Introduction

Here is the source code for binningbycategories.BinningbyCategories.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package binningbycategories;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

/**
 *
 * @author Rajat
 */
public class BinningbyCategories {

    public static class YouTubeBinMapper extends Mapper<Object, Text, Text, NullWritable> {

        private MultipleOutputs<Text, NullWritable> mos = null;

        @Override
        protected void setup(Mapper.Context context) throws IOException, InterruptedException {
            mos = new MultipleOutputs<Text, NullWritable>(context);
        }

        @Override
        protected void map(Object key, Text value, Mapper.Context context)
                throws IOException, InterruptedException {

            String[] input = value.toString().split(",");
            Text Name = new Text(input[3]);
            String line = Name.toString();
            if (line.contains("UNA ")) {
                mos.write("bins", value, NullWritable.get(), "UNA");
            } else if (line.contains("Autos & Vehicles")) {
                mos.write("bins", value, NullWritable.get(), "Autos & Vehicles");
            } else if (line.contains("Comedy")) {
                mos.write("bins", value, NullWritable.get(), "Comedy");
            } else if (line.contains("Entertainment")) {
                mos.write("bins", value, NullWritable.get(), "Entertainment");
            } else if (line.contains("Film & Animation")) {
                mos.write("bins", value, NullWritable.get(), "Film & Animation");
            } else if (line.contains("Gadgets & Games")) {
                mos.write("bins", value, NullWritable.get(), "Gadgets & Games");
            } else if (line.contains("Howto & DIY")) {
                mos.write("bins", value, NullWritable.get(), "Howto & DIY");
            } else if (line.contains("Music")) {
                mos.write("bins", value, NullWritable.get(), "Music");
            } else if (line.contains("News & Politics")) {
                mos.write("bins", value, NullWritable.get(), "News & Politics");
            } else if (line.contains("People & Blogs")) {
                mos.write("bins", value, NullWritable.get(), "People & Blogs");
            } else if (line.contains("Pets & Animals")) {
                mos.write("bins", value, NullWritable.get(), "Pets & Animals");
            } else if (line.contains("Sports")) {
                mos.write("bins", value, NullWritable.get(), "Sports");
            } else if (line.contains("Travel & Places")) {
                mos.write("bins", value, NullWritable.get(), "Travel & Places");
            } else {
                mos.write("bins", value, NullWritable.get(), "UnCatogrized");
            }
        }

        @Override
        protected void cleanup(Mapper.Context context) throws IOException, InterruptedException {
            mos.close();
        }
    }

    /**
     * @param args the command line arguments
     * @throws java.lang.Exception
     */
    public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        Job job = new Job(conf, "Binning");
        job.setJarByClass(BinningbyCategories.class);
        job.setMapperClass(YouTubeBinMapper.class);
        job.setNumReduceTasks(0);

        TextInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        // Configure the MultipleOutputs by adding an output called "bins"
        // With the proper output format and mapper key/value pairs
        MultipleOutputs.addNamedOutput(job, "bins", TextOutputFormat.class, Text.class, NullWritable.class);

        // Enable the counters for the job
        // If there is a significant number of different named outputs, this
        // should be disabled
        MultipleOutputs.setCountersEnabled(job, true);

        System.exit(job.waitForCompletion(true) ? 0 : 2);
    }

}