reverseIndexer.reverseIndexer.java Source code

Java tutorial

Introduction

Here is the source code for reverseIndexer.reverseIndexer.java

Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package reverseIndexer;

import countTheGivenWords.searchAndCountMapper;
import countTheGivenWords.searchAndCountReducer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.ShuffleConsumerPlugin.Context;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.WordCount;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.util.GenericOptionsParser;

/**
 *
 * @author madhatter
 */
public class reverseIndexer {

    public static void startJob(String[] args) {

        try {
            /*
                
             Configuration conf = new Configuration();
             String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
             if (otherArgs.length < 2) {
             System.err.println("Usage: ReverseIndexer <output> <input file(s)>");
             System.exit(2);
             }
             Job job = new Job(conf, "reverse indexer");
             job.setJarByClass(this.getClass());
             job.setMapperClass(IndexerMapper.class);
             job.setReducerClass(IndexerReducer.class);
             job.setMapOutputKeyClass(Text.class);
             job.setMapOutputValueClass(LineRecWritable.class);
             job.setOutputKeyClass(Text.class);
             job.setOutputValueClass(Text.class);
                
                
             for (int i = 1; i < otherArgs.length; i++) {
             FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
             }
             FileOutputFormat.setOutputPath(job, new Path(otherArgs[0]));
             System.exit(job.waitForCompletion(true) ? 0 : 1);*/

            JobConf conf = new JobConf(WordCount.class);
            conf.setJobName("wordcount");

            conf.setOutputKeyClass(Text.class);
            conf.setOutputValueClass(IntWritable.class);

            reverseIndexMapper Map = new reverseIndexMapper();
            conf.setMapperClass((Class<? extends org.apache.hadoop.mapred.Mapper>) Map.getClass());

            reverseIndexReducer Reduce = new reverseIndexReducer();
            conf.setCombinerClass((Class<? extends org.apache.hadoop.mapred.Reducer>) Reduce.getClass());
            conf.setReducerClass((Class<? extends org.apache.hadoop.mapred.Reducer>) Reduce.getClass());

            conf.setInputFormat(TextInputFormat.class);
            conf.setOutputFormat(TextOutputFormat.class);

            FileInputFormat.setInputPaths(conf, new Path(args[1]));

            Path outputDir = new Path(args[2]);

            outputDir.getFileSystem(conf).delete(outputDir, true);
            FileSystem fs = FileSystem.get(conf);
            fs.delete(outputDir, true);

            FileOutputFormat.setOutputPath(conf, outputDir);

            JobClient.runJob(conf);

        } catch (Exception Exp) {
            Exp.printStackTrace();

        }
    }
}