Example usage for org.apache.hadoop.mapreduce Job setCombinerClass

List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setCombinerClass.

Prototype

public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException 

Source Link

Document

Set the combiner class for the job.

Usage

From source file:Assignment2_P5_IPAddressCount.IPAddressDriver.java

/**
 * @param args the command line arguments
 */// w  w w.j  a va  2  s .com
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "IP Address Count");
    job.setJarByClass(IPAddressDriver.class);
    job.setMapperClass(IPAddress_Mapper.class);
    job.setCombinerClass(IPAddress_Reducer.class);
    job.setReducerClass(IPAddress_Reducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:Assignment3_P2_MergeStockAverageCount.StockPriceMergeDriver.java

/**
 * @param args the command line arguments
 *///w ww.j a  va 2 s  .c  om
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();

    // local file system handle
    FileSystem local = FileSystem.getLocal(conf);

    // hdfs file system handle
    FileSystem hdfs = FileSystem.get(conf);

    // local input directory
    Path inputDir = new Path(args[0]);

    // hdfs i/p  directory
    Path inputDir1 = new Path(args[1]);

    // local input files in local dir
    FileStatus[] inputFiles = local.listStatus(inputDir);

    // o/p stream
    FSDataOutputStream out = hdfs.create(inputDir1);

    // open each file and extract contents of file
    for (int i = 0; i < inputFiles.length; i++) {
        System.out.println("File name ----------------------------------------------------------------> "
                + inputFiles[i].getPath().getName());
        FSDataInputStream in = local.open(inputFiles[i].getPath());
        byte buffer[] = new byte[256];
        int bytesRead = 0;

        // extract all contents of file
        while ((bytesRead = in.read(buffer)) > 0) {
            out.write(buffer, 0, bytesRead);
        }

        // close input stream
        in.close();
    }

    Job job = Job.getInstance(conf, "Average Stock Price");
    job.setJarByClass(StockPriceMergeDriver.class);
    job.setMapperClass(StockPriceMerge_Mapper.class);
    job.setCombinerClass(StockPriceMerge_Reducer.class);
    job.setReducerClass(StockPriceMerge_Reducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(FloatWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[1])); // above programs output will be input for mapper
    FileOutputFormat.setOutputPath(job, new Path(args[2]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:Assignment3_P5_Top25Movies.Top25MovieRatingDriver.java

/**
 * @param args the command line arguments
 *///from www .j a va 2s.c  o  m
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job1 = Job.getInstance(conf, "Movie Rating Count");
    job1.setJarByClass(Top25MovieRatingDriver.class);

    // the usual - get basic mapred ready
    job1.setMapperClass(Top25MovieRating_Mapper.class);
    job1.setCombinerClass(Top25MovieRating_Reducer.class);
    job1.setReducerClass(Top25MovieRating_Reducer.class);

    // this will basically out -> movieId, average rating
    job1.setOutputKeyClass(IntWritable.class);
    job1.setOutputValueClass(FloatWritable.class);

    FileInputFormat.addInputPath(job1, new Path(args[0]));
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));
    boolean complete = job1.waitForCompletion(true);

    // here's where we sort
    Configuration conf2 = new Configuration();
    Job job2 = Job.getInstance(conf2, "Movie Rating Count");
    if (complete) {
        job2.setJarByClass(Top25MovieRatingDriver.class);

        // namesake fellow, take it and go types - mostly useless
        job2.setMapperClass(Top25MovieRating_Mapper1.class);
        job2.setMapOutputKeyClass(FloatWritable.class);
        job2.setMapOutputValueClass(IntWritable.class);

        // this is where we would ideally sort descendingly
        job2.setSortComparatorClass(Top25MovieRating_SortComparator.class);

        // o/p top 25, man
        job2.setNumReduceTasks(1);
        job2.setReducerClass(Top25MovieRating_Reducer1.class);
        job2.setOutputKeyClass(FloatWritable.class);
        job2.setOutputValueClass(IntWritable.class);

        FileInputFormat.addInputPath(job2, new Path(args[1]));
        FileOutputFormat.setOutputPath(job2, new Path(args[2]));
        System.exit(job2.waitForCompletion(true) ? 0 : 1);
    }
}

From source file:Assignment4_P2_StockAverageWithCombiner.StockAverageDriver.java

/**
 * @param args the command line arguments
 *///from  w w w  . j  a  v a  2 s.com
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Average Stock Price");
    job.setJarByClass(StockAverageDriver.class);

    job.setMapperClass(StockAverage_Mapper.class);
    job.setCombinerClass(StockAverage_Combiner.class);
    job.setReducerClass(StockAverage_Reducer.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(StockAverage_CompositeValueWritable.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:Assignment4_P4_MemoryConscious.MovieRatingMemConsciousDriver.java

/**
 * @param args the command line arguments
 *///from ww  w . j av a2s .co m
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Movie Rating Mem Conscious Standard Deviation");
    job.setJarByClass(MovieRatingMemConsciousDriver.class);

    job.setMapperClass(MovieRatingMemConscious_Mapper.class);
    job.setCombinerClass(MovingRatingMemConscious_Combiner.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(SortedMapWritable.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);
    job.setReducerClass(MovieRatingMemConscious_Reducer.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:Assignment5_P2_DistinctIPAddress.DistinctIPAddressDriver.java

/**
 * @param args the command line arguments
 *//*from  w  w  w .j av  a 2s.  c  om*/
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "IP Address Count");
    job.setJarByClass(DistinctIPAddressDriver.class);
    job.setMapperClass(DistinctIPAddress_Mapper.class);
    job.setCombinerClass(DistinctIPAddress_Reducer.class);
    job.setReducerClass(DistinctIPAddress_Reducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:averagerating_youtube.AverageRating_Youtube.java

/**
 * @param args the command line arguments
 *//*from  ww w. j  a va  2s .  c o m*/
@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    job.setJobName("AverageRating_Youtube");

    job.setJarByClass(AverageRating_Youtube.class);
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(AvgRating_CommCountMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(AverageRating_CommentCountTuple.class);
    job.setCombinerClass(AvgRating_CommCountCombiner.class);
    job.setReducerClass(AvgRating_CommCountReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(AverageRating_CommentCountTuple.class);
    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:Business.MapReduceOne.java

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, "FirstJob");
    job.setJarByClass(MapReduceOne.class);

    final File f = new File(MapReduceOne.class.getProtectionDomain().getCodeSource().getLocation().getPath());
    String inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/inFiles/";
    String outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outFiles/OutputOne";
    //use the arguments instead if provided.
    if (args.length > 1) {
        inFiles = args[1];/*from ww w. jav a 2 s  .c om*/
        outFiles = args[2];
    }
    Path in = new Path(inFiles);
    Path out = new Path(outFiles);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setMapperClass(Mapper1.class);
    job.setCombinerClass(Reducer1.class);
    job.setReducerClass(Reducer1.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
    return 0;
}

From source file:byte_import.HexastoreBulkImport.java

License:Open Source License

public Job createSubmittableJob(String[] args) {
    TABLE_NAME = args[1];/*from  w w  w  .j a v  a  2 s.  co  m*/
    Job job = null;
    try {
        job = new Job(new Configuration(), NAME);
        job.setJarByClass(HexastoreBulkImport.class);
        job.setMapperClass(sampler.TotalOrderPrep.Map.class);
        job.setReducerClass(Reduce.class);
        job.setCombinerClass(Combiner.class);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(ImmutableBytesWritable.class);
        job.setPartitionerClass(TotalOrderPartitioner.class);
        //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("/user/npapa/"+regions+"partitions/part-r-00000"));
        TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("partitions/part-r-00000"));
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(HFileOutputFormat.class);
        Path out = new Path("out");
        FileOutputFormat.setOutputPath(job, out);
        Configuration conf = new Configuration();
        FileSystem fs;
        try {
            fs = FileSystem.get(conf);
            if (fs.exists(out)) {
                fs.delete(out, true);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        HBaseAdmin hadmin = new HBaseAdmin(conf);
        HTableDescriptor desc = new HTableDescriptor(TABLE_NAME + "_stats");
        HColumnDescriptor family = new HColumnDescriptor("size");
        desc.addFamily(family);
        conf.setInt("zookeeper.session.timeout", 600000);
        if (hadmin.tableExists(TABLE_NAME + "_stats")) {
            //hadmin.disableTable(TABLE_NAME+"_stats");
            //hadmin.deleteTable(TABLE_NAME+"_stats");
        } else {
            hadmin.createTable(desc);
        }

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        //job.getConfiguration().setInt("mapred.map.tasks", 18);
        job.getConfiguration().set("h2rdf.tableName", TABLE_NAME);
        job.getConfiguration().setInt("mapred.reduce.tasks", (int) TotalOrderPrep.regions);
        job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
        job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
        job.getConfiguration().setInt("io.sort.mb", 100);
        job.getConfiguration().setInt("io.file.buffer.size", 131072);
        job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);
        //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864);
        job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432);
        job.getConfiguration().setInt("mapred.tasktracker.map.tasks.maximum", 5);
        job.getConfiguration().setInt("mapred.tasktracker.reduce.tasks.maximum", 5);
        //job.getConfiguration().setInt("io.sort.mb", 100);

    } catch (IOException e2) {
        e2.printStackTrace();
    }

    return job;
}

From source file:ca.uwaterloo.cs.bigdata2017w.assignment0.PerfectX.java

License:Apache License

/**
 * Runs this tool./*from   www . j  av a 2  s .  com*/
 */
@Override
public int run(String[] argv) throws Exception {
    final Args args = new Args();
    CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(100));

    try {
        parser.parseArgument(argv);
    } catch (CmdLineException e) {
        System.err.println(e.getMessage());
        parser.printUsage(System.err);
        return -1;
    }

    LOG.info("Tool: " + PerfectX.class.getSimpleName());
    LOG.info(" - input path: " + args.input);
    LOG.info(" - output path: " + args.output);
    LOG.info(" - number of reducers: " + args.numReducers);
    LOG.info(" - use in-mapper combining: " + args.imc);

    Configuration conf = getConf();
    Job job = Job.getInstance(conf);
    job.setJobName(PerfectX.class.getSimpleName());
    job.setJarByClass(PerfectX.class);

    job.setNumReduceTasks(args.numReducers);

    FileInputFormat.setInputPaths(job, new Path(args.input));
    FileOutputFormat.setOutputPath(job, new Path(args.output));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(args.imc ? MyMapperIMC.class : MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(args.output);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}