Example usage for org.apache.hadoop.mapred JobConf setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> theClass)

Source Link

Document

Set the Reducer class for the job.

Usage

From source file:edu.umn.cs.spatialHadoop.temporal.RepartitionTemporal.java

License:Apache License

public static void repartitionMapReduce(Path[] inputPaths, Path outputPath, Shape stockShape, long blockSize,
        CellInfo[] cellInfos, String sindex, boolean overwrite) throws IOException {

    JobConf job = new JobConf(Repartition.class);

    job.setJobName("RepartitionTemporal");
    FileSystem outFs = outputPath.getFileSystem(job);

    // Overwrite output file
    if (outFs.exists(outputPath)) {
        if (overwrite)
            outFs.delete(outputPath, true);
        else//from  w  w  w .  j a  va 2 s. co  m
            throw new RuntimeException(
                    "Output file '" + outputPath + "' already exists and overwrite flag is not set");
    }

    // Decide which map function to use depending on the type of global
    // index
    if (sindex.equals("rtree") || sindex.equals("str")) {
        // Repartition without replication
        job.setMapperClass(RepartitionMapNoReplication.class);
    } else {
        // Repartition with replication (grid and r+tree)
        job.setMapperClass(RepartitionMap.class);
    }
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(stockShape.getClass());
    CombinedSpatialInputFormat.setInputPaths(job, inputPaths);
    job.setInputFormat(CombinedSpatialInputFormat.class);

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks()));

    FileOutputFormat.setOutputPath(job, outputPath);
    if (sindex.equals("grid") || sindex.equals("str") || sindex.equals("str+")) {
        job.setOutputFormat(GridOutputFormat.class);
    } else if (sindex.equals("rtree") || sindex.equals("r+tree")) {
        // For now, the two types of local index are the same
        job.setOutputFormat(RTreeGridOutputFormat.class);
    } else {
        throw new RuntimeException("Unsupported spatial index: " + sindex);
    }

    SpatialSite.setCells(job, cellInfos);
    job.setBoolean(SpatialSite.OVERWRITE, overwrite);

    // Set reduce function
    job.setReducerClass(RepartitionReduce.class);
    job.setNumReduceTasks(
            Math.max(1, Math.min(cellInfos.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10)));

    // Set output committer that combines output files together
    job.setOutputCommitter(RepartitionOutputCommitter.class);

    if (blockSize != 0) {
        job.setLong("dfs.block.size", blockSize);
        job.setLong("fs.local.block.size", blockSize);
    }

    JobClient.runJob(job);
}

From source file:edu.yale.cs.hadoopdb.benchmark.AggTaskLargeDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws Exception {

    JobConf conf = new JobConf(this.getClass());
    conf.setJobName("aggregation_db_large");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);

    if (args.length < 1) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }/*from  w  w w .j av a2s  .co m*/

    // OUTPUT properties
    Path outputPath = new Path(args[0]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.set(DBConst.DB_RELATION_ID, "UserVisits");
    conf.set(DBConst.DB_RECORD_READER, AggUserVisitsRecord.class.getName());
    conf.set(DBConst.DB_SQL_QUERY,
            "SELECT sourceIP, SUM(adRevenue) AS sumAdRevenue " + "FROM UserVisits GROUP BY sourceIP;");

    return conf;
}

From source file:edu.yale.cs.hadoopdb.benchmark.AggTaskLargeHDFS.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(getConf(), this.getClass());
    conf.setJobName("aggregation_hdfs_large");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);

    conf.setMapperClass(AggTaskLargeHDFS.Map.class);
    conf.setCombinerClass(AggTaskLargeHDFS.Reduce.class);
    conf.setReducerClass(AggTaskLargeHDFS.Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    if (args.length < 2) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }/*ww  w.  j a  v  a 2s .  co m*/

    FileInputFormat.setInputPaths(conf, new Path(args[0]));

    // OUTPUT properties
    Path outputPath = new Path(args[1]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    return conf;

}

From source file:edu.yale.cs.hadoopdb.benchmark.AggTaskSmallDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws Exception {

    JobConf conf = new JobConf(this.getClass());
    conf.setJobName("aggregation_db_small");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);

    if (args.length < 1) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }// w ww.  jav  a2  s .  co m

    // OUTPUT properties
    Path outputPath = new Path(args[0]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.set(DBConst.DB_RELATION_ID, "UserVisits");
    conf.set(DBConst.DB_RECORD_READER, AggUserVisitsRecord.class.getName());
    conf.set(DBConst.DB_SQL_QUERY,
            "SELECT SUBSTRING(sourceIP, 1, 7) AS subSourceIP, SUM(adRevenue) AS sumAdRevenue FROM UserVisits GROUP BY subSourceIP;");

    return conf;
}

From source file:edu.yale.cs.hadoopdb.benchmark.AggTaskSmallHDFS.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(this.getClass());
    conf.setJobName("aggregation_hdfs_small");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(DoubleWritable.class);

    conf.setMapperClass(AggTaskSmallHDFS.Map.class);
    conf.setCombinerClass(AggTaskSmallHDFS.Reduce.class);
    conf.setReducerClass(AggTaskSmallHDFS.Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    if (args.length < 2) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }/*from   ww  w . j av a  2s.  co m*/

    FileInputFormat.setInputPaths(conf, new Path(args[0]));

    // OUTPUT properties
    Path outputPath = new Path(args[1]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    return conf;

}

From source file:edu.yale.cs.hadoopdb.benchmark.JoinTaskDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws Exception {
    JobConf conf = new JobConf(JoinTaskDB.class);
    conf.setJobName("join_db");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(Map.class);
    conf.setReducerClass(Reduce.class);
    conf.setNumReduceTasks(1); // Because we look for 1 TOP value

    // join arguments
    conf.setOutputFormat(TextOutputFormat.class);
    for (int i = 0; i < args.length; ++i) {
        if ("-date_l".equals(args[i]))
            conf.set("date_l", args[++i]);
        else if ("-date_u".equals(args[i]))
            conf.set("date_u", args[++i]);
        else if ("-output".equals(args[i]))
            conf.set("output", args[++i]);
    }//from   w w w . ja v  a 2s .  c o m

    // OUTPUT properties
    Path outputPath = new Path(conf.get("output"));
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.set(DBConst.DB_RELATION_ID, "UserVisits");
    conf.set(DBConst.DB_RECORD_READER, JoinRecord.class.getName());

    String TABLE_R = "Rankings";
    String TABLE_UV = "UserVisits";

    conf.set(DBConst.DB_SQL_QUERY,
            "SELECT sourceIP, SUM(pageRank) as sumPageRank, COUNT(pageRank) as countPageRank, SUM(adRevenue) as totalRevenue "
                    + "FROM " + TABLE_R + " AS R, " + TABLE_UV + " AS UV " + "WHERE R.pageURL = UV.destURL "
                    + "AND UV.visitDate BETWEEN '" + conf.get("date_l") + "' AND '" + conf.get("date_u") + "' "
                    + "GROUP BY UV.sourceIP;");

    return conf;
}

From source file:edu.yale.cs.hadoopdb.benchmark.JoinTaskHDFS.java

License:Apache License

public int run(String[] args) throws Exception {

    long startTime = System.currentTimeMillis();

    if (args.length < 5) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }/*  www  .  j  a  va2  s.c om*/

    String dateFrom = args[0];
    String dateTo = args[1];
    String rankingsInputDir = args[2];
    String userVisitsInputDir = args[3];
    String outputDir = args[4];

    // output path (delete)
    Path outputPath = new Path(outputDir);
    HDFSUtil.deletePath(outputPath);

    // phase 1
    JobConf conf1 = new JobConf(this.getClass());
    conf1.setJobName("join_hdfs_phase1");
    Path p1Output = new Path(outputDir + "/phase1");
    FileOutputFormat.setOutputPath(conf1, p1Output);
    conf1.setInputFormat(TextInputFormat.class);
    conf1.setOutputFormat(TextOutputFormat.class);

    conf1.setOutputKeyClass(Text.class);
    conf1.setOutputValueClass(Text.class);

    FileInputFormat.setInputPaths(conf1, new Path(rankingsInputDir), new Path(userVisitsInputDir));

    conf1.set(DATE_FROM_PARAM, dateFrom);
    conf1.set(DATE_TO_PARAM, dateTo);

    conf1.setMapperClass(Phase1Map.class);
    conf1.setReducerClass(Phase1Reduce.class);
    // conf1.setPartitionerClass(theClass)

    RunningJob job1 = JobClient.runJob(conf1);

    if (job1.isSuccessful()) {

        // phase 2

        JobConf conf2 = new JobConf(this.getClass());
        conf2.setJobName("join_hdfs_phase2");
        conf2.setInputFormat(KeyValueTextInputFormat.class);
        conf2.setOutputFormat(TextOutputFormat.class);

        conf2.setOutputKeyClass(Text.class);
        conf2.setOutputValueClass(Text.class);
        conf2.setMapperClass(IdentityMapper.class);
        conf2.setReducerClass(Phase2Reduce.class);

        Path p2Output = new Path(outputDir + "/phase2");
        FileOutputFormat.setOutputPath(conf2, p2Output);
        FileInputFormat.setInputPaths(conf2, p1Output);

        RunningJob job2 = JobClient.runJob(conf2);

        if (job2.isSuccessful()) {

            // phase 3

            JobConf conf3 = new JobConf(this.getClass());
            conf3.setJobName("join_hdfs_phase3");
            conf3.setNumReduceTasks(1);
            conf3.setInputFormat(KeyValueTextInputFormat.class);
            conf3.setOutputKeyClass(Text.class);
            conf3.setOutputValueClass(Text.class);
            conf3.setMapperClass(IdentityMapper.class);
            conf3.setReducerClass(Phase3Reduce.class);

            Path p3Output = new Path(outputDir + "/phase3");
            FileOutputFormat.setOutputPath(conf3, p3Output);
            FileInputFormat.setInputPaths(conf3, p2Output);

            RunningJob job3 = JobClient.runJob(conf3);

            if (!job3.isSuccessful()) {
                System.out.println("PHASE 3 FAILED!!!");
            }

        } else {
            System.out.println("PHASE 2 FAILED!!!");
        }

    } else {
        System.out.println("PHASE 1 FAILED!!!");
    }

    long endTime = System.currentTimeMillis();
    System.out.println("\nJOB TIME : " + (endTime - startTime) + " ms.\n");

    return 0;
}

From source file:edu.yale.cs.hadoopdb.benchmark.UDFAggTaskDB.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(this.getClass());
    conf.setJobName("udf_agg_db");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(LongWritable.class);

    conf.setMapperClass(Query4Map.class);
    conf.setCombinerClass(LongSumReducer.class);
    conf.setReducerClass(LongSumReducer.class);
    conf.setOutputFormat(TextOutputFormat.class);

    if (args.length < 1) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }/* w  w w.j  a v a  2 s .  c om*/

    // OUTPUT properties
    Path outputPath = new Path(args[0]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.set(DBConst.DB_RELATION_ID, "Documents");
    conf.set(DBConst.DB_RECORD_READER, DocumentRecord.class.getName());
    conf.set(DBConst.DB_SQL_QUERY, "SELECT url, contents FROM Documents;");

    return conf;

}

From source file:edu.yale.cs.hadoopdb.benchmark.UDFAggTaskHDFS.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws IOException {

    JobConf conf = new JobConf(this.getClass());
    conf.setJobName("udf_agg_hdfs");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(LongWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(LongSumReducer.class);
    conf.setReducerClass(LongSumReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    if (args.length < 2) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }// ww w  .j a  va 2  s .co m

    FileInputFormat.setInputPaths(conf, new Path(args[0]));

    // OUTPUT properties
    Path outputPath = new Path(args[1]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    return conf;

}

From source file:edu.yale.cs.hadoopdb.dataloader.GlobalHasher.java

License:Apache License

@Override
protected JobConf configureJob(String... args) throws Exception {

    JobConf conf = new JobConf(getConf(), this.getClass());
    conf.setJobName("GlobalHasher");

    conf.setMapOutputKeyClass(UnsortableInt.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputKeyClass(NullWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(GlobalHasher.Map.class);
    conf.setReducerClass(GlobalHasher.Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    if (args.length < 5) {
        throw new RuntimeException("Incorrect arguments provided for " + this.getClass());
    }/*from  w ww . j  a  va2s. co m*/

    FileInputFormat.setInputPaths(conf, new Path(args[0]));

    // OUTPUT properties
    Path outputPath = new Path(args[1]);
    HDFSUtil.deletePath(outputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    int partNo = Integer.parseInt(args[2]);
    conf.setNumReduceTasks(partNo);

    conf.set(DELIMITER_PARAM, args[3]);

    int hashFieldPos = Integer.parseInt(args[4]);
    conf.setInt(HASH_FIELD_POS_PARAM, hashFieldPos);

    return conf;
}