Example usage for org.apache.hadoop.mapred JobConf setMapperClass

List of usage examples for org.apache.hadoop.mapred JobConf setMapperClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMapperClass.

Prototype

public void setMapperClass(Class<? extends Mapper> theClass) 

Source Link

Document

Set the Mapper class for the job.

Usage

From source file:edu.umd.cloud9.demo.DemoWordCountTuple2.java

License:Apache License

/**
 * Runs this tool.// w  ww  . j  a  v  a2s  . c  om
 */
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];

    int numMapTasks = Integer.parseInt(args[2]);
    int numReduceTasks = Integer.parseInt(args[3]);

    sLogger.info("Tool: DemoWordCountTuple2");
    sLogger.info(" - input path: " + inputPath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - number of mappers: " + numMapTasks);
    sLogger.info(" - number of reducers: " + numReduceTasks);

    JobConf conf = new JobConf(DemoWordCountTuple2.class);
    conf.setJobName("DemoWordCountTuple2");

    conf.setNumMapTasks(numMapTasks);
    conf.setNumReduceTasks(numReduceTasks);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputKeyClass(Tuple.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    conf.setMapperClass(MapClass.class);
    conf.setCombinerClass(ReduceClass.class);
    conf.setReducerClass(ReduceClass.class);

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    JobClient.runJob(conf);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.cloud9.example.simple.DemoMapredNullInput.java

License:Apache License

/**
 * Runs the demo./*  w  w w  .  j ava 2s  .  com*/
 */
public static void main(String[] args) throws IOException {
    JobConf conf = new JobConf(DemoMapredNullInput.class);
    conf.setJobName("DemoMapredNullInput");

    conf.setNumMapTasks(10);
    conf.setNumReduceTasks(0);

    conf.setInputFormat(NullInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapperClass(MyMapper.class);

    JobClient.runJob(conf);
}

From source file:edu.umd.cloud9.example.simple.DemoNullInput.java

License:Apache License

/**
 * Runs the demo.//w  w w  . j a va  2  s.com
 */
public static void main(String[] args) throws IOException {
    JobConf conf = new JobConf(DemoNullInput.class);
    conf.setJobName("DemoNullInput");

    conf.setNumMapTasks(10);
    conf.setNumReduceTasks(0);

    conf.setInputFormat(NullInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setMapperClass(MyMapper.class);

    JobClient.runJob(conf);
}

From source file:edu.umd.cloud9.examples.BigramCount.java

License:Apache License

/**
 * Runs this tool./*www .j  a v  a  2s.c om*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];

    int mapTasks = Integer.parseInt(args[2]);
    int reduceTasks = Integer.parseInt(args[3]);

    sLogger.info("Tool: BigramCount");
    sLogger.info(" - input path: " + inputPath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - number of mappers: " + mapTasks);
    sLogger.info(" - number of reducers: " + reduceTasks);

    JobConf conf = new JobConf(BigramCount.class);
    conf.setJobName("BigramCount");

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    /**
     *  Note that these must match the Class arguments given in the mapper 
     */
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(MyMapper.class);
    conf.setCombinerClass(MyReducer.class);
    conf.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    JobClient.runJob(conf);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.cloud9.examples.BigramRelativeFrequency.java

License:Apache License

/**
 * Runs this tool.//  w ww  . j  a v a  2 s .  c om
 */
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];

    int mapTasks = Integer.parseInt(args[2]);
    int reduceTasks = Integer.parseInt(args[3]);

    sLogger.info("Tool: BigramRelativeFrequency");
    sLogger.info(" - input path: " + inputPath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - number of mappers: " + mapTasks);
    sLogger.info(" - number of reducers: " + reduceTasks);

    JobConf conf = new JobConf(BigramRelativeFrequency.class);
    conf.setJobName("BigramRelativeFrequency");

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    /**
     *  Note that these must match the Class arguments given in the mapper 
     */
    conf.setOutputKeyClass(PairOfStrings.class);
    conf.setOutputValueClass(FloatWritable.class);

    conf.setMapperClass(MyMapper.class);
    conf.setCombinerClass(MyCombiner.class);
    conf.setReducerClass(MyReducer.class);
    conf.setPartitionerClass(MyPartitioner.class);

    //Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    JobClient.runJob(conf);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.cloud9.examples.BuildInvertedIndex.java

License:Apache License

/**
 * Runs this tool.//from  w w  w .  j ava 2s .  co  m
 */
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];

    int mapTasks = Integer.parseInt(args[2]);
    int reduceTasks = Integer.parseInt(args[3]);

    sLogger.info("Tool: BuildInvertedIndex");
    sLogger.info(" - input path: " + inputPath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - number of mappers: " + mapTasks);
    sLogger.info(" - number of reducers: " + reduceTasks);

    JobConf conf = new JobConf(BuildInvertedIndex.class);
    conf.setJobName("BuildInvertedIndex");

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    /**
     *  Note that these must match the Class arguments given in the mapper 
     */
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(PairOfInts.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    JobClient.runJob(conf);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.cloud9.examples.DemoWordCount.java

License:Apache License

/**
 * Runs this tool./* w  w w.  jav  a 2 s.  com*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];

    int mapTasks = Integer.parseInt(args[2]);
    int reduceTasks = Integer.parseInt(args[3]);

    sLogger.info("Tool: DemoWordCount");
    sLogger.info(" - input path: " + inputPath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - number of mappers: " + mapTasks);
    sLogger.info(" - number of reducers: " + reduceTasks);

    JobConf conf = new JobConf(DemoWordCount.class);
    conf.setJobName("DemoWordCount");

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    /**
     *  Note that these must match the Class arguments given in the mapper 
     */
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(MyMapper.class);
    conf.setCombinerClass(MyReducer.class);
    conf.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    JobClient.runJob(conf);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.cloud9.examples.JingBigramRelativeFrequency.java

License:Apache License

/**
  * Runs this tool./*from   w  ww  .  j a  v  a 2 s . co m*/
  */
 public int run(String[] args) throws Exception {
     if (args.length != 4) {
         printUsage();
         return -1;
     }

     String inputPath = args[0];
     String outputPath = args[1];

     int mapTasks = Integer.parseInt(args[2]);
     int reduceTasks = Integer.parseInt(args[3]);

     sLogger.info("Tool: DemoWordCount");
     sLogger.info(" - input path: " + inputPath);
     sLogger.info(" - output path: " + outputPath);
     sLogger.info(" - number of mappers: " + mapTasks);
     sLogger.info(" - number of reducers: " + reduceTasks);

     JobConf conf = new JobConf(BigramRelativeFrequency.class);
     conf.setJobName("DemoWordCount");

     conf.setNumMapTasks(mapTasks);
     conf.setNumReduceTasks(reduceTasks);

     FileInputFormat.setInputPaths(conf, new Path(inputPath));
     FileOutputFormat.setOutputPath(conf, new Path(outputPath));
     FileOutputFormat.setCompressOutput(conf, false);

     /**
      *  Note that these must match the Class arguments given in the mapper 
      */
     conf.setOutputKeyClass(Text.class);
     conf.setOutputValueClass(IntWritable.class);

     conf.setMapperClass(MyMapper.class);
     conf.setCombinerClass(MyReducer.class);
     conf.setReducerClass(MyReducer.class);

     // Delete the output directory if it exists already
     Path outputDir = new Path(outputPath);
     FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true);

     long startTime = System.currentTimeMillis();
     JobClient.runJob(conf);
     sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
     return 0;
 }

From source file:edu.umd.cloud9.io.benchmark.HadoopSortRandomPairsOfInts.java

License:Apache License

/**
 * Runs this benchmark.//  ww  w.jav a  2 s.c o m
 */
public static void main(String[] args) throws IOException {
    String inputPath = "random-pairs.seq";
    String outputPath = "random-pairs.sorted";
    int numMapTasks = 1;
    int numReduceTasks = 1;

    JobConf conf = new JobConf(HadoopSortRandomPairsOfInts.class);
    conf.setJobName("SortRandomPairsOfInts");

    conf.setNumMapTasks(numMapTasks);
    conf.setNumReduceTasks(numReduceTasks);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputKeyClass(PairOfInts.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(IdentityMapper.class);
    conf.setCombinerClass(IdentityReducer.class);
    conf.setReducerClass(IdentityReducer.class);

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime;
    double duration;

    startTime = System.currentTimeMillis();

    JobClient.runJob(conf);

    duration = (System.currentTimeMillis() - startTime) / 1000.0;
    System.out.println("Job took " + duration + " seconds");
}

From source file:edu.umd.cloud9.pagerank.BuildPageRankRecords.java

License:Apache License

/**
 * Runs this tool.//from   www.  j  av a  2s .  c  o m
 */
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];
    int n = Integer.parseInt(args[2]);

    sLogger.info("Tool name: BuildPageRankRecords");
    sLogger.info(" - inputDir: " + inputPath);
    sLogger.info(" - outputDir: " + outputPath);
    sLogger.info(" - numNodes: " + n);

    JobConf conf = new JobConf(BuildPageRankRecords.class);
    conf.setJobName("PackageLinkGraph");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(0);

    conf.setInt("NodeCnt", n);
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);

    TextInputFormat.addInputPath(conf, new Path(inputPath));
    SequenceFileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(PageRankNode.class);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(PageRankNode.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(IdentityReducer.class);

    // delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    JobClient.runJob(conf);

    return 0;
}