Example usage for org.apache.hadoop.mapreduce Job setJobName

List of usage examples for org.apache.hadoop.mapreduce Job setJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJobName.

Prototype

public void setJobName(String name) throws IllegalStateException 

Source Link

Document

Set the user-specified job name.

Usage

From source file:edu.umd.JBizz.BuildInvertedIndexCompressed.java

License:Apache License

/**
 * Runs this tool.// w  w w.j a v  a  2 s  .c om
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool name: " + BuildInvertedIndexCompressed.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - num reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(BuildInvertedIndexCompressed.class.getSimpleName());
    job.setJarByClass(BuildInvertedIndexCompressed.class);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    //job.setMapOutputKeyClass(Text.class);
    //job.setMapOutputValueClass(PairOfInts.class);
    job.setMapOutputKeyClass(PairOfStringInt.class);
    job.setMapOutputValueClass(VIntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setOutputFormatClass(MapFileOutputFormat.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    job.setPartitionerClass(MyPartitioner.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.JBizz.BuildPersonalizedPageRankRecords.java

License:Apache License

/**
 * Runs this tool.//from   w  w w.  j  a  v a2s.  co  m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
    options.addOption(OptionBuilder.withArgName("sources").hasArg().withDescription("Personalization Nodes")
            .create(SOURCES));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)
            || !cmdline.hasOption(SOURCES)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    String sources = cmdline.getOptionValue(SOURCES);

    String[] sourceList = sources.trim().split(",");
    int numSources = sourceList.length;

    int source = Integer.parseInt(sourceList[0]);

    int[] sourceNodes = new int[numSources];
    for (int i = 0; i < numSources; i++) {
        sourceNodes[i] = Integer.parseInt(sourceList[i]);
    }

    LOG.info("Tool name: " + BuildPersonalizedPageRankRecords.class.getSimpleName());
    LOG.info(" - inputDir: " + inputPath);
    LOG.info(" - outputDir: " + outputPath);
    LOG.info(" - numNodes: " + n);
    LOG.info("source list: " + sourceList);
    LOG.info("first source: " + source);

    Configuration conf = getConf();
    conf.setInt(NODE_CNT_FIELD, n);
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);
    conf.setInt(SOURCE, source);
    conf.setInt(NUM_SOURCES, numSources);
    conf.set(SOURCE_NODES, sources);

    Job job = Job.getInstance(conf);
    job.setJobName(BuildPersonalizedPageRankRecords.class.getSimpleName() + ":" + inputPath);
    job.setJarByClass(BuildPersonalizedPageRankRecords.class);

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    job.setMapperClass(MyMapper.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:edu.umd.JBizz.ExtractTopPersonalizedPageRankNodes.java

License:Apache License

/**
 * Runs this tool.// w w  w.  j  a  va2s .co  m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("top n").create(TOP));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(TOP)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int n = Integer.parseInt(cmdline.getOptionValue(TOP));

    LOG.info("Tool name: " + ExtractTopPersonalizedPageRankNodes.class.getSimpleName());
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output: " + outputPath);
    LOG.info(" - top: " + n);

    Configuration conf = getConf();
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);
    conf.setInt("n", n);

    Job job = Job.getInstance(conf);
    job.setJobName(ExtractTopPersonalizedPageRankNodes.class.getName() + ":" + inputPath);
    job.setJarByClass(ExtractTopPersonalizedPageRankNodes.class);

    job.setNumReduceTasks(1);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(FloatWritable.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(FloatWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:edu.umd.JBizz.RunPersonalizedPageRankBasic.java

License:Apache License

private float phase1(int i, int j, String basePath, int numNodes) throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName("PageRank:Basic:iteration" + j + ":Phase1");
    job.setJarByClass(RunPersonalizedPageRankBasic.class);

    String in = basePath + "/iter" + formatter.format(i);
    String out = basePath + "/iter" + formatter.format(j) + "t";
    String outm = out + "-mass";

    // We need to actually count the number of part files to get the number of partitions (because
    // the directory might contain _log).
    int numPartitions = 0;
    for (FileStatus s : FileSystem.get(getConf()).listStatus(new Path(in))) {
        if (s.getPath().getName().contains("part-"))
            numPartitions++;/*from w w  w  .  j  a  v  a  2s  . c  o  m*/
    }

    LOG.info("PageRank: iteration " + j + ": Phase1");
    LOG.info(" - input: " + in);
    LOG.info(" - output: " + out);
    LOG.info(" - nodeCnt: " + numNodes);
    //LOG.info(" - useCombiner: " + useCombiner);
    //LOG.info(" - useInmapCombiner: " + useInMapperCombiner);
    LOG.info("computed number of partitions: " + numPartitions);

    int numReduceTasks = numPartitions;

    job.getConfiguration().setInt("NodeCount", numNodes);
    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    //job.getConfiguration().set("mapred.child.java.opts", "-Xmx2048m");
    job.getConfiguration().set("PageRankMassPath", outm);

    job.setNumReduceTasks(numReduceTasks);

    FileInputFormat.setInputPaths(job, new Path(in));
    FileOutputFormat.setOutputPath(job, new Path(out));

    job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    job.setMapperClass(MapWithInMapperCombiningClass.class);

    job.setReducerClass(ReduceClass.class);

    FileSystem.get(getConf()).delete(new Path(out), true);
    FileSystem.get(getConf()).delete(new Path(outm), true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    float mass = 0;
    FileSystem fs = FileSystem.get(getConf());
    for (FileStatus f : fs.listStatus(new Path(outm))) {
        FSDataInputStream fin = fs.open(f.getPath());
        mass = mass + fin.readFloat();
        fin.close();
    }

    return mass;
}

From source file:edu.umd.JBizz.RunPersonalizedPageRankBasic.java

License:Apache License

private void phase2(int i, int j, float missing, String basePath, int numNodes, String sources)
        throws Exception {
    Job job = Job.getInstance(getConf());
    job.setJobName("PageRank:Basic:iteration" + j + ":Phase2");
    job.setJarByClass(RunPersonalizedPageRankBasic.class);

    LOG.info("missing PageRank mass: " + missing);
    LOG.info("number of nodes: " + numNodes);

    String in = basePath + "/iter" + formatter.format(j) + "t";
    String out = basePath + "/iter" + formatter.format(j);

    LOG.info("PageRank: iteration " + j + ": Phase2");
    LOG.info(" - input: " + in);
    LOG.info(" - output: " + out);

    String[] sourceList = sources.trim().split(",");
    int numSources = sourceList.length;

    int source = Integer.parseInt(sourceList[0]);

    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    job.getConfiguration().setFloat("MissingMass", (float) missing);
    job.getConfiguration().setInt("NodeCount", numNodes);
    job.getConfiguration().setInt("sourcesList", source);

    job.setNumReduceTasks(0);// w ww.jav a  2 s. c  o  m

    FileInputFormat.setInputPaths(job, new Path(in));
    FileOutputFormat.setOutputPath(job, new Path(out));

    job.setInputFormatClass(NonSplitableSequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    job.setMapperClass(MapPageRankMassDistributionClass.class);

    FileSystem.get(getConf()).delete(new Path(out), true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
}

From source file:edu.umd.shrawanraina.BuildInvertedIndexCompressed.java

License:Apache License

/**
 * Runs this tool.//from   w  w w .j  a va  2  s  .  c om
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    LOG.info("Tool name: " + BuildInvertedIndexCompressed.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output path: " + outputPath);
    LOG.info(" - num reducers: " + reduceTasks);

    Job job = Job.getInstance(getConf());
    job.setJobName(BuildInvertedIndexCompressed.class.getSimpleName());
    job.setJarByClass(BuildInvertedIndexCompressed.class);

    job.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setMapOutputKeyClass(PairOfStringInt.class);
    job.setMapOutputValueClass(VIntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BytesWritable.class);
    job.setOutputFormatClass(MapFileOutputFormat.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputPath);
    FileSystem.get(getConf()).delete(outputDir, true);
    /*
    // Delete side data directory if it exists already.
    Path sdDir = new Path("sidedata");
    FileSystem.get(getConf()).delete(sdDir, true);
    */
    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:edu.umd.shrawanraina.BuildInvertedIndexHBase.java

License:Apache License

/**
 * Runs this tool./*from  w w w  .  j  a v  a2s  . c om*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputTable = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    // If the table doesn't already exist, create it.
    Configuration conf = getConf();
    conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

    Configuration hbaseConfig = HBaseConfiguration.create(conf);
    HBaseAdmin admin = new HBaseAdmin(hbaseConfig);

    if (admin.tableExists(outputTable)) {
        LOG.info(String.format("Table '%s' exists: dropping table and recreating.", outputTable));
        LOG.info(String.format("Disabling table '%s'", outputTable));
        admin.disableTable(outputTable);
        LOG.info(String.format("Droppping table '%s'", outputTable));
        admin.deleteTable(outputTable);
    }

    HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(outputTable));
    for (int i = 0; i < FAMILIES.length; i++) {
        HColumnDescriptor hColumnDesc = new HColumnDescriptor(FAMILIES[i]);
        tableDesc.addFamily(hColumnDesc);
    }

    admin.createTable(tableDesc);
    LOG.info(String.format("Successfully created table '%s'", outputTable));

    admin.close();

    // Now we're ready to start running MapReduce.
    LOG.info("Tool: " + HBaseWordCount.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output table: " + outputTable);
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job = Job.getInstance(conf);
    job.setJobName(BuildInvertedIndexHBaseCompressed.class.getSimpleName());
    job.setJarByClass(BuildInvertedIndexHBaseCompressed.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(PairOfInts.class);

    job.setMapperClass(MyMapper.class);
    //job.setReducerClass(MyReducer.class);
    job.setNumReduceTasks(reduceTasks);

    // job.setOutputKeyClass(Text.class);
    // job.setOutputValueClass(PairOfWritables.class);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    TableMapReduceUtil.initTableReducerJob(outputTable, MyTableReducer.class, job);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputTable);
    FileSystem.get(getConf()).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    return 0;
}

From source file:edu.umd.shrawanraina.BuildInvertedIndexHBaseCompressed.java

License:Apache License

/**
 * Runs this tool./* ww  w .  j a va  2 s . c  o  m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers")
            .create(NUM_REDUCERS));
    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputTable = cmdline.getOptionValue(OUTPUT);
    int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS))
            : 1;

    // If the table doesn't already exist, create it.
    Configuration conf = getConf();
    conf.addResource(new Path("/etc/hbase/conf/hbase-site.xml"));

    Configuration hbaseConfig = HBaseConfiguration.create(conf);
    HBaseAdmin admin = new HBaseAdmin(hbaseConfig);

    if (admin.tableExists(outputTable)) {
        LOG.info(String.format("Table '%s' exists: dropping table and recreating.", outputTable));
        LOG.info(String.format("Disabling table '%s'", outputTable));
        admin.disableTable(outputTable);
        LOG.info(String.format("Droppping table '%s'", outputTable));
        admin.deleteTable(outputTable);
    }

    HTableDescriptor tableDesc = new HTableDescriptor(TableName.valueOf(outputTable));
    for (int i = 0; i < FAMILIES.length; i++) {
        HColumnDescriptor hColumnDesc = new HColumnDescriptor(FAMILIES[i]);
        tableDesc.addFamily(hColumnDesc);
    }

    admin.createTable(tableDesc);
    LOG.info(String.format("Successfully created table '%s'", outputTable));

    admin.close();

    // Now we're ready to start running MapReduce.
    LOG.info("Tool: " + HBaseWordCount.class.getSimpleName());
    LOG.info(" - input path: " + inputPath);
    LOG.info(" - output table: " + outputTable);
    LOG.info(" - number of reducers: " + reduceTasks);

    Job job = Job.getInstance(conf);
    job.setJobName(BuildInvertedIndexHBaseCompressed.class.getSimpleName());
    job.setJarByClass(BuildInvertedIndexHBaseCompressed.class);

    job.setMapOutputKeyClass(PairOfStringInt.class);
    job.setMapOutputValueClass(VIntWritable.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    job.setNumReduceTasks(reduceTasks);

    // job.setOutputKeyClass(Text.class);
    // job.setOutputValueClass(PairOfWritables.class);

    FileInputFormat.setInputPaths(job, new Path(inputPath));
    TableMapReduceUtil.initTableReducerJob(outputTable, MyTableReducer.class, job);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(outputTable);
    FileSystem.get(getConf()).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");
    return 0;
}

From source file:edu.umd.shrawanraina.BuildPersonalizedPageRankRecords.java

License:Apache License

/**
 * Runs this tool.// w ww.ja va  2  s.  c o m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
    options.addOption(
            OptionBuilder.withArgName("node").hasArg().withDescription("source nodes").create(SOURCES));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)
            || !cmdline.hasOption(SOURCES)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    String sources = cmdline.getOptionValue(SOURCES);

    LOG.info("Tool name: " + BuildPersonalizedPageRankRecords.class.getSimpleName());
    LOG.info(" - inputDir: " + inputPath);
    LOG.info(" - outputDir: " + outputPath);
    LOG.info(" - numNodes: " + n);
    LOG.info(" - sources: " + sources);

    Configuration conf = getConf();
    conf.setInt(NODE_CNT_FIELD, n);
    conf.setStrings("sources", sources);
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);

    Job job = Job.getInstance(conf);
    job.setJobName(BuildPersonalizedPageRankRecords.class.getSimpleName() + ":" + inputPath);
    job.setJarByClass(BuildPersonalizedPageRankRecords.class);

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNodeUpd.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNodeUpd.class);

    job.setMapperClass(MyMapper.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:edu.umd.shrawanraina.ExtractTopPersonalizedPageRankNodes.java

License:Apache License

/**
 * Runs this tool.//from   w w  w  .  ja  va 2 s  .  co m
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(OptionBuilder.withArgName("top").hasArg().withDescription("top num").create(TOP));
    options.addOption(
            OptionBuilder.withArgName("node").hasArg().withDescription("source node").create(SOURCES));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(SOURCES) || !cmdline.hasOption(TOP)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    int n = Integer.parseInt(cmdline.getOptionValue(TOP));
    String source = cmdline.getOptionValue(SOURCES);
    String outputPath = cmdline.hasOption(OUTPUT) ? cmdline.getOptionValue(OUTPUT) : "final";

    LOG.info("Tool name: " + ExtractTopPersonalizedPageRankNodes.class.getSimpleName());
    LOG.info(" - input: " + inputPath);
    LOG.info(" - source: " + source);

    Configuration conf = new Configuration();
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);

    Job job = Job.getInstance(conf);
    job.setJobName(ExtractTopPersonalizedPageRankNodes.class.getSimpleName());
    job.setJarByClass(ExtractTopPersonalizedPageRankNodes.class);

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    job.waitForCompletion(true);
    extractTop(inputPath, outputPath, source, n);
    return 0;
}