Example usage for org.apache.hadoop.mapreduce Job setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> cls) throws IllegalStateException

Source Link

Document

Set the Reducer for the job.

Usage

From source file:br.com.lassal.nqueens.grid.job.NQueenCounter.java

/**
 * Forma de chamada/*from ww  w .j  ava 2s.co m*/
 * <> {numero de rainhas} {diretorio raiz} -F
 *
 * @param strings
 * @return
 * @throws Exception
 */
public int run(String[] args) throws Exception {
    // Configuration processed by ToolRunner
    Configuration conf = getConf();

    // Create a JobConf using the processed conf
    Job job = new Job(conf, "nqueens-counter");
    job.setJarByClass(NQueenCounter.class);

    int queensNumber = Integer.parseInt(args[0]);
    String workingFolder = args.length >= 2 ? args[1] : null;
    boolean isFinal = args.length >= 3 && "-F".equals(args[2]) ? true : false;

    Path sourcePath = this.setWorkingFolder(queensNumber, workingFolder, isFinal, job);
    job.setOutputKeyClass(org.apache.hadoop.io.Text.class);
    job.setOutputValueClass(org.apache.hadoop.io.Text.class);

    if (isFinal) {
        job.setMapperClass(br.com.lassal.nqueens.grid.mapreduce.NQueenIncrementalCounterResultMapper.class);
        job.setReducerClass(br.com.lassal.nqueens.grid.mapreduce.NQueenIncrementalCounterResultReducer.class);
    } else {
        job.setMapperClass(br.com.lassal.nqueens.grid.mapreduce.NQueenIncrementalCounterMapper.class);
        job.setReducerClass(br.com.lassal.nqueens.grid.mapreduce.NQueenIncrementalCounterReducer.class);
    }

    // Submit the job, then poll for progress until the job is complete
    boolean result = job.waitForCompletion(true);

    if (sourcePath != null) {
        FileSystem fs = FileSystem.get(conf);
        fs.delete(sourcePath, true);
    }

    return result ? 0 : 1;

}

From source file:br.ufpr.inf.hpath.HPath.java

License:Apache License

/**
 * Execute the XPath query as a Hadoop job
 * @param xpath_query XPath query submitted by the user via cli.
 * @param inputFile XML file which has all data.
 * @param outputFile Query's result is stored in this file. 
 * @throws Exception// www.  ja va 2s  .c o  m
 */
public static void main(String[] args) throws Exception {

    if (args.length < 1) {
        System.out.println("USAGE: hpath [xpath_query] [input_file] [<output_dir>]");
        System.exit(-1);
    }

    System.out.println("***************");
    System.out.println(" Query  -> " + args[2]);
    System.out.println(" Input  -> " + args[0]);
    System.out.println(" Output -> " + args[1]);
    System.out.println("***************");

    String xpath_query = args[2];
    String inputFile = args[0];
    String outputFile = args[1];
    String tag = "";

    // tag = getFisrtQueryTag(xpath_query);
    tag = getLastQueryTag(xpath_query);
    Configuration conf = new Configuration();
    conf.set("xmlinput.start", "<" + tag);
    conf.set("xmlinput.end", "</" + tag + ">");
    conf.set("xpath.query", xpath_query);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "HPath");
    FileSystem fs = FileSystem.get(conf);
    Path inFile = new Path(inputFile);
    Path outFile = new Path(outputFile);

    if (!fs.exists(inFile)) {
        System.out.println("error: Input file not found.");
        System.exit(-1);
    }
    if (!fs.isFile(inFile)) {
        System.out.println("error: Input should be a file.");
        System.exit(-1);
    }
    if (fs.exists(outFile)) {
        System.out.println("error: Output already exists.");
        System.exit(-1);
    }

    job.setJarByClass(HPath.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(XmlItemInputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.addInputPath(job, inFile);
    FileOutputFormat.setOutputPath(job, outFile);
    job.waitForCompletion(true);
}

From source file:bulkload.ImportTsv.java

License:Apache License

/**
 * Sets up the actual job.//  ww w.ja v a2s. c  om
 * 
 * @param conf
 *            The current configuration.
 * @param args
 *            The command line parameters.
 * @return The newly created job.
 * @throws IOException
 *             When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {

    Job job = null;
    try (Connection connection = ConnectionFactory.createConnection(conf)) {
        try (Admin admin = connection.getAdmin()) {
            // Support non-XML supported characters
            // by re-encoding the passed separator as a Base64 string.
            String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
            if (actualSeparator != null) {
                conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes()));
            }
            TableName tableName = TableName.valueOf(args[0]);
            if (!admin.tableExists(tableName)) {
                String errorMsg = format("Table '%s' does not exist.", tableName);
                LOG.error(errorMsg);
                throw new TableNotFoundException(errorMsg);
            }
            Path inputDir = new Path(args[1]);
            String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNameAsString());
            job = Job.getInstance(conf, jobName);
            job.setJarByClass(TsvImporter.class);
            FileInputFormat.setInputPaths(job, inputDir);
            job.setInputFormatClass(TextInputFormat.class);
            job.setMapperClass(TsvImporter.class);

            String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
            if (hfileOutPath != null) {
                try (HTable table = (HTable) connection.getTable(tableName)) {
                    Path outputDir = new Path(hfileOutPath);
                    FileSystem fs = FileSystem.get(conf);
                    if (fs.exists(outputDir)) {
                        if (!fs.delete(outputDir, true)) {
                            throw new IllegalStateException("delete path:" + outputDir + " failed");
                        }
                    }
                    FileOutputFormat.setOutputPath(job, outputDir);
                    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
                    job.setMapOutputValueClass(Put.class);
                    job.setReducerClass(PutSortReducer.class);
                    HFileOutputFormat2.configureIncrementalLoad(job, table, table);
                }
            } else {
                // No reducers. Just write straight to table. Call
                // initTableReducerJob
                // to set up the TableOutputFormat.
                TableMapReduceUtil.initTableReducerJob(tableName.getNameAsString(), null, job);
                job.setNumReduceTasks(0);

                //               TableMapReduceUtil.addDependencyJars(job);
                //               TableMapReduceUtil.addDependencyJars(job.getConfiguration(),
                //                     com.google.common.base.Function.class /* Guava used by TsvParser */);
            }

            // Workaround to remove unnecessary hadoop dependencies
            String[] jars = job.getConfiguration().get("tmpjars").split(",", -1);
            StringBuilder filteredJars = new StringBuilder();
            for (String j : jars) {
                String[] parts = j.split("/", -1);
                String fileName = parts[parts.length - 1];
                if (fileName.indexOf("hadoop-") != 0) {
                    filteredJars.append(j);
                    filteredJars.append(",");
                }
            }
            job.getConfiguration().set("tmpjars", filteredJars.toString());
        }
    }

    return job;
}

From source file:Business.MapReduceOne.java

@Override
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    Job job = new Job(conf, "FirstJob");
    job.setJarByClass(MapReduceOne.class);

    final File f = new File(MapReduceOne.class.getProtectionDomain().getCodeSource().getLocation().getPath());
    String inFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/inFiles/";
    String outFiles = f.getAbsolutePath().replace("/build/classes", "") + "/src/outFiles/OutputOne";
    //use the arguments instead if provided.
    if (args.length > 1) {
        inFiles = args[1];/*from   w  w  w .  ja v  a  2s  .c  o  m*/
        outFiles = args[2];
    }
    Path in = new Path(inFiles);
    Path out = new Path(outFiles);
    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);

    job.setMapperClass(Mapper1.class);
    job.setCombinerClass(Reducer1.class);
    job.setReducerClass(Reducer1.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    System.exit(job.waitForCompletion(true) ? 0 : 1);
    return 0;
}

From source file:byte_import.HexastoreBulkImport.java

License:Open Source License

public Job createSubmittableJob(String[] args) {
    TABLE_NAME = args[1];//  w w  w  .  j  a v a  2s .com
    Job job = null;
    try {
        job = new Job(new Configuration(), NAME);
        job.setJarByClass(HexastoreBulkImport.class);
        job.setMapperClass(sampler.TotalOrderPrep.Map.class);
        job.setReducerClass(Reduce.class);
        job.setCombinerClass(Combiner.class);
        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(ImmutableBytesWritable.class);
        job.setPartitionerClass(TotalOrderPartitioner.class);
        //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("/user/npapa/"+regions+"partitions/part-r-00000"));
        TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("partitions/part-r-00000"));
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(HFileOutputFormat.class);
        Path out = new Path("out");
        FileOutputFormat.setOutputPath(job, out);
        Configuration conf = new Configuration();
        FileSystem fs;
        try {
            fs = FileSystem.get(conf);
            if (fs.exists(out)) {
                fs.delete(out, true);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

        HBaseAdmin hadmin = new HBaseAdmin(conf);
        HTableDescriptor desc = new HTableDescriptor(TABLE_NAME + "_stats");
        HColumnDescriptor family = new HColumnDescriptor("size");
        desc.addFamily(family);
        conf.setInt("zookeeper.session.timeout", 600000);
        if (hadmin.tableExists(TABLE_NAME + "_stats")) {
            //hadmin.disableTable(TABLE_NAME+"_stats");
            //hadmin.deleteTable(TABLE_NAME+"_stats");
        } else {
            hadmin.createTable(desc);
        }

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        //job.getConfiguration().setInt("mapred.map.tasks", 18);
        job.getConfiguration().set("h2rdf.tableName", TABLE_NAME);
        job.getConfiguration().setInt("mapred.reduce.tasks", (int) TotalOrderPrep.regions);
        job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
        job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
        job.getConfiguration().setInt("io.sort.mb", 100);
        job.getConfiguration().setInt("io.file.buffer.size", 131072);
        job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);
        //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864);
        job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432);
        job.getConfiguration().setInt("mapred.tasktracker.map.tasks.maximum", 5);
        job.getConfiguration().setInt("mapred.tasktracker.reduce.tasks.maximum", 5);
        //job.getConfiguration().setInt("io.sort.mb", 100);

    } catch (IOException e2) {
        e2.printStackTrace();
    }

    return job;
}

From source file:ca.uwaterloo.cs.bigdata2017w.assignment0.PerfectX.java

License:Apache License

/**
 * Runs this tool.//  ww w . j a v a  2 s . c  om
 */
@Override
public int run(String[] argv) throws Exception {
    final Args args = new Args();
    CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(100));

    try {
        parser.parseArgument(argv);
    } catch (CmdLineException e) {
        System.err.println(e.getMessage());
        parser.printUsage(System.err);
        return -1;
    }

    LOG.info("Tool: " + PerfectX.class.getSimpleName());
    LOG.info(" - input path: " + args.input);
    LOG.info(" - output path: " + args.output);
    LOG.info(" - number of reducers: " + args.numReducers);
    LOG.info(" - use in-mapper combining: " + args.imc);

    Configuration conf = getConf();
    Job job = Job.getInstance(conf);
    job.setJobName(PerfectX.class.getSimpleName());
    job.setJarByClass(PerfectX.class);

    job.setNumReduceTasks(args.numReducers);

    FileInputFormat.setInputPaths(job, new Path(args.input));
    FileOutputFormat.setOutputPath(job, new Path(args.output));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(args.imc ? MyMapperIMC.class : MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(args.output);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:ca.uwaterloo.cs.bigdata2017w.assignment0.WordCount.java

License:Apache License

/**
 * Runs this tool./*from ww w  .  j a v  a  2s  . com*/
 */
@Override
public int run(String[] argv) throws Exception {
    final Args args = new Args();
    CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(100));

    try {
        parser.parseArgument(argv);
    } catch (CmdLineException e) {
        System.err.println(e.getMessage());
        parser.printUsage(System.err);
        return -1;
    }

    LOG.info("Tool: " + WordCount.class.getSimpleName());
    LOG.info(" - input path: " + args.input);
    LOG.info(" - output path: " + args.output);
    LOG.info(" - number of reducers: " + args.numReducers);
    LOG.info(" - use in-mapper combining: " + args.imc);

    Configuration conf = getConf();
    Job job = Job.getInstance(conf);
    job.setJobName(WordCount.class.getSimpleName());
    job.setJarByClass(WordCount.class);

    job.setNumReduceTasks(args.numReducers);

    FileInputFormat.setInputPaths(job, new Path(args.input));
    FileOutputFormat.setOutputPath(job, new Path(args.output));

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapperClass(args.imc ? MyMapperIMC.class : MyMapper.class);
    job.setCombinerClass(MyReducer.class);
    job.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    Path outputDir = new Path(args.output);
    FileSystem.get(conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:ca.uwaterloo.iss4e.hadoop.pointperrow.CosineMain.java

License:Open Source License

public int run(String[] args) throws IOException {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: ca.uwaterloo.iss4e.hadoop.pointperrow.ConsineMain <input> <output>");
        System.exit(2);/*w w w  .  j  a  v  a 2s  .  c  om*/
    }
    Job job1 = new Job(conf, "ConsineMain");
    job1.setJarByClass(CosineMain.class);

    job1.setMapperClass(AggregateReadingsMapper.class);
    job1.setMapOutputKeyClass(LongWritable.class);
    job1.setMapOutputValueClass(DoubleWritable.class);

    job1.setReducerClass(AggregateReadingsReducer.class);
    job1.setOutputKeyClass(LongWritable.class);
    job1.setOutputValueClass(Text.class);
    FileInputFormat.setInputDirRecursive(job1, true);
    FileInputFormat.setInputPaths(job1, new Path(otherArgs[0]));
    int lastIdx = otherArgs[0].lastIndexOf("/");
    String tempOutput = otherArgs[0].substring(0, lastIdx) + "/temp";
    FileOutputFormat.setOutputPath(job1, new Path(tempOutput));

    System.out.println("\nStarting Job-1 ...");
    final long startTime = System.currentTimeMillis();
    try {
        final long startTimeJob1 = System.currentTimeMillis();
        if (!job1.waitForCompletion(true)) {
            System.out.println("Job-1 failed.");
        } else {
            System.out.println("Duration of Job1 " + ((System.currentTimeMillis() - startTimeJob1) / 1000.0)
                    + " seconds.");
            final Job job2 = new Job(conf, "ConsineMain Aggregate");
            job2.setJarByClass(CosineMain.class);
            job2.setInputFormatClass(CartesianInputFormat.class);
            CartesianInputFormat.setLeftInputInfo(job2, TextInputFormat.class, tempOutput);
            CartesianInputFormat.setRightInputInfo(job2, TextInputFormat.class, tempOutput);
            FileOutputFormat.setOutputPath(job2, new Path(otherArgs[1]));

            job2.setMapperClass(CartesianProductMapper.class);
            job2.setMapOutputKeyClass(DoubleWritable.class);
            job2.setMapOutputValueClass(Text.class);

            job2.setSortComparatorClass(DescendingKeyComparator.class);

            job2.setReducerClass(CartesianProductReducer.class);
            job2.setOutputKeyClass(Text.class);
            job2.setOutputValueClass(DoubleWritable.class);

            job2.setNumReduceTasks(10);
            final long startTimeJob2 = System.currentTimeMillis();
            System.out.println("\nStarting Job-2 ...");
            if (!job2.waitForCompletion(true)) {
                System.out.println("Job-2 failed.");
            } else {
                System.out.println("Duration of Job2: "
                        + ((System.currentTimeMillis() - startTimeJob2) / 1000.0) + " seconds.");
            }

        }
        FileSystem fs = FileSystem.get(conf);
        fs.delete(new Path(tempOutput), true);
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Total Duration: " + duration + " seconds.");
    }
    return 0;
}

From source file:ca.uwaterloo.iss4e.hadoop.pointperrow.CosineMain.java

License:Open Source License

public int run1(String[] args) throws IOException {
    if (args.length != 3) {
        System.err.println("Usage: java " + getClass().getName() + " <inputDir> <outDir> <ntasks>");
        ToolRunner.printGenericCommandUsage(System.err);
        return -1;
    }//from   w w  w . ja va 2 s. c o  m
    Configuration conf = getConf();
    final Job job2 = new Job(conf, "ConsineMain cartesian product");
    job2.setJarByClass(CosineMain.class);

    job2.setInputFormatClass(CartesianInputFormat.class);
    CartesianInputFormat.setLeftInputInfo(job2, TextInputFormat.class, args[0]);
    CartesianInputFormat.setRightInputInfo(job2, TextInputFormat.class, args[0]);
    FileOutputFormat.setOutputPath(job2, new Path(args[1]));

    job2.setMapperClass(CartesianProductMapper.class);
    job2.setMapOutputKeyClass(DoubleWritable.class);
    job2.setMapOutputValueClass(Text.class);

    job2.setSortComparatorClass(DescendingKeyComparator.class);

    job2.setReducerClass(CartesianProductReducer.class);
    job2.setOutputKeyClass(Text.class);
    job2.setOutputValueClass(DoubleWritable.class);
    job2.setNumReduceTasks(Integer.parseInt(args[2]));

    System.out.println("\nStarting Job-2 ...");
    final long startTime = System.currentTimeMillis();
    try {
        if (!job2.waitForCompletion(true)) {
            System.out.println("Job-2 failed.");
            System.exit(1);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Duration is " + duration + " seconds.");
    }
    return 0;
}

From source file:ca.uwaterloo.iss4e.hadoop.pointperrow.HistogramMain.java

License:Open Source License

public int run(String[] args) throws IOException {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: ca.uwaterloo.iss4e.hadoop.pointperrow.HistogramMain <input> <output>");
        System.exit(2);/*from  ww w. j  av a 2s.c om*/
    }
    Job job = new Job(conf, "HistogramMain");
    job.setJarByClass(HistogramMain.class);

    job.setMapperClass(MyMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(DoubleWritable.class);

    job.setCombinerClass(MyCombiner.class);

    job.setReducerClass(MyReducer.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(Text.class);
    FileInputFormat.setInputDirRecursive(job, true);
    FileInputFormat.setInputPaths(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    System.out.println("\nStarting Job ...");
    final long startTime = System.currentTimeMillis();
    try {
        if (!job.waitForCompletion(true)) {
            System.out.println("Job failed.");
            System.exit(1);
        }
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Duration is " + duration + " seconds.");
    }
    return 0;
}