Example usage for org.apache.hadoop.mapred JobConf setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> theClass)

Source Link

Document

Set the Reducer class for the job.

Usage

From source file:org.acacia.partitioner.java.EdgeDistributor.java

License:Apache License

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    String dir1 = "/user/miyuru/input";
    String dir2 = "/user/miyuru/edgedistributed-out";

    //      //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());
    if (fs1.exists(new Path(dir2))) {
        fs1.delete(new Path(dir2), true);
    }/*from  ww  w.  j a  v  a  2 s. c om*/

    //First job scans through the edge list and splits the edges in to separate files based on the partitioned vertex files.

    JobConf conf = new JobConf(EdgeDistributor.class);
    conf.set("org.acacia.partitioner.hbase.zookeeper.quorum", args[0]);
    conf.set("org.acacia.partitioner.hbase.table", args[1]);
    conf.set("org.acacia.partitioner.index.contacthost", args[2]);
    conf.set("vert-count", args[3]);
    conf.set("initpartition-id", args[4]);
    conf.set("zero-flag", args[5]);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(FileMapper.class);
    conf.setReducerClass(FileReducer.class);
    //conf.setInputFormat(TextInputFormat.class);
    conf.setInputFormat(NLinesInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setNumReduceTasks(96); //Need to specify the number of reduce tasks explicitly. Otherwise it creates only one reduce task.

    FileInputFormat.setInputPaths(conf, new Path(dir1));
    FileOutputFormat.setOutputPath(conf, new Path(dir2));

    MultipleOutputs.addMultiNamedOutput(conf, "partition", TextOutputFormat.class, NullWritable.class,
            Text.class);

    Job job = new Job(conf, "EdgeDistributor");
    job.waitForCompletion(true);

    System.out.println("Done job EdgeDistribution");
}

From source file:org.acacia.partitioner.java.EdgelistPartitioner.java

License:Apache License

@SuppressWarnings("unused")
public static void main(String[] args) throws IOException {
    JobConf conf = new JobConf(EdgelistPartitioner.class);

    if (conf == null) {
        return;/* www .j  av  a2 s  . co m*/
    }
    String dir1 = "/user/miyuru/merged";
    String dir2 = "/user/miyuru/merged-out";

    //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());
    //only delete dir2 because dir1 is uploaded externally.
    if (fs1.exists(new Path(dir2))) {
        fs1.delete(new Path(dir2), true);
    }

    conf.setInputFormat(WholeFileInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    WholeFileInputFormat.setInputPaths(conf, new Path(dir1));
    SequenceFileOutputFormat.setOutputPath(conf, new Path(dir2));

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(SequenceFileMapper.class);
    conf.setReducerClass(MultipleOutputsInvertedReducer.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setJobName("EdgelistPartitioner");

    MultipleOutputs.addMultiNamedOutput(conf, "partition", TextOutputFormat.class, NullWritable.class,
            Text.class);

    JobClient.runJob(conf);
}

From source file:org.acacia.partitioner.java.NoptSplitter.java

License:Apache License

/**
 * @param args//from   w w w . j a  v  a  2 s.co m
 */
public static void main(String[] args) {
    if (!validArgs(args)) {
        printUsage();
        return;
    }
    //These are the temp paths that are created on HDFS
    String dir1 = "/user/miyuru/edgedistributed-out/nopt";
    String dir2 = "/user/miyuru/nopt-distributed";

    //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1;
    try {
        fs1 = FileSystem.get(new JobConf());

        System.out.println("Deleting the dir : " + dir2);

        if (fs1.exists(new Path(dir2))) {
            fs1.delete(new Path(dir2), true);
        }

        //         Path notinPath = new Path(dir2);
        //         
        //         if(!fs1.exists(notinPath)){
        //            fs1.create(notinPath);
        //         }

        JobConf conf = new JobConf(NoptSplitter.class);
        //          conf.setOutputKeyClass(Text.class);
        //          conf.setOutputValueClass(Text.class);
        conf.setMapperClass(Map.class);
        conf.setCombinerClass(Reduce.class);
        conf.setReducerClass(Reduce.class);

        //         conf.setInputFormat(TextInputFormat.class);
        //         conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(dir1));
        FileOutputFormat.setOutputPath(conf, new Path(dir2));

        Job job1 = new Job(conf, "nopt_splitter");
        job1.setNumReduceTasks(Integer.parseInt(args[0])); //The most importnt point in this job
        job1.waitForCompletion(true);

    } catch (IOException e) {
        e.printStackTrace();
    } catch (InterruptedException e) {
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }
}

From source file:org.ahanna.DoubleConversionMapper.java

License:Apache License

public static void main(String[] args) {
    JobConf conf = new JobConf(DoubleConversion.class);
    conf.setJobName("DoubleConversation");

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(DoubleConversionMapper.class);
    conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class);

    // KeyValueTextInputFormat treats each line as an input record, 
    // and splits the line by the tab character to separate it into key and value 
    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    try {//from  w w  w.j  ava 2 s. c  o m
        JobClient.runJob(conf);
    } catch (IOException e) {
        // do nothing
    }
}

From source file:org.apache.ambari.servicemonitor.jobs.FileUsingJobRunner.java

License:Apache License

public int run(String[] args) throws Exception {
    // Configuration processed by ToolRunner
    Configuration conf = getConf();

    CommandLine commandLine = getCommandLine();
    // Create a JobConf using the processed conf
    JobConf jobConf = new JobConf(conf, FileUsingJobRunner.class);

    //tune the config
    if (jobConf.get(JobKeys.RANGEINPUTFORMAT_ROWS) == null) {
        jobConf.setInt(JobKeys.RANGEINPUTFORMAT_ROWS, 1);
    }/*from  w  w w .  ja  v  a 2s  .co  m*/

    // Process custom command-line options
    String name = OptionHelper.getStringOption(commandLine, "n", "File Using Job");
    if (commandLine.hasOption('x')) {
        //delete the output directory
        String destDir = jobConf.get(JobKeys.MAPRED_OUTPUT_DIR);
        FileSystem fs = FileSystem.get(jobConf);
        fs.delete(new Path(destDir), true);
    }

    // Specify various job-specific parameters     
    jobConf.setMapperClass(FileUsingMapper.class);
    jobConf.setReducerClass(FileUsingReducer.class);
    jobConf.setMapOutputKeyClass(IntWritable.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputFormat(TextOutputFormat.class);
    jobConf.setInputFormat(RangeInputFormat.class);
    //jobConf.setPartitionerClass(SleepJob.class);
    jobConf.setSpeculativeExecution(false);
    jobConf.setJobName(name);
    jobConf.setJarByClass(this.getClass());
    FileInputFormat.addInputPath(jobConf, new Path("ignored"));

    // Submit the job, then poll for progress until the job is complete
    RunningJob runningJob = JobClient.runJob(jobConf);
    runningJob.waitForCompletion();
    return runningJob.isSuccessful() ? 0 : 1;
}

From source file:org.apache.avro.mapred.AvroJob.java

License:Apache License

private static void configureAvroOutput(JobConf job) {
    if (job.get("mapred.output.format.class") == null)
        job.setOutputFormat(AvroOutputFormat.class);

    if (job.getReducerClass() == IdentityReducer.class)
        job.setReducerClass(HadoopReducer.class);

    job.setOutputKeyClass(AvroWrapper.class);
    configureAvroShuffle(job);//w  w w.  j  a v  a2s .  c  om
}

From source file:org.apache.avro.mapred.TestSequenceFileReader.java

License:Apache License

@Test
public void testNonAvroReducer() throws Exception {
    JobConf job = new JobConf();
    Path output = new Path(System.getProperty("test.dir", ".") + "/seq-out");

    output.getFileSystem(job).delete(output);

    // configure input for Avro from sequence file
    AvroJob.setInputSequenceFile(job);//from  ww  w. j av a 2 s . c  o m
    AvroJob.setInputSchema(job, SCHEMA);
    FileInputFormat.setInputPaths(job, FILE.toURI().toString());

    // mapper is default, identity

    // use a hadoop reducer that consumes Avro input
    AvroJob.setMapOutputSchema(job, SCHEMA);
    job.setReducerClass(NonAvroReducer.class);

    // configure output for non-Avro SequenceFile
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, output);

    // output key/value classes are default, LongWritable/Text

    JobClient.runJob(job);

    checkFile(new SequenceFileReader<Long, CharSequence>(new File(output.toString() + "/part-00000")));
}

From source file:org.apache.avro.mapred.TestWordCountGeneric.java

License:Apache License

@Test
@SuppressWarnings("deprecation")
public void testJob() throws Exception {
    String dir = System.getProperty("test.dir", ".") + "/mapred";
    Path outputPath = new Path(dir + "/out");
    JobConf job = new JobConf();
    try {//w  w w  .jav  a  2 s. c  o  m
        WordCountUtil.writeLinesFile();

        job.setJobName("wordcount");

        AvroJob.setInputGeneric(job, Schema.create(Schema.Type.STRING));
        AvroJob.setOutputGeneric(job, WordCount.SCHEMA$);

        job.setMapperClass(MapImpl.class);
        job.setCombinerClass(ReduceImpl.class);
        job.setReducerClass(ReduceImpl.class);

        FileInputFormat.setInputPaths(job, new Path(dir + "/in"));
        FileOutputFormat.setOutputPath(job, outputPath);
        FileOutputFormat.setCompressOutput(job, true);

        JobClient.runJob(job);

        WordCountUtil.validateCountsFile();
    } finally {
        outputPath.getFileSystem(job).delete(outputPath);
    }
}

From source file:org.apache.avro.mapred.TestWordCountSpecific.java

License:Apache License

@Test
@SuppressWarnings("deprecation")
public void testJob() throws Exception {
    JobConf job = new JobConf();
    String dir = System.getProperty("test.dir", ".") + "/mapred";
    Path outputPath = new Path(dir + "/out");

    try {/*from  ww w .  ja va  2s .c  om*/
        WordCountUtil.writeLinesFile();

        job.setJobName("wordcount");

        AvroJob.setInputSpecific(job, Schema.create(Schema.Type.STRING));
        AvroJob.setOutputSpecific(job, WordCount.SCHEMA$);

        job.setMapperClass(MapImpl.class);
        job.setCombinerClass(ReduceImpl.class);
        job.setReducerClass(ReduceImpl.class);

        FileInputFormat.setInputPaths(job, new Path(dir + "/in"));
        FileOutputFormat.setOutputPath(job, outputPath);
        FileOutputFormat.setCompressOutput(job, true);

        JobClient.runJob(job);

        WordCountUtil.validateCountsFile();
    } finally {
        outputPath.getFileSystem(job).delete(outputPath);
    }

}

From source file:org.apache.avro.mapred.tether.TetherJob.java

License:Apache License

private static void setupTetherJob(JobConf job) throws IOException {
    job.setMapRunnerClass(TetherMapRunner.class);
    job.setPartitionerClass(TetherPartitioner.class);
    job.setReducerClass(TetherReducer.class);

    job.setInputFormat(TetherInputFormat.class);
    job.setOutputFormat(TetherOutputFormat.class);

    job.setOutputKeyClass(TetherData.class);
    job.setOutputKeyComparatorClass(TetherKeyComparator.class);
    job.setMapOutputValueClass(NullWritable.class);

    // set the map output key class to TetherData
    job.setMapOutputKeyClass(TetherData.class);

    // add TetherKeySerialization to io.serializations
    Collection<String> serializations = job.getStringCollection("io.serializations");
    if (!serializations.contains(TetherKeySerialization.class.getName())) {
        serializations.add(TetherKeySerialization.class.getName());
        job.setStrings("io.serializations", serializations.toArray(new String[0]));
    }/*  ww  w  .  j a v a 2 s .  c  o  m*/

    // determine whether the executable should be added to the cache.
    if (job.getBoolean(TETHER_EXEC_CACHED, false)) {
        DistributedCache.addCacheFile(getExecutable(job), job);
    }
}