Example usage for org.apache.hadoop.mapred JobConf setReducerClass

List of usage examples for org.apache.hadoop.mapred JobConf setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> theClass) 

Source Link

Document

Set the Reducer class for the job.

Usage

From source file:org.acacia.partitioner.java.EdgeDistributor.java

License:Apache License

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
    String dir1 = "/user/miyuru/input";
    String dir2 = "/user/miyuru/edgedistributed-out";

    //      //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());
    if (fs1.exists(new Path(dir2))) {
        fs1.delete(new Path(dir2), true);
    }/*from  ww  w.  j a  v  a  2 s. c om*/

    //First job scans through the edge list and splits the edges in to separate files based on the partitioned vertex files.

    JobConf conf = new JobConf(EdgeDistributor.class);
    conf.set("org.acacia.partitioner.hbase.zookeeper.quorum", args[0]);
    conf.set("org.acacia.partitioner.hbase.table", args[1]);
    conf.set("org.acacia.partitioner.index.contacthost", args[2]);
    conf.set("vert-count", args[3]);
    conf.set("initpartition-id", args[4]);
    conf.set("zero-flag", args[5]);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(FileMapper.class);
    conf.setReducerClass(FileReducer.class);
    //conf.setInputFormat(TextInputFormat.class);
    conf.setInputFormat(NLinesInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setNumReduceTasks(96); //Need to specify the number of reduce tasks explicitly. Otherwise it creates only one reduce task.

    FileInputFormat.setInputPaths(conf, new Path(dir1));
    FileOutputFormat.setOutputPath(conf, new Path(dir2));

    MultipleOutputs.addMultiNamedOutput(conf, "partition", TextOutputFormat.class, NullWritable.class,
            Text.class);

    Job job = new Job(conf, "EdgeDistributor");
    job.waitForCompletion(true);

    System.out.println("Done job EdgeDistribution");
}

From source file:org.acacia.partitioner.java.EdgelistPartitioner.java

License:Apache License

@SuppressWarnings("unused")
public static void main(String[] args) throws IOException {
    JobConf conf = new JobConf(EdgelistPartitioner.class);

    if (conf == null) {
        return;/* www .j  av  a2 s  . co m*/
    }
    String dir1 = "/user/miyuru/merged";
    String dir2 = "/user/miyuru/merged-out";

    //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());
    //only delete dir2 because dir1 is uploaded externally.
    if (fs1.exists(new Path(dir2))) {
        fs1.delete(new Path(dir2), true);
    }

    conf.setInputFormat(WholeFileInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    WholeFileInputFormat.setInputPaths(conf, new Path(dir1));
    SequenceFileOutputFormat.setOutputPath(conf, new Path(dir2));

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(SequenceFileMapper.class);
    conf.setReducerClass(MultipleOutputsInvertedReducer.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setJobName("EdgelistPartitioner");

    MultipleOutputs.addMultiNamedOutput(conf, "partition", TextOutputFormat.class, NullWritable.class,
            Text.class);

    JobClient.runJob(conf);
}

From source file:org.acacia.partitioner.java.NoptSplitter.java

License:Apache License

/**
 * @param args//from   w w w . j a  v  a  2 s.co m
 */
public static void main(String[] args) {
    if (!validArgs(args)) {
        printUsage();
        return;
    }
    //These are the temp paths that are created on HDFS
    String dir1 = "/user/miyuru/edgedistributed-out/nopt";
    String dir2 = "/user/miyuru/nopt-distributed";

    //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1;
    try {
        fs1 = FileSystem.get(new JobConf());

        System.out.println("Deleting the dir : " + dir2);

        if (fs1.exists(new Path(dir2))) {
            fs1.delete(new Path(dir2), true);
        }

        //         Path notinPath = new Path(dir2);
        //         
        //         if(!fs1.exists(notinPath)){
        //            fs1.create(notinPath);
        //         }

        JobConf conf = new JobConf(NoptSplitter.class);
        //          conf.setOutputKeyClass(Text.class);
        //          conf.setOutputValueClass(Text.class);
        conf.setMapperClass(Map.class);
        conf.setCombinerClass(Reduce.class);
        conf.setReducerClass(Reduce.class);

        //         conf.setInputFormat(TextInputFormat.class);
        //         conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(dir1));
        FileOutputFormat.setOutputPath(conf, new Path(dir2));

        Job job1 = new Job(conf, "nopt_splitter");
        job1.setNumReduceTasks(Integer.parseInt(args[0])); //The most importnt point in this job
        job1.waitForCompletion(true);

    } catch (IOException e) {
        e.printStackTrace();
    } catch (InterruptedException e) {
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }
}

From source file:org.ahanna.DoubleConversionMapper.java

License:Apache License

public static void main(String[] args) {
    JobConf conf = new JobConf(DoubleConversion.class);
    conf.setJobName("DoubleConversation");

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(DoubleConversionMapper.class);
    conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class);

    // KeyValueTextInputFormat treats each line as an input record, 
    // and splits the line by the tab character to separate it into key and value 
    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    try {//from  w w  w.j  ava 2 s. c  o m
        JobClient.runJob(conf);
    } catch (IOException e) {
        // do nothing
    }
}

From source file:org.apache.ambari.servicemonitor.jobs.FileUsingJobRunner.java

License:Apache License

public int run(String[] args) throws Exception {
    // Configuration processed by ToolRunner
    Configuration conf = getConf();

    CommandLine commandLine = getCommandLine();
    // Create a JobConf using the processed conf
    JobConf jobConf = new JobConf(conf, FileUsingJobRunner.class);

    //tune the config
    if (jobConf.get(JobKeys.RANGEINPUTFORMAT_ROWS) == null) {
        jobConf.setInt(JobKeys.RANGEINPUTFORMAT_ROWS, 1);
    }/*from  w  w w .  ja  v  a 2s  .co  m*/

    // Process custom command-line options
    String name = OptionHelper.getStringOption(commandLine, "n", "File Using Job");
    if (commandLine.hasOption('x')) {
        //delete the output directory
        String destDir = jobConf.get(JobKeys.MAPRED_OUTPUT_DIR);
        FileSystem fs = FileSystem.get(jobConf);
        fs.delete(new Path(destDir), true);
    }

    // Specify various job-specific parameters     
    jobConf.setMapperClass(FileUsingMapper.class);
    jobConf.setReducerClass(FileUsingReducer.class);
    jobConf.setMapOutputKeyClass(IntWritable.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputFormat(TextOutputFormat.class);
    jobConf.setInputFormat(RangeInputFormat.class);
    //jobConf.setPartitionerClass(SleepJob.class);
    jobConf.setSpeculativeExecution(false);
    jobConf.setJobName(name);
    jobConf.setJarByClass(this.getClass());
    FileInputFormat.addInputPath(jobConf, new Path("ignored"));

    // Submit the job, then poll for progress until the job is complete
    RunningJob runningJob = JobClient.runJob(jobConf);
    runningJob.waitForCompletion();
    return runningJob.isSuccessful() ? 0 : 1;
}

From source file:org.apache.avro.mapred.AvroJob.java

License:Apache License

private static void configureAvroOutput(JobConf job) {
    if (job.get("mapred.output.format.class") == null)
        job.setOutputFormat(AvroOutputFormat.class);

    if (job.getReducerClass() == IdentityReducer.class)
        job.setReducerClass(HadoopReducer.class);

    job.setOutputKeyClass(AvroWrapper.class);
    configureAvroShuffle(job);//w  w w.  j  a v  a2s .  c  om
}

From source file:org.apache.avro.mapred.TestSequenceFileReader.java

License:Apache License

@Test
public void testNonAvroReducer() throws Exception {
    JobConf job = new JobConf();
    Path output = new Path(System.getProperty("test.dir", ".") + "/seq-out");

    output.getFileSystem(job).delete(output);

    // configure input for Avro from sequence file
    AvroJob.setInputSequenceFile(job);//from  ww  w. j av a 2 s . c  o m
    AvroJob.setInputSchema(job, SCHEMA);
    FileInputFormat.setInputPaths(job, FILE.toURI().toString());

    // mapper is default, identity

    // use a hadoop reducer that consumes Avro input
    AvroJob.setMapOutputSchema(job, SCHEMA);
    job.setReducerClass(NonAvroReducer.class);

    // configure output for non-Avro SequenceFile
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, output);

    // output key/value classes are default, LongWritable/Text

    JobClient.runJob(job);

    checkFile(new SequenceFileReader<Long, CharSequence>(new File(output.toString() + "/part-00000")));
}

From source file:org.apache.avro.mapred.TestWordCountGeneric.java

License:Apache License

@Test
@SuppressWarnings("deprecation")
public void testJob() throws Exception {
    String dir = System.getProperty("test.dir", ".") + "/mapred";
    Path outputPath = new Path(dir + "/out");
    JobConf job = new JobConf();
    try {//w  w w  .jav  a  2 s. c  o  m
        WordCountUtil.writeLinesFile();

        job.setJobName("wordcount");

        AvroJob.setInputGeneric(job, Schema.create(Schema.Type.STRING));
        AvroJob.setOutputGeneric(job, WordCount.SCHEMA$);

        job.setMapperClass(MapImpl.class);
        job.setCombinerClass(ReduceImpl.class);
        job.setReducerClass(ReduceImpl.class);

        FileInputFormat.setInputPaths(job, new Path(dir + "/in"));
        FileOutputFormat.setOutputPath(job, outputPath);
        FileOutputFormat.setCompressOutput(job, true);

        JobClient.runJob(job);

        WordCountUtil.validateCountsFile();
    } finally {
        outputPath.getFileSystem(job).delete(outputPath);
    }
}

From source file:org.apache.avro.mapred.TestWordCountSpecific.java

License:Apache License

@Test
@SuppressWarnings("deprecation")
public void testJob() throws Exception {
    JobConf job = new JobConf();
    String dir = System.getProperty("test.dir", ".") + "/mapred";
    Path outputPath = new Path(dir + "/out");

    try {/*from  ww w .  ja va  2s .c  om*/
        WordCountUtil.writeLinesFile();

        job.setJobName("wordcount");

        AvroJob.setInputSpecific(job, Schema.create(Schema.Type.STRING));
        AvroJob.setOutputSpecific(job, WordCount.SCHEMA$);

        job.setMapperClass(MapImpl.class);
        job.setCombinerClass(ReduceImpl.class);
        job.setReducerClass(ReduceImpl.class);

        FileInputFormat.setInputPaths(job, new Path(dir + "/in"));
        FileOutputFormat.setOutputPath(job, outputPath);
        FileOutputFormat.setCompressOutput(job, true);

        JobClient.runJob(job);

        WordCountUtil.validateCountsFile();
    } finally {
        outputPath.getFileSystem(job).delete(outputPath);
    }

}

From source file:org.apache.avro.mapred.tether.TetherJob.java

License:Apache License

private static void setupTetherJob(JobConf job) throws IOException {
    job.setMapRunnerClass(TetherMapRunner.class);
    job.setPartitionerClass(TetherPartitioner.class);
    job.setReducerClass(TetherReducer.class);

    job.setInputFormat(TetherInputFormat.class);
    job.setOutputFormat(TetherOutputFormat.class);

    job.setOutputKeyClass(TetherData.class);
    job.setOutputKeyComparatorClass(TetherKeyComparator.class);
    job.setMapOutputValueClass(NullWritable.class);

    // set the map output key class to TetherData
    job.setMapOutputKeyClass(TetherData.class);

    // add TetherKeySerialization to io.serializations
    Collection<String> serializations = job.getStringCollection("io.serializations");
    if (!serializations.contains(TetherKeySerialization.class.getName())) {
        serializations.add(TetherKeySerialization.class.getName());
        job.setStrings("io.serializations", serializations.toArray(new String[0]));
    }/*  ww  w  .  j a v a 2 s .  c  o  m*/

    // determine whether the executable should be added to the cache.
    if (job.getBoolean(TETHER_EXEC_CACHED, false)) {
        DistributedCache.addCacheFile(getExecutable(job), job);
    }
}