Example usage for org.apache.hadoop.mapred JobConf setReducerClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setReducerClass.

Prototype

public void setReducerClass(Class<? extends Reducer> theClass)

Source Link

Document

Set the Reducer class for the job.

Usage

From source file:edu.uci.ics.pregelix.example.utils.VertexSorter.java

License:Apache License

public static void main(String[] args) throws IOException {
    JobConf job = new JobConf(VertexSorter.class);

    job.setJobName(VertexSorter.class.getSimpleName());
    job.setMapperClass(MapRecordOnly.class);
    job.setReducerClass(ReduceRecordOnly.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setNumReduceTasks(Integer.parseInt(args[2]));
    JobClient.runJob(job);//from w  ww .j av  a2  s  .c  om
}

From source file:edu.ucsb.cs.lsh.minhash.MinHashLshDriver.java

License:Apache License

public static void main(String args[]) throws ParseException, IOException {

    JobConf job = new JobConf();
    job.setJarByClass(MinHashLshDriver.class);
    job.setJobName(MinHashLshDriver.class.getSimpleName());
    GenericOptionsParser gop = new GenericOptionsParser(job, args);
    args = gop.getRemainingArgs();//from  w ww. j a va  2 s.co  m

    job.setMapperClass(LshMapper.class);
    job.setMapOutputKeyClass(IntArrayWritable.class); // signatures
    job.setMapOutputValueClass(LongWritable.class); // doc IDs
    job.setNumReduceTasks(job.getInt(NUM_REDUCERS_PROPERTY, NUM_REDUCERS_VALUE));
    job.setReducerClass(LshReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    String inputDir = args[0];
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input directory not set.");
    }
    FileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path("lsh-jaccard-buckets");
    FileOutputFormat.setOutputPath(job, outputPath);
    FileSystem.get(job).delete(outputPath, true);

    LshTable lshTable = new LshTable(job.getInt(K_PROPERTY, K_VALUE), job.getInt(L_PROPERTY, L_VALUE), 1024,
            job.getLong(NUM_FEATURES_PROPERTY, NUM_FEATURES_VALUE),
            job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE));

    writeLsh(job, outputPath.getFileSystem(job), lshTable);

    JobSubmitter.run(job, "LSH", job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE));
}

From source file:edu.ucsb.cs.lsh.projection.ProjectionsGenerator.java

License:Apache License

public static void main(JobConf job) throws IOException {
    int nBits/*D*/, nFeatures/*K*/, nReducers;
    job.setJobName(ProjectionsGenerator.class.getSimpleName());
    FileSystem fs = FileSystem.get(job);

    nBits = job.getInt(ProjectionLshDriver.LSH_NBITS_PROPERTY, ProjectionLshDriver.LSH_NBITS_VALUE);
    nFeatures = readCollectionFeatureCount(fs, job);
    setParameters(nBits, nFeatures);//  w ww. j  av  a2s .c o m
    nReducers = job.getInt(ProjectionLshDriver.LSH_NREDUCER_PROPERTY, ProjectionLshDriver.LSH_NREDUCER_VALUE);
    Path inputPath = new Path(INPUT_DIR);
    Path outputPath = new Path(OUTPUT_DIR);
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    if (fs.exists(inputPath))
        fs.delete(inputPath, true);

    SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, new Path(inputPath.toString() + "/file"),
            IntWritable.class, IntWritable.class);
    for (int i = 0; i < nReducers; i++)
        writer.append(new IntWritable(i), new IntWritable(i));
    writer.close();

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);
    FileOutputFormat.setCompressOutput(job, false);

    job.set("mapred.child.java.opts", "-Xmx2048m");
    job.setInt("mapred.map.max.attempts", 10);
    job.setInt("mapred.reduce.max.attempts", 10);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(nReducers);

    job.setMapperClass(IdentityMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(ProjectionReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(RandomVector.class);

    JobSubmitter.run(job, "LSH", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
}

From source file:edu.ucsb.cs.partitioning.cosine.CosinePartitioning.java

License:Apache License

/**
 * Sets MapReduce input configurations for the core cosine partitioning job.
 *//*from  ww  w .  j  a va 2 s  .c  o m*/
public static JobConf setMapReduce(JobConf job, Class mapper, Class reducer) {
    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(IntIntWritable.class);
    job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);
    job.setNumReduceTasks(job.getInt(Config.NUM_PARTITIONS_PROPERTY, Config.NUM_PARTITIONS_VALUE));
    job.setReducerClass(reducer);
    job.setOutputKeyClass(IntIntWritable.class);
    job.setOutputValueClass(IdFeatureWeightArrayWritable.class);
    return job;
}

From source file:edu.ucsb.cs.partitioning.lsh.LshPartitionMain.java

License:Apache License

public static void main(String args[]) throws ParseException, IOException {

    JobConf job = new JobConf();
    job.setJarByClass(LshPartitionMain.class);
    job.setJobName(LshPartitionMain.class.getSimpleName());
    GenericOptionsParser gop = new GenericOptionsParser(job, args);
    args = gop.getRemainingArgs();//from  w ww  .  j ava2 s  .c  o  m

    job.setMapperClass(LshMapper.class);
    job.setMapOutputKeyClass(IntArrayWritable.class); // signatures
    job.setMapOutputValueClass(LongWritable.class); // doc IDs
    job.setNumReduceTasks(job.getInt(NUM_REDUCERS_PROPERTY, NUM_REDUCERS_VALUE));
    job.setReducerClass(LshReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    String inputDir = args[0];
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input directory not set.");
    }
    FileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path("lsh-jaccard-buckets");
    FileOutputFormat.setOutputPath(job, outputPath);
    FileSystem.get(job).delete(outputPath, true);

    LshTable lshTable = new LshTable(job.getInt(K_PROPERTY, K_VALUE), job.getInt(L_PROPERTY, L_VALUE), 1024,
            job.getLong(NUM_FEATURES_PROPERTY, NUM_FEATURES_VALUE),
            job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE));

    writeLsh(job, outputPath.getFileSystem(job), lshTable);

    run(job);

}

From source file:edu.ucsb.cs.sort.length.LengthSortMain.java

License:Apache License

/**
 * Sets the job configurations including the mapper and reducer classes to
 * do the sorting based on vector lengths.
 *///from   w  ww .  j  a v a  2 s.  c om
public static void main(String[] args) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    job.setJobName(LengthSortMain.class.getSimpleName());
    job.setJarByClass(LengthSortMain.class);
    job.setMapperClass(LengthSortMapper.class);
    job.setMapOutputKeyClass(FloatWritable.class);
    job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);

    job.setPartitionerClass(LengthRangePartitioner.class);

    job.setReducerClass(LengthSortReducer.class);
    job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE));
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FeatureWeightArrayWritable.class);
    //
    // set input & output
    //
    String inputDir = SortDriver.INPUT_DIR;
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input path not set");
    }
    job.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path(SortDriver.OUTPUT_DIR);
    FileSystem.get(job).delete(outputPath, true);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);

    //
    // run
    //
    JobSubmitter.run(job, "Sort By Vector Lenghts", -1);
}

From source file:edu.ucsb.cs.sort.maxw.MaxwSortMain.java

License:Apache License

/**
 * Main method sets the job configurations including the mapper and reducer
 * classes to do the sorting./*from w  w w  .  jav a 2  s .co  m*/
 */
public static void main(String[] args) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    // ToolRunner.printGenericCommandUsage(System.out);
    job.setJobName(MaxwSortMain.class.getSimpleName());
    job.setJarByClass(MaxwSortMain.class);
    job.setMapperClass(MaxwSortMapper.class);
    job.setMapOutputKeyClass(FloatWritable.class);
    job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);

    job.setPartitionerClass(MaxwRangePartitioner.class);

    job.setReducerClass(MaxwSortReducer.class);
    job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE));
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FeatureWeightArrayWritable.class);
    //
    // set input & output
    //
    String inputDir = SortDriver.INPUT_DIR;
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input path not set");
    }
    job.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path(SortDriver.OUTPUT_DIR);
    FileSystem.get(job).delete(outputPath, true);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);
    //
    // run
    //
    JobSubmitter.run(job, "Sort By infinity-Norm", -1);
}

From source file:edu.ucsb.cs.sort.norm.NormSortMain.java

License:Apache License

/**
 * Main method sets the job configurations including the mapper and reducer
 * classes to do the sorting. Some of the produced partitions might be
 * merged later to reflect the number of partitions chosen by the user.
 *//*from   w ww  .  j a v a  2s .c  o m*/
public static void main(String[] args) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    job.setJobName("NormSort");
    job.setJarByClass(NormSortMain.class);
    job.setMapperClass(NormSortMapper.class);
    job.setMapOutputKeyClass(FloatWritable.class);
    job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);

    job.setPartitionerClass(NormRangePartitioner.class);

    job.setReducerClass(NormSortReducer.class);
    job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE));
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FeatureWeightArrayWritable.class);
    //
    // set input & output
    //
    String inputDir = SortDriver.INPUT_DIR;
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input path not set");
    }
    job.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path(SortDriver.OUTPUT_DIR);
    FileSystem.get(job).delete(outputPath, true);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);
    //
    // run
    //
    JobSubmitter.run(job, "Sort By p-norm", -1);
}

From source file:edu.ucsb.cs.sort.signature.SigSortMain.java

License:Apache License

/**
 * Sets the job configurations including the mapper and reducer classes to
 * do the sorting based signatures./*  w w w  .  j a v a  2 s.c  o  m*/
 */
public static void main(String[] args) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    job.setJobName(SigSortMain.class.getSimpleName());
    job.setJarByClass(SigSortMain.class);
    job.setMapperClass(SigSortMapper.class);
    job.setMapOutputKeyClass(BitSignature.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setPartitionerClass(SigRangePartitioner.class);

    job.setReducerClass(SigSortReducer.class);
    job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE));
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(BitSignature.class);
    //
    // set input & output
    //
    String inputDir = SortDriver.INPUT_DIR;
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input path not set");
    }
    job.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path(OUTPUT_PATH);
    FileSystem.get(job).delete(outputPath, true);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);

    //
    // run
    //
    JobSubmitter.run(job, "Sort By Signature Bytes", -1);
}

From source file:edu.umd.cloud9.collection.aquaint2.NumberAquaint2Documents.java

License:Apache License

/**
 * Runs this tool.//from  www  . j  av a2 s .c o m
 */
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];
    String outputFile = args[2];
    int mapTasks = 10;

    LOG.info("Tool: " + NumberAquaint2Documents.class.getCanonicalName());
    LOG.info(" - Input path: " + inputPath);
    LOG.info(" - Output path: " + outputPath);
    LOG.info(" - Output file: " + outputFile);

    JobConf conf = new JobConf(NumberAquaint2Documents.class);
    conf.setJobName(NumberAquaint2Documents.class.getSimpleName());

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(1);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    conf.setInputFormat(Aquaint2DocumentInputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    JobClient.runJob(conf);

    Aquaint2DocnoMapping.writeDocnoData(new Path(outputPath + "/part-00000"), new Path(outputFile),
            FileSystem.get(conf));

    return 0;
}