Example usage for org.apache.hadoop.mapred JobConf setMapperClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMapperClass.

Prototype

public void setMapperClass(Class<? extends Mapper> theClass)

Source Link

Document

Set the Mapper class for the job.

Usage

From source file:edu.ucsb.cs.hybrid.HybridDriver.java

License:Apache License

/**
 * @param job : passed by reference to set its mapper class.
 *//*from www . j  a  v  a  2s.co  m*/
public static void setMapperAndRunner(JobConf job) {
    int numSplits = job.getInt(Config.NUMBER_SPLITS_PROPERTY, Config.NUMBER_SPLITS_VALUE);
    int PSSChoice = job.getInt(Config.BLOCK_CHOICE_PROPERTY, Config.BLOCK_CHOICE_VALUE);//1,2
    String name = "PSS";
    if (numSplits > 1) {
        //check can I set # splits for runner here?
        job.setMapRunnerClass(MultipleS_Runner.class);
        if (job.getBoolean(Config.MULTI_THREADS_PROPERTY, Config.MULTI_THREADS_VALUE)) { // threads testing
            job.setMapperClass(PSS1_Threaded_Mapper.class);// naming
        } else if (PSSChoice == 1) {
            name += "1";
            job.setMapperClass(PSS1_Mapper.class);
        } else if (PSSChoice == 2) {
            name += "2";
            job.setMapperClass(PSS2_Mapper.class);// MultipleS_Block1_Mapper
        } else
            ;//For future implementations 
    } else {
        job.setMapRunnerClass(SingleS_Runner.class);
        if (job.getBoolean(Config.MULTI_THREADS_PROPERTY, Config.MULTI_THREADS_VALUE)) // threads
            throw new RuntimeException(
                    "ERROR: Single S with multithreads! Set hybrid.threads.property to false.");
        if (PSSChoice == 1) {
            job.setMapperClass(PSS_Mapper.class);
            if (job.getBoolean(Config.BAYADRO_SKIP_PROPERTY, Config.BAYADRO_SKIP_VALUE)) {
                name += "/Bayardo_Dynamic_filter";
                job.setMapperClass(PSS_Bayardo_Mapper.class);//PSS+Bayardo WWW'07
            }
        } else if (PSSChoice == 2) {
            name += "2/SingleS";
            job.setMapperClass(PSS2_SingleS_Mapper.class);
        } else
            job.setMapperClass(PSS3_SingleS_Mapper.class); //what is this?
    }
    job.setJobName(name);
}

From source file:edu.ucsb.cs.hybrid.HybridDriver.java

License:Apache License

public static void IDMappingJob(String[] args) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    job.setJarByClass(HybridDriver.class);
    job.setJobName("Converting binary similarity scores to text");
    job.setMapperClass(IDMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setNumReduceTasks(0);/*from w w  w .  j  a v  a 2 s.c o  m*/
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    Path inputPath = new Path(OUTPUT_DIR);
    job.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.setInputPaths(job, inputPath);
    Path outputPath = new Path("SimilarityScores");
    job.setOutputFormat(TextOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);
    FileSystem.get(job).delete(outputPath, true);
    HashPagesDriver.prepareDistribCache(job, HashPagesDriver.IDS_FILE2); //remove not sure
    JobSubmitter.run(job, "BINARY TO TEXT", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
}

From source file:edu.ucsb.cs.lsh.minhash.MinHashLshDriver.java

License:Apache License

public static void main(String args[]) throws ParseException, IOException {

    JobConf job = new JobConf();
    job.setJarByClass(MinHashLshDriver.class);
    job.setJobName(MinHashLshDriver.class.getSimpleName());
    GenericOptionsParser gop = new GenericOptionsParser(job, args);
    args = gop.getRemainingArgs();/*  w ww.  ja v  a 2 s . co m*/

    job.setMapperClass(LshMapper.class);
    job.setMapOutputKeyClass(IntArrayWritable.class); // signatures
    job.setMapOutputValueClass(LongWritable.class); // doc IDs
    job.setNumReduceTasks(job.getInt(NUM_REDUCERS_PROPERTY, NUM_REDUCERS_VALUE));
    job.setReducerClass(LshReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    String inputDir = args[0];
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input directory not set.");
    }
    FileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path("lsh-jaccard-buckets");
    FileOutputFormat.setOutputPath(job, outputPath);
    FileSystem.get(job).delete(outputPath, true);

    LshTable lshTable = new LshTable(job.getInt(K_PROPERTY, K_VALUE), job.getInt(L_PROPERTY, L_VALUE), 1024,
            job.getLong(NUM_FEATURES_PROPERTY, NUM_FEATURES_VALUE),
            job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE));

    writeLsh(job, outputPath.getFileSystem(job), lshTable);

    JobSubmitter.run(job, "LSH", job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE));
}

From source file:edu.ucsb.cs.lsh.projection.ProjectionsGenerator.java

License:Apache License

public static void main(JobConf job) throws IOException {
    int nBits/*D*/, nFeatures/*K*/, nReducers;
    job.setJobName(ProjectionsGenerator.class.getSimpleName());
    FileSystem fs = FileSystem.get(job);

    nBits = job.getInt(ProjectionLshDriver.LSH_NBITS_PROPERTY, ProjectionLshDriver.LSH_NBITS_VALUE);
    nFeatures = readCollectionFeatureCount(fs, job);
    setParameters(nBits, nFeatures);/*from  ww w.  jav  a 2s  .  c o  m*/
    nReducers = job.getInt(ProjectionLshDriver.LSH_NREDUCER_PROPERTY, ProjectionLshDriver.LSH_NREDUCER_VALUE);
    Path inputPath = new Path(INPUT_DIR);
    Path outputPath = new Path(OUTPUT_DIR);
    if (fs.exists(outputPath))
        fs.delete(outputPath, true);
    if (fs.exists(inputPath))
        fs.delete(inputPath, true);

    SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, new Path(inputPath.toString() + "/file"),
            IntWritable.class, IntWritable.class);
    for (int i = 0; i < nReducers; i++)
        writer.append(new IntWritable(i), new IntWritable(i));
    writer.close();

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);
    FileOutputFormat.setCompressOutput(job, false);

    job.set("mapred.child.java.opts", "-Xmx2048m");
    job.setInt("mapred.map.max.attempts", 10);
    job.setInt("mapred.reduce.max.attempts", 10);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(nReducers);

    job.setMapperClass(IdentityMapper.class);
    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setReducerClass(ProjectionReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(RandomVector.class);

    JobSubmitter.run(job, "LSH", job.getFloat(Config.THRESHOLD_PROPERTY, Config.THRESHOLD_VALUE));
}

From source file:edu.ucsb.cs.lsh.projection.SignaturesGenerator.java

License:Apache License

public static void main(String[] args) throws Exception {
    JobConf job = new JobConf(SignaturesGenerator.class);
    new GenericOptionsParser(job, args);
    job.setJobName(SignaturesGenerator.class.getSimpleName());
    int nBits = job.getInt(ProjectionLshDriver.LSH_NBITS_PROPERTY, ProjectionLshDriver.LSH_NBITS_VALUE);
    setParameters();/*from  w ww .  j av a  2s .co m*/
    FileSystem fs = FileSystem.get(job);
    prepareDistributedCache(job, fs, new Path(ProjectionsGenerator.OUTPUT_DIR));
    Path outputPath = new Path(OUTPUT_DIR);
    if (fs.exists(outputPath))
        fs.delete(outputPath);

    FileInputFormat.setInputPaths(job, INPUT_DIR);
    // Path(INPUT_DIR));
    FileOutputFormat.setOutputPath(job, outputPath);
    // FileOutputFormat.setCompressOutput(job, false);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    job.set("mapred.child.java.opts", "-Xmx2048m");
    job.setInt("mapred.map.max.attempts", 10);
    job.setInt("mapred.reduce.max.attempts", 10);
    job.setInt("mapred.task.timeout", 6000000);

    job.setMapperClass(SigMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(BitSignature.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(BitSignature.class);

    JobSubmitter.run(job, "LSH", -1);
}

From source file:edu.ucsb.cs.partitioning.cosine.CosinePartitioning.java

License:Apache License

/**
 * Sets MapReduce input configurations for the core cosine partitioning job.
 *//*www .j a v a 2 s .  co m*/
public static JobConf setMapReduce(JobConf job, Class mapper, Class reducer) {
    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(IntIntWritable.class);
    job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);
    job.setNumReduceTasks(job.getInt(Config.NUM_PARTITIONS_PROPERTY, Config.NUM_PARTITIONS_VALUE));
    job.setReducerClass(reducer);
    job.setOutputKeyClass(IntIntWritable.class);
    job.setOutputValueClass(IdFeatureWeightArrayWritable.class);
    return job;
}

From source file:edu.ucsb.cs.partitioning.lsh.LshPartitionMain.java

License:Apache License

public static void main(String args[]) throws ParseException, IOException {

    JobConf job = new JobConf();
    job.setJarByClass(LshPartitionMain.class);
    job.setJobName(LshPartitionMain.class.getSimpleName());
    GenericOptionsParser gop = new GenericOptionsParser(job, args);
    args = gop.getRemainingArgs();/*from   ww  w .j av  a  2  s .c o  m*/

    job.setMapperClass(LshMapper.class);
    job.setMapOutputKeyClass(IntArrayWritable.class); // signatures
    job.setMapOutputValueClass(LongWritable.class); // doc IDs
    job.setNumReduceTasks(job.getInt(NUM_REDUCERS_PROPERTY, NUM_REDUCERS_VALUE));
    job.setReducerClass(LshReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    String inputDir = args[0];
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input directory not set.");
    }
    FileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path("lsh-jaccard-buckets");
    FileOutputFormat.setOutputPath(job, outputPath);
    FileSystem.get(job).delete(outputPath, true);

    LshTable lshTable = new LshTable(job.getInt(K_PROPERTY, K_VALUE), job.getInt(L_PROPERTY, L_VALUE), 1024,
            job.getLong(NUM_FEATURES_PROPERTY, NUM_FEATURES_VALUE),
            job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE));

    writeLsh(job, outputPath.getFileSystem(job), lshTable);

    run(job);

}

From source file:edu.ucsb.cs.preprocessing.sequence.SeqWriter.java

License:Apache License

/**
 * Runs a MR job with maps only to convert input directory of numeric valued
 * records to hadoop sequence format. It assumes a text input of format of
 * [id feature weight ..] to be the format of input.
 *//*  www  .j  a  va 2  s  .co m*/
public static void writeSequence() throws IOException {

    JobConf job = new JobConf();
    job.setJobName("Convert text vectors to hadoop seqeunce ");
    job.setJarByClass(SeqWriter.class);

    job.setMapperClass(SeqMapper.class);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(FeatureWeightArrayWritable.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FeatureWeightArrayWritable.class);

    job.setInputFormat(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(INPUT_DIR));
    FileSystem.get(job).delete(new Path(HashPagesDriver.IDS_FILE2), true);
    Path outputPath = new Path(OUTPUT_DIR);
    FileSystem.get(job).delete(outputPath, true);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);

    JobSubmitter.run(job, "PREPROCESS", -1);
}

From source file:edu.ucsb.cs.sort.length.LengthSortMain.java

License:Apache License

/**
 * Sets the job configurations including the mapper and reducer classes to
 * do the sorting based on vector lengths.
 *///from  w ww  .ja va 2s  .  c om
public static void main(String[] args) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    job.setJobName(LengthSortMain.class.getSimpleName());
    job.setJarByClass(LengthSortMain.class);
    job.setMapperClass(LengthSortMapper.class);
    job.setMapOutputKeyClass(FloatWritable.class);
    job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);

    job.setPartitionerClass(LengthRangePartitioner.class);

    job.setReducerClass(LengthSortReducer.class);
    job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE));
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FeatureWeightArrayWritable.class);
    //
    // set input & output
    //
    String inputDir = SortDriver.INPUT_DIR;
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input path not set");
    }
    job.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path(SortDriver.OUTPUT_DIR);
    FileSystem.get(job).delete(outputPath, true);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);

    //
    // run
    //
    JobSubmitter.run(job, "Sort By Vector Lenghts", -1);
}

From source file:edu.ucsb.cs.sort.maxw.MaxwSortMain.java

License:Apache License

/**
 * Main method sets the job configurations including the mapper and reducer
 * classes to do the sorting./*  w  w w.  j a  v  a 2  s.co  m*/
 */
public static void main(String[] args) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    // ToolRunner.printGenericCommandUsage(System.out);
    job.setJobName(MaxwSortMain.class.getSimpleName());
    job.setJarByClass(MaxwSortMain.class);
    job.setMapperClass(MaxwSortMapper.class);
    job.setMapOutputKeyClass(FloatWritable.class);
    job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);

    job.setPartitionerClass(MaxwRangePartitioner.class);

    job.setReducerClass(MaxwSortReducer.class);
    job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE));
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FeatureWeightArrayWritable.class);
    //
    // set input & output
    //
    String inputDir = SortDriver.INPUT_DIR;
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input path not set");
    }
    job.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path(SortDriver.OUTPUT_DIR);
    FileSystem.get(job).delete(outputPath, true);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);
    //
    // run
    //
    JobSubmitter.run(job, "Sort By infinity-Norm", -1);
}