Example usage for org.apache.hadoop.mapred JobConf setMapOutputKeyClass

List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) 

Source Link

Document

Set the key class for the map output data.

Usage

From source file:edu.ucsb.cs.lsh.projection.SignaturesGenerator.java

License:Apache License

public static void main(String[] args) throws Exception {
    JobConf job = new JobConf(SignaturesGenerator.class);
    new GenericOptionsParser(job, args);
    job.setJobName(SignaturesGenerator.class.getSimpleName());
    int nBits = job.getInt(ProjectionLshDriver.LSH_NBITS_PROPERTY, ProjectionLshDriver.LSH_NBITS_VALUE);
    setParameters();/* w w  w  .jav  a 2 s .  co  m*/
    FileSystem fs = FileSystem.get(job);
    prepareDistributedCache(job, fs, new Path(ProjectionsGenerator.OUTPUT_DIR));
    Path outputPath = new Path(OUTPUT_DIR);
    if (fs.exists(outputPath))
        fs.delete(outputPath);

    FileInputFormat.setInputPaths(job, INPUT_DIR);
    // Path(INPUT_DIR));
    FileOutputFormat.setOutputPath(job, outputPath);
    // FileOutputFormat.setCompressOutput(job, false);
    job.setInputFormat(SequenceFileInputFormat.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);

    job.set("mapred.child.java.opts", "-Xmx2048m");
    job.setInt("mapred.map.max.attempts", 10);
    job.setInt("mapred.reduce.max.attempts", 10);
    job.setInt("mapred.task.timeout", 6000000);

    job.setMapperClass(SigMapper.class);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(BitSignature.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(BitSignature.class);

    JobSubmitter.run(job, "LSH", -1);
}

From source file:edu.ucsb.cs.partitioning.cosine.CosinePartitioning.java

License:Apache License

/**
 * Sets MapReduce input configurations for the core cosine partitioning job.
 *///from w w w.java  2 s .co  m
public static JobConf setMapReduce(JobConf job, Class mapper, Class reducer) {
    job.setMapperClass(mapper);
    job.setMapOutputKeyClass(IntIntWritable.class);
    job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);
    job.setNumReduceTasks(job.getInt(Config.NUM_PARTITIONS_PROPERTY, Config.NUM_PARTITIONS_VALUE));
    job.setReducerClass(reducer);
    job.setOutputKeyClass(IntIntWritable.class);
    job.setOutputValueClass(IdFeatureWeightArrayWritable.class);
    return job;
}

From source file:edu.ucsb.cs.partitioning.lsh.LshPartitionMain.java

License:Apache License

public static void main(String args[]) throws ParseException, IOException {

    JobConf job = new JobConf();
    job.setJarByClass(LshPartitionMain.class);
    job.setJobName(LshPartitionMain.class.getSimpleName());
    GenericOptionsParser gop = new GenericOptionsParser(job, args);
    args = gop.getRemainingArgs();//w w w  . j av a  2s .  co  m

    job.setMapperClass(LshMapper.class);
    job.setMapOutputKeyClass(IntArrayWritable.class); // signatures
    job.setMapOutputValueClass(LongWritable.class); // doc IDs
    job.setNumReduceTasks(job.getInt(NUM_REDUCERS_PROPERTY, NUM_REDUCERS_VALUE));
    job.setReducerClass(LshReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(Text.class);

    String inputDir = args[0];
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input directory not set.");
    }
    FileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path("lsh-jaccard-buckets");
    FileOutputFormat.setOutputPath(job, outputPath);
    FileSystem.get(job).delete(outputPath, true);

    LshTable lshTable = new LshTable(job.getInt(K_PROPERTY, K_VALUE), job.getInt(L_PROPERTY, L_VALUE), 1024,
            job.getLong(NUM_FEATURES_PROPERTY, NUM_FEATURES_VALUE),
            job.getFloat(THRESHOLD_PROPERTY, THRESHOLD_VALUE));

    writeLsh(job, outputPath.getFileSystem(job), lshTable);

    run(job);

}

From source file:edu.ucsb.cs.preprocessing.sequence.SeqWriter.java

License:Apache License

/**
 * Runs a MR job with maps only to convert input directory of numeric valued
 * records to hadoop sequence format. It assumes a text input of format of
 * [id feature weight ..] to be the format of input.
 *///  w  w  w .ja v a  2  s  .c  o  m
public static void writeSequence() throws IOException {

    JobConf job = new JobConf();
    job.setJobName("Convert text vectors to hadoop seqeunce ");
    job.setJarByClass(SeqWriter.class);

    job.setMapperClass(SeqMapper.class);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(FeatureWeightArrayWritable.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FeatureWeightArrayWritable.class);

    job.setInputFormat(TextInputFormat.class);
    TextInputFormat.addInputPath(job, new Path(INPUT_DIR));
    FileSystem.get(job).delete(new Path(HashPagesDriver.IDS_FILE2), true);
    Path outputPath = new Path(OUTPUT_DIR);
    FileSystem.get(job).delete(outputPath, true);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);

    JobSubmitter.run(job, "PREPROCESS", -1);
}

From source file:edu.ucsb.cs.sort.length.LengthSortMain.java

License:Apache License

/**
 * Sets the job configurations including the mapper and reducer classes to
 * do the sorting based on vector lengths.
 *//*  ww w  .  ja  v  a 2  s.c o  m*/
public static void main(String[] args) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    job.setJobName(LengthSortMain.class.getSimpleName());
    job.setJarByClass(LengthSortMain.class);
    job.setMapperClass(LengthSortMapper.class);
    job.setMapOutputKeyClass(FloatWritable.class);
    job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);

    job.setPartitionerClass(LengthRangePartitioner.class);

    job.setReducerClass(LengthSortReducer.class);
    job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE));
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FeatureWeightArrayWritable.class);
    //
    // set input & output
    //
    String inputDir = SortDriver.INPUT_DIR;
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input path not set");
    }
    job.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path(SortDriver.OUTPUT_DIR);
    FileSystem.get(job).delete(outputPath, true);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);

    //
    // run
    //
    JobSubmitter.run(job, "Sort By Vector Lenghts", -1);
}

From source file:edu.ucsb.cs.sort.maxw.MaxwSortMain.java

License:Apache License

/**
 * Main method sets the job configurations including the mapper and reducer
 * classes to do the sorting./*w  ww. j  av  a  2s.c o m*/
 */
public static void main(String[] args) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    // ToolRunner.printGenericCommandUsage(System.out);
    job.setJobName(MaxwSortMain.class.getSimpleName());
    job.setJarByClass(MaxwSortMain.class);
    job.setMapperClass(MaxwSortMapper.class);
    job.setMapOutputKeyClass(FloatWritable.class);
    job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);

    job.setPartitionerClass(MaxwRangePartitioner.class);

    job.setReducerClass(MaxwSortReducer.class);
    job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE));
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FeatureWeightArrayWritable.class);
    //
    // set input & output
    //
    String inputDir = SortDriver.INPUT_DIR;
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input path not set");
    }
    job.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path(SortDriver.OUTPUT_DIR);
    FileSystem.get(job).delete(outputPath, true);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);
    //
    // run
    //
    JobSubmitter.run(job, "Sort By infinity-Norm", -1);
}

From source file:edu.ucsb.cs.sort.norm.NormSortMain.java

License:Apache License

/**
 * Main method sets the job configurations including the mapper and reducer
 * classes to do the sorting. Some of the produced partitions might be
 * merged later to reflect the number of partitions chosen by the user.
 *//*from  w  w w  .j  a  v  a 2 s .co m*/
public static void main(String[] args) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    job.setJobName("NormSort");
    job.setJarByClass(NormSortMain.class);
    job.setMapperClass(NormSortMapper.class);
    job.setMapOutputKeyClass(FloatWritable.class);
    job.setMapOutputValueClass(IdFeatureWeightArrayWritable.class);

    job.setPartitionerClass(NormRangePartitioner.class);

    job.setReducerClass(NormSortReducer.class);
    job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE));
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(FeatureWeightArrayWritable.class);
    //
    // set input & output
    //
    String inputDir = SortDriver.INPUT_DIR;
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input path not set");
    }
    job.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path(SortDriver.OUTPUT_DIR);
    FileSystem.get(job).delete(outputPath, true);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);
    //
    // run
    //
    JobSubmitter.run(job, "Sort By p-norm", -1);
}

From source file:edu.ucsb.cs.sort.signature.SigSortMain.java

License:Apache License

/**
 * Sets the job configurations including the mapper and reducer classes to
 * do the sorting based signatures./*from  www .j a va2s  .  c o m*/
 */
public static void main(String[] args) throws IOException {

    JobConf job = new JobConf();
    new GenericOptionsParser(job, args);
    job.setJobName(SigSortMain.class.getSimpleName());
    job.setJarByClass(SigSortMain.class);
    job.setMapperClass(SigSortMapper.class);
    job.setMapOutputKeyClass(BitSignature.class);
    job.setMapOutputValueClass(LongWritable.class);

    job.setPartitionerClass(SigRangePartitioner.class);

    job.setReducerClass(SigSortReducer.class);
    job.setNumReduceTasks(job.getInt(SortDriver.NUM_REDUCE_PROPERTY, SortDriver.NUM_REDUCE_VALUE));
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(BitSignature.class);
    //
    // set input & output
    //
    String inputDir = SortDriver.INPUT_DIR;
    if (inputDir == null) {
        throw new UnsupportedOperationException("ERROR: input path not set");
    }
    job.setInputFormat(SequenceFileInputFormat.class);
    SequenceFileInputFormat.addInputPath(job, new Path(inputDir));
    Path outputPath = new Path(OUTPUT_PATH);
    FileSystem.get(job).delete(outputPath, true);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, outputPath);

    //
    // run
    //
    JobSubmitter.run(job, "Sort By Signature Bytes", -1);
}

From source file:edu.umd.cloud9.collection.wikipedia.BuildWikipediaLinkGraph.java

License:Apache License

private void task1(String inputPath, String outputPath, int partitions) throws IOException {
    LOG.info("Exracting edges...");
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output: " + outputPath);

    JobConf conf = new JobConf(getConf(), BuildWikipediaLinkGraph.class);
    conf.setJobName(String.format("BuildWikipediaLinkGraph:Edges[input: %s, output: %s, num_partitions: %d]",
            inputPath, outputPath, partitions));

    conf.setNumReduceTasks(partitions);//  w ww .j av a  2s .  c  o m

    SequenceFileInputFormat.addInputPath(conf, new Path(inputPath));
    TextOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(PairOfStringInt.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(MyMapper1.class);
    conf.setReducerClass(MyReducer1.class);
    conf.setPartitionerClass(MyPartitioner1.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    JobClient.runJob(conf);
}

From source file:edu.umd.cloud9.collection.wikipedia.BuildWikipediaLinkGraph.java

License:Apache License

private void task2(String inputPath, String outputPath, int partitions) throws IOException {
    LOG.info("Building adjacency lists...");
    LOG.info(" - input: " + inputPath);
    LOG.info(" - output: " + outputPath);

    JobConf conf = new JobConf(getConf(), BuildWikipediaLinkGraph.class);
    conf.setJobName(//  w  w  w . ja v  a 2s  . com
            String.format("BuildWikipediaLinkGraph:AdjacencyList[input: %s, output: %s, num_partitions: %d]",
                    inputPath, outputPath, partitions));

    conf.setNumReduceTasks(partitions);

    TextInputFormat.addInputPath(conf, new Path(inputPath));
    TextOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(IntWritable.class);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(MyMapper2.class);
    conf.setReducerClass(MyReducer2.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    JobClient.runJob(conf);
}