Example usage for org.apache.hadoop.mapreduce Job setSortComparatorClass

List of usage examples for org.apache.hadoop.mapreduce Job setSortComparatorClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setSortComparatorClass.

Prototype

public void setSortComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException 

Source Link

Document

Define the comparator that controls how the keys are sorted before they are passed to the Reducer .

Usage

From source file:nl.sanoma.hdt.report.generator.ReportGeneratorDriver.java

License:Open Source License

/**
 * Job to join the data and the metadata from distributed cache and
 * calculate the revenue by quarter and most popular product category for user
 *
 * @param dBPath the path of the import MapFile
 * @param inputPath the path of the logs directory
 * @param outputPath the path of the output directory
 * @return returns the exitCode of the job
 * @throws IOException//  w  w  w. j a  v  a  2s  .  c o  m
 * @throws URISyntaxException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public Boolean generateReport(String dBPath, String inputPath, String outputPath)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    Job job = new Job(getConf());
    Configuration conf = job.getConfiguration();

    job.setJobName("Repor Generator");
    DistributedCache.addCacheFile(new URI(dBPath), conf);
    job.setJarByClass(ReportGeneratorDriver.class);
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setPartitionerClass(KeyDataPartitioner.class);
    job.setGroupingComparatorClass(KeyDataGroupingComparator.class);
    job.setSortComparatorClass(KeyDataComparator.class);
    job.setMapperClass(ReportGeneratorMapper.class);
    job.setMapOutputKeyClass(KeyData.class);
    job.setMapOutputValueClass(ValueData.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setReducerClass(ReportGeneratorReducer.class);
    job.setNumReduceTasks(1);

    return job.waitForCompletion(true);
}

From source file:org.acacia.csr.java.CSRConverter.java

License:Apache License

public static void main(String[] args) throws Exception {
    if (!validArgs(args)) {
        printUsage();//from   ww  w .  ja  v a2 s  .  c  o m
        return;
    }
    //These are the temp paths that are created on HDFS
    String dir1 = "/user/miyuru/csrconverter-output";
    String dir2 = "/user/miyuru/csrconverter-output-sorted";

    //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());

    System.out.println("Deleting the dir : " + dir1);

    if (fs1.exists(new Path(dir1))) {
        fs1.delete(new Path(dir1), true);
    }

    System.out.println("Done deleting the dir : " + dir1);
    System.out.println("Deleting the dir : " + dir2);
    if (fs1.exists(new Path(dir2))) {
        fs1.delete(new Path(dir2), true);
    }

    Path notinPath = new Path("/user/miyuru/notinverts/notinverts");

    if (!fs1.exists(notinPath)) {
        fs1.create(notinPath);
    }

    System.out.println("Done deleting the dir : " + dir2);

    //Note on Aug 23 2014: Sometimes after this the mapReduce job hangs. need to see why.

    VertexCounterClient.setDefaultGraphID(args[3], args[2]);

    //First job creates the inverted index

    JobConf conf = new JobConf(CSRConverter.class);
    conf.set("org.acacia.partitioner.hbase.zookeeper.quorum", args[1]);
    conf.set("org.acacia.partitioner.hbase.table", args[2]);
    conf.set("org.acacia.partitioner.hbase.contacthost", args[3]);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Text.class);
    //conf.setMapperClass(InvertedMapper.class);
    conf.setReducerClass(InvertedReducer.class);
    //conf.setInputFormat(TextInputFormat.class);
    conf.setInputFormat(NLinesInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    //FileInputFormat.setInputPaths(conf, new Path(args[0]));
    MultipleInputs.addInputPath(conf, new Path(args[0]), NLinesInputFormat.class, InvertedMapper.class);
    MultipleInputs.addInputPath(conf, new Path("/user/miyuru/notinverts/notinverts"), TextInputFormat.class,
            InvertedMapper.class);
    FileOutputFormat.setOutputPath(conf, new Path(dir1));

    //Also for the moment we turn-off the speculative execution
    conf.setBoolean("mapred.map.tasks.speculative.execution", false);
    conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
    conf.setNumMapTasks(96);
    conf.setNumReduceTasks(96);
    conf.setPartitionerClass(VertexPartitioner.class);
    conf.set("vertex-count", args[4]);
    conf.set("zero-flag", args[5]);
    Job job = new Job(conf, "csr_inverter");
    job.setSortComparatorClass(SortComparator.class);
    job.waitForCompletion(true);
}

From source file:org.acacia.csr.java.NotInFinder.java

License:Apache License

public static void main(String[] args) throws Exception {
    String dir1 = "/user/miyuru/wcout";
    String dir2 = "/user/miyuru/notinverts";
    //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());

    if (fs1.exists(new Path(dir2))) {
        fs1.delete(new Path(dir2), true);
    }// www .java2s  .com

    JobConf conf = new JobConf();
    conf.setNumMapTasks(96);
    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(LongWritable.class);
    conf.setMapperClass(TokenizerMapper.class);
    conf.setReducerClass(IntSumReducer.class);
    conf.setCombinerClass(IntSumReducer.class);
    conf.setInputFormat(NLinesInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    FileInputFormat.setInputPaths(conf, new Path(dir1));
    FileOutputFormat.setOutputPath(conf, new Path(dir2));
    Job job = new Job(conf, "NotInFinder");
    job.setJarByClass(WordCount.class);
    //   job.setMapperClass(TokenizerMapper.class);
    //   job.setCombinerClass(IntSumReducer.class);
    //   job.setReducerClass(IntSumReducer.class);
    //   job.setOutputKeyClass(LongWritable.class);
    //   job.setOutputValueClass(LongWritable.class);

    job.setSortComparatorClass(SortComparator.class);
    job.waitForCompletion(true);

}

From source file:org.acacia.csr.java.WordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    /*/*from w  ww.j a va 2  s  . c  o  m*/
    String dir1 = "/user/miyuru/wcout";
     //We first delete the temporary directories if they exist on the HDFS
      FileSystem fs1 = FileSystem.get(new JobConf());
              
     if(fs1.exists(new Path(dir1))){
        fs1.delete(new Path(dir1), true);
     }
            
    JobConf conf = new JobConf();
    conf.setNumMapTasks(96);
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(LongWritable.class);
            
    job.setSortComparatorClass(SortComparator.class);
    FileInputFormat.addInputPath(job, new Path("/user/miyuru/input"));
    FileOutputFormat.setOutputPath(job, new Path(dir1));
    job.waitForCompletion(true); 
    */

    String dir3 = "/user/miyuru/wcout";
    String dir5 = "/user/miyuru/input";
    //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs3 = FileSystem.get(new JobConf());

    if (fs3.exists(new Path(dir3))) {
        fs3.delete(new Path(dir3), true);
    }

    JobConf conf3 = new JobConf();
    conf3.setNumMapTasks(96);
    FileInputFormat.addInputPath(conf3, new Path(dir5));
    FileOutputFormat.setOutputPath(conf3, new Path(dir3));
    Job job3 = new Job(conf3, "word count");
    job3.setJarByClass(WordCount.class);
    job3.setMapperClass(TokenizerMapper.class);
    job3.setCombinerClass(IntSumReducer.class);
    job3.setReducerClass(IntSumReducer.class);
    job3.setOutputKeyClass(LongWritable.class);
    job3.setOutputValueClass(LongWritable.class);

    job3.setSortComparatorClass(SortComparator.class);

    job3.waitForCompletion(true);

    PrintWriter writer;
    try {
        writer = new PrintWriter("/tmp/wfile", "UTF-8");
        writer.println("");
        writer.flush();
        writer.close();
    } catch (FileNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (UnsupportedEncodingException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    System.out.println("------Done Word Count---------------");

}

From source file:org.acacia.csr.java.ZeroVertexSearcher.java

License:Apache License

public static void main(String[] args) throws Exception {
    /*//from  www .j  a v a  2s.  c  o m
    String dir1 = "/user/miyuru/wcout";
     //We first delete the temporary directories if they exist on the HDFS
      FileSystem fs1 = FileSystem.get(new JobConf());
              
     if(fs1.exists(new Path(dir1))){
        fs1.delete(new Path(dir1), true);
     }
            
    JobConf conf = new JobConf();
    conf.setNumMapTasks(96);
    Job job = new Job(conf, "word count");
    job.setJarByClass(WordCount.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(LongWritable.class);
            
    job.setSortComparatorClass(SortComparator.class);
    FileInputFormat.addInputPath(job, new Path("/user/miyuru/input"));
    FileOutputFormat.setOutputPath(job, new Path(dir1));
    job.waitForCompletion(true); 
    */

    String dir3 = "/user/miyuru/zout";
    String dir5 = "/user/miyuru/input";
    //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs3 = FileSystem.get(new JobConf());

    if (fs3.exists(new Path(dir3))) {
        fs3.delete(new Path(dir3), true);
    }

    JobConf conf3 = new JobConf();
    conf3.setNumMapTasks(96);
    FileInputFormat.addInputPath(conf3, new Path(dir5));
    FileOutputFormat.setOutputPath(conf3, new Path(dir3));
    conf3.set("mapred.map.max.attempts", "0");//If the job fails we assume that it happens because we found zero. Therfore we do not attempt again.
    Job job3 = new Job(conf3, "zero_vertex_search");
    job3.setJarByClass(ZeroVertexSearcher.class);
    job3.setMapperClass(TokenizerMapper.class);
    job3.setCombinerClass(IntSumReducer.class);
    job3.setReducerClass(IntSumReducer.class);
    job3.setOutputKeyClass(LongWritable.class);
    job3.setOutputValueClass(LongWritable.class);
    job3.setNumReduceTasks(0);

    job3.setSortComparatorClass(SortComparator.class);
    try {
        job3.waitForCompletion(true);
    } catch (org.acacia.csr.java.ZeroFoundException ex) {
        System.out.println("Found Zero vertex");
        job3.killJob();
    }
    System.out.println("------Done Zero Vertex search---------------");

}

From source file:org.apache.avro.mapreduce.AvroJob.java

License:Apache License

/**
 * Sets the map output key schema./*  ww  w .  j a v  a  2  s  .c om*/
 *
 * @param job The job to configure.
 * @param schema The map output key schema.
 */
public static void setMapOutputKeySchema(Job job, Schema schema) {
    job.setMapOutputKeyClass(AvroKey.class);
    job.setGroupingComparatorClass(AvroKeyComparator.class);
    job.setSortComparatorClass(AvroKeyComparator.class);
    AvroSerialization.setKeyWriterSchema(job.getConfiguration(), schema);
    AvroSerialization.setKeyReaderSchema(job.getConfiguration(), schema);
    AvroSerialization.addToConfiguration(job.getConfiguration());
}

From source file:org.apache.crunch.GroupingOptions.java

License:Apache License

public void configure(Job job) {
    if (partitionerClass != null) {
        job.setPartitionerClass(partitionerClass);
    }//from  w  w w .  ja  va2s.c  o m
    if (groupingComparatorClass != null) {
        job.setGroupingComparatorClass(groupingComparatorClass);
    }
    if (sortComparatorClass != null) {
        job.setSortComparatorClass(sortComparatorClass);
    }
    if (numReducers > 0) {
        job.setNumReduceTasks(numReducers);
    }
    for (Map.Entry<String, String> e : extraConf.entrySet()) {
        job.getConfiguration().set(e.getKey(), e.getValue());
    }
}

From source file:org.apache.crunch.types.avro.AvroGroupedTableType.java

License:Apache License

@Override
public void configureShuffle(Job job, GroupingOptions options) {
    AvroTableType<K, V> att = (AvroTableType<K, V>) tableType;
    String schemaJson = att.getSchema().toString();
    Configuration conf = job.getConfiguration();

    if (att.hasReflect()) {
        if (att.hasSpecific()) {
            Avros.checkCombiningSpecificAndReflectionSchemas();
        }//w w w .j a v a2  s.  co  m
        conf.setBoolean(AvroJob.MAP_OUTPUT_IS_REFLECT, true);
    }
    conf.set(AvroJob.MAP_OUTPUT_SCHEMA, schemaJson);
    job.setSortComparatorClass(AvroKeyComparator.class);
    job.setMapOutputKeyClass(AvroKey.class);
    job.setMapOutputValueClass(AvroValue.class);
    if (options != null) {
        options.configure(job);
    }

    Avros.configureReflectDataFactory(conf);

    Collection<String> serializations = job.getConfiguration().getStringCollection("io.serializations");
    if (!serializations.contains(SafeAvroSerialization.class.getName())) {
        serializations.add(SafeAvroSerialization.class.getName());
        job.getConfiguration().setStrings("io.serializations", serializations.toArray(new String[0]));
    }
}

From source file:org.apache.druid.indexer.SortableBytes.java

License:Apache License

public static void useSortableBytesAsMapOutputKey(Job job, Class<? extends Partitioner> partitionerClass) {
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setGroupingComparatorClass(SortableBytesGroupingComparator.class);
    job.setSortComparatorClass(SortableBytesSortingComparator.class);
    job.setPartitionerClass(partitionerClass);
}

From source file:org.apache.gobblin.compaction.mapreduce.CompactionOrcJobConfigurator.java

License:Apache License

protected void configureMapper(Job job) {
    job.setInputFormatClass(OrcValueCombineFileInputFormat.class);
    job.setMapperClass(OrcValueMapper.class);
    job.setMapOutputKeyClass(OrcKey.class);
    job.setMapOutputValueClass(OrcValue.class);
    job.setGroupingComparatorClass(OrcKeyComparator.class);
    job.setSortComparatorClass(OrcKeyComparator.class);
}