Example usage for org.apache.hadoop.mapreduce Job setSortComparatorClass

List of usage examples for org.apache.hadoop.mapreduce Job setSortComparatorClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setSortComparatorClass.

Prototype

public void setSortComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException 

Source Link

Document

Define the comparator that controls how the keys are sorted before they are passed to the Reducer .

Usage

From source file:ph.fingra.hadoop.mapred.parts.distribution.CountryStatistic.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "distribute/country job";
    job.setJobName(jobName);// w ww . ja  va 2  s  .c o m

    job.setJarByClass(CountryStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(CountryMapper.class);
    job.setReducerClass(CountryReducer.class);

    job.setMapOutputKeyClass(CountryKey.class);
    job.setMapOutputValueClass(CountryEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(CountryPartitioner.class);
    job.setSortComparatorClass(CountrySortComparator.class);
    job.setGroupingComparatorClass(CountryGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.distribution.DeviceStatistic.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "distribute/device job";
    job.setJobName(jobName);//from  ww  w . ja v  a  2s  . c o  m

    job.setJarByClass(DeviceStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(DeviceMapper.class);
    job.setReducerClass(DeviceReducer.class);

    job.setMapOutputKeyClass(DeviceKey.class);
    job.setMapOutputValueClass(DeviceEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(DevicePartitioner.class);
    job.setSortComparatorClass(DeviceSortComparator.class);
    job.setGroupingComparatorClass(DeviceGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.distribution.LanguageStatistic.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "distribute/language job";
    job.setJobName(jobName);//ww w .ja  v  a2s. c o m

    job.setJarByClass(LanguageStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(LanguageMapper.class);
    job.setReducerClass(LanguageReducer.class);

    job.setMapOutputKeyClass(LanguageKey.class);
    job.setMapOutputValueClass(LanguageEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(LanguagePartitioner.class);
    job.setSortComparatorClass(LanguageSortComparator.class);
    job.setGroupingComparatorClass(LanguageGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.distribution.OsversionStatistic.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "distribute/osversion job";
    job.setJobName(jobName);// ww w. j a  v a2 s  .  c o m

    job.setJarByClass(OsversionStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(OsversionMapper.class);
    job.setReducerClass(OsversionReducer.class);

    job.setMapOutputKeyClass(OsversionKey.class);
    job.setMapOutputValueClass(OsversionEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(OsversionPartitioner.class);
    job.setSortComparatorClass(OsversionSortComparator.class);
    job.setGroupingComparatorClass(OsversionGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.distribution.ResolutionStatistic.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "distribute/resolution job";
    job.setJobName(jobName);/* w  w  w . j  a  v  a 2 s .  c o  m*/

    job.setJarByClass(ResolutionStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(ResolutionMapper.class);
    job.setReducerClass(ResolutionReducer.class);

    job.setMapOutputKeyClass(ResolutionKey.class);
    job.setMapOutputValueClass(ResolutionEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(ResolutionPartitioner.class);
    job.setSortComparatorClass(ResolutionSortComparator.class);
    job.setGroupingComparatorClass(ResolutionGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.performance.FrequencyStatistic.java

License:Apache License

public Job createJobIntermediate(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "perform/tokenfreq job";
    job.setJobName(jobName);/*ww  w .  ja v a2 s.  c  o  m*/

    job.setJarByClass(FrequencyStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(TokenfreqMapper.class);
    job.setReducerClass(TokenfreqReducer.class);

    job.setMapOutputKeyClass(TokenfreqKey.class);
    job.setMapOutputValueClass(TokenfreqEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setPartitionerClass(TokenfreqPartitioner.class);
    job.setSortComparatorClass(TokenfreqSortComparator.class);
    job.setGroupingComparatorClass(TokenfreqGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.performance.FrequencyStatistic.java

License:Apache License

public Job createHourJobIntermediate(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig, TargetDate targetdate) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());
    conf.set("hour", targetdate.getHour());

    Job job = new Job(conf);
    String jobName = "perform/tokenfreq hour job";
    job.setJobName(jobName);//ww w.j a va2s .co m

    job.setJarByClass(FrequencyStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(TokenfreqHourMapper.class);
    job.setReducerClass(TokenfreqHourReducer.class);

    job.setMapOutputKeyClass(TokenfreqHourKey.class);
    job.setMapOutputValueClass(TokenfreqHourEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setPartitionerClass(TokenfreqHourPartitioner.class);
    job.setSortComparatorClass(TokenfreqHourSortComparator.class);
    job.setGroupingComparatorClass(TokenfreqHourGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.performance.HourSessionStatistic.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "perform/hoursession job";
    job.setJobName(jobName);//  w  ww . j  a va2s . c o  m

    job.setJarByClass(HourSessionStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(HourSessionMapper.class);
    job.setReducerClass(HourSessionReducer.class);

    job.setMapOutputKeyClass(HourSessionKey.class);
    job.setMapOutputValueClass(HourSessionEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setPartitionerClass(HourSessionPartitioner.class);
    job.setSortComparatorClass(HourSessionSortComparator.class);
    job.setGroupingComparatorClass(HourSessionGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.performance.SessionLengthStatistic.java

License:Apache License

public Job createJobIntermediate(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "perform/sesstime job";
    job.setJobName(jobName);//  www  .  j  av  a2 s .  co m

    job.setJarByClass(SessionLengthStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(SesstimeMapper.class);
    job.setReducerClass(SesstimeReducer.class);

    job.setMapOutputKeyClass(SesstimeKey.class);
    job.setMapOutputValueClass(SesstimeEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setPartitionerClass(SesstimePartitioner.class);
    job.setSortComparatorClass(SesstimeSortComparator.class);
    job.setGroupingComparatorClass(SesstimeGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.performance.SessionLengthStatistic.java

License:Apache License

public Job createHourJobIntermediate(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig, TargetDate targetdate) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());
    conf.set("hour", targetdate.getHour());

    Job job = new Job(conf);
    String jobName = "perform/sesstime hour job";
    job.setJobName(jobName);//w w  w.j ava2 s.co m

    job.setJarByClass(SessionLengthStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(SesstimeHourMapper.class);
    job.setReducerClass(SesstimeHourReducer.class);

    job.setMapOutputKeyClass(SesstimeHourKey.class);
    job.setMapOutputValueClass(SesstimeHourEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setPartitionerClass(SesstimeHourPartitioner.class);
    job.setSortComparatorClass(SesstimeHourSortComparator.class);
    job.setGroupingComparatorClass(SesstimeHourGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}