Example usage for org.apache.hadoop.mapreduce Job setSortComparatorClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setSortComparatorClass.

Prototype

public void setSortComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException

Source Link

Document

Define the comparator that controls how the keys are sorted before they are passed to the Reducer .

Usage

From source file:ph.fingra.hadoop.mapred.parts.distribution.CountryStatistic.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "distribute/country job";
    job.setJobName(jobName);// w ww . ja  va 2  s  .c o m

    job.setJarByClass(CountryStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(CountryMapper.class);
    job.setReducerClass(CountryReducer.class);

    job.setMapOutputKeyClass(CountryKey.class);
    job.setMapOutputValueClass(CountryEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(CountryPartitioner.class);
    job.setSortComparatorClass(CountrySortComparator.class);
    job.setGroupingComparatorClass(CountryGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.distribution.DeviceStatistic.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "distribute/device job";
    job.setJobName(jobName);//from  ww  w . ja v  a  2s  . c o  m

    job.setJarByClass(DeviceStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(DeviceMapper.class);
    job.setReducerClass(DeviceReducer.class);

    job.setMapOutputKeyClass(DeviceKey.class);
    job.setMapOutputValueClass(DeviceEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(DevicePartitioner.class);
    job.setSortComparatorClass(DeviceSortComparator.class);
    job.setGroupingComparatorClass(DeviceGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.distribution.LanguageStatistic.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "distribute/language job";
    job.setJobName(jobName);//ww w .ja  v  a2s. c o m

    job.setJarByClass(LanguageStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(LanguageMapper.class);
    job.setReducerClass(LanguageReducer.class);

    job.setMapOutputKeyClass(LanguageKey.class);
    job.setMapOutputValueClass(LanguageEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(LanguagePartitioner.class);
    job.setSortComparatorClass(LanguageSortComparator.class);
    job.setGroupingComparatorClass(LanguageGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.distribution.OsversionStatistic.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "distribute/osversion job";
    job.setJobName(jobName);// ww w. j a  v a2 s  .  c o m

    job.setJarByClass(OsversionStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(OsversionMapper.class);
    job.setReducerClass(OsversionReducer.class);

    job.setMapOutputKeyClass(OsversionKey.class);
    job.setMapOutputValueClass(OsversionEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(OsversionPartitioner.class);
    job.setSortComparatorClass(OsversionSortComparator.class);
    job.setGroupingComparatorClass(OsversionGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.distribution.ResolutionStatistic.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "distribute/resolution job";
    job.setJobName(jobName);/* w  w  w . j  a  v  a 2 s .  c o  m*/

    job.setJarByClass(ResolutionStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(ResolutionMapper.class);
    job.setReducerClass(ResolutionReducer.class);

    job.setMapOutputKeyClass(ResolutionKey.class);
    job.setMapOutputValueClass(ResolutionEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(ResolutionPartitioner.class);
    job.setSortComparatorClass(ResolutionSortComparator.class);
    job.setGroupingComparatorClass(ResolutionGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.performance.FrequencyStatistic.java

License:Apache License

public Job createJobIntermediate(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "perform/tokenfreq job";
    job.setJobName(jobName);/*ww  w .  ja v a2 s.  c  o  m*/

    job.setJarByClass(FrequencyStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(TokenfreqMapper.class);
    job.setReducerClass(TokenfreqReducer.class);

    job.setMapOutputKeyClass(TokenfreqKey.class);
    job.setMapOutputValueClass(TokenfreqEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setPartitionerClass(TokenfreqPartitioner.class);
    job.setSortComparatorClass(TokenfreqSortComparator.class);
    job.setGroupingComparatorClass(TokenfreqGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.performance.FrequencyStatistic.java

License:Apache License

public Job createHourJobIntermediate(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig, TargetDate targetdate) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());
    conf.set("hour", targetdate.getHour());

    Job job = new Job(conf);
    String jobName = "perform/tokenfreq hour job";
    job.setJobName(jobName);//ww w.j a va2s .co m

    job.setJarByClass(FrequencyStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(TokenfreqHourMapper.class);
    job.setReducerClass(TokenfreqHourReducer.class);

    job.setMapOutputKeyClass(TokenfreqHourKey.class);
    job.setMapOutputValueClass(TokenfreqHourEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setPartitionerClass(TokenfreqHourPartitioner.class);
    job.setSortComparatorClass(TokenfreqHourSortComparator.class);
    job.setGroupingComparatorClass(TokenfreqHourGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.performance.HourSessionStatistic.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "perform/hoursession job";
    job.setJobName(jobName);//  w  ww . j  a va2s . c o  m

    job.setJarByClass(HourSessionStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(HourSessionMapper.class);
    job.setReducerClass(HourSessionReducer.class);

    job.setMapOutputKeyClass(HourSessionKey.class);
    job.setMapOutputValueClass(HourSessionEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setPartitionerClass(HourSessionPartitioner.class);
    job.setSortComparatorClass(HourSessionSortComparator.class);
    job.setGroupingComparatorClass(HourSessionGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.performance.SessionLengthStatistic.java

License:Apache License

public Job createJobIntermediate(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "perform/sesstime job";
    job.setJobName(jobName);//  www  .  j  av  a2 s .  co m

    job.setJarByClass(SessionLengthStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(SesstimeMapper.class);
    job.setReducerClass(SesstimeReducer.class);

    job.setMapOutputKeyClass(SesstimeKey.class);
    job.setMapOutputValueClass(SesstimeEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setPartitionerClass(SesstimePartitioner.class);
    job.setSortComparatorClass(SesstimeSortComparator.class);
    job.setGroupingComparatorClass(SesstimeGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.performance.SessionLengthStatistic.java

License:Apache License

public Job createHourJobIntermediate(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig, TargetDate targetdate) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());
    conf.set("hour", targetdate.getHour());

    Job job = new Job(conf);
    String jobName = "perform/sesstime hour job";
    job.setJobName(jobName);//w w  w.j ava2 s.co m

    job.setJarByClass(SessionLengthStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(SesstimeHourMapper.class);
    job.setReducerClass(SesstimeHourReducer.class);

    job.setMapOutputKeyClass(SesstimeHourKey.class);
    job.setMapOutputValueClass(SesstimeHourEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setPartitionerClass(SesstimeHourPartitioner.class);
    job.setSortComparatorClass(SesstimeHourSortComparator.class);
    job.setGroupingComparatorClass(SesstimeHourGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}