Example usage for org.apache.hadoop.mapreduce Job setGroupingComparatorClass

List of usage examples for org.apache.hadoop.mapreduce Job setGroupingComparatorClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setGroupingComparatorClass.

Prototype

public void setGroupingComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException 

Source Link

Document

Define the comparator that controls which keys are grouped together for a single call to Reducer#reduce(Object,Iterable,org.apache.hadoop.mapreduce.Reducer.Context)

Usage

From source file:org.sifarish.common.UtilityPredictor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Rating predictor  MR";
    job.setJobName(jobName);//from w  w  w.j  av  a  2  s.co  m

    job.setJarByClass(UtilityPredictor.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(UtilityPredictor.PredictionMapper.class);
    job.setReducerClass(UtilityPredictor.PredictorReducer.class);

    job.setMapOutputKeyClass(TextInt.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(ItemIdGroupComprator.class);
    job.setPartitionerClass(ItemIdPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());
    int numReducer = job.getConfiguration().getInt("utp.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.sifarish.feature.DiffTypeSimilarity.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Dirfferent type entity similarity MR";
    job.setJobName(jobName);/*from   w  ww  .j  av  a2  s  .c  o m*/

    job.setJarByClass(DiffTypeSimilarity.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(DiffTypeSimilarity.SimilarityMapper.class);
    job.setReducerClass(DiffTypeSimilarity.SimilarityReducer.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(IdPairGroupComprator.class);
    job.setPartitionerClass(IdPairPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());

    int numReducer = job.getConfiguration().getInt("dts.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.sifarish.feature.SameTypeSimilarity.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Same type entity similarity MR";
    job.setJobName(jobName);/*from   www. ja v a 2  s.  c  o m*/

    job.setJarByClass(SameTypeSimilarity.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(SameTypeSimilarity.SimilarityMapper.class);
    job.setReducerClass(SameTypeSimilarity.SimilarityReducer.class);

    job.setMapOutputKeyClass(TextIntInt.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(IdPairGroupComprator.class);
    job.setPartitionerClass(IdPairPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());

    int numReducer = job.getConfiguration().getInt("sts.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.sifarish.feature.TopMatches.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Top n matches MR";
    job.setJobName(jobName);//from w  ww. j av  a 2 s.com

    job.setJarByClass(TopMatches.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(TopMatches.TopMatchesMapper.class);
    job.setReducerClass(TopMatches.TopMatchesReducer.class);
    job.setCombinerClass(TopMatches.TopMatchesCombiner.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TupleTextPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());
    int numReducer = job.getConfiguration().getInt("tm.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.sifarish.social.PearsonCorrelator.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "PearsonCorrelator  MR";
    job.setJobName(jobName);/*from w w w .j a  va2 s. c o m*/

    job.setJarByClass(PearsonCorrelator.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(PearsonCorrelator.PearsonMapper.class);
    job.setReducerClass(PearsonCorrelator.PrearsonReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());
    int numReducer = job.getConfiguration().getInt("pec.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.sifarish.social.RatingPredictor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Rating predictor  MR";
    job.setJobName(jobName);/*  w  w  w .java2  s .c  o m*/

    job.setJarByClass(RatingPredictor.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(RatingPredictor.PredictionMapper.class);
    job.setReducerClass(RatingPredictor.PredictorReducer.class);

    job.setMapOutputKeyClass(TextInt.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(ItemIdGroupComprator.class);
    job.setPartitionerClass(ItemIdPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());
    int numReducer = job.getConfiguration().getInt("rap.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.visitante.basic.EngagementEventGenerator.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "web log user engaement event generator  MR";
    job.setJobName(jobName);//from  w w w  .java 2s . c  o  m

    job.setJarByClass(EngagementEventGenerator.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "visitante");

    job.setMapperClass(SessionExtractor.SessionMapper.class);
    job.setReducerClass(EngagementEventGenerator.SessionReducer.class);

    job.setMapOutputKeyClass(TextLong.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SessionIdGroupComprator.class);
    job.setPartitionerClass(SessionIdPartitioner.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("ee.num.reducer", 1));
    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.visitante.basic.SessionExtractor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "web log session extraction  MR";
    job.setJobName(jobName);//from   w  w w .j  ava 2s . co  m

    job.setJarByClass(SessionExtractor.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "visitante");

    job.setMapperClass(SessionExtractor.SessionMapper.class);
    job.setReducerClass(SessionExtractor.SessionReducer.class);

    job.setMapOutputKeyClass(TextLong.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SessionIdGroupComprator.class);
    job.setPartitionerClass(SessionIdPartitioner.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.visitante.basic.SessionSummarizer.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "web log session summarizer  MR";
    job.setJobName(jobName);//from   w  ww  . jav  a2  s.c om

    job.setJarByClass(SessionSummarizer.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "visitante");

    job.setMapperClass(SessionExtractor.SessionMapper.class);
    job.setReducerClass(SessionSummarizer.SessionReducer.class);

    job.setMapOutputKeyClass(TextLong.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SessionIdGroupComprator.class);
    job.setPartitionerClass(SessionIdPartitioner.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.visitante.basic.UserSessionSummary.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "web log user session summary  MR";
    job.setJobName(jobName);/*  w  w  w  . java 2 s .  c  o m*/

    job.setJarByClass(UserSessionSummary.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "visitante");

    job.setMapperClass(UserSessionSummary.SessionMapper.class);
    job.setReducerClass(UserSessionSummary.SessionReducer.class);

    job.setMapOutputKeyClass(TextLong.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SessionExtractor.SessionIdGroupComprator.class);
    job.setPartitionerClass(SessionExtractor.SessionIdPartitioner.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}