Example usage for org.apache.hadoop.mapreduce Job setGroupingComparatorClass

List of usage examples for org.apache.hadoop.mapreduce Job setGroupingComparatorClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setGroupingComparatorClass.

Prototype

public void setGroupingComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException 

Source Link

Document

Define the comparator that controls which keys are grouped together for a single call to Reducer#reduce(Object,Iterable,org.apache.hadoop.mapreduce.Reducer.Context)

Usage

From source file:org.apache.rya.accumulo.mr.merge.CopyTool.java

License:Apache License

private int runQueryCopy() throws Exception {
    log.info("Setting up Copy Tool with a query-based ruleset...");
    setup();//w  w w  . j  ava  2 s .  c o m
    if (!useCopyFileOutput) {
        createChildInstance(conf);
    }

    // Set up the configuration
    final AccumuloRdfConfiguration aconf = new AccumuloRdfConfiguration(conf);
    aconf.setBoolean(ConfigUtils.USE_MOCK_INSTANCE, mock);
    aconf.setTablePrefix(tablePrefix);
    aconf.setFlush(false);
    ConfigUtils.setIndexers(aconf);

    // Since we're copying at the statement-level, ignore any given list of tables and determine
    // which tables we might need to create based on which indexers are desired.
    final TablePrefixLayoutStrategy prefixStrategy = new TablePrefixLayoutStrategy(tablePrefix);
    tables.clear();
    // Always include core tables
    tables.add(prefixStrategy.getSpo());
    tables.add(prefixStrategy.getOsp());
    tables.add(prefixStrategy.getPo());
    // Copy namespaces if they exist
    tables.add(prefixStrategy.getNs());
    // Add tables associated with any configured indexers
    /* TODO: SEE RYA-160
    if (aconf.getBoolean(ConfigUtils.USE_FREETEXT, false)) {
    tables.add(ConfigUtils.getFreeTextDocTablename(conf));
    tables.add(ConfigUtils.getFreeTextTermTablename(conf));
    }
    if (aconf.getBoolean(ConfigUtils.USE_GEO, false)) {
    tables.add(ConfigUtils.getGeoTablename(conf));
    }
    if (aconf.getBoolean(ConfigUtils.USE_TEMPORAL, false)) {
    tables.add(ConfigUtils.getTemporalTableName(conf));
    }
    if (aconf.getBoolean(ConfigUtils.USE_ENTITY, false)) {
    tables.add(ConfigUtils.getEntityTableName(conf));
    }
    */
    // Ignore anything else, e.g. statistics -- must be recalculated for the child if desired

    // Extract the ruleset, and copy the namespace table directly
    final AccumuloQueryRuleset ruleset = new AccumuloQueryRuleset(aconf);
    ruleset.addTable(prefixStrategy.getNs());
    for (final String line : ruleset.toString().split("\n")) {
        log.info(line);
    }

    // Create a Job and configure its input and output
    final Job job = Job.getInstance(aconf);
    job.setJarByClass(this.getClass());
    setupMultiTableInputFormat(job, ruleset);
    setupAccumuloOutput(job, "");

    if (useCopyFileOutput) {
        // Configure job for file output
        job.setJobName("Ruleset-based export to file: " + tablePrefix + " -> " + localBaseOutputDir);
        // Map (row) to (table+key, key+value)
        job.setMapperClass(RowRuleMapper.class);
        job.setMapOutputKeyClass(GroupedRow.class);
        job.setMapOutputValueClass(GroupedRow.class);
        // Group according to table and and sort according to key
        job.setGroupingComparatorClass(GroupedRow.GroupComparator.class);
        job.setSortComparatorClass(GroupedRow.SortComparator.class);
        // Reduce ([table+row], rows): output each row to the file for that table, in sorted order
        job.setReducerClass(MultipleFileReducer.class);
        job.setOutputKeyClass(Key.class);
        job.setOutputValueClass(Value.class);
    } else {
        // Configure job for table output
        job.setJobName("Ruleset-based copy: " + tablePrefix + " -> " + childTablePrefix);
        // Map (row): convert to statement, insert to child (for namespace table, output row directly)
        job.setMapperClass(AccumuloRyaRuleMapper.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Mutation.class);
        job.setNumReduceTasks(0);
        // Create the child tables, so mappers don't try to do this in parallel
        for (final String parentTable : tables) {
            final String childTable = parentTable.replaceFirst(tablePrefix, childTablePrefix);
            createTableIfNeeded(childTable);
        }
    }

    // Run the job and copy files to local filesystem if needed
    final Date beginTime = new Date();
    log.info("Job started: " + beginTime);
    final boolean success = job.waitForCompletion(true);
    if (success) {
        if (useCopyFileOutput) {
            log.info("Moving data from HDFS to the local file system");
            final Path baseOutputPath = new Path(baseOutputDir);
            for (final FileStatus status : FileSystem.get(conf).listStatus(baseOutputPath)) {
                if (status.isDirectory()) {
                    final String tableName = status.getPath().getName();
                    final Path hdfsPath = getPath(baseOutputDir, tableName);
                    final Path localPath = getPath(localBaseOutputDir, tableName);
                    log.info("HDFS directory: " + hdfsPath.toString());
                    log.info("Local directory: " + localPath.toString());
                    copyHdfsToLocal(hdfsPath, localPath);
                }
            }
        }
        final Date endTime = new Date();
        log.info("Job finished: " + endTime);
        log.info("The job took " + (endTime.getTime() - beginTime.getTime()) / 1000 + " seconds.");
        return 0;
    } else {
        log.error("Job failed!!!");
        return 1;
    }
}

From source file:org.apache.tez.mapreduce.examples.SecondarySort.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysort <in> <out>");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }/*from   w ww  . ja v  a 2  s  .com*/
    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:org.avenir.association.AssociationRuleMiner.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Association rule mining from frequent item sets";
    job.setJobName(jobName);//from  ww w .j a va  2s. c  om

    job.setJarByClass(AssociationRuleMiner.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "avenir");
    job.setMapperClass(AssociationRuleMiner.RuleMinerMapper.class);
    job.setReducerClass(AssociationRuleMiner.RuleMinerReducer.class);

    job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    int numReducer = job.getConfiguration().getInt("arm.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.avenir.knn.FeatureCondProbJoiner.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Training vector feature cond probability joiner  MR";
    job.setJobName(jobName);// w  w  w . j a  v a 2  s  . c o m

    job.setJarByClass(FeatureCondProbJoiner.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(FeatureCondProbJoiner.JoinerMapper.class);
    job.setReducerClass(FeatureCondProbJoiner.JoinerReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.avenir.knn.NearestNeighbor.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "K nerest neighbor(KNN)  MR";
    job.setJobName(jobName);/*  www  .  j a  v  a2  s.  co m*/

    job.setJarByClass(NearestNeighbor.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(NearestNeighbor.TopMatchesMapper.class);
    job.setReducerClass(NearestNeighbor.TopMatchesReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.avenir.reinforce.RandomFirstGreedyBandit.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Random first greedy  bandit problem";
    job.setJobName(jobName);//w w  w  .  j  av a2  s .c o m

    job.setJarByClass(RandomFirstGreedyBandit.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "avenir");
    job.setMapperClass(RandomFirstGreedyBandit.BanditMapper.class);
    job.setReducerClass(RandomFirstGreedyBandit.BanditReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TupleTextPartitioner.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.avenir.sequence.CandidateGenerationWithSelfJoin.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Generates k candidate sequence";
    job.setJobName(jobName);/*from   w w w .  j  a v  a 2  s  .c  o m*/

    job.setJarByClass(CandidateGenerationWithSelfJoin.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration(), "avenir");
    job.setMapperClass(CandidateGenerationWithSelfJoin.CandidateGenerationMapper.class);
    job.setReducerClass(CandidateGenerationWithSelfJoin.CandidateGenerationReducer.class);

    job.setMapOutputKeyClass(Tuple.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SecondarySort.TuplePairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TuplePairPartitioner.class);

    int numReducer = job.getConfiguration().getInt("cgs.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.beymani.proximity.AverageDistance.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Nearest neighbour stat calculation  MR";
    job.setJobName(jobName);/* w  w  w.  j av  a 2 s . c o  m*/

    job.setJarByClass(AverageDistance.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(AverageDistance.TopMatchesMapper.class);
    job.setReducerClass(AverageDistance.TopMatchesReducer.class);

    job.setMapOutputKeyClass(TextInt.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(IdRankGroupComprator.class);
    job.setPartitionerClass(IdRankPartitioner.class);

    Utility.setConfiguration(job.getConfiguration());

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.beymani.proximity.NeighborDensity.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Nearest neighbour density";
    job.setJobName(jobName);//from   www.  j  av a  2  s.  c o  m

    job.setJarByClass(NeighborDensity.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(NeighborDensity.GroupingMapper.class);
    job.setReducerClass(NeighborDensity.GroupingReducer.class);

    job.setMapOutputKeyClass(TextInt.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SecondarySort.TextIntIdPairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TextIntIdPairTuplePartitioner.class);

    Utility.setConfiguration(job.getConfiguration());

    job.setNumReduceTasks(job.getConfiguration().getInt("num.reducer", 1));

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}

From source file:org.chombo.mr.Joiner.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = new Job(getConf());
    String jobName = "Joiner  MR";
    job.setJobName(jobName);/*from  www .ja  va 2 s. com*/

    job.setJarByClass(Joiner.class);

    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    Utility.setConfiguration(job.getConfiguration());

    job.setMapperClass(Joiner.JoinerMapper.class);
    job.setReducerClass(Joiner.JoinerReducer.class);

    job.setMapOutputKeyClass(TextInt.class);
    job.setMapOutputValueClass(Tuple.class);

    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);

    job.setGroupingComparatorClass(SecondarySort.TextIntIdPairGroupComprator.class);
    job.setPartitionerClass(SecondarySort.TextIntIdPairTuplePartitioner.class);

    int numReducer = job.getConfiguration().getInt("joi.num.reducer", -1);
    numReducer = -1 == numReducer ? job.getConfiguration().getInt("num.reducer", 1) : numReducer;
    job.setNumReduceTasks(numReducer);

    int status = job.waitForCompletion(true) ? 0 : 1;
    return status;
}