Example usage for org.apache.hadoop.mapreduce Job setGroupingComparatorClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setGroupingComparatorClass.

Prototype

public void setGroupingComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException

Source Link

Document

Define the comparator that controls which keys are grouped together for a single call to Reducer#reduce(Object,Iterable,org.apache.hadoop.mapreduce.Reducer.Context)

Usage

From source file:nl.sanoma.hdt.report.generator.ReportGeneratorDriver.java

License:Open Source License

/**
 * Job to join the data and the metadata from distributed cache and
 * calculate the revenue by quarter and most popular product category for user
 *
 * @param dBPath the path of the import MapFile
 * @param inputPath the path of the logs directory
 * @param outputPath the path of the output directory
 * @return returns the exitCode of the job
 * @throws IOException/*from  w  ww .  j a v  a 2  s .  c o m*/
 * @throws URISyntaxException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public Boolean generateReport(String dBPath, String inputPath, String outputPath)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    Job job = new Job(getConf());
    Configuration conf = job.getConfiguration();

    job.setJobName("Repor Generator");
    DistributedCache.addCacheFile(new URI(dBPath), conf);
    job.setJarByClass(ReportGeneratorDriver.class);
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setPartitionerClass(KeyDataPartitioner.class);
    job.setGroupingComparatorClass(KeyDataGroupingComparator.class);
    job.setSortComparatorClass(KeyDataComparator.class);
    job.setMapperClass(ReportGeneratorMapper.class);
    job.setMapOutputKeyClass(KeyData.class);
    job.setMapOutputValueClass(ValueData.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setReducerClass(ReportGeneratorReducer.class);
    job.setNumReduceTasks(1);

    return job.waitForCompletion(true);
}

From source file:org.apache.avro.mapreduce.AvroJob.java

License:Apache License

/**
 * Sets the map output key schema./*  w ww .j  av  a 2 s . c  o  m*/
 *
 * @param job The job to configure.
 * @param schema The map output key schema.
 */
public static void setMapOutputKeySchema(Job job, Schema schema) {
    job.setMapOutputKeyClass(AvroKey.class);
    job.setGroupingComparatorClass(AvroKeyComparator.class);
    job.setSortComparatorClass(AvroKeyComparator.class);
    AvroSerialization.setKeyWriterSchema(job.getConfiguration(), schema);
    AvroSerialization.setKeyReaderSchema(job.getConfiguration(), schema);
    AvroSerialization.addToConfiguration(job.getConfiguration());
}

From source file:org.apache.blur.mapreduce.lib.update.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    int c = 0;/*from  ww  w .  j  a v  a  2s.c  om*/
    if (args.length < 5) {
        System.err.println(
                "Usage Driver <table> <mr inc working path> <output path> <zk connection> <reducer multipler> <extra config files...>");
    }
    String table = args[c++];
    String mrIncWorkingPathStr = args[c++];
    String outputPathStr = args[c++];
    String blurZkConnection = args[c++];
    int reducerMultipler = Integer.parseInt(args[c++]);
    for (; c < args.length; c++) {
        String externalConfigFileToAdd = args[c];
        getConf().addResource(new Path(externalConfigFileToAdd));
    }

    Path outputPath = new Path(outputPathStr);
    Path mrIncWorkingPath = new Path(mrIncWorkingPathStr);
    FileSystem fileSystem = mrIncWorkingPath.getFileSystem(getConf());

    Path newData = new Path(mrIncWorkingPath, NEW);
    Path inprogressData = new Path(mrIncWorkingPath, INPROGRESS);
    Path completeData = new Path(mrIncWorkingPath, COMPLETE);
    Path fileCache = new Path(mrIncWorkingPath, CACHE);

    fileSystem.mkdirs(newData);
    fileSystem.mkdirs(inprogressData);
    fileSystem.mkdirs(completeData);
    fileSystem.mkdirs(fileCache);

    List<Path> srcPathList = new ArrayList<Path>();
    for (FileStatus fileStatus : fileSystem.listStatus(newData)) {
        srcPathList.add(fileStatus.getPath());
    }
    if (srcPathList.isEmpty()) {
        return 0;
    }

    List<Path> inprogressPathList = new ArrayList<Path>();
    boolean success = false;
    Iface client = null;
    try {
        inprogressPathList = movePathList(fileSystem, inprogressData, srcPathList);

        Job job = Job.getInstance(getConf(), "Blur Row Updater for table [" + table + "]");
        client = BlurClient.getClientFromZooKeeperConnectionStr(blurZkConnection);
        waitForOtherSnapshotsToBeRemoved(client, table, MRUPDATE_SNAPSHOT);
        client.createSnapshot(table, MRUPDATE_SNAPSHOT);
        TableDescriptor descriptor = client.describe(table);
        Path tablePath = new Path(descriptor.getTableUri());

        BlurInputFormat.setLocalCachePath(job, fileCache);
        BlurInputFormat.addTable(job, descriptor, MRUPDATE_SNAPSHOT);
        MultipleInputs.addInputPath(job, tablePath, BlurInputFormat.class, MapperForExistingData.class);
        for (Path p : inprogressPathList) {
            FileInputFormat.addInputPath(job, p);
            MultipleInputs.addInputPath(job, p, SequenceFileInputFormat.class, MapperForNewData.class);
        }

        BlurOutputFormat.setOutputPath(job, outputPath);
        BlurOutputFormat.setupJob(job, descriptor);

        job.setReducerClass(UpdateReducer.class);
        job.setMapOutputKeyClass(IndexKey.class);
        job.setMapOutputValueClass(IndexValue.class);
        job.setPartitionerClass(IndexKeyPartitioner.class);
        job.setGroupingComparatorClass(IndexKeyWritableComparator.class);

        BlurOutputFormat.setReducerMultiplier(job, reducerMultipler);

        success = job.waitForCompletion(true);
        Counters counters = job.getCounters();
        LOG.info("Counters [" + counters + "]");

    } finally {
        if (success) {
            LOG.info("Indexing job succeeded!");
            movePathList(fileSystem, completeData, inprogressPathList);
        } else {
            LOG.error("Indexing job failed!");
            movePathList(fileSystem, newData, inprogressPathList);
        }
        if (client != null) {
            client.removeSnapshot(table, MRUPDATE_SNAPSHOT);
        }
    }

    if (success) {
        return 0;
    } else {
        return 1;
    }

}

From source file:org.apache.crunch.GroupingOptions.java

License:Apache License

public void configure(Job job) {
    if (partitionerClass != null) {
        job.setPartitionerClass(partitionerClass);
    }//from w w w  . ja  va  2  s. com
    if (groupingComparatorClass != null) {
        job.setGroupingComparatorClass(groupingComparatorClass);
    }
    if (sortComparatorClass != null) {
        job.setSortComparatorClass(sortComparatorClass);
    }
    if (numReducers > 0) {
        job.setNumReduceTasks(numReducers);
    }
    for (Map.Entry<String, String> e : extraConf.entrySet()) {
        job.getConfiguration().set(e.getKey(), e.getValue());
    }
}

From source file:org.apache.druid.indexer.SortableBytes.java

License:Apache License

public static void useSortableBytesAsMapOutputKey(Job job, Class<? extends Partitioner> partitionerClass) {
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setGroupingComparatorClass(SortableBytesGroupingComparator.class);
    job.setSortComparatorClass(SortableBytesSortingComparator.class);
    job.setPartitionerClass(partitionerClass);
}

From source file:org.apache.gobblin.compaction.mapreduce.CompactionOrcJobConfigurator.java

License:Apache License

protected void configureMapper(Job job) {
    job.setInputFormatClass(OrcValueCombineFileInputFormat.class);
    job.setMapperClass(OrcValueMapper.class);
    job.setMapOutputKeyClass(OrcKey.class);
    job.setMapOutputValueClass(OrcValue.class);
    job.setGroupingComparatorClass(OrcKeyComparator.class);
    job.setSortComparatorClass(OrcKeyComparator.class);
}

From source file:org.apache.hadoop.examples.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysort <in> <out>");
        System.exit(2);/*w ww.  j ava2s  .  c  o  m*/
    }
    Job job = Job.getInstance(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopGroupingTest.java

License:Apache License

/**
 * @param combiner With combiner.//from ww w  . j a va2 s . c om
 * @throws Exception If failed.
 */
public void doTestGrouping(boolean combiner) throws Exception {
    vals.clear();

    Job job = Job.getInstance();

    job.setInputFormatClass(InFormat.class);
    job.setOutputFormatClass(OutFormat.class);

    job.setOutputKeyClass(YearTemperature.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Mapper.class);

    if (combiner) {
        job.setCombinerClass(MyReducer.class);
        job.setNumReduceTasks(0);
        job.setCombinerKeyGroupingComparatorClass(YearComparator.class);
    } else {
        job.setReducerClass(MyReducer.class);
        job.setNumReduceTasks(4);
        job.setGroupingComparatorClass(YearComparator.class);
    }

    grid(0).hadoop().submit(new GridHadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration()))
            .get(30000);

    assertTrue(vals.isEmpty());
}

From source file:org.apache.ignite.internal.processors.hadoop.HadoopGroupingTest.java

License:Apache License

/**
 * @param combiner With combiner./*from  w  w w  .j  ava 2  s .  c om*/
 * @throws Exception If failed.
 */
public void doTestGrouping(boolean combiner) throws Exception {
    vals.clear();

    Job job = Job.getInstance();

    job.setInputFormatClass(InFormat.class);
    job.setOutputFormatClass(OutFormat.class);

    job.setOutputKeyClass(YearTemperature.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Mapper.class);

    if (combiner) {
        job.setCombinerClass(MyReducer.class);
        job.setNumReduceTasks(0);
        job.setCombinerKeyGroupingComparatorClass(YearComparator.class);
    } else {
        job.setReducerClass(MyReducer.class);
        job.setNumReduceTasks(4);
        job.setGroupingComparatorClass(YearComparator.class);
    }

    grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration()))
            .get(30000);

    assertTrue(vals.isEmpty());
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.HadoopGroupingTest.java

License:Apache License

/**
 * @param combiner With combiner./*from w w  w. j  a va  2s . c o  m*/
 * @throws Exception If failed.
 */
public void doTestGrouping(boolean combiner) throws Exception {
    HadoopGroupingTestState.values().clear();

    Job job = Job.getInstance();

    job.setInputFormatClass(InFormat.class);
    job.setOutputFormatClass(OutFormat.class);

    job.setOutputKeyClass(YearTemperature.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Mapper.class);

    if (combiner) {
        job.setCombinerClass(MyReducer.class);
        job.setNumReduceTasks(0);
        job.setCombinerKeyGroupingComparatorClass(YearComparator.class);
    } else {
        job.setReducerClass(MyReducer.class);
        job.setNumReduceTasks(4);
        job.setGroupingComparatorClass(YearComparator.class);
    }

    grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration()))
            .get(30000);

    assertTrue(HadoopGroupingTestState.values().isEmpty());
}