Example usage for org.apache.hadoop.mapreduce Job setGroupingComparatorClass

List of usage examples for org.apache.hadoop.mapreduce Job setGroupingComparatorClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setGroupingComparatorClass.

Prototype

public void setGroupingComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException 

Source Link

Document

Define the comparator that controls which keys are grouped together for a single call to Reducer#reduce(Object,Iterable,org.apache.hadoop.mapreduce.Reducer.Context)

Usage

From source file:nl.sanoma.hdt.report.generator.ReportGeneratorDriver.java

License:Open Source License

/**
 * Job to join the data and the metadata from distributed cache and
 * calculate the revenue by quarter and most popular product category for user
 *
 * @param dBPath the path of the import MapFile
 * @param inputPath the path of the logs directory
 * @param outputPath the path of the output directory
 * @return returns the exitCode of the job
 * @throws IOException/*from  w  ww .  j a v  a 2  s .  c o m*/
 * @throws URISyntaxException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public Boolean generateReport(String dBPath, String inputPath, String outputPath)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    Job job = new Job(getConf());
    Configuration conf = job.getConfiguration();

    job.setJobName("Repor Generator");
    DistributedCache.addCacheFile(new URI(dBPath), conf);
    job.setJarByClass(ReportGeneratorDriver.class);
    FileInputFormat.setInputPaths(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    job.setPartitionerClass(KeyDataPartitioner.class);
    job.setGroupingComparatorClass(KeyDataGroupingComparator.class);
    job.setSortComparatorClass(KeyDataComparator.class);
    job.setMapperClass(ReportGeneratorMapper.class);
    job.setMapOutputKeyClass(KeyData.class);
    job.setMapOutputValueClass(ValueData.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Text.class);
    job.setReducerClass(ReportGeneratorReducer.class);
    job.setNumReduceTasks(1);

    return job.waitForCompletion(true);
}

From source file:org.apache.avro.mapreduce.AvroJob.java

License:Apache License

/**
 * Sets the map output key schema./*  w ww .j  av  a 2 s . c  o  m*/
 *
 * @param job The job to configure.
 * @param schema The map output key schema.
 */
public static void setMapOutputKeySchema(Job job, Schema schema) {
    job.setMapOutputKeyClass(AvroKey.class);
    job.setGroupingComparatorClass(AvroKeyComparator.class);
    job.setSortComparatorClass(AvroKeyComparator.class);
    AvroSerialization.setKeyWriterSchema(job.getConfiguration(), schema);
    AvroSerialization.setKeyReaderSchema(job.getConfiguration(), schema);
    AvroSerialization.addToConfiguration(job.getConfiguration());
}

From source file:org.apache.blur.mapreduce.lib.update.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    int c = 0;/*from  ww  w .  j  a v  a  2s.c  om*/
    if (args.length < 5) {
        System.err.println(
                "Usage Driver <table> <mr inc working path> <output path> <zk connection> <reducer multipler> <extra config files...>");
    }
    String table = args[c++];
    String mrIncWorkingPathStr = args[c++];
    String outputPathStr = args[c++];
    String blurZkConnection = args[c++];
    int reducerMultipler = Integer.parseInt(args[c++]);
    for (; c < args.length; c++) {
        String externalConfigFileToAdd = args[c];
        getConf().addResource(new Path(externalConfigFileToAdd));
    }

    Path outputPath = new Path(outputPathStr);
    Path mrIncWorkingPath = new Path(mrIncWorkingPathStr);
    FileSystem fileSystem = mrIncWorkingPath.getFileSystem(getConf());

    Path newData = new Path(mrIncWorkingPath, NEW);
    Path inprogressData = new Path(mrIncWorkingPath, INPROGRESS);
    Path completeData = new Path(mrIncWorkingPath, COMPLETE);
    Path fileCache = new Path(mrIncWorkingPath, CACHE);

    fileSystem.mkdirs(newData);
    fileSystem.mkdirs(inprogressData);
    fileSystem.mkdirs(completeData);
    fileSystem.mkdirs(fileCache);

    List<Path> srcPathList = new ArrayList<Path>();
    for (FileStatus fileStatus : fileSystem.listStatus(newData)) {
        srcPathList.add(fileStatus.getPath());
    }
    if (srcPathList.isEmpty()) {
        return 0;
    }

    List<Path> inprogressPathList = new ArrayList<Path>();
    boolean success = false;
    Iface client = null;
    try {
        inprogressPathList = movePathList(fileSystem, inprogressData, srcPathList);

        Job job = Job.getInstance(getConf(), "Blur Row Updater for table [" + table + "]");
        client = BlurClient.getClientFromZooKeeperConnectionStr(blurZkConnection);
        waitForOtherSnapshotsToBeRemoved(client, table, MRUPDATE_SNAPSHOT);
        client.createSnapshot(table, MRUPDATE_SNAPSHOT);
        TableDescriptor descriptor = client.describe(table);
        Path tablePath = new Path(descriptor.getTableUri());

        BlurInputFormat.setLocalCachePath(job, fileCache);
        BlurInputFormat.addTable(job, descriptor, MRUPDATE_SNAPSHOT);
        MultipleInputs.addInputPath(job, tablePath, BlurInputFormat.class, MapperForExistingData.class);
        for (Path p : inprogressPathList) {
            FileInputFormat.addInputPath(job, p);
            MultipleInputs.addInputPath(job, p, SequenceFileInputFormat.class, MapperForNewData.class);
        }

        BlurOutputFormat.setOutputPath(job, outputPath);
        BlurOutputFormat.setupJob(job, descriptor);

        job.setReducerClass(UpdateReducer.class);
        job.setMapOutputKeyClass(IndexKey.class);
        job.setMapOutputValueClass(IndexValue.class);
        job.setPartitionerClass(IndexKeyPartitioner.class);
        job.setGroupingComparatorClass(IndexKeyWritableComparator.class);

        BlurOutputFormat.setReducerMultiplier(job, reducerMultipler);

        success = job.waitForCompletion(true);
        Counters counters = job.getCounters();
        LOG.info("Counters [" + counters + "]");

    } finally {
        if (success) {
            LOG.info("Indexing job succeeded!");
            movePathList(fileSystem, completeData, inprogressPathList);
        } else {
            LOG.error("Indexing job failed!");
            movePathList(fileSystem, newData, inprogressPathList);
        }
        if (client != null) {
            client.removeSnapshot(table, MRUPDATE_SNAPSHOT);
        }
    }

    if (success) {
        return 0;
    } else {
        return 1;
    }

}

From source file:org.apache.crunch.GroupingOptions.java

License:Apache License

public void configure(Job job) {
    if (partitionerClass != null) {
        job.setPartitionerClass(partitionerClass);
    }//from w w w  . ja  va  2  s. com
    if (groupingComparatorClass != null) {
        job.setGroupingComparatorClass(groupingComparatorClass);
    }
    if (sortComparatorClass != null) {
        job.setSortComparatorClass(sortComparatorClass);
    }
    if (numReducers > 0) {
        job.setNumReduceTasks(numReducers);
    }
    for (Map.Entry<String, String> e : extraConf.entrySet()) {
        job.getConfiguration().set(e.getKey(), e.getValue());
    }
}

From source file:org.apache.druid.indexer.SortableBytes.java

License:Apache License

public static void useSortableBytesAsMapOutputKey(Job job, Class<? extends Partitioner> partitionerClass) {
    job.setMapOutputKeyClass(BytesWritable.class);
    job.setGroupingComparatorClass(SortableBytesGroupingComparator.class);
    job.setSortComparatorClass(SortableBytesSortingComparator.class);
    job.setPartitionerClass(partitionerClass);
}

From source file:org.apache.gobblin.compaction.mapreduce.CompactionOrcJobConfigurator.java

License:Apache License

protected void configureMapper(Job job) {
    job.setInputFormatClass(OrcValueCombineFileInputFormat.class);
    job.setMapperClass(OrcValueMapper.class);
    job.setMapOutputKeyClass(OrcKey.class);
    job.setMapOutputValueClass(OrcValue.class);
    job.setGroupingComparatorClass(OrcKeyComparator.class);
    job.setSortComparatorClass(OrcKeyComparator.class);
}

From source file:org.apache.hadoop.examples.SecondarySort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: secondarysort <in> <out>");
        System.exit(2);/*w ww.  j ava2s  .  c  o  m*/
    }
    Job job = Job.getInstance(conf, "secondary sort");
    job.setJarByClass(SecondarySort.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    job.setGroupingComparatorClass(FirstGroupingComparator.class);

    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopGroupingTest.java

License:Apache License

/**
 * @param combiner With combiner.//from ww w  . j a va2 s . c om
 * @throws Exception If failed.
 */
public void doTestGrouping(boolean combiner) throws Exception {
    vals.clear();

    Job job = Job.getInstance();

    job.setInputFormatClass(InFormat.class);
    job.setOutputFormatClass(OutFormat.class);

    job.setOutputKeyClass(YearTemperature.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Mapper.class);

    if (combiner) {
        job.setCombinerClass(MyReducer.class);
        job.setNumReduceTasks(0);
        job.setCombinerKeyGroupingComparatorClass(YearComparator.class);
    } else {
        job.setReducerClass(MyReducer.class);
        job.setNumReduceTasks(4);
        job.setGroupingComparatorClass(YearComparator.class);
    }

    grid(0).hadoop().submit(new GridHadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration()))
            .get(30000);

    assertTrue(vals.isEmpty());
}

From source file:org.apache.ignite.internal.processors.hadoop.HadoopGroupingTest.java

License:Apache License

/**
 * @param combiner With combiner./*from  w  w w  .j  ava 2  s .  c om*/
 * @throws Exception If failed.
 */
public void doTestGrouping(boolean combiner) throws Exception {
    vals.clear();

    Job job = Job.getInstance();

    job.setInputFormatClass(InFormat.class);
    job.setOutputFormatClass(OutFormat.class);

    job.setOutputKeyClass(YearTemperature.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Mapper.class);

    if (combiner) {
        job.setCombinerClass(MyReducer.class);
        job.setNumReduceTasks(0);
        job.setCombinerKeyGroupingComparatorClass(YearComparator.class);
    } else {
        job.setReducerClass(MyReducer.class);
        job.setNumReduceTasks(4);
        job.setGroupingComparatorClass(YearComparator.class);
    }

    grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration()))
            .get(30000);

    assertTrue(vals.isEmpty());
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.HadoopGroupingTest.java

License:Apache License

/**
 * @param combiner With combiner./*from w w  w. j  a va  2s . c o  m*/
 * @throws Exception If failed.
 */
public void doTestGrouping(boolean combiner) throws Exception {
    HadoopGroupingTestState.values().clear();

    Job job = Job.getInstance();

    job.setInputFormatClass(InFormat.class);
    job.setOutputFormatClass(OutFormat.class);

    job.setOutputKeyClass(YearTemperature.class);
    job.setOutputValueClass(Text.class);

    job.setMapperClass(Mapper.class);

    if (combiner) {
        job.setCombinerClass(MyReducer.class);
        job.setNumReduceTasks(0);
        job.setCombinerKeyGroupingComparatorClass(YearComparator.class);
    } else {
        job.setReducerClass(MyReducer.class);
        job.setNumReduceTasks(4);
        job.setGroupingComparatorClass(YearComparator.class);
    }

    grid(0).hadoop().submit(new HadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration()))
            .get(30000);

    assertTrue(HadoopGroupingTestState.values().isEmpty());
}