Example usage for org.apache.hadoop.mapreduce Job setSortComparatorClass

List of usage examples for org.apache.hadoop.mapreduce Job setSortComparatorClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setSortComparatorClass.

Prototype

public void setSortComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException 

Source Link

Document

Define the comparator that controls how the keys are sorted before they are passed to the Reducer .

Usage

From source file:org.freeeed.mr.FreeEedMR.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    // inventory dir holds all package (zip) files resulting from stage
    String projectFileName = args[0];
    String outputPath = args[1];/*from  ww w . j av a  2  s  .  com*/
    LOGGER.info("Running Hadoop job");
    LOGGER.info("Input project file = " + projectFileName);
    LOGGER.info("Output path = " + outputPath);
    Stats.getInstance().setNumberMappers(projectFileName);
    ESIndex.getInstance().init();

    // Hadoop configuration class
    Configuration configuration = getConf();
    // No speculative execution! Do not process the same file twice
    configuration.set("mapred.reduce.tasks.speculative.execution", "false");
    // TODO even in local mode, the first argument should not be the inventory
    // but write a complete project file instead
    Project project = Project.getCurrentProject();
    if (project == null || project.isEmpty()) {
        // configure Hadoop input files
        System.out.println("Reading project file " + projectFileName);
        project = Project.loadFromFile(new File(projectFileName));
    }
    project.setProperty(ParameterProcessing.OUTPUT_DIR_HADOOP, outputPath);
    // send complete project information to all mappers and reducers
    configuration.set(ParameterProcessing.PROJECT, project.toString());

    Settings.load();
    configuration.set(ParameterProcessing.SETTINGS_STR, Settings.getSettings().toString());
    configuration.set(EmailProperties.PROPERTIES_FILE,
            Files.toString(new File(EmailProperties.PROPERTIES_FILE), Charset.defaultCharset()));
    Job job = new Job(configuration);
    job.setJarByClass(FreeEedMR.class);
    job.setJobName("FreeEedMR");

    // Hadoop processes key-value pairs
    //        job.setOutputKeyClass(Text.class);
    //        job.setOutputValueClass(MapWritable.class);

    // set map and reduce classes
    job.setMapperClass(FreeEedMapper.class);
    job.setInputFormatClass(NLineInputFormat.class);
    job.setNumReduceTasks(0);
    // secondary sort for compound keys - this sorts the attachments
    job.setSortComparatorClass(KeyComparator.class);
    job.setGroupingComparatorClass(GroupComparator.class);

    // Hadoop TextInputFormat class
    //        job.setInputFormatClass(TextInputFormat.class);
    //        job.setOutputFormatClass(TextOutputFormat.class);

    LOGGER.debug("project.isEnvHadoop() = {} ", project.isEnvHadoop());
    String inputPath = projectFileName;
    if (project.isEnvHadoop() || Settings.getSettings().isHadoopDebug()) {
        inputPath = formInputPath(project);
    }

    LOGGER.debug("Ready to run, inputPath = {}, outputPath = {}", inputPath, outputPath);
    FileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    if (Settings.getSettings().isHadoopDebug()) {
        if (new File(outputPath).exists()) {
            Util.deleteDirectory(new File(outputPath));
        }
    }

    LOGGER.trace("Project");
    LOGGER.trace(project.toString());

    boolean success = job.waitForCompletion(true);

    ESIndex.getInstance().destroy();

    if (project.isEnvHadoop() && project.isFsS3()) {
        transferResultsToS3(outputPath);
    }

    return success ? 0 : 1;
}

From source file:org.gridgain.grid.kernal.processors.hadoop.GridHadoopSortingTest.java

License:Open Source License

/**
 * @throws Exception If failed./*www. j  a v a  2  s.c  om*/
 */
public void testSortSimple() throws Exception {
    // Generate test data.
    Job job = Job.getInstance();

    job.setInputFormatClass(InFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    job.setMapperClass(Mapper.class);
    job.setNumReduceTasks(0);

    setupFileSystems(job.getConfiguration());

    FileOutputFormat.setOutputPath(job, new Path(ggfsScheme() + PATH_INPUT));

    X.printerrln("Data generation started.");

    grid(0).hadoop().submit(new GridHadoopJobId(UUID.randomUUID(), 1), createJobInfo(job.getConfiguration()))
            .get(180000);

    X.printerrln("Data generation complete.");

    // Run main map-reduce job.
    job = Job.getInstance();

    setupFileSystems(job.getConfiguration());

    job.getConfiguration().set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY,
            JavaSerialization.class.getName() + "," + WritableSerialization.class.getName());

    FileInputFormat.setInputPaths(job, new Path(ggfsScheme() + PATH_INPUT));
    FileOutputFormat.setOutputPath(job, new Path(ggfsScheme() + PATH_OUTPUT));

    job.setSortComparatorClass(JavaSerializationComparator.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    job.setNumReduceTasks(2);

    job.setMapOutputKeyClass(UUID.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    X.printerrln("Job started.");

    grid(0).hadoop().submit(new GridHadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration()))
            .get(180000);

    X.printerrln("Job complete.");

    // Check result.
    Path outDir = new Path(ggfsScheme() + PATH_OUTPUT);

    AbstractFileSystem fs = AbstractFileSystem.get(new URI(ggfsScheme()), job.getConfiguration());

    for (FileStatus file : fs.listStatus(outDir)) {
        X.printerrln("__ file: " + file);

        if (file.getLen() == 0)
            continue;

        FSDataInputStream in = fs.open(file.getPath());

        Scanner sc = new Scanner(in);

        UUID prev = null;

        while (sc.hasNextLine()) {
            UUID next = UUID.fromString(sc.nextLine());

            //                X.printerrln("___ check: " + next);

            if (prev != null)
                assertTrue(prev.compareTo(next) < 0);

            prev = next;
        }
    }
}

From source file:org.hedera.mapreduce.BuildDictionary.java

License:Apache License

/**
 * Runs this tool.//from  w  ww.  j  a  va2s.com
 */
@Override
@SuppressWarnings("static-access")
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of terms").create(COUNT_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)
            || !cmdline.hasOption(COUNT_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String input = cmdline.getOptionValue(INPUT_OPTION);
    String output = cmdline.getOptionValue(OUTPUT_OPTION);

    LOG.info("Tool name: " + BuildDictionary.class.getSimpleName());
    LOG.info(" - input: " + input);
    LOG.info(" - output: " + output);

    Configuration conf = getConf();

    conf.set(HADOOP_OUTPUT_OPTION, output);
    conf.setInt(HADOOP_TERMS_COUNT_OPTION, Integer.parseInt(cmdline.getOptionValue(COUNT_OPTION)));
    conf.set("mapreduce.map.memory.mb", "2048");
    conf.set("mapreduce.map.java.opts", "-Xmx2048m");
    conf.set("mapreduce.reduce.memory.mb", "2048");
    conf.set("mapreduce.reduce.java.opts", "-Xmx2048m");

    Job job = setup(BuildDictionary.class.getSimpleName() + ":" + input, BuildDictionary.class, input, output,
            SequenceFileInputFormat.class, NullOutputFormat.class, Text.class, PairOfIntLong.class, Text.class,
            NullWritable.class, Mapper.class, MyReducer.class, 1);

    job.setSortComparatorClass(DictionaryTransformationStrategy.WritableComparator.class);

    FileSystem.get(getConf()).delete(new Path(output), true);
    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:org.imageterrier.indexers.hadoop.HadoopIndexer.java

License:Mozilla Public License

protected Job createJob(HadoopIndexerOptions options) throws IOException {
    final Job job = new Job(getConf());
    job.setJobName("terrierIndexing");

    if (options.getInputMode() == InputMode.QUANTISED_FEATURES) {
        job.setMapperClass(QFIndexerMapper.class);
    } else {//w w w.j  a  v  a2  s  .  co m
        if (options.shardPerThread) {
            job.setMapperClass(MultithreadedMapper.class);
            MultithreadedMapper.setMapperClass(job, MTImageIndexerMapper.class);
            MultithreadedMapper.setNumberOfThreads(job, options.getMultithread());
        } else {
            job.setMapperClass(ImageIndexerMapper.class);
        }
    }
    // Load quantiser (if it exists), extract header, count codebook size
    if (options.getInputModeOptions().hasQuantiserFile()) {
        final String quantFile = options.getInputModeOptions().getQuantiserFile();
        System.out.println("Loading codebook to see its size");
        final SpatialClusters<?> quantiser = readClusters(options);
        System.out.println("Setting codebook size: " + quantiser.numClusters());
        job.getConfiguration().setInt(QUANTISER_SIZE, quantiser.numClusters());
        if (quantiser.numClusters() < options.getNumReducers())
            options.setNumReducers(quantiser.numClusters());
    }
    job.setReducerClass(IndexerReducer.class);

    FileOutputFormat.setOutputPath(job, options.getOutputPath());
    job.setMapOutputKeyClass(NewSplitEmittedTerm.class);
    job.setMapOutputValueClass(MapEmittedPostingList.class);
    job.getConfiguration().setBoolean("indexing.hadoop.multiple.indices", options.isDocumentPartitionMode());

    // if
    // (!job.getConfiguration().get("mapred.job.tracker").equals("local")) {
    // job.getConfiguration().set("mapred.map.output.compression.codec",
    // GzipCodec.class.getCanonicalName());
    // job.getConfiguration().setBoolean("mapred.compress.map.output",
    // true);
    // } else {
    job.getConfiguration().setBoolean("mapred.compress.map.output", false);
    // }

    job.setInputFormatClass(PositionAwareSequenceFileInputFormat.class); // important

    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setSortComparatorClass(NewSplitEmittedTerm.SETRawComparatorTermSplitFlush.class);
    job.setGroupingComparatorClass(NewSplitEmittedTerm.SETRawComparatorTerm.class);

    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);

    SequenceFileInputFormat.setInputPaths(job, options.getInputPaths());

    job.setNumReduceTasks(options.getNumReducers());
    if (options.getNumReducers() > 1) {
        if (options.isDocumentPartitionMode()) {
            job.setPartitionerClass(NewSplitEmittedTerm.SETPartitioner.class);
        } else {
            // job.setPartitionerClass(NewSplitEmittedTerm.SETPartitionerLowercaseAlphaTerm.class);
            if (job.getConfiguration().getInt(QUANTISER_SIZE, -1) == -1) {
                job.setPartitionerClass(NewSplitEmittedTerm.SETPartitionerHashedTerm.class);
            } else {
                job.setPartitionerClass(NewSplitEmittedTerm.SETPartitionerCodebookAwareTerm.class);
            }

        }
    } else {
        // for JUnit tests, we seem to need to restore the original
        // partitioner class
        job.setPartitionerClass(HashPartitioner.class);
    }

    job.setJarByClass(this.getClass());

    return job;
}

From source file:org.qcri.pca.CompositeJob.java

/**
 * Computes XtX and YtX//  ww  w.  j  a v a  2  s.  c  om
 * 
 * Xc = (Y - Ym) * MEM = Y * MEM - Ym * MEM = X - Xm
 * 
 * XtX = (X - Xm)' * (X - Xm) YtX = (Y - Ym)' * (Y - Ym)
 * 
 * @param conf
 *          the configuration
 * @param matrixInputPath
 *          Y
 * @param inMemMatrixDir
 *          MEM, where X = Y * MEM
 * @param inMemMatrixNumRows
 *          MEM.rows
 * @param inMemMatrixNumCols
 *          MEM.cols
 * @param ymPath
 *          Ym
 * @param xmPath
 *          Xm
 * @param matrixOutputPath
 *          YtX
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public void run(Configuration conf, Path matrixInputPath, String inMemMatrixDir, int inMemMatrixNumRows,
        int inMemMatrixNumCols, String ymPath, String xmPath, Path matrixOutputPath)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.set(MATRIXINMEMORY, inMemMatrixDir);
    conf.setInt(MATRIXINMEMORYROWS, inMemMatrixNumRows);
    conf.setInt(MATRIXINMEMORYCOLS, inMemMatrixNumCols);
    conf.set(YMPATH, ymPath);
    conf.set(XMPATH, xmPath);
    Path xtxOutputPath = getXtXPathBasedOnYm(new Path(ymPath));
    conf.set(XTXPATH, xtxOutputPath.toString());
    Job job = new Job(conf);
    job.setJobName("CompositeJob-" + matrixInputPath.getName());
    job.setJarByClass(CompositeJob.class);
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);
    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapOutputKeyClass(CompositeWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);
    job.setSortComparatorClass(CompositeWritable.class);
    job.setGroupingComparatorClass(CompositeWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    job.waitForCompletion(true);
}

From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java

License:Open Source License

protected boolean runTriplesJobSampling() throws ClassNotFoundException, IOException, InterruptedException {
    Job job = null;
    boolean jobOK;
    BufferedWriter bufferedWriter;

    // if input path does not exists, fail
    if (!this.inputFS.exists(this.conf.getInputPath())) {
        System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath());
        System.exit(-1);// w w w .j a v a 2 s. c  om
    }

    // if dictionary output path does not exists, fail
    if (!this.dictionaryFS.exists(this.conf.getInputPath())) {
        System.out.println("Dictionary output path does not exist: " + this.conf.getInputPath());
        System.exit(-1);
    }

    // if samples path exists, fail
    if (this.dictionaryFS.exists(this.conf.getTriplesSamplesPath())) {
        if (this.conf.getDeleteTriplesSamplesPath()) { // ... and option
            // provided, delete
            // recursively
            this.dictionaryFS.delete(this.conf.getTriplesSamplesPath(), true);
        } else { // ... and option not provided, fail
            System.out.println("Triples samples path does exist: " + this.conf.getTriplesSamplesPath());
            System.out.println("Select other path or use option -dst to overwrite");
            System.exit(-1);
        }
    }

    this.conf.setProperty("mapred.child.java.opts",
            "-XX:ErrorFile=/home/hadoop/tmp/hs_err_pid%p.log -Xmx2500m");

    // Job to create a SequenceInputFormat
    job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName() + " phase 1");

    job.setJarByClass(HDTBuilderDriver.class);

    FileInputFormat.addInputPath(job, this.conf.getInputPath());
    FileOutputFormat.setOutputPath(job, this.conf.getTriplesSamplesPath());

    job.setInputFormatClass(LzoTextInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    job.setMapperClass(TriplesSPOMapper.class);
    job.setSortComparatorClass(TripleSPOComparator.class);
    job.setGroupingComparatorClass(TripleSPOComparator.class);
    job.setMapOutputKeyClass(TripleSPOWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputKeyClass(TripleSPOWritable.class);
    job.setOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(this.conf.getTriplesReducers());

    DistributedCache.addCacheFile(this.conf.getDictionaryFile().toUri(), job.getConfiguration());

    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    jobOK = job.waitForCompletion(true);

    this.numTriples = job.getCounters().findCounter(Counters.Triples).getValue();
    bufferedWriter = new BufferedWriter(
            new OutputStreamWriter(this.triplesFS.create(this.conf.getTriplesCountersFile())));
    bufferedWriter.write(this.numTriples.toString() + "\n");
    bufferedWriter.close();

    return jobOK;
}

From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java

License:Open Source License

protected boolean runTriplesJob()
        throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
    Job job = null;
    boolean jobOK;

    // if triples output path exists...
    if (this.triplesFS.exists(this.conf.getTriplesOutputPath())) {
        if (this.conf.getDeleteTriplesOutputPath()) { // ... and option provided, delete recursively
            this.triplesFS.delete(this.conf.getTriplesOutputPath(), true);
        } else { // ... and option not provided, fail
            System.out.println("Triples output path does exist: " + this.conf.getTriplesOutputPath());
            System.out.println("Select other path or use option -dt to overwrite");
            System.exit(-1);//from   www . j a  v a  2  s.  c  o m
        }
    }

    job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName() + " phase 2");

    job.setJarByClass(HDTBuilderDriver.class);

    FileInputFormat.addInputPath(job, this.conf.getTriplesSamplesPath());
    FileOutputFormat.setOutputPath(job, this.conf.getTriplesOutputPath());

    job.setInputFormatClass(SequenceFileInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    job.setSortComparatorClass(TripleSPOComparator.class);
    job.setGroupingComparatorClass(TripleSPOComparator.class);

    job.setPartitionerClass(TotalOrderPartitioner.class);

    job.setOutputKeyClass(TripleSPOWritable.class);
    job.setOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(this.conf.getTriplesReducers());

    System.out.println("Sampling started");
    InputSampler.writePartitionFile(job,
            new InputSampler.IntervalSampler<Text, Text>(this.conf.getSampleProbability()));
    String partitionFile = TotalOrderPartitioner.getPartitionFile(job.getConfiguration());
    URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
    DistributedCache.addCacheFile(partitionUri, job.getConfiguration());
    DistributedCache.createSymlink(job.getConfiguration());
    System.out.println("Sampling finished");

    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    jobOK = job.waitForCompletion(true);

    return jobOK;
}

From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java

License:Open Source License

protected boolean runTriplesJobWithOneJob()
        throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
    Job job = null;
    boolean jobOK;
    BufferedWriter bufferedWriter;

    // if input path does not exists, fail
    if (!this.inputFS.exists(this.conf.getInputPath())) {
        System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath());
        System.exit(-1);//from  ww  w.ja v a  2 s  .  c  o m
    }

    // if dictionary output path does not exists, fail
    if (!this.dictionaryFS.exists(this.conf.getInputPath())) {
        System.out.println("Dictionary output path does not exist: " + this.conf.getInputPath());
        System.exit(-1);
    }

    // if triples output path exists...
    if (this.triplesFS.exists(this.conf.getTriplesOutputPath())) {
        if (this.conf.getDeleteTriplesOutputPath()) { // ... and option provided, delete recursively
            this.triplesFS.delete(this.conf.getTriplesOutputPath(), true);
        } else { // ... and option not provided, fail
            System.out.println("Triples output path does exist: " + this.conf.getTriplesOutputPath());
            System.out.println("Select other path or use option -dt to overwrite");
            System.exit(-1);
        }
    }

    // Launch job
    this.conf.setProperty("mapred.child.java.opts",
            "-XX:ErrorFile=/home/hadoop/tmp/hs_err_pid%p.log -Xmx2500m");

    job = new Job(this.conf.getConfigurationObject(), this.conf.getDictionaryJobName());
    job.setJarByClass(HDTBuilderDriver.class);

    FileInputFormat.addInputPath(job, this.conf.getInputPath());
    FileOutputFormat.setOutputPath(job, this.conf.getTriplesOutputPath());

    job.setInputFormatClass(LzoTextInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    job.setMapperClass(TriplesSPOMapper.class);
    job.setSortComparatorClass(TripleSPOComparator.class);
    job.setMapOutputKeyClass(TripleSPOWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(this.conf.getTriplesReducers());

    job.setOutputKeyClass(TripleSPOWritable.class);
    job.setOutputValueClass(NullWritable.class);

    DistributedCache.addCacheFile(this.conf.getDictionaryFile().toUri(), job.getConfiguration());
    // DistributedCache.addCacheFile(this.conf.getDictionaryMapFile().toUri(), job.getConfiguration());
    // DistributedCache.addCacheFile(this.conf.getDictionaryReduceFile().toUri(), job.getConfiguration());

    jobOK = job.waitForCompletion(true);

    this.numTriples = job.getCounters().findCounter(Counters.Triples).getValue();
    bufferedWriter = new BufferedWriter(
            new OutputStreamWriter(this.triplesFS.create(this.conf.getTriplesCountersFile())));
    bufferedWriter.write(this.numTriples.toString() + "\n");
    bufferedWriter.close();

    return jobOK;
}

From source file:ph.fingra.hadoop.mapred.parts.component.ComponentAppversionStatistic.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "component/componentappversion job";
    job.setJobName(jobName);/*from  www. j  a  v  a 2 s .c  om*/

    job.setJarByClass(ComponentAppversionStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(ComponentAppversionMapper.class);
    job.setReducerClass(ComponentAppversionReducer.class);

    job.setMapOutputKeyClass(ComponentAppversionKey.class);
    job.setMapOutputValueClass(ComponentAppversionEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(ComponentAppversionPartitioner.class);
    job.setSortComparatorClass(ComponentAppversionSortComparator.class);
    job.setGroupingComparatorClass(ComponentAppversionGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.component.ComponentCountryStatistic.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "component/componentcountry job";
    job.setJobName(jobName);// ww w .jav a2  s. c o  m

    job.setJarByClass(ComponentCountryStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(ComponentCountryMapper.class);
    job.setReducerClass(ComponentCountryReducer.class);

    job.setMapOutputKeyClass(ComponentCountryKey.class);
    job.setMapOutputValueClass(ComponentCountryEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(ComponentCountryPartitioner.class);
    job.setSortComparatorClass(ComponentCountrySortComparator.class);
    job.setGroupingComparatorClass(ComponentCountryGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}