Example usage for org.apache.hadoop.mapreduce Job setSortComparatorClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setSortComparatorClass.

Prototype

public void setSortComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException

Source Link

Document

Define the comparator that controls how the keys are sorted before they are passed to the Reducer .

Usage

From source file:org.freeeed.mr.FreeEedMR.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    // inventory dir holds all package (zip) files resulting from stage
    String projectFileName = args[0];
    String outputPath = args[1];/*from  ww w . j av a  2  s  .  com*/
    LOGGER.info("Running Hadoop job");
    LOGGER.info("Input project file = " + projectFileName);
    LOGGER.info("Output path = " + outputPath);
    Stats.getInstance().setNumberMappers(projectFileName);
    ESIndex.getInstance().init();

    // Hadoop configuration class
    Configuration configuration = getConf();
    // No speculative execution! Do not process the same file twice
    configuration.set("mapred.reduce.tasks.speculative.execution", "false");
    // TODO even in local mode, the first argument should not be the inventory
    // but write a complete project file instead
    Project project = Project.getCurrentProject();
    if (project == null || project.isEmpty()) {
        // configure Hadoop input files
        System.out.println("Reading project file " + projectFileName);
        project = Project.loadFromFile(new File(projectFileName));
    }
    project.setProperty(ParameterProcessing.OUTPUT_DIR_HADOOP, outputPath);
    // send complete project information to all mappers and reducers
    configuration.set(ParameterProcessing.PROJECT, project.toString());

    Settings.load();
    configuration.set(ParameterProcessing.SETTINGS_STR, Settings.getSettings().toString());
    configuration.set(EmailProperties.PROPERTIES_FILE,
            Files.toString(new File(EmailProperties.PROPERTIES_FILE), Charset.defaultCharset()));
    Job job = new Job(configuration);
    job.setJarByClass(FreeEedMR.class);
    job.setJobName("FreeEedMR");

    // Hadoop processes key-value pairs
    //        job.setOutputKeyClass(Text.class);
    //        job.setOutputValueClass(MapWritable.class);

    // set map and reduce classes
    job.setMapperClass(FreeEedMapper.class);
    job.setInputFormatClass(NLineInputFormat.class);
    job.setNumReduceTasks(0);
    // secondary sort for compound keys - this sorts the attachments
    job.setSortComparatorClass(KeyComparator.class);
    job.setGroupingComparatorClass(GroupComparator.class);

    // Hadoop TextInputFormat class
    //        job.setInputFormatClass(TextInputFormat.class);
    //        job.setOutputFormatClass(TextOutputFormat.class);

    LOGGER.debug("project.isEnvHadoop() = {} ", project.isEnvHadoop());
    String inputPath = projectFileName;
    if (project.isEnvHadoop() || Settings.getSettings().isHadoopDebug()) {
        inputPath = formInputPath(project);
    }

    LOGGER.debug("Ready to run, inputPath = {}, outputPath = {}", inputPath, outputPath);
    FileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    if (Settings.getSettings().isHadoopDebug()) {
        if (new File(outputPath).exists()) {
            Util.deleteDirectory(new File(outputPath));
        }
    }

    LOGGER.trace("Project");
    LOGGER.trace(project.toString());

    boolean success = job.waitForCompletion(true);

    ESIndex.getInstance().destroy();

    if (project.isEnvHadoop() && project.isFsS3()) {
        transferResultsToS3(outputPath);
    }

    return success ? 0 : 1;
}

From source file:org.gridgain.grid.kernal.processors.hadoop.GridHadoopSortingTest.java

License:Open Source License

/**
 * @throws Exception If failed./*www. j  a v a  2  s.c  om*/
 */
public void testSortSimple() throws Exception {
    // Generate test data.
    Job job = Job.getInstance();

    job.setInputFormatClass(InFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    job.setMapperClass(Mapper.class);
    job.setNumReduceTasks(0);

    setupFileSystems(job.getConfiguration());

    FileOutputFormat.setOutputPath(job, new Path(ggfsScheme() + PATH_INPUT));

    X.printerrln("Data generation started.");

    grid(0).hadoop().submit(new GridHadoopJobId(UUID.randomUUID(), 1), createJobInfo(job.getConfiguration()))
            .get(180000);

    X.printerrln("Data generation complete.");

    // Run main map-reduce job.
    job = Job.getInstance();

    setupFileSystems(job.getConfiguration());

    job.getConfiguration().set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY,
            JavaSerialization.class.getName() + "," + WritableSerialization.class.getName());

    FileInputFormat.setInputPaths(job, new Path(ggfsScheme() + PATH_INPUT));
    FileOutputFormat.setOutputPath(job, new Path(ggfsScheme() + PATH_OUTPUT));

    job.setSortComparatorClass(JavaSerializationComparator.class);

    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);

    job.setNumReduceTasks(2);

    job.setMapOutputKeyClass(UUID.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    X.printerrln("Job started.");

    grid(0).hadoop().submit(new GridHadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration()))
            .get(180000);

    X.printerrln("Job complete.");

    // Check result.
    Path outDir = new Path(ggfsScheme() + PATH_OUTPUT);

    AbstractFileSystem fs = AbstractFileSystem.get(new URI(ggfsScheme()), job.getConfiguration());

    for (FileStatus file : fs.listStatus(outDir)) {
        X.printerrln("__ file: " + file);

        if (file.getLen() == 0)
            continue;

        FSDataInputStream in = fs.open(file.getPath());

        Scanner sc = new Scanner(in);

        UUID prev = null;

        while (sc.hasNextLine()) {
            UUID next = UUID.fromString(sc.nextLine());

            //                X.printerrln("___ check: " + next);

            if (prev != null)
                assertTrue(prev.compareTo(next) < 0);

            prev = next;
        }
    }
}

From source file:org.hedera.mapreduce.BuildDictionary.java

License:Apache License

/**
 * Runs this tool.//from  w  ww.  j  a  va2s.com
 */
@Override
@SuppressWarnings("static-access")
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of terms").create(COUNT_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)
            || !cmdline.hasOption(COUNT_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String input = cmdline.getOptionValue(INPUT_OPTION);
    String output = cmdline.getOptionValue(OUTPUT_OPTION);

    LOG.info("Tool name: " + BuildDictionary.class.getSimpleName());
    LOG.info(" - input: " + input);
    LOG.info(" - output: " + output);

    Configuration conf = getConf();

    conf.set(HADOOP_OUTPUT_OPTION, output);
    conf.setInt(HADOOP_TERMS_COUNT_OPTION, Integer.parseInt(cmdline.getOptionValue(COUNT_OPTION)));
    conf.set("mapreduce.map.memory.mb", "2048");
    conf.set("mapreduce.map.java.opts", "-Xmx2048m");
    conf.set("mapreduce.reduce.memory.mb", "2048");
    conf.set("mapreduce.reduce.java.opts", "-Xmx2048m");

    Job job = setup(BuildDictionary.class.getSimpleName() + ":" + input, BuildDictionary.class, input, output,
            SequenceFileInputFormat.class, NullOutputFormat.class, Text.class, PairOfIntLong.class, Text.class,
            NullWritable.class, Mapper.class, MyReducer.class, 1);

    job.setSortComparatorClass(DictionaryTransformationStrategy.WritableComparator.class);

    FileSystem.get(getConf()).delete(new Path(output), true);
    long startTime = System.currentTimeMillis();
    job.waitForCompletion(true);
    LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:org.imageterrier.indexers.hadoop.HadoopIndexer.java

License:Mozilla Public License

protected Job createJob(HadoopIndexerOptions options) throws IOException {
    final Job job = new Job(getConf());
    job.setJobName("terrierIndexing");

    if (options.getInputMode() == InputMode.QUANTISED_FEATURES) {
        job.setMapperClass(QFIndexerMapper.class);
    } else {//w w w.j  a  v  a2  s  .  co m
        if (options.shardPerThread) {
            job.setMapperClass(MultithreadedMapper.class);
            MultithreadedMapper.setMapperClass(job, MTImageIndexerMapper.class);
            MultithreadedMapper.setNumberOfThreads(job, options.getMultithread());
        } else {
            job.setMapperClass(ImageIndexerMapper.class);
        }
    }
    // Load quantiser (if it exists), extract header, count codebook size
    if (options.getInputModeOptions().hasQuantiserFile()) {
        final String quantFile = options.getInputModeOptions().getQuantiserFile();
        System.out.println("Loading codebook to see its size");
        final SpatialClusters<?> quantiser = readClusters(options);
        System.out.println("Setting codebook size: " + quantiser.numClusters());
        job.getConfiguration().setInt(QUANTISER_SIZE, quantiser.numClusters());
        if (quantiser.numClusters() < options.getNumReducers())
            options.setNumReducers(quantiser.numClusters());
    }
    job.setReducerClass(IndexerReducer.class);

    FileOutputFormat.setOutputPath(job, options.getOutputPath());
    job.setMapOutputKeyClass(NewSplitEmittedTerm.class);
    job.setMapOutputValueClass(MapEmittedPostingList.class);
    job.getConfiguration().setBoolean("indexing.hadoop.multiple.indices", options.isDocumentPartitionMode());

    // if
    // (!job.getConfiguration().get("mapred.job.tracker").equals("local")) {
    // job.getConfiguration().set("mapred.map.output.compression.codec",
    // GzipCodec.class.getCanonicalName());
    // job.getConfiguration().setBoolean("mapred.compress.map.output",
    // true);
    // } else {
    job.getConfiguration().setBoolean("mapred.compress.map.output", false);
    // }

    job.setInputFormatClass(PositionAwareSequenceFileInputFormat.class); // important

    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setSortComparatorClass(NewSplitEmittedTerm.SETRawComparatorTermSplitFlush.class);
    job.setGroupingComparatorClass(NewSplitEmittedTerm.SETRawComparatorTerm.class);

    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);

    SequenceFileInputFormat.setInputPaths(job, options.getInputPaths());

    job.setNumReduceTasks(options.getNumReducers());
    if (options.getNumReducers() > 1) {
        if (options.isDocumentPartitionMode()) {
            job.setPartitionerClass(NewSplitEmittedTerm.SETPartitioner.class);
        } else {
            // job.setPartitionerClass(NewSplitEmittedTerm.SETPartitionerLowercaseAlphaTerm.class);
            if (job.getConfiguration().getInt(QUANTISER_SIZE, -1) == -1) {
                job.setPartitionerClass(NewSplitEmittedTerm.SETPartitionerHashedTerm.class);
            } else {
                job.setPartitionerClass(NewSplitEmittedTerm.SETPartitionerCodebookAwareTerm.class);
            }

        }
    } else {
        // for JUnit tests, we seem to need to restore the original
        // partitioner class
        job.setPartitionerClass(HashPartitioner.class);
    }

    job.setJarByClass(this.getClass());

    return job;
}

From source file:org.qcri.pca.CompositeJob.java

/**
 * Computes XtX and YtX//  ww  w.  j  a v a  2  s.  c  om
 * 
 * Xc = (Y - Ym) * MEM = Y * MEM - Ym * MEM = X - Xm
 * 
 * XtX = (X - Xm)' * (X - Xm) YtX = (Y - Ym)' * (Y - Ym)
 * 
 * @param conf
 *          the configuration
 * @param matrixInputPath
 *          Y
 * @param inMemMatrixDir
 *          MEM, where X = Y * MEM
 * @param inMemMatrixNumRows
 *          MEM.rows
 * @param inMemMatrixNumCols
 *          MEM.cols
 * @param ymPath
 *          Ym
 * @param xmPath
 *          Xm
 * @param matrixOutputPath
 *          YtX
 * @throws IOException
 * @throws InterruptedException
 * @throws ClassNotFoundException
 */
public void run(Configuration conf, Path matrixInputPath, String inMemMatrixDir, int inMemMatrixNumRows,
        int inMemMatrixNumCols, String ymPath, String xmPath, Path matrixOutputPath)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.set(MATRIXINMEMORY, inMemMatrixDir);
    conf.setInt(MATRIXINMEMORYROWS, inMemMatrixNumRows);
    conf.setInt(MATRIXINMEMORYCOLS, inMemMatrixNumCols);
    conf.set(YMPATH, ymPath);
    conf.set(XMPATH, xmPath);
    Path xtxOutputPath = getXtXPathBasedOnYm(new Path(ymPath));
    conf.set(XTXPATH, xtxOutputPath.toString());
    Job job = new Job(conf);
    job.setJobName("CompositeJob-" + matrixInputPath.getName());
    job.setJarByClass(CompositeJob.class);
    FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf);
    matrixInputPath = fs.makeQualified(matrixInputPath);
    matrixOutputPath = fs.makeQualified(matrixOutputPath);
    FileInputFormat.addInputPath(job, matrixInputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileOutputFormat.setOutputPath(job, matrixOutputPath);
    job.setMapperClass(MyMapper.class);
    job.setReducerClass(MyReducer.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapOutputKeyClass(CompositeWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);
    job.setSortComparatorClass(CompositeWritable.class);
    job.setGroupingComparatorClass(CompositeWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);
    job.submit();
    job.waitForCompletion(true);
}

From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java

License:Open Source License

protected boolean runTriplesJobSampling() throws ClassNotFoundException, IOException, InterruptedException {
    Job job = null;
    boolean jobOK;
    BufferedWriter bufferedWriter;

    // if input path does not exists, fail
    if (!this.inputFS.exists(this.conf.getInputPath())) {
        System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath());
        System.exit(-1);// w w w .j a v a 2 s. c  om
    }

    // if dictionary output path does not exists, fail
    if (!this.dictionaryFS.exists(this.conf.getInputPath())) {
        System.out.println("Dictionary output path does not exist: " + this.conf.getInputPath());
        System.exit(-1);
    }

    // if samples path exists, fail
    if (this.dictionaryFS.exists(this.conf.getTriplesSamplesPath())) {
        if (this.conf.getDeleteTriplesSamplesPath()) { // ... and option
            // provided, delete
            // recursively
            this.dictionaryFS.delete(this.conf.getTriplesSamplesPath(), true);
        } else { // ... and option not provided, fail
            System.out.println("Triples samples path does exist: " + this.conf.getTriplesSamplesPath());
            System.out.println("Select other path or use option -dst to overwrite");
            System.exit(-1);
        }
    }

    this.conf.setProperty("mapred.child.java.opts",
            "-XX:ErrorFile=/home/hadoop/tmp/hs_err_pid%p.log -Xmx2500m");

    // Job to create a SequenceInputFormat
    job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName() + " phase 1");

    job.setJarByClass(HDTBuilderDriver.class);

    FileInputFormat.addInputPath(job, this.conf.getInputPath());
    FileOutputFormat.setOutputPath(job, this.conf.getTriplesSamplesPath());

    job.setInputFormatClass(LzoTextInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    job.setMapperClass(TriplesSPOMapper.class);
    job.setSortComparatorClass(TripleSPOComparator.class);
    job.setGroupingComparatorClass(TripleSPOComparator.class);
    job.setMapOutputKeyClass(TripleSPOWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputKeyClass(TripleSPOWritable.class);
    job.setOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(this.conf.getTriplesReducers());

    DistributedCache.addCacheFile(this.conf.getDictionaryFile().toUri(), job.getConfiguration());

    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    jobOK = job.waitForCompletion(true);

    this.numTriples = job.getCounters().findCounter(Counters.Triples).getValue();
    bufferedWriter = new BufferedWriter(
            new OutputStreamWriter(this.triplesFS.create(this.conf.getTriplesCountersFile())));
    bufferedWriter.write(this.numTriples.toString() + "\n");
    bufferedWriter.close();

    return jobOK;
}

From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java

License:Open Source License

protected boolean runTriplesJob()
        throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
    Job job = null;
    boolean jobOK;

    // if triples output path exists...
    if (this.triplesFS.exists(this.conf.getTriplesOutputPath())) {
        if (this.conf.getDeleteTriplesOutputPath()) { // ... and option provided, delete recursively
            this.triplesFS.delete(this.conf.getTriplesOutputPath(), true);
        } else { // ... and option not provided, fail
            System.out.println("Triples output path does exist: " + this.conf.getTriplesOutputPath());
            System.out.println("Select other path or use option -dt to overwrite");
            System.exit(-1);//from   www . j a  v a  2  s.  c  o m
        }
    }

    job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName() + " phase 2");

    job.setJarByClass(HDTBuilderDriver.class);

    FileInputFormat.addInputPath(job, this.conf.getTriplesSamplesPath());
    FileOutputFormat.setOutputPath(job, this.conf.getTriplesOutputPath());

    job.setInputFormatClass(SequenceFileInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    job.setSortComparatorClass(TripleSPOComparator.class);
    job.setGroupingComparatorClass(TripleSPOComparator.class);

    job.setPartitionerClass(TotalOrderPartitioner.class);

    job.setOutputKeyClass(TripleSPOWritable.class);
    job.setOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(this.conf.getTriplesReducers());

    System.out.println("Sampling started");
    InputSampler.writePartitionFile(job,
            new InputSampler.IntervalSampler<Text, Text>(this.conf.getSampleProbability()));
    String partitionFile = TotalOrderPartitioner.getPartitionFile(job.getConfiguration());
    URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
    DistributedCache.addCacheFile(partitionUri, job.getConfiguration());
    DistributedCache.createSymlink(job.getConfiguration());
    System.out.println("Sampling finished");

    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    jobOK = job.waitForCompletion(true);

    return jobOK;
}

From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java

License:Open Source License

protected boolean runTriplesJobWithOneJob()
        throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
    Job job = null;
    boolean jobOK;
    BufferedWriter bufferedWriter;

    // if input path does not exists, fail
    if (!this.inputFS.exists(this.conf.getInputPath())) {
        System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath());
        System.exit(-1);//from  ww  w.ja v a  2 s  .  c  o m
    }

    // if dictionary output path does not exists, fail
    if (!this.dictionaryFS.exists(this.conf.getInputPath())) {
        System.out.println("Dictionary output path does not exist: " + this.conf.getInputPath());
        System.exit(-1);
    }

    // if triples output path exists...
    if (this.triplesFS.exists(this.conf.getTriplesOutputPath())) {
        if (this.conf.getDeleteTriplesOutputPath()) { // ... and option provided, delete recursively
            this.triplesFS.delete(this.conf.getTriplesOutputPath(), true);
        } else { // ... and option not provided, fail
            System.out.println("Triples output path does exist: " + this.conf.getTriplesOutputPath());
            System.out.println("Select other path or use option -dt to overwrite");
            System.exit(-1);
        }
    }

    // Launch job
    this.conf.setProperty("mapred.child.java.opts",
            "-XX:ErrorFile=/home/hadoop/tmp/hs_err_pid%p.log -Xmx2500m");

    job = new Job(this.conf.getConfigurationObject(), this.conf.getDictionaryJobName());
    job.setJarByClass(HDTBuilderDriver.class);

    FileInputFormat.addInputPath(job, this.conf.getInputPath());
    FileOutputFormat.setOutputPath(job, this.conf.getTriplesOutputPath());

    job.setInputFormatClass(LzoTextInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    job.setMapperClass(TriplesSPOMapper.class);
    job.setSortComparatorClass(TripleSPOComparator.class);
    job.setMapOutputKeyClass(TripleSPOWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(this.conf.getTriplesReducers());

    job.setOutputKeyClass(TripleSPOWritable.class);
    job.setOutputValueClass(NullWritable.class);

    DistributedCache.addCacheFile(this.conf.getDictionaryFile().toUri(), job.getConfiguration());
    // DistributedCache.addCacheFile(this.conf.getDictionaryMapFile().toUri(), job.getConfiguration());
    // DistributedCache.addCacheFile(this.conf.getDictionaryReduceFile().toUri(), job.getConfiguration());

    jobOK = job.waitForCompletion(true);

    this.numTriples = job.getCounters().findCounter(Counters.Triples).getValue();
    bufferedWriter = new BufferedWriter(
            new OutputStreamWriter(this.triplesFS.create(this.conf.getTriplesCountersFile())));
    bufferedWriter.write(this.numTriples.toString() + "\n");
    bufferedWriter.close();

    return jobOK;
}

From source file:ph.fingra.hadoop.mapred.parts.component.ComponentAppversionStatistic.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "component/componentappversion job";
    job.setJobName(jobName);/*from  www. j  a  v  a 2 s .c  om*/

    job.setJarByClass(ComponentAppversionStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(ComponentAppversionMapper.class);
    job.setReducerClass(ComponentAppversionReducer.class);

    job.setMapOutputKeyClass(ComponentAppversionKey.class);
    job.setMapOutputValueClass(ComponentAppversionEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(ComponentAppversionPartitioner.class);
    job.setSortComparatorClass(ComponentAppversionSortComparator.class);
    job.setGroupingComparatorClass(ComponentAppversionGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}

From source file:ph.fingra.hadoop.mapred.parts.component.ComponentCountryStatistic.java

License:Apache License

public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce,
        FingraphConfig finconfig) throws IOException {

    conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose());
    conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter());

    Job job = new Job(conf);
    String jobName = "component/componentcountry job";
    job.setJobName(jobName);// ww w .jav a2  s. c o  m

    job.setJarByClass(ComponentCountryStatistic.class);

    for (int i = 0; i < inputpaths.length; i++) {
        FileInputFormat.addInputPath(job, inputpaths[i]);
    }
    FileOutputFormat.setOutputPath(job, outputpath);

    job.setMapperClass(ComponentCountryMapper.class);
    job.setReducerClass(ComponentCountryReducer.class);

    job.setMapOutputKeyClass(ComponentCountryKey.class);
    job.setMapOutputValueClass(ComponentCountryEntity.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setPartitionerClass(ComponentCountryPartitioner.class);
    job.setSortComparatorClass(ComponentCountrySortComparator.class);
    job.setGroupingComparatorClass(ComponentCountryGroupComparator.class);

    job.setNumReduceTasks(numreduce);

    return job;
}