List of usage examples for org.apache.hadoop.mapreduce Job setSortComparatorClass
public void setSortComparatorClass(Class<? extends RawComparator> cls) throws IllegalStateException
From source file:org.freeeed.mr.FreeEedMR.java
License:Apache License
@Override public int run(String[] args) throws Exception { // inventory dir holds all package (zip) files resulting from stage String projectFileName = args[0]; String outputPath = args[1];/*from ww w . j av a 2 s . com*/ LOGGER.info("Running Hadoop job"); LOGGER.info("Input project file = " + projectFileName); LOGGER.info("Output path = " + outputPath); Stats.getInstance().setNumberMappers(projectFileName); ESIndex.getInstance().init(); // Hadoop configuration class Configuration configuration = getConf(); // No speculative execution! Do not process the same file twice configuration.set("mapred.reduce.tasks.speculative.execution", "false"); // TODO even in local mode, the first argument should not be the inventory // but write a complete project file instead Project project = Project.getCurrentProject(); if (project == null || project.isEmpty()) { // configure Hadoop input files System.out.println("Reading project file " + projectFileName); project = Project.loadFromFile(new File(projectFileName)); } project.setProperty(ParameterProcessing.OUTPUT_DIR_HADOOP, outputPath); // send complete project information to all mappers and reducers configuration.set(ParameterProcessing.PROJECT, project.toString()); Settings.load(); configuration.set(ParameterProcessing.SETTINGS_STR, Settings.getSettings().toString()); configuration.set(EmailProperties.PROPERTIES_FILE, Files.toString(new File(EmailProperties.PROPERTIES_FILE), Charset.defaultCharset())); Job job = new Job(configuration); job.setJarByClass(FreeEedMR.class); job.setJobName("FreeEedMR"); // Hadoop processes key-value pairs // job.setOutputKeyClass(Text.class); // job.setOutputValueClass(MapWritable.class); // set map and reduce classes job.setMapperClass(FreeEedMapper.class); job.setInputFormatClass(NLineInputFormat.class); job.setNumReduceTasks(0); // secondary sort for compound keys - this sorts the attachments job.setSortComparatorClass(KeyComparator.class); job.setGroupingComparatorClass(GroupComparator.class); // Hadoop TextInputFormat class // job.setInputFormatClass(TextInputFormat.class); // job.setOutputFormatClass(TextOutputFormat.class); LOGGER.debug("project.isEnvHadoop() = {} ", project.isEnvHadoop()); String inputPath = projectFileName; if (project.isEnvHadoop() || Settings.getSettings().isHadoopDebug()) { inputPath = formInputPath(project); } LOGGER.debug("Ready to run, inputPath = {}, outputPath = {}", inputPath, outputPath); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, new Path(outputPath)); if (Settings.getSettings().isHadoopDebug()) { if (new File(outputPath).exists()) { Util.deleteDirectory(new File(outputPath)); } } LOGGER.trace("Project"); LOGGER.trace(project.toString()); boolean success = job.waitForCompletion(true); ESIndex.getInstance().destroy(); if (project.isEnvHadoop() && project.isFsS3()) { transferResultsToS3(outputPath); } return success ? 0 : 1; }
From source file:org.gridgain.grid.kernal.processors.hadoop.GridHadoopSortingTest.java
License:Open Source License
/** * @throws Exception If failed./*www. j a v a 2 s.c om*/ */ public void testSortSimple() throws Exception { // Generate test data. Job job = Job.getInstance(); job.setInputFormatClass(InFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); job.setMapperClass(Mapper.class); job.setNumReduceTasks(0); setupFileSystems(job.getConfiguration()); FileOutputFormat.setOutputPath(job, new Path(ggfsScheme() + PATH_INPUT)); X.printerrln("Data generation started."); grid(0).hadoop().submit(new GridHadoopJobId(UUID.randomUUID(), 1), createJobInfo(job.getConfiguration())) .get(180000); X.printerrln("Data generation complete."); // Run main map-reduce job. job = Job.getInstance(); setupFileSystems(job.getConfiguration()); job.getConfiguration().set(CommonConfigurationKeys.IO_SERIALIZATIONS_KEY, JavaSerialization.class.getName() + "," + WritableSerialization.class.getName()); FileInputFormat.setInputPaths(job, new Path(ggfsScheme() + PATH_INPUT)); FileOutputFormat.setOutputPath(job, new Path(ggfsScheme() + PATH_OUTPUT)); job.setSortComparatorClass(JavaSerializationComparator.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setNumReduceTasks(2); job.setMapOutputKeyClass(UUID.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); X.printerrln("Job started."); grid(0).hadoop().submit(new GridHadoopJobId(UUID.randomUUID(), 2), createJobInfo(job.getConfiguration())) .get(180000); X.printerrln("Job complete."); // Check result. Path outDir = new Path(ggfsScheme() + PATH_OUTPUT); AbstractFileSystem fs = AbstractFileSystem.get(new URI(ggfsScheme()), job.getConfiguration()); for (FileStatus file : fs.listStatus(outDir)) { X.printerrln("__ file: " + file); if (file.getLen() == 0) continue; FSDataInputStream in = fs.open(file.getPath()); Scanner sc = new Scanner(in); UUID prev = null; while (sc.hasNextLine()) { UUID next = UUID.fromString(sc.nextLine()); // X.printerrln("___ check: " + next); if (prev != null) assertTrue(prev.compareTo(next) < 0); prev = next; } } }
From source file:org.hedera.mapreduce.BuildDictionary.java
License:Apache License
/** * Runs this tool.//from w ww. j a va2s.com */ @Override @SuppressWarnings("static-access") public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT_OPTION)); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION)); options.addOption( OptionBuilder.withArgName("num").hasArg().withDescription("number of terms").create(COUNT_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION) || !cmdline.hasOption(COUNT_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String input = cmdline.getOptionValue(INPUT_OPTION); String output = cmdline.getOptionValue(OUTPUT_OPTION); LOG.info("Tool name: " + BuildDictionary.class.getSimpleName()); LOG.info(" - input: " + input); LOG.info(" - output: " + output); Configuration conf = getConf(); conf.set(HADOOP_OUTPUT_OPTION, output); conf.setInt(HADOOP_TERMS_COUNT_OPTION, Integer.parseInt(cmdline.getOptionValue(COUNT_OPTION))); conf.set("mapreduce.map.memory.mb", "2048"); conf.set("mapreduce.map.java.opts", "-Xmx2048m"); conf.set("mapreduce.reduce.memory.mb", "2048"); conf.set("mapreduce.reduce.java.opts", "-Xmx2048m"); Job job = setup(BuildDictionary.class.getSimpleName() + ":" + input, BuildDictionary.class, input, output, SequenceFileInputFormat.class, NullOutputFormat.class, Text.class, PairOfIntLong.class, Text.class, NullWritable.class, Mapper.class, MyReducer.class, 1); job.setSortComparatorClass(DictionaryTransformationStrategy.WritableComparator.class); FileSystem.get(getConf()).delete(new Path(output), true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:org.imageterrier.indexers.hadoop.HadoopIndexer.java
License:Mozilla Public License
protected Job createJob(HadoopIndexerOptions options) throws IOException { final Job job = new Job(getConf()); job.setJobName("terrierIndexing"); if (options.getInputMode() == InputMode.QUANTISED_FEATURES) { job.setMapperClass(QFIndexerMapper.class); } else {//w w w.j a v a2 s . co m if (options.shardPerThread) { job.setMapperClass(MultithreadedMapper.class); MultithreadedMapper.setMapperClass(job, MTImageIndexerMapper.class); MultithreadedMapper.setNumberOfThreads(job, options.getMultithread()); } else { job.setMapperClass(ImageIndexerMapper.class); } } // Load quantiser (if it exists), extract header, count codebook size if (options.getInputModeOptions().hasQuantiserFile()) { final String quantFile = options.getInputModeOptions().getQuantiserFile(); System.out.println("Loading codebook to see its size"); final SpatialClusters<?> quantiser = readClusters(options); System.out.println("Setting codebook size: " + quantiser.numClusters()); job.getConfiguration().setInt(QUANTISER_SIZE, quantiser.numClusters()); if (quantiser.numClusters() < options.getNumReducers()) options.setNumReducers(quantiser.numClusters()); } job.setReducerClass(IndexerReducer.class); FileOutputFormat.setOutputPath(job, options.getOutputPath()); job.setMapOutputKeyClass(NewSplitEmittedTerm.class); job.setMapOutputValueClass(MapEmittedPostingList.class); job.getConfiguration().setBoolean("indexing.hadoop.multiple.indices", options.isDocumentPartitionMode()); // if // (!job.getConfiguration().get("mapred.job.tracker").equals("local")) { // job.getConfiguration().set("mapred.map.output.compression.codec", // GzipCodec.class.getCanonicalName()); // job.getConfiguration().setBoolean("mapred.compress.map.output", // true); // } else { job.getConfiguration().setBoolean("mapred.compress.map.output", false); // } job.setInputFormatClass(PositionAwareSequenceFileInputFormat.class); // important job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setSortComparatorClass(NewSplitEmittedTerm.SETRawComparatorTermSplitFlush.class); job.setGroupingComparatorClass(NewSplitEmittedTerm.SETRawComparatorTerm.class); job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false); SequenceFileInputFormat.setInputPaths(job, options.getInputPaths()); job.setNumReduceTasks(options.getNumReducers()); if (options.getNumReducers() > 1) { if (options.isDocumentPartitionMode()) { job.setPartitionerClass(NewSplitEmittedTerm.SETPartitioner.class); } else { // job.setPartitionerClass(NewSplitEmittedTerm.SETPartitionerLowercaseAlphaTerm.class); if (job.getConfiguration().getInt(QUANTISER_SIZE, -1) == -1) { job.setPartitionerClass(NewSplitEmittedTerm.SETPartitionerHashedTerm.class); } else { job.setPartitionerClass(NewSplitEmittedTerm.SETPartitionerCodebookAwareTerm.class); } } } else { // for JUnit tests, we seem to need to restore the original // partitioner class job.setPartitionerClass(HashPartitioner.class); } job.setJarByClass(this.getClass()); return job; }
From source file:org.qcri.pca.CompositeJob.java
/** * Computes XtX and YtX// ww w. j a v a 2 s. c om * * Xc = (Y - Ym) * MEM = Y * MEM - Ym * MEM = X - Xm * * XtX = (X - Xm)' * (X - Xm) YtX = (Y - Ym)' * (Y - Ym) * * @param conf * the configuration * @param matrixInputPath * Y * @param inMemMatrixDir * MEM, where X = Y * MEM * @param inMemMatrixNumRows * MEM.rows * @param inMemMatrixNumCols * MEM.cols * @param ymPath * Ym * @param xmPath * Xm * @param matrixOutputPath * YtX * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ public void run(Configuration conf, Path matrixInputPath, String inMemMatrixDir, int inMemMatrixNumRows, int inMemMatrixNumCols, String ymPath, String xmPath, Path matrixOutputPath) throws IOException, InterruptedException, ClassNotFoundException { conf.set(MATRIXINMEMORY, inMemMatrixDir); conf.setInt(MATRIXINMEMORYROWS, inMemMatrixNumRows); conf.setInt(MATRIXINMEMORYCOLS, inMemMatrixNumCols); conf.set(YMPATH, ymPath); conf.set(XMPATH, xmPath); Path xtxOutputPath = getXtXPathBasedOnYm(new Path(ymPath)); conf.set(XTXPATH, xtxOutputPath.toString()); Job job = new Job(conf); job.setJobName("CompositeJob-" + matrixInputPath.getName()); job.setJarByClass(CompositeJob.class); FileSystem fs = FileSystem.get(matrixInputPath.toUri(), conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); FileInputFormat.addInputPath(job, matrixInputPath); job.setInputFormatClass(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(job, matrixOutputPath); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(CompositeWritable.class); job.setMapOutputValueClass(VectorWritable.class); job.setSortComparatorClass(CompositeWritable.class); job.setGroupingComparatorClass(CompositeWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.submit(); job.waitForCompletion(true); }
From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java
License:Open Source License
protected boolean runTriplesJobSampling() throws ClassNotFoundException, IOException, InterruptedException { Job job = null; boolean jobOK; BufferedWriter bufferedWriter; // if input path does not exists, fail if (!this.inputFS.exists(this.conf.getInputPath())) { System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath()); System.exit(-1);// w w w .j a v a 2 s. c om } // if dictionary output path does not exists, fail if (!this.dictionaryFS.exists(this.conf.getInputPath())) { System.out.println("Dictionary output path does not exist: " + this.conf.getInputPath()); System.exit(-1); } // if samples path exists, fail if (this.dictionaryFS.exists(this.conf.getTriplesSamplesPath())) { if (this.conf.getDeleteTriplesSamplesPath()) { // ... and option // provided, delete // recursively this.dictionaryFS.delete(this.conf.getTriplesSamplesPath(), true); } else { // ... and option not provided, fail System.out.println("Triples samples path does exist: " + this.conf.getTriplesSamplesPath()); System.out.println("Select other path or use option -dst to overwrite"); System.exit(-1); } } this.conf.setProperty("mapred.child.java.opts", "-XX:ErrorFile=/home/hadoop/tmp/hs_err_pid%p.log -Xmx2500m"); // Job to create a SequenceInputFormat job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName() + " phase 1"); job.setJarByClass(HDTBuilderDriver.class); FileInputFormat.addInputPath(job, this.conf.getInputPath()); FileOutputFormat.setOutputPath(job, this.conf.getTriplesSamplesPath()); job.setInputFormatClass(LzoTextInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); job.setMapperClass(TriplesSPOMapper.class); job.setSortComparatorClass(TripleSPOComparator.class); job.setGroupingComparatorClass(TripleSPOComparator.class); job.setMapOutputKeyClass(TripleSPOWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(TripleSPOWritable.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(this.conf.getTriplesReducers()); DistributedCache.addCacheFile(this.conf.getDictionaryFile().toUri(), job.getConfiguration()); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); jobOK = job.waitForCompletion(true); this.numTriples = job.getCounters().findCounter(Counters.Triples).getValue(); bufferedWriter = new BufferedWriter( new OutputStreamWriter(this.triplesFS.create(this.conf.getTriplesCountersFile()))); bufferedWriter.write(this.numTriples.toString() + "\n"); bufferedWriter.close(); return jobOK; }
From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java
License:Open Source License
protected boolean runTriplesJob() throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException { Job job = null; boolean jobOK; // if triples output path exists... if (this.triplesFS.exists(this.conf.getTriplesOutputPath())) { if (this.conf.getDeleteTriplesOutputPath()) { // ... and option provided, delete recursively this.triplesFS.delete(this.conf.getTriplesOutputPath(), true); } else { // ... and option not provided, fail System.out.println("Triples output path does exist: " + this.conf.getTriplesOutputPath()); System.out.println("Select other path or use option -dt to overwrite"); System.exit(-1);//from www . j a v a 2 s. c o m } } job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName() + " phase 2"); job.setJarByClass(HDTBuilderDriver.class); FileInputFormat.addInputPath(job, this.conf.getTriplesSamplesPath()); FileOutputFormat.setOutputPath(job, this.conf.getTriplesOutputPath()); job.setInputFormatClass(SequenceFileInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); job.setSortComparatorClass(TripleSPOComparator.class); job.setGroupingComparatorClass(TripleSPOComparator.class); job.setPartitionerClass(TotalOrderPartitioner.class); job.setOutputKeyClass(TripleSPOWritable.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(this.conf.getTriplesReducers()); System.out.println("Sampling started"); InputSampler.writePartitionFile(job, new InputSampler.IntervalSampler<Text, Text>(this.conf.getSampleProbability())); String partitionFile = TotalOrderPartitioner.getPartitionFile(job.getConfiguration()); URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH); DistributedCache.addCacheFile(partitionUri, job.getConfiguration()); DistributedCache.createSymlink(job.getConfiguration()); System.out.println("Sampling finished"); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); jobOK = job.waitForCompletion(true); return jobOK; }
From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java
License:Open Source License
protected boolean runTriplesJobWithOneJob() throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException { Job job = null; boolean jobOK; BufferedWriter bufferedWriter; // if input path does not exists, fail if (!this.inputFS.exists(this.conf.getInputPath())) { System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath()); System.exit(-1);//from ww w.ja v a 2 s . c o m } // if dictionary output path does not exists, fail if (!this.dictionaryFS.exists(this.conf.getInputPath())) { System.out.println("Dictionary output path does not exist: " + this.conf.getInputPath()); System.exit(-1); } // if triples output path exists... if (this.triplesFS.exists(this.conf.getTriplesOutputPath())) { if (this.conf.getDeleteTriplesOutputPath()) { // ... and option provided, delete recursively this.triplesFS.delete(this.conf.getTriplesOutputPath(), true); } else { // ... and option not provided, fail System.out.println("Triples output path does exist: " + this.conf.getTriplesOutputPath()); System.out.println("Select other path or use option -dt to overwrite"); System.exit(-1); } } // Launch job this.conf.setProperty("mapred.child.java.opts", "-XX:ErrorFile=/home/hadoop/tmp/hs_err_pid%p.log -Xmx2500m"); job = new Job(this.conf.getConfigurationObject(), this.conf.getDictionaryJobName()); job.setJarByClass(HDTBuilderDriver.class); FileInputFormat.addInputPath(job, this.conf.getInputPath()); FileOutputFormat.setOutputPath(job, this.conf.getTriplesOutputPath()); job.setInputFormatClass(LzoTextInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); job.setMapperClass(TriplesSPOMapper.class); job.setSortComparatorClass(TripleSPOComparator.class); job.setMapOutputKeyClass(TripleSPOWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(this.conf.getTriplesReducers()); job.setOutputKeyClass(TripleSPOWritable.class); job.setOutputValueClass(NullWritable.class); DistributedCache.addCacheFile(this.conf.getDictionaryFile().toUri(), job.getConfiguration()); // DistributedCache.addCacheFile(this.conf.getDictionaryMapFile().toUri(), job.getConfiguration()); // DistributedCache.addCacheFile(this.conf.getDictionaryReduceFile().toUri(), job.getConfiguration()); jobOK = job.waitForCompletion(true); this.numTriples = job.getCounters().findCounter(Counters.Triples).getValue(); bufferedWriter = new BufferedWriter( new OutputStreamWriter(this.triplesFS.create(this.conf.getTriplesCountersFile()))); bufferedWriter.write(this.numTriples.toString() + "\n"); bufferedWriter.close(); return jobOK; }
From source file:ph.fingra.hadoop.mapred.parts.component.ComponentAppversionStatistic.java
License:Apache License
public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce, FingraphConfig finconfig) throws IOException { conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose()); conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter()); Job job = new Job(conf); String jobName = "component/componentappversion job"; job.setJobName(jobName);/*from www. j a v a 2 s .c om*/ job.setJarByClass(ComponentAppversionStatistic.class); for (int i = 0; i < inputpaths.length; i++) { FileInputFormat.addInputPath(job, inputpaths[i]); } FileOutputFormat.setOutputPath(job, outputpath); job.setMapperClass(ComponentAppversionMapper.class); job.setReducerClass(ComponentAppversionReducer.class); job.setMapOutputKeyClass(ComponentAppversionKey.class); job.setMapOutputValueClass(ComponentAppversionEntity.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setPartitionerClass(ComponentAppversionPartitioner.class); job.setSortComparatorClass(ComponentAppversionSortComparator.class); job.setGroupingComparatorClass(ComponentAppversionGroupComparator.class); job.setNumReduceTasks(numreduce); return job; }
From source file:ph.fingra.hadoop.mapred.parts.component.ComponentCountryStatistic.java
License:Apache License
public Job createJob(Configuration conf, Path[] inputpaths, Path outputpath, int numreduce, FingraphConfig finconfig) throws IOException { conf.setBoolean("verbose", finconfig.getDebug().isDebug_show_verbose()); conf.setBoolean("counter", finconfig.getDebug().isDebug_show_counter()); Job job = new Job(conf); String jobName = "component/componentcountry job"; job.setJobName(jobName);// ww w .jav a2 s. c o m job.setJarByClass(ComponentCountryStatistic.class); for (int i = 0; i < inputpaths.length; i++) { FileInputFormat.addInputPath(job, inputpaths[i]); } FileOutputFormat.setOutputPath(job, outputpath); job.setMapperClass(ComponentCountryMapper.class); job.setReducerClass(ComponentCountryReducer.class); job.setMapOutputKeyClass(ComponentCountryKey.class); job.setMapOutputValueClass(ComponentCountryEntity.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setPartitionerClass(ComponentCountryPartitioner.class); job.setSortComparatorClass(ComponentCountrySortComparator.class); job.setGroupingComparatorClass(ComponentCountryGroupComparator.class); job.setNumReduceTasks(numreduce); return job; }