List of usage examples for org.apache.hadoop.mapreduce Job setInputFormatClass
public void setInputFormatClass(Class<? extends InputFormat> cls) throws IllegalStateException
From source file:com.xyz.reccommendation.driver.SKU2SKUCount.java
License:Apache License
public static void main(String[] args) throws Exception { final Configuration conf = new Configuration(); String envt = null;/*from w w w .j a v a 2s . c o m*/ if (args.length > 0) { envt = args[0]; } else { envt = "dev"; } Properties prop = new Properties(); try { // load a properties file from class path, inside static method prop.load(SKU2SKUCount.class.getClassLoader().getResourceAsStream("config-" + envt + ".properties")); } catch (IOException ex) { ex.printStackTrace(); System.exit(1); } MongoConfigUtil.setOutputURI(conf, "mongodb://" + prop.getProperty("mongodb.ip") + "/" + prop.getProperty("mongodb.dbname") + ".out_stat_custom"); log.debug("MongoDB URL : mongodb://" + prop.getProperty("mongodb.ip") + "/" + prop.getProperty("mongodb.dbname") + "." + ".out_stat_custom"); log.debug("Conf: " + conf); MongoConfigUtil.setCreateInputSplits(conf, false); args = new GenericOptionsParser(conf, args).getRemainingArgs(); final Job job = new Job(conf, "Count the sku to sku mapping from pview data on hdfs in \"inputPview\" path."); job.setJarByClass(SKU2SKUCount.class); job.setMapperClass(TokenizerMapper.class); // job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BSONWritable.class); job.setInputFormatClass(KeyValueTextInputFormat.class); job.setOutputFormatClass(MongoOutputFormat.class); FileInputFormat.setInputPaths(job, new Path("inputPview")); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.yahoo.druid.hadoop.DruidInputFormatTest.java
License:Apache License
@Test public void testSampleMRJob() throws Exception { Job job = Job.getInstance(new Configuration(), "Druid-Loader-Sample-Test-Job"); job.getConfiguration().set("mapreduce.job.acl-view-job", "*"); job.getConfiguration().set("mapreduce.map.java.opts", "-Duser.timezone=UTC"); job.getConfiguration().set(DruidInputFormat.CONF_DRUID_OVERLORD_HOSTPORT, "localhost:" + overlordTestPort); job.getConfiguration().set(DruidInputFormat.CONF_DRUID_SCHEMA, "{" + "\"dataSource\":\"testDataSource\"," + "\"interval\":\"1970-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z\"," + "\"granularity\":\"NONE\"," + "\"dimensions\":[\"host\"]," + "\"metrics\":[\"visited_sum\",\"unique_hosts\"]" + "}"); job.setMapperClass(SampleMapper.class); job.setNumReduceTasks(0);// w w w. ja v a2s . c om job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(NullWritable.class); job.setInputFormatClass(DruidInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); String outputPath = tempFolder.newFolder() + "/out"; TextOutputFormat.setOutputPath(job, new Path(outputPath)); Assert.assertTrue(job.waitForCompletion(true)); //verify that the SampleMapper actually ran and verified the data Assert.assertTrue(FileUtils.readFileToString(new File(outputPath + "/part-m-00000")).startsWith("SUCCESS")); }
From source file:com.yahoo.druid.hadoop.example.SamplePrintMRJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { // When implementing tool Configuration conf = this.getConf(); // Create job Job job = new Job(conf, "Druid-Loader-Sample-Job"); job.setJarByClass(SamplePrintMRJob.class); // job.setJobName("Druid-Loader-Sample-Job"); job.getConfiguration().set("mapreduce.job.acl-view-job", "*"); job.getConfiguration().set("mapreduce.job.queuename", "default"); job.getConfiguration().set("mapreduce.map.java.opts", "-Duser.timezone=UTC"); //job.getConfiguration().set("mapreduce.map.memory.mb", "1024"); job.getConfiguration().set(DruidInputFormat.CONF_DRUID_STORAGE_STORAGE_DIR, "/tmp/druid/storage"); job.getConfiguration().set(DruidInputFormat.CONF_DRUID_OVERLORD_HOSTPORT, "localhost:8080"); job.getConfiguration().set(DruidInputFormat.CONF_DRUID_DATASOURCE, "wikipedia"); job.getConfiguration().set(DruidInputFormat.CONF_DRUID_INTERVAL, "2009-01-01T00:00:00.000/2050-01-01T00:00:00.000"); job.getConfiguration().set(DruidInputFormat.CONF_DRUID_SCHEMA_FILE, "/tmp/druid/schema/druid_fun_mr.json"); job.setMapperClass(DruidPrintMapper.class); job.setNumReduceTasks(0);/*from ww w. ja v a2s . c o m*/ job.setOutputKeyClass(DateTime.class); job.setOutputValueClass(Map.class); job.setInputFormatClass(DruidInputFormat.class); job.setOutputFormatClass(NullOutputFormat.class); System.out.println("Starting Druid Loader Sample Job....."); return job.waitForCompletion(true) ? 0 : 1; //System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.yahoo.glimmer.indexing.generator.TripleIndexGenerator.java
License:Open Source License
public int run(String[] args) throws Exception { SimpleJSAP jsap = new SimpleJSAP(TripleIndexGenerator.class.getName(), "Generates a keyword index from RDF data.", new Parameter[] { new Switch(NO_CONTEXTS_ARG, 'C', "withoutContexts", "Don't process the contexts for each tuple."), new FlaggedOption(METHOD_ARG, JSAP.STRING_PARSER, "horizontal", JSAP.REQUIRED, 'm', METHOD_ARG, "horizontal or vertical."), new FlaggedOption(PREDICATES_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'p', PREDICATES_ARG, "Subset of the properties to be indexed."), new FlaggedOption(RESOURCE_PREFIX_ARG, JSAP.STRING_PARSER, "@", JSAP.NOT_REQUIRED, 'r', RESOURCE_PREFIX_ARG, "Prefix to add to object resource hash values when indexing. Stops queries for numbers matching resource hash values. Default is '@'"), new UnflaggedOption("input", JSAP.STRING_PARSER, JSAP.REQUIRED, "HDFS location for the input data."), new UnflaggedOption(NUMBER_OF_DOCS_ARG, JSAP.LONG_PARSER, JSAP.REQUIRED, "Number of documents to index"), new UnflaggedOption("output", JSAP.STRING_PARSER, JSAP.REQUIRED, "HDFS location for the output."), new UnflaggedOption(RESOURCES_HASH_ARG, JSAP.STRING_PARSER, JSAP.REQUIRED, "HDFS location of the resources hash file."), }); JSAPResult jsapResult = jsap.parse(args); // check whether the command line was valid, and if it wasn't, // display usage information and exit. if (!jsapResult.success()) { System.err.println();//from w w w. j a va2s .c om System.err.println("Usage: java " + TripleIndexGenerator.class.getName()); System.err.println(" " + jsap.getUsage()); System.err.println(); System.exit(1); } Job job = Job.getInstance(getConf()); job.setJarByClass(TripleIndexGenerator.class); job.setJobName("TripleIndexGenerator" + System.currentTimeMillis()); FileInputFormat.setInputPaths(job, new Path(jsapResult.getString("input"))); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(DocumentMapper.class); job.setMapOutputKeyClass(TermKey.class); job.setMapOutputValueClass(TermValue.class); job.setPartitionerClass(TermKey.FirstPartitioner.class); job.setGroupingComparatorClass(TermKey.FirstGroupingComparator.class); job.setReducerClass(TermReduce.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IndexRecordWriterValue.class); job.setOutputFormatClass(IndexRecordWriter.OutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(jsapResult.getString("output"))); Configuration conf = job.getConfiguration(); conf.setClass("mapred.output.key.comparator.class", TermKey.Comparator.class, WritableComparator.class); conf.set("mapreduce.user.classpath.first", "true"); long numDocs = jsapResult.getLong(NUMBER_OF_DOCS_ARG); conf.setLong(NUMBER_OF_DOCUMENTS, numDocs); // Set this in a attempt to get around the 2GB of ram task limit on our cluster. // Setting this in the hope of fixing Direct buffer memory errors conf.setInt(INDEX_WRITER_CACHE_SIZE, 1024 * 1024); conf.set(OUTPUT_DIR, jsapResult.getString("output")); boolean withContexts = !jsapResult.getBoolean(NO_CONTEXTS_ARG, false); if (jsapResult.getString(METHOD_ARG).equalsIgnoreCase(METHOD_ARG_VALUE_HORIZONTAL)) { HorizontalDocumentFactory.setupConf(conf, withContexts, jsapResult.getString(RESOURCES_HASH_ARG), jsapResult.getString(RESOURCE_PREFIX_ARG)); } else if (jsapResult.getString(METHOD_ARG).equalsIgnoreCase(METHOD_ARG_VALUE_VERTICAL)) { if (!jsapResult.contains(PREDICATES_ARG)) { throw new IllegalArgumentException("When '" + METHOD_ARG + "' is '" + METHOD_ARG_VALUE_VERTICAL + "' you have to give a predicates file too."); } VerticalDocumentFactory.setupConf(conf, withContexts, jsapResult.getString(RESOURCES_HASH_ARG), jsapResult.getString(RESOURCE_PREFIX_ARG), jsapResult.getString(PREDICATES_ARG)); } else { throw new IllegalArgumentException(METHOD_ARG + " should be '" + METHOD_ARG_VALUE_HORIZONTAL + "' or '" + METHOD_ARG_VALUE_VERTICAL + "'"); } conf.setInt("mapreduce.input.linerecordreader.line.maxlength", 1024 * 1024); boolean success = job.waitForCompletion(true); return success ? 0 : 1; }
From source file:com.yahoo.glimmer.indexing.preprocessor.PrepTool.java
License:Open Source License
@Override public int run(String[] args) throws Exception { SimpleJSAP jsap = new SimpleJSAP(PrepTool.class.getName(), "RDF tuples pre-processor for Glimmer", new Parameter[] { new Switch(NO_CONTEXTS_ARG, 'C', NO_CONTEXTS_ARG, "Don't process the contexts for each tuple."), new FlaggedOption(ONTOLOGY_ARG, JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'O', ONTOLOGY_ARG), new FlaggedOption(REDUCER_COUNT_ARG, JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'r', REDUCER_COUNT_ARG), new UnflaggedOption(INPUT_ARG, JSAP.STRING_PARSER, JSAP.REQUIRED, "HDFS location for the input data."), new UnflaggedOption(OUTPUT_ARG, JSAP.STRING_PARSER, JSAP.REQUIRED, "HDFS location for the out data."), }); JSAPResult jsapResult = jsap.parse(args); if (!jsapResult.success()) { System.err.print(jsap.getUsage()); System.exit(1);/* ww w.j a va 2s .com*/ } Configuration config = getConf(); boolean withContexts = !jsapResult.getBoolean(NO_CONTEXTS_ARG, false); config.setBoolean(TuplesToResourcesMapper.INCLUDE_CONTEXTS_KEY, withContexts); // The ontology if any... String ontologyFilename = jsapResult.getString(ONTOLOGY_ARG); if (ontologyFilename != null) { // Load the ontology InputStream ontologyInputStream = new FileInputStream(ontologyFilename); OWLOntology ontology = OntologyLoader.load(ontologyInputStream); System.out.println( "Loaded ontology from " + ontologyFilename + " with " + ontology.getAxiomCount() + " axioms."); ArrayList<String> ontologyClasses = new ArrayList<String>(); for (OWLClass owlClass : ontology.getClassesInSignature()) { ontologyClasses.add(owlClass.getIRI().toString()); } System.out.println("Adding " + ontologyClasses.size() + " classes from ontology."); config.setStrings(TuplesToResourcesMapper.EXTRA_RESOURCES, ontologyClasses.toArray(new String[0])); } else { System.out.println("No ontology filename set in conf. No ontology has been loaded."); } Job job = Job.getInstance(config); job.setJarByClass(PrepTool.class); job.setJobName(PrepTool.class.getName() + "-part1-" + System.currentTimeMillis()); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(TuplesToResourcesMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); int reducerCount = jsapResult.getInt(REDUCER_COUNT_ARG, DEFAULT_REDUCER_COUNT); job.setNumReduceTasks(reducerCount); if (reducerCount == 1) { // We assign 'global' ids in the reducer. For this to work, there // can be only one. But using just one reducer, we run out of local disk space during the // pre-reduce merge with big data sets like WCC. job.setReducerClass(ResourcesReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Object.class); job.setOutputFormatClass(ResourceRecordWriter.OutputFormat.class); } else { /* * TODO: Take the functionality of the reducer and move it to run on * the gateway. We then use n identity reducers, the output of which * will be read and merged as streams on the gateway. */ } FileInputFormat.setInputPaths(job, new Path(jsapResult.getString(INPUT_ARG))); Path outputDir = new Path(jsapResult.getString(OUTPUT_ARG)); FileOutputFormat.setOutputPath(job, outputDir); if (!job.waitForCompletion(true)) { System.err.println("Failed to process tuples from " + jsapResult.getString(INPUT_ARG)); return 1; } // IF THERE WAS ONLY ONE REDUCER WE NOW HAVE // One file per reducer containing lists of urls(recourses) for // subjects, predicates, objects and contexts. // One file per reducer that contains all resources. subjects + // predicates + objects + contexts. // One file per reducer that contains the subjects + all <predicate> // <object>|"Literal" <context> on that subject. // IF THERE WAS MORE THAN ONE REDUCER WE NOW HAVE N FILES THAT NEED TO BE MERGED ON THE GATEWAY. TODO. return 0; }
From source file:com.yahoo.semsearch.fastlinking.io.RepackWikipedia.java
License:Apache License
@SuppressWarnings("static-access") @Override//from www.j av a 2 s.c om public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output location") .create(OUTPUT_OPTION)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("mapping file") .create(MAPPING_FILE_OPTION)); options.addOption(OptionBuilder.withArgName("block|record|none").hasArg() .withDescription("compression type").create(COMPRESSION_TYPE_OPTION)); options.addOption(OptionBuilder.withArgName("en|sv|de").hasArg().withDescription("two-letter language code") .create(LANGUAGE_OPTION)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION) || !cmdline.hasOption(MAPPING_FILE_OPTION) || !cmdline.hasOption(COMPRESSION_TYPE_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT_OPTION); String outputPath = cmdline.getOptionValue(OUTPUT_OPTION); String mappingFile = cmdline.getOptionValue(MAPPING_FILE_OPTION); String compressionType = cmdline.getOptionValue(COMPRESSION_TYPE_OPTION); if (!"block".equals(compressionType) && !"record".equals(compressionType) && !"none".equals(compressionType)) { System.err.println("Error: \"" + compressionType + "\" unknown compression type!"); return -1; } String language = null; if (cmdline.hasOption(LANGUAGE_OPTION)) { language = cmdline.getOptionValue(LANGUAGE_OPTION); if (language.length() != 2) { System.err.println("Error: \"" + language + "\" unknown language!"); return -1; } } // this is the default block size int blocksize = 1000000; Job job = Job.getInstance(getConf()); job.setJarByClass(RepackWikipedia.class); job.setJobName(String.format("RepackWikipedia[%s: %s, %s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath, OUTPUT_OPTION, outputPath, COMPRESSION_TYPE_OPTION, compressionType, LANGUAGE_OPTION, language)); job.getConfiguration().set(DOCNO_MAPPING_FIELD, mappingFile); LOG.info("Tool name: " + this.getClass().getName()); LOG.info(" - XML dump file: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - docno mapping data file: " + mappingFile); LOG.info(" - compression type: " + compressionType); LOG.info(" - language: " + language); if ("block".equals(compressionType)) { LOG.info(" - block size: " + blocksize); } job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); if ("none".equals(compressionType)) { FileOutputFormat.setCompressOutput(job, false); } else { FileOutputFormat.setCompressOutput(job, true); if ("record".equals(compressionType)) { SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.RECORD); } else { SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); job.getConfiguration().setInt("io.seqfile.compress.blocksize", blocksize); } } if (language != null) { job.getConfiguration().set("wiki.language", language); } job.setInputFormatClass(WikipediaPageInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(WikipediaPageFactory.getWikipediaPageClass(language)); //job.setOutputValueClass(EnglishWikipediaPage.class); job.setMapperClass(MyMapper.class); // Delete the output directory if it exists already. FileSystem.get(getConf()).delete(new Path(outputPath), true); return job.waitForCompletion(true) ? 0 : -1; }
From source file:com.yahoo.semsearch.fastlinking.io.WikipediaDocnoMappingBuilder.java
License:Apache License
@SuppressWarnings("static-access") @Override//from www. j a v a 2s . c o m public int run(String[] args) throws Exception { Options options = new Options(); options.addOption( OptionBuilder.withArgName("path").hasArg().withDescription("XML dump file").create(INPUT_OPTION)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output file") .create(OUTPUT_FILE_OPTION)); options.addOption(OptionBuilder.withArgName("en|sv|de|cs|es|zh|ar|tr|it").hasArg() .withDescription("two-letter language code").create(LANGUAGE_OPTION)); options.addOption(KEEP_ALL_OPTION, false, "keep all pages"); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_FILE_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String language = null; if (cmdline.hasOption(LANGUAGE_OPTION)) { language = cmdline.getOptionValue(LANGUAGE_OPTION); if (language.length() != 2) { System.err.println("Error: \"" + language + "\" unknown language!"); return -1; } } String inputPath = cmdline.getOptionValue(INPUT_OPTION); String outputFile = cmdline.getOptionValue(OUTPUT_FILE_OPTION); boolean keepAll = cmdline.hasOption(KEEP_ALL_OPTION); String tmpPath = "tmp-" + WikipediaDocnoMappingBuilder.class.getSimpleName() + "-" + RANDOM.nextInt(10000); LOG.info("Tool name: " + this.getClass().getName()); LOG.info(" - input: " + inputPath); LOG.info(" - output file: " + outputFile); LOG.info(" - keep all pages: " + keepAll); LOG.info(" - language: " + language); Job job = Job.getInstance(getConf()); job.setJarByClass(WikipediaDocnoMappingBuilder.class); job.setJobName(String.format("BuildWikipediaDocnoMapping[%s: %s, %s: %s, %s: %s]", INPUT_OPTION, inputPath, OUTPUT_FILE_OPTION, outputFile, LANGUAGE_OPTION, language)); job.getConfiguration().setBoolean(KEEP_ALL_OPTION, keepAll); if (language != null) { job.getConfiguration().set("wiki.language", language); } job.setNumReduceTasks(1); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(tmpPath)); FileOutputFormat.setCompressOutput(job, false); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(WikipediaPageInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. FileSystem.get(getConf()).delete(new Path(tmpPath), true); if (job.waitForCompletion(true)) { // long cnt = keepAll ? job.getCounters().findCounter(PageTypes.TOTAL).getValue() : job.getCounters().findCounter(PageTypes.ARTICLE).getValue(); long cnt = job.getCounters() .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue(); WikipediaDocnoMapping.writeDocnoMappingData(FileSystem.get(getConf()), tmpPath + "/part-r-00000", (int) cnt, outputFile); FileSystem.get(getConf()).delete(new Path(tmpPath), true); return 0; } else { return -1; } }
From source file:com.yahoo.semsearch.fastlinking.utils.RunFELOntheGrid.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf); //Job job = Job.getInstance( conf ); job.setJarByClass(RunFELOntheGrid.class); // Process custom command-line options Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // Specify various job-specific parameters job.setJobName("Entity Linker"); job.setNumReduceTasks(100);/*from ww w.ja v a2 s . c om*/ job.setJarByClass(RunFELOntheGrid.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setMapperClass(FELMapper.class); job.setReducerClass(FELReducer.class); job.setCombinerClass(FELReducer.class); job.waitForCompletion(true); return 0; }
From source file:com.yahoo.semsearch.fastlinking.w2v.EntityEmbeddings.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf); //Job job = Job.getInstance( conf ); job.setJarByClass(EntityEmbeddings.class); // Process custom command-line options Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // Specify various job-specific parameters job.setJobName("Entity embeddings"); job.setNumReduceTasks(1);//ww w . j av a 2s . c o m job.setJarByClass(EntityEmbeddings.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setMapperClass(EntityEMapper.class); job.setReducerClass(EntityEReducer.class); job.setCombinerClass(EntityEReducer.class); job.waitForCompletion(true); return 0; }