List of usage examples for org.apache.hadoop.mapreduce Job setNumReduceTasks
public void setNumReduceTasks(int tasks) throws IllegalStateException
From source file:co.nubetech.hiho.job.ExportDelimitedToDB.java
License:Apache License
public int run(String[] args) throws IOException { Configuration conf = getConf(); Job job = new Job(conf); job.setJobName("MySQLBulkLoading"); job.setMapperClass(DelimitedLoadMapper.class); job.setJarByClass(DelimitedLoadMapper.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path(args[0])); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputFormatClass(DBOutputFormat.class); int ret = 0;//from w w w .jav a 2 s .co m try { ret = job.waitForCompletion(true) ? 0 : 1; } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:co.nubetech.hiho.job.ExportToDB.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); populateConfiguration(args, conf);/*from w w w .ja v a 2s . com*/ try { checkMandatoryConfs(conf); } catch (HIHOException e1) { e1.printStackTrace(); throw new Exception(e1); } Job job = new Job(conf); job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1)); job.setJobName("HihoDBExport"); job.setMapperClass(GenericDBLoadDataMapper.class); job.setJarByClass(ExportToDB.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path(inputPath)); GenericDBOutputFormat.setOutput(job, tableName, columnNames); int ret = 0; try { ret = job.waitForCompletion(true) ? 0 : 1; } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:co.nubetech.hiho.job.ExportToFTPServer.java
License:Apache License
@Override public int run(String[] args) throws IOException { Configuration conf = getConf(); populateConfiguration(args, conf);// www . ja v a2s .c om try { checkMandatoryConfs(conf); } catch (HIHOException e1) { e1.printStackTrace(); throw new IOException(e1); } for (Entry<String, String> entry : conf) { logger.debug("key, value " + entry.getKey() + "=" + entry.getValue()); } Job job = new Job(conf); job.setMapperClass(TokenCounterMapper.class); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path(inputPath)); job.setReducerClass(IntSumReducer.class); job.setOutputFormatClass(FTPTextOutputFormat.class); FTPTextOutputFormat.setOutputPath(job, new Path(outputPath)); job.setJarByClass(ExportToFTPServer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setNumReduceTasks(2); int ret = 0; try { ret = job.waitForCompletion(true) ? 0 : 1; } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:co.nubetech.hiho.job.ExportToMySQLDB.java
License:Apache License
@Override public int run(String[] args) throws IOException { Configuration conf = getConf(); populateConfiguration(args, conf);/*from w w w .j a va 2 s . co m*/ try { checkMandatoryConfs(conf); } catch (HIHOException e1) { e1.printStackTrace(); throw new IOException(e1); } Job job = new Job(conf); job.setJobName("MySQLBulkLoading"); job.setMapperClass(MySQLLoadDataMapper.class); job.setJarByClass(MySQLLoadDataMapper.class); for (Entry<String, String> entry : conf) { logger.debug("key, value " + entry.getKey() + "=" + entry.getValue()); } // verify required properties are loaded logger.debug(conf.get(DBConfiguration.URL_PROPERTY)); logger.debug(conf.get(DBConfiguration.USERNAME_PROPERTY)); logger.debug(conf.get(DBConfiguration.PASSWORD_PROPERTY)); job.setNumReduceTasks(0); job.setInputFormatClass(FileStreamInputFormat.class); FileStreamInputFormat.addInputPath(job, new Path(inputPath)); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); // job.setJarByClass(com.mysql.jdbc.Driver.class); job.setOutputFormatClass(NullOutputFormat.class); int ret = 0; try { ret = job.waitForCompletion(true) ? 0 : 1; } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:co.nubetech.hiho.job.ExportToOracleDb.java
License:Apache License
@Override public int run(String[] args) throws IOException { Configuration conf = getConf(); for (Entry<String, String> entry : conf) { logger.debug("key, value " + entry.getKey() + "=" + entry.getValue()); }/*w w w. ja v a2s . com*/ for (int i = 0; i < args.length; i++) { logger.debug("Remaining arguments are" + " " + args[i]); } populateConfiguration(args, conf); try { checkMandatoryConfs(conf); } catch (HIHOException e1) { e1.printStackTrace(); throw new IOException(e1); } Job job = new Job(conf); job.setJobName("OracleLoading"); job.setMapperClass(OracleLoadMapper.class); job.setJarByClass(ExportToOracleDb.class); job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1)); try { // we first create the external table definition String query = conf.get(HIHOConf.EXTERNAL_TABLE_DML); // create table if user has specified if (query != null) { this.runQuery(query, conf); } } catch (HIHOException e1) { e1.printStackTrace(); } // verify required properties are loaded job.setNumReduceTasks(0); job.setInputFormatClass(FileStreamInputFormat.class); FileStreamInputFormat.addInputPath(job, new Path(inputPath)); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); // job.setJarByClass(com.mysql.jdbc.Driver.class); job.setOutputFormatClass(NullOutputFormat.class); int ret = 0; try { ret = job.waitForCompletion(true) ? 0 : 1; } catch (Exception e) { e.printStackTrace(); } // run alter table query and add locations try { this.runQuery(getAlterTableDML(new Path(inputPath), conf), conf); } catch (HIHOException e1) { e1.printStackTrace(); } return ret; }
From source file:co.nubetech.hiho.job.sf.ExportSalesForceJob.java
License:Apache License
@Override public int run(String[] arg0) throws Exception { Configuration conf = getConf(); populateConfiguration(arg0, conf);//from w ww . j a v a 2s .c om try { checkMandatoryConfs(conf); } catch (HIHOException e1) { e1.printStackTrace(); throw new Exception(e1); } Job job = new Job(conf); job.setJobName("SaleForceLoading"); job.setMapperClass(SalesForceLoadMapper.class); job.setJarByClass(SalesForceLoadMapper.class); job.setNumReduceTasks(0); job.setInputFormatClass(TextInputFormat.class); TextInputFormat.addInputPath(job, new Path(inputPath)); // NLineInputFormat.setNumLinesPerSplit(job, 10); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputFormatClass(NullOutputFormat.class); int ret = 0; try { ret = job.waitForCompletion(true) ? 0 : 1; } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:code.DemoWordCount.java
License:Apache License
/** * Runs this tool.// w w w.j a v a 2 s . com */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool: " + DemoWordCount.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - number of reducers: " + reduceTasks); Configuration conf = getConf(); Job job = Job.getInstance(conf); job.setJobName(DemoWordCount.class.getSimpleName()); job.setJarByClass(DemoWordCount.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); LOG.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:com.ailk.oci.ocnosql.tools.load.mutiple.MutipleColumnImportTsv.java
License:Apache License
/** * Sets up the actual job.//from w w w.j a v a 2s . com * * @param conf The current configuration. * @return The newly created job. * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String tableName, String inputPath, String tmpOutputPath) throws IOException, ClassNotFoundException { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. String actualSeparator = conf.get(CommonConstants.SEPARATOR); if (actualSeparator != null) { conf.set(CommonConstants.SEPARATOR, Base64.encodeBytes(actualSeparator.getBytes())); } String tableNameConf = conf.get(CommonConstants.TABLE_NAME); if (tableNameConf == null) { conf.set(CommonConstants.TABLE_NAME, tableName); } // See if a non-default Mapper was set String mapperClassName = conf.get(MAPPER_CONF_KEY); Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER; Path inputDir = new Path(inputPath); Job job = new Job(conf, NAME + "_" + tableName); job.setJarByClass(MutipleColumnImportTsv.class); FileInputFormat.setInputPaths(job, inputDir); //??Dimporttsv.inputFormatInputFormat,TextInputFormat String inputFmtName = conf.get(CommonConstants.INPUTFORMAT, "org.apache.hadoop.mapreduce.lib.input.TextInputFormat"); LOG.info(CommonConstants.INPUTFORMAT + " is " + inputFmtName); Class<? extends InputFormat> inputFmtClass = Class.forName(inputFmtName).asSubclass(InputFormat.class); job.setInputFormatClass(inputFmtClass); job.setMapperClass(mapperClass); String hfileOutPath = tmpOutputPath; if (hfileOutPath != null) { if (!doesTableExist(tableName)) { createTable(conf, tableName); } HTable table = new HTable(conf, tableName); // job.setReducerClass(MutipleColumnReducer.class); Path outputDir = new Path(hfileOutPath); FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); HFileOutputFormat.configureIncrementalLoad(job, table); } else { // No reducers. Just write straight to table. Call initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.class /* Guava used by TsvParser */); return job; }
From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java
License:Apache License
/** * Sets up the actual job. importtsvmapreduce job * * @param conf The current configuration. * @return The newly created job./*from w w w. j a va2 s. com*/ * @throws IOException When setting up the job fails. */ public static Job createSubmittableJob(Configuration conf, String tableName, String inputPath, String tmpOutputPath) throws IOException, ClassNotFoundException { // Support non-XML supported characters // by re-encoding the passed separator as a Base64 string. //???BASE64? String actualSeparator = conf.get(CommonConstants.SEPARATOR); if (actualSeparator != null) { conf.set(CommonConstants.SEPARATOR, Base64.encodeBytes(actualSeparator.getBytes())); } // See if a non-default Mapper was set?mapper?SingleColumnImporterMapper String mapperClassName = conf.get(MAPPER_CONF_KEY); Class mapperClass = mapperClassName != null ? Class.forName(mapperClassName) : DEFAULT_MAPPER; Path inputDir = new Path(inputPath); //?job Job job = new Job(conf, NAME + "_" + tableName); //Set the Jar by finding where a given class came from. job.setJarByClass(SingleColumnImportTsv.class); // FileInputFormat.setInputPaths(job, inputDir); //jobinputformat //??Dimporttsv.inputFormatInputFormat,TextInputFormat //??Dimporttsv.inputFormatInputFormat,TextInputFormat String inputFmtName = conf.get(CommonConstants.INPUTFORMAT, "org.apache.hadoop.mapreduce.lib.input.TextInputFormat"); LOG.info(CommonConstants.INPUTFORMAT + " is " + inputFmtName); Class<? extends InputFormat> inputFmtClass = Class.forName(inputFmtName).asSubclass(InputFormat.class); job.setInputFormatClass(inputFmtClass); job.setMapperClass(mapperClass); //mapper job.setMapperClass(mapperClass); String hfileOutPath = tmpOutputPath; if (hfileOutPath != null) { //? if (!doesTableExist(tableName)) { createTable(conf, tableName); } HTable table = new HTable(conf, tableName); //reducer job.setReducerClass(SingleColumnReducer.class); Path outputDir = new Path(hfileOutPath); // FileOutputFormat.setOutputPath(job, outputDir); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(TextArrayWritable.class); //job?partition?outputformat?reduce configureIncrementalLoad(job, table); } else {//put // No reducers. Just write straight to table. Call initTableReducerJob // to set up the TableOutputFormat. TableMapReduceUtil.initTableReducerJob(tableName, null, job); job.setNumReduceTasks(0); } TableMapReduceUtil.addDependencyJars(job); TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.class /* Guava used by TsvParser */); return job; }
From source file:com.ailk.oci.ocnosql.tools.load.single.SingleColumnImportTsv.java
License:Apache License
/** * Configure a MapReduce Job to perform an incremental load into the given * table. This// www. j a v a 2 s .c om * <ul> * <li>Inspects the table to configure a total order partitioner</li> * <li>Uploads the partitions file to the cluster and adds it to the DistributedCache</li> * <li>Sets the number of reduce tasks to match the current number of regions</li> * <li>Sets the output key/value class to match HFileOutputFormat's requirements</li> * <li>Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or * PutSortReducer)</li> * </ul> * The user should be sure to set the map output value class to either KeyValue or Put before * running this function. */ public static void configureIncrementalLoad(Job job, HTable table) throws IOException { Configuration conf = job.getConfiguration(); Class<? extends Partitioner> topClass; try { topClass = getTotalOrderPartitionerClass(); } catch (ClassNotFoundException e) { throw new IOException("Failed getting TotalOrderPartitioner", e); } //partition job.setPartitionerClass(topClass); //Set the key class for the job output data job.setOutputKeyClass(ImmutableBytesWritable.class); //Set the value class for job outputs job.setOutputValueClass(KeyValue.class); //outputformatHfile job.setOutputFormatClass(HFileOutputFormat2.class); // Based on the configured map output class, set the correct reducer to properly // sort the incoming values. // TODO it would be nice to pick one or the other of these formats. if (KeyValue.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(KeyValueSortReducer.class); } else if (Put.class.equals(job.getMapOutputValueClass())) { job.setReducerClass(SingleColumnReducer.class); } else { LOG.warn("Unknown map output value type:" + job.getMapOutputValueClass()); } LOG.info("Looking up current regions for table " + table); //?regionstarkey List<ImmutableBytesWritable> startKeys = getRegionStartKeys(table); LOG.info("Configuring " + startKeys.size() + " reduce partitions " + "to match current region count"); //?region?reduce? job.setNumReduceTasks(startKeys.size()); Path partitionsPath = new Path(job.getWorkingDirectory(), "partitions_" + UUID.randomUUID()); LOG.info("Writing partition information to " + partitionsPath); FileSystem fs = partitionsPath.getFileSystem(conf); writePartitions(conf, partitionsPath, startKeys); partitionsPath.makeQualified(fs); URI cacheUri; try { // Below we make explicit reference to the bundled TOP. Its cheating. // We are assume the define in the hbase bundled TOP is as it is in // hadoop (whether 0.20 or 0.22, etc.) /* cacheUri = new URI(partitionsPath.toString() + "#" + org.apache.hadoop.hbase.mapreduce.hadoopbackport.TotalOrderPartitioner.DEFAULT_PATH); */ cacheUri = new URI(partitionsPath.toString() + "#" + TotalOrderPartitioner.DEFAULT_PATH); } catch (URISyntaxException e) { throw new IOException(e); } DistributedCache.addCacheFile(cacheUri, conf); DistributedCache.createSymlink(conf); // Set compression algorithms based on column families configureCompression(table, conf); TableMapReduceUtil.addDependencyJars(job); LOG.info("Incremental table output configured."); }