List of usage examples for org.apache.hadoop.mapreduce Job setMapperClass
public void setMapperClass(Class<? extends Mapper> cls) throws IllegalStateException
From source file:com.cloudera.sa.hbasebulkload.HBASEBulkLoadDriver.java
@Override public int run(String[] args) throws Exception { Configuration config = getConf(); args = new GenericOptionsParser(config, args).getRemainingArgs(); if (args.length < 6) { /*System.out.println("hadoop jar HBASEBulkLoad.jar " + "com.cloudera.sa.hbasebulkload.HBASEBulkLoadDriver" + " <inputpath> <outputpath> <hbaseTable> <hbaseColumnFamily" + " \"<hbaseColumns (delimiter seperated)>\" <column delimiter>");*/ ToolRunner.printGenericCommandUsage(System.out); return 2; }/* w w w.j a v a 2 s.c o m*/ String hbaseTab = args[2]; String hbaseColumnFamily = args[3]; String hbaseColumns = args[4]; String hbaseColumnSeperator = args[5]; config.set(HBASEBulkLoadConstants.HBASE_TABLE_KEY, hbaseTab.trim().toLowerCase(Locale.ENGLISH)); config.set(HBASEBulkLoadConstants.HBASE_COLUMN_FAMILY_KEY, hbaseColumnFamily); config.set(HBASEBulkLoadConstants.HBASE_COLUMNS_KEY, hbaseColumns.trim().toLowerCase(Locale.ENGLISH)); config.set(HBASEBulkLoadConstants.HBASE_COLUMN_SEPERATOR_KEY, hbaseColumnSeperator); System.out.println(2); Job job = Job.getInstance(config, this.getClass().getName() + "-" + hbaseTab); HBaseConfiguration.addHbaseResources(config); job.setInputFormatClass(TextInputFormat.class); job.setJarByClass(HBASEBulkLoadDriver.class); job.setMapperClass(HBASEBulkLoadKeyValueMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); job.setCombinerClass(PutCombiner.class); job.setReducerClass(PutSortReducer.class); Connection connection = ConnectionFactory.createConnection(config); Table hTab = connection.getTable(TableName.valueOf(hbaseTab)); FileSystem.get(getConf()).delete(new Path(args[1]), true); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); //job.setOutputFormatClass(HFileOutputFormat2.class); TableMapReduceUtil.initTableReducerJob(hTab.getName().getNameAsString(), null, job); //job.setNumReduceTasks(0); TableMapReduceUtil.addDependencyJars(job); HFileOutputFormat2.configureIncrementalLoadMap(job, hTab); int exitCode = job.waitForCompletion(true) ? HBASEBulkLoadConstants.SUCCESS : HBASEBulkLoadConstants.FAILURE; System.out.println(8); if (HBASEBulkLoadConstants.SUCCESS == exitCode) { LoadIncrementalHFiles loader = new LoadIncrementalHFiles(config); loader.doBulkLoad(new Path(args[1]), (HTable) hTab); connection.close(); } return exitCode; }
From source file:com.cloudera.sa.securewordcount.SecureWordCountDriver.java
@Override public int run(String[] args) throws Exception { Configuration config = getConf(); args = new GenericOptionsParser(config, args).getRemainingArgs(); if (args.length < 2) { ToolRunner.printGenericCommandUsage(System.out); return 2; }/*from w w w .j a v a2 s. c o m*/ Job job = Job.getInstance(config, this.getClass().getName() + "-wordcount"); job.setJarByClass(SecureWordCountDriver.class); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.sqoop.mapreduce.db.TestDataDrivenDBInputFormat.java
License:Apache License
public void testDateSplits() throws Exception { Statement s = connection.createStatement(); final String DATE_TABLE = "datetable"; final String COL = "foo"; try {//from www. jav a 2 s . c o m try { // delete the table if it already exists. s.executeUpdate("DROP TABLE " + DATE_TABLE); } catch (SQLException e) { // Ignored; proceed regardless of whether we deleted the table; // it may have simply not existed. } // Create the table. s.executeUpdate("CREATE TABLE " + DATE_TABLE + "(" + COL + " TIMESTAMP)"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-01')"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-02')"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-05-01')"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2011-04-01')"); // commit this tx. connection.commit(); Configuration conf = new Configuration(); conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.getLocal(conf); fs.delete(new Path(OUT_DIR), true); // now do a dd import Job job = new Job(conf); job.setMapperClass(ValMapper.class); job.setReducerClass(Reducer.class); job.setMapOutputKeyClass(DateCol.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(DateCol.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(1); job.getConfiguration().setInt("mapreduce.map.tasks", 2); FileOutputFormat.setOutputPath(job, new Path(OUT_DIR)); DBConfiguration.configureDB(job.getConfiguration(), DRIVER_CLASS, DB_URL, (String) null, (String) null); DataDrivenDBInputFormat.setInput(job, DateCol.class, DATE_TABLE, null, COL, COL); boolean ret = job.waitForCompletion(true); assertTrue("job failed", ret); // Check to see that we imported as much as we thought we did. assertEquals("Did not get all the records", 4, job.getCounters() .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue()); } finally { s.close(); } }
From source file:com.cloudera.sqoop.mapreduce.HBaseImportJob.java
License:Apache License
@Override protected void configureMapper(Job job, String tableName, String tableClassName) throws IOException { job.setOutputKeyClass(SqoopRecord.class); job.setOutputValueClass(NullWritable.class); job.setMapperClass(getMapperClass()); }
From source file:com.cloudera.sqoop.mapreduce.MergeJob.java
License:Apache License
public boolean runMergeJob() throws IOException { Configuration conf = options.getConf(); Job job = new Job(conf); String userClassName = options.getClassName(); if (null == userClassName) { // Shouldn't get here. throw new IOException("Record class name not specified with " + "--class-name."); }/* w ww . j a v a 2 s.com*/ // Set the external jar to use for the job. String existingJar = options.getExistingJarName(); if (existingJar != null) { // User explicitly identified a jar path. LOG.debug("Setting job jar to user-specified jar: " + existingJar); job.getConfiguration().set("mapred.jar", existingJar); } else { // Infer it from the location of the specified class, if it's on the // classpath. try { Class<? extends Object> userClass = conf.getClassByName(userClassName); if (null != userClass) { String userJar = Jars.getJarPathForClass(userClass); LOG.debug("Setting job jar based on user class " + userClassName + ": " + userJar); job.getConfiguration().set("mapred.jar", userJar); } else { LOG.warn("Specified class " + userClassName + " is not in a jar. " + "MapReduce may not find the class"); } } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } } try { Path oldPath = new Path(options.getMergeOldPath()); Path newPath = new Path(options.getMergeNewPath()); Configuration jobConf = job.getConfiguration(); FileSystem fs = FileSystem.get(jobConf); oldPath = oldPath.makeQualified(fs); newPath = newPath.makeQualified(fs); FileInputFormat.addInputPath(job, oldPath); FileInputFormat.addInputPath(job, newPath); jobConf.set(MERGE_OLD_PATH_KEY, oldPath.toString()); jobConf.set(MERGE_NEW_PATH_KEY, newPath.toString()); jobConf.set(MERGE_KEY_COL_KEY, options.getMergeKeyCol()); jobConf.set(MERGE_SQOOP_RECORD_KEY, userClassName); FileOutputFormat.setOutputPath(job, new Path(options.getTargetDir())); if (ExportJobBase.isSequenceFiles(jobConf, newPath)) { job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MergeRecordMapper.class); } else { job.setMapperClass(MergeTextMapper.class); job.setOutputFormatClass(RawKeyTextOutputFormat.class); } jobConf.set("mapred.output.key.class", userClassName); job.setOutputValueClass(NullWritable.class); job.setReducerClass(MergeReducer.class); // Set the intermediate data types. job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(MergeRecord.class); // Make sure Sqoop and anything else we need is on the classpath. cacheJars(job, null); return this.runJob(job); } catch (InterruptedException ie) { throw new IOException(ie); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } }
From source file:com.cloudera.sqoop.mapreduce.MySQLDumpImportJob.java
License:Apache License
/** * Set the mapper class implementation to use in the job, * as well as any related configuration (e.g., map output types). *///from ww w . java 2 s. com protected void configureMapper(Job job, String tableName, String tableClassName) throws ClassNotFoundException, IOException { job.setMapperClass(getMapperClass()); job.setOutputKeyClass(String.class); job.setOutputValueClass(NullWritable.class); }
From source file:com.cloudera.test.UseHCat.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); args = new GenericOptionsParser(conf, args).getRemainingArgs(); // Get the input and output table names as arguments String inputTableName = args[0]; String outputTableName = args[1]; // Assume the default database String dbName = null;/*from w w w. j av a2 s . c om*/ Job job = new Job(conf, "UseHCat"); HCatInputFormat.setInput(job, dbName, inputTableName); job.setJarByClass(UseHCat.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); // An HCatalog record as input job.setInputFormatClass(HCatInputFormat.class); // Mapper emits a string as key and an integer as value job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); // Ignore the key for the reducer output; emitting an HCatalog record as value job.setOutputKeyClass(WritableComparable.class); job.setOutputValueClass(DefaultHCatRecord.class); job.setOutputFormatClass(HCatOutputFormat.class); HCatOutputFormat.setOutput(job, OutputJobInfo.create(dbName, outputTableName, null)); HCatSchema s = HCatOutputFormat.getTableSchema(job); System.err.println("INFO: output schema explicitly set for writing:" + s); HCatOutputFormat.setSchema(job, s); return (job.waitForCompletion(true) ? 0 : 1); }
From source file:com.cloudera.traffic.AveragerRunner.java
License:Apache License
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = new Job(conf); job.setJarByClass(AveragerRunner.class); job.setMapperClass(AveragerMapper.class); job.setReducerClass(AveragerReducer.class); job.setCombinerClass(AveragerReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(AverageWritable.class); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.waitForCompletion(true);/*from w w w .j a va2 s . com*/ }
From source file:com.cloudy.mapred.base.JobUtil.java
License:Apache License
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { Job job = new Job(new Configuration(conf)); Configuration jobConf = job.getConfiguration(); if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }// w w w.ja v a 2 s . c o m job.setJarByClass(mapper); job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); job.setOutputKeyClass(mapperKey); job.setOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setNumReduceTasks(0); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:com.cloudy.mapred.base.JobUtil.java
License:Apache License
/** * Create a map and reduce Hadoop job. Does not set the name on the job. * @param inputPath The input {@link org.apache.hadoop.fs.Path} * @param outputPath The output {@link org.apache.hadoop.fs.Path} * @param inputFormat The {@link org.apache.hadoop.mapreduce.InputFormat} * @param mapper The {@link org.apache.hadoop.mapreduce.Mapper} class to use * @param mapperKey The {@link org.apache.hadoop.io.Writable} key class. If the Mapper is a no-op, * this value may be null * @param mapperValue The {@link org.apache.hadoop.io.Writable} value class. If the Mapper is a no-op, * this value may be null * @param reducer The {@link org.apache.hadoop.mapreduce.Reducer} to use * @param reducerKey The reducer key class. * @param reducerValue The reducer value class. * @param outputFormat The {@link org.apache.hadoop.mapreduce.OutputFormat}. * @param conf The {@link org.apache.hadoop.conf.Configuration} to use. * @return The {@link org.apache.hadoop.mapreduce.Job}. * @throws IOException if there is a problem with the IO. * * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class) * @see #prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class, * org.apache.hadoop.conf.Configuration) *//*from ww w . j a va 2s . c om*/ public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); if (mapperKey != null) { job.setMapOutputKeyClass(mapperKey); } if (mapperValue != null) { job.setMapOutputValueClass(mapperValue); } // jobConf.setBoolean("mapred.compress.map.output", true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }