List of usage examples for org.apache.hadoop.mapreduce Job setOutputValueClass
public void setOutputValueClass(Class<?> theClass) throws IllegalStateException
From source file:com.cloudera.hbase.WordCount.java
License:Open Source License
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: wordcount <in> <out>"); return 2; }//from ww w . j a v a2 s . c om Configuration conf = getConf(); Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.recordservice.examples.mapreduce.RecordCount.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: RecordCount <input_query> <output_path>"); System.exit(1);/*from www . ja v a 2 s . com*/ } String inputQuery = args[0]; String output = args[1]; Job job = Job.getInstance(getConf()); job.setJobName("recordcount"); job.setJarByClass(RecordCount.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setNumReduceTasks(1); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(LongWritable.class); RecordServiceConfig.setInputQuery(job.getConfiguration(), inputQuery); job.setInputFormatClass(RecordServiceInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); FileSystem fs = FileSystem.get(job.getConfiguration()); Path outputPath = new Path(output); if (fs.exists(outputPath)) fs.delete(outputPath, true); FileOutputFormat.setOutputPath(job, outputPath); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.recordservice.examples.terasort.TeraChecksum.java
License:Apache License
@Override public int run(String[] args) throws Exception { boolean useRecordService = false; Job job = Job.getInstance(getConf()); if (args.length != 2 && args.length != 3) { usage();/*from w w w.j a va 2s .c o m*/ return 2; } if (args.length == 3) { useRecordService = Boolean.parseBoolean(args[2]); } FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraSum"); job.setJarByClass(TeraChecksum.class); job.setMapperClass(ChecksumMapper.class); job.setReducerClass(ChecksumReducer.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Unsigned16.class); // force a single reducer job.setNumReduceTasks(1); if (useRecordService) { RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]); job.setInputFormatClass(RecordServiceTeraInputFormat.class); } else { TeraInputFormat.setInputPaths(job, new Path(args[0])); job.setInputFormatClass(TeraInputFormat.class); } return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.recordservice.examples.terasort.TeraGen.java
License:Apache License
/** * @param args the cli arguments/*from w w w.ja v a2 s.co m*/ */ @Override public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(getConf()); if (args.length != 2) { usage(); return 2; } setNumberOfRows(job, parseHumanLong(args[0])); Path outputDir = new Path(args[1]); if (outputDir.getFileSystem(getConf()).exists(outputDir)) { throw new IOException("Output directory " + outputDir + " already exists."); } FileOutputFormat.setOutputPath(job, outputDir); job.setJobName("TeraGen"); job.setJarByClass(TeraGen.class); job.setMapperClass(SortGenMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setInputFormatClass(RangeInputFormat.class); job.setOutputFormatClass(TeraOutputFormat.class); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.recordservice.examples.terasort.TeraSort.java
License:Apache License
@Override public int run(String[] args) throws Exception { boolean useRecordService = false; if (args.length != 2 && args.length != 3) { usage();/*from w w w. ja va2s .c om*/ return 1; } if (args.length == 3) { useRecordService = Boolean.parseBoolean(args[2]); } LOG.info("starting"); Job job = Job.getInstance(getConf()); boolean useSimplePartitioner = getUseSimplePartitioner(job); if (useRecordService) { RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]); job.setInputFormatClass(RecordServiceTeraInputFormat.class); useSimplePartitioner = true; } else { Path inputDir = new Path(args[0]); TeraInputFormat.setInputPaths(job, inputDir); job.setInputFormatClass(TeraInputFormat.class); } Path outputDir = new Path(args[1]); FileOutputFormat.setOutputPath(job, outputDir); job.setJobName("TeraSort"); job.setJarByClass(TeraSort.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TeraOutputFormat.class); if (useSimplePartitioner) { job.setPartitionerClass(SimplePartitioner.class); } else { long start = System.currentTimeMillis(); Path partitionFile = new Path(outputDir, TeraInputFormat.PARTITION_FILENAME); URI partitionUri = new URI(partitionFile.toString() + "#" + TeraInputFormat.PARTITION_FILENAME); try { TeraInputFormat.writePartitionFile(job, partitionFile); } catch (Throwable e) { LOG.error(e.getMessage()); return -1; } job.addCacheFile(partitionUri); long end = System.currentTimeMillis(); System.out.println("Spent " + (end - start) + "ms computing partitions."); job.setPartitionerClass(TotalOrderPartitioner.class); } job.getConfiguration().setInt("dfs.replication", getOutputReplication(job)); TeraOutputFormat.setFinalSync(job, true); int ret = job.waitForCompletion(true) ? 0 : 1; LOG.info("done"); return ret; }
From source file:com.cloudera.recordservice.examples.terasort.TeraValidate.java
License:Apache License
@Override public int run(String[] args) throws Exception { boolean useRecordService = false; if (args.length != 2 && args.length != 3) { usage();/*from w w w . j ava2 s . c o m*/ return 1; } if (args.length == 3) { useRecordService = Boolean.parseBoolean(args[2]); } Job job = Job.getInstance(getConf()); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setJobName("TeraValidate"); job.setJarByClass(TeraValidate.class); job.setMapperClass(ValidateMapper.class); job.setReducerClass(ValidateReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // force a single reducer job.setNumReduceTasks(1); // force a single split FileInputFormat.setMinInputSplitSize(job, Long.MAX_VALUE); if (useRecordService) { RecordServiceConfig.setInputTable(job.getConfiguration(), null, args[0]); job.setInputFormatClass(RecordServiceTeraInputFormat.class); } else { TeraInputFormat.setInputPaths(job, new Path(args[0])); job.setInputFormatClass(TeraInputFormat.class); } return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.sa.securewordcount.SecureWordCountDriver.java
@Override public int run(String[] args) throws Exception { Configuration config = getConf(); args = new GenericOptionsParser(config, args).getRemainingArgs(); if (args.length < 2) { ToolRunner.printGenericCommandUsage(System.out); return 2; }//from w ww.j ava2s . c o m Job job = Job.getInstance(config, this.getClass().getName() + "-wordcount"); job.setJarByClass(SecureWordCountDriver.class); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.sqoop.mapreduce.db.TestDataDrivenDBInputFormat.java
License:Apache License
public void testDateSplits() throws Exception { Statement s = connection.createStatement(); final String DATE_TABLE = "datetable"; final String COL = "foo"; try {/*from www .ja v a 2 s . com*/ try { // delete the table if it already exists. s.executeUpdate("DROP TABLE " + DATE_TABLE); } catch (SQLException e) { // Ignored; proceed regardless of whether we deleted the table; // it may have simply not existed. } // Create the table. s.executeUpdate("CREATE TABLE " + DATE_TABLE + "(" + COL + " TIMESTAMP)"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-01')"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-02')"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-05-01')"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2011-04-01')"); // commit this tx. connection.commit(); Configuration conf = new Configuration(); conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.getLocal(conf); fs.delete(new Path(OUT_DIR), true); // now do a dd import Job job = new Job(conf); job.setMapperClass(ValMapper.class); job.setReducerClass(Reducer.class); job.setMapOutputKeyClass(DateCol.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(DateCol.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(1); job.getConfiguration().setInt("mapreduce.map.tasks", 2); FileOutputFormat.setOutputPath(job, new Path(OUT_DIR)); DBConfiguration.configureDB(job.getConfiguration(), DRIVER_CLASS, DB_URL, (String) null, (String) null); DataDrivenDBInputFormat.setInput(job, DateCol.class, DATE_TABLE, null, COL, COL); boolean ret = job.waitForCompletion(true); assertTrue("job failed", ret); // Check to see that we imported as much as we thought we did. assertEquals("Did not get all the records", 4, job.getCounters() .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue()); } finally { s.close(); } }
From source file:com.cloudera.sqoop.mapreduce.HBaseImportJob.java
License:Apache License
@Override protected void configureMapper(Job job, String tableName, String tableClassName) throws IOException { job.setOutputKeyClass(SqoopRecord.class); job.setOutputValueClass(NullWritable.class); job.setMapperClass(getMapperClass()); }
From source file:com.cloudera.sqoop.mapreduce.MergeJob.java
License:Apache License
public boolean runMergeJob() throws IOException { Configuration conf = options.getConf(); Job job = new Job(conf); String userClassName = options.getClassName(); if (null == userClassName) { // Shouldn't get here. throw new IOException("Record class name not specified with " + "--class-name."); }/*w w w. j a va 2s .c o m*/ // Set the external jar to use for the job. String existingJar = options.getExistingJarName(); if (existingJar != null) { // User explicitly identified a jar path. LOG.debug("Setting job jar to user-specified jar: " + existingJar); job.getConfiguration().set("mapred.jar", existingJar); } else { // Infer it from the location of the specified class, if it's on the // classpath. try { Class<? extends Object> userClass = conf.getClassByName(userClassName); if (null != userClass) { String userJar = Jars.getJarPathForClass(userClass); LOG.debug("Setting job jar based on user class " + userClassName + ": " + userJar); job.getConfiguration().set("mapred.jar", userJar); } else { LOG.warn("Specified class " + userClassName + " is not in a jar. " + "MapReduce may not find the class"); } } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } } try { Path oldPath = new Path(options.getMergeOldPath()); Path newPath = new Path(options.getMergeNewPath()); Configuration jobConf = job.getConfiguration(); FileSystem fs = FileSystem.get(jobConf); oldPath = oldPath.makeQualified(fs); newPath = newPath.makeQualified(fs); FileInputFormat.addInputPath(job, oldPath); FileInputFormat.addInputPath(job, newPath); jobConf.set(MERGE_OLD_PATH_KEY, oldPath.toString()); jobConf.set(MERGE_NEW_PATH_KEY, newPath.toString()); jobConf.set(MERGE_KEY_COL_KEY, options.getMergeKeyCol()); jobConf.set(MERGE_SQOOP_RECORD_KEY, userClassName); FileOutputFormat.setOutputPath(job, new Path(options.getTargetDir())); if (ExportJobBase.isSequenceFiles(jobConf, newPath)) { job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MergeRecordMapper.class); } else { job.setMapperClass(MergeTextMapper.class); job.setOutputFormatClass(RawKeyTextOutputFormat.class); } jobConf.set("mapred.output.key.class", userClassName); job.setOutputValueClass(NullWritable.class); job.setReducerClass(MergeReducer.class); // Set the intermediate data types. job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(MergeRecord.class); // Make sure Sqoop and anything else we need is on the classpath. cacheJars(job, null); return this.runJob(job); } catch (InterruptedException ie) { throw new IOException(ie); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } }