List of usage examples for org.apache.hadoop.mapred JobConf setOutputValueClass
public void setOutputValueClass(Class<?> theClass)
From source file:com.cloudera.hbase.OldWordCount.java
License:Open Source License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordCount.class); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);//from ww w . ja v a2 s. c o m } conf.setJobName("word count"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(conf, new Path(otherArgs[1])); JobClient.runJob(conf); return; }
From source file:com.cloudera.recordservice.examples.mapreduce.WordCount.java
License:Apache License
public void run(String[] args) throws Exception { boolean useRecordService = true; if (args.length == 3) { useRecordService = Boolean.parseBoolean(args[2]); } else if (args.length != 2) { System.err.println("Usage: WordCount <input path> <output path>"); System.exit(-1);/* w ww .j a va 2 s . com*/ } String input = args[0].trim(); String output = args[1]; JobConf conf = new JobConf(WordCount.class); conf.setJobName("wordcount-" + (useRecordService ? "with" : "without") + "-RecordService"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); if (useRecordService) { conf.setInputFormat(com.cloudera.recordservice.mapred.TextInputFormat.class); RecordServiceConfig.setInput(conf, input); } else { conf.setInputFormat(TextInputFormat.class); FileInputFormat.setInputPaths(conf, new Path(input)); } FileSystem fs = FileSystem.get(conf); Path outputPath = new Path(output); if (fs.exists(outputPath)) fs.delete(outputPath, true); conf.setOutputFormat(TextOutputFormat.class); FileOutputFormat.setOutputPath(conf, outputPath); JobClient.runJob(conf); System.out.println("Done"); }
From source file:com.cloudera.recordservice.mapreduce.testapps.RecordCount.java
License:Apache License
public static long countRecords(String path) throws IOException { String output = TestUtil.getTempDirectory(); Path inputPath = new Path(path); Path outputPath = new Path(output); JobConf conf = new JobConf(RecordCount.class); conf.setJobName("recordcount"); conf.setOutputKeyClass(NullWritable.class); conf.setOutputValueClass(LongWritable.class); conf.setInt("mapreduce.job.reduces", 1); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(com.cloudera.recordservice.mapred.TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, inputPath); FileOutputFormat.setOutputPath(conf, outputPath); JobClient.runJob(conf);/*from ww w . ja va2s. co m*/ // Read the result and return it. Since we set the number of reducers to 1, // there is always just one file containing the value. FileSystem fs = outputPath.getFileSystem(conf); FSDataInputStream resultStream = fs.open(new Path(output + "/part-00000")); byte[] bytes = new byte[16]; int length = resultStream.read(bytes); String result = new String(bytes, 0, length).trim(); return Long.parseLong(result); }
From source file:com.cloudera.recordservice.tests.TestMiniClusterController.java
License:Apache License
public static void fillInWordCountMRJobConf(JobConf conf) { String input = "select n_comment from tpch.nation"; conf.setJobName("samplejob-wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(com.cloudera.recordservice.mapred.TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); com.cloudera.recordservice.mr.RecordServiceConfig.setInputQuery(conf, input); setRandomOutputDir(conf);/*from w ww . ja v a 2 s.c om*/ }
From source file:com.cloudera.sqoop.orm.TestParseMethods.java
License:Apache License
public void runParseTest(String fieldTerminator, String lineTerminator, String encloser, String escape, boolean encloseRequired) throws IOException { ClassLoader prevClassLoader = null; String[] argv = getArgv(true, fieldTerminator, lineTerminator, encloser, escape, encloseRequired); runImport(argv);/*from w w w . j av a 2 s.c om*/ try { String tableClassName = getTableName(); argv = getArgv(false, fieldTerminator, lineTerminator, encloser, escape, encloseRequired); SqoopOptions opts = new ImportTool().parseArguments(argv, null, null, true); CompilationManager compileMgr = new CompilationManager(opts); String jarFileName = compileMgr.getJarFilename(); // Make sure the user's class is loaded into our address space. prevClassLoader = ClassLoaderStack.addJarFile(jarFileName, tableClassName); JobConf job = new JobConf(); job.setJar(jarFileName); // Tell the job what class we're testing. job.set(ReparseMapper.USER_TYPE_NAME_KEY, tableClassName); // use local mode in the same JVM. ConfigurationHelper.setJobtrackerAddr(job, "local"); if (!BaseSqoopTestCase.isOnPhysicalCluster()) { job.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); } String warehouseDir = getWarehouseDir(); Path warehousePath = new Path(warehouseDir); Path inputPath = new Path(warehousePath, getTableName()); Path outputPath = new Path(warehousePath, getTableName() + "-out"); job.setMapperClass(ReparseMapper.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); JobClient.runJob(job); } catch (InvalidOptionsException ioe) { fail(ioe.toString()); } catch (ParseException pe) { fail(pe.toString()); } finally { if (null != prevClassLoader) { ClassLoaderStack.setCurrentClassLoader(prevClassLoader); } } }
From source file:com.cloudera.sqoop.orm.TestParseMethods.java
License:Apache License
public void testFieldSetter() throws IOException { ClassLoader prevClassLoader = null; String[] types = { "VARCHAR(32)", "VARCHAR(32)" }; String[] vals = { "'meep'", "'foo'" }; createTableWithColTypes(types, vals); String[] argv = getArgv(true, ",", "\\n", "\\\'", "\\", false); runImport(argv);// w ww . ja v a 2 s.c om try { String tableClassName = getTableName(); argv = getArgv(false, ",", "\\n", "\\\'", "\\", false); SqoopOptions opts = new ImportTool().parseArguments(argv, null, null, true); CompilationManager compileMgr = new CompilationManager(opts); String jarFileName = compileMgr.getJarFilename(); // Make sure the user's class is loaded into our address space. prevClassLoader = ClassLoaderStack.addJarFile(jarFileName, tableClassName); JobConf job = new JobConf(); job.setJar(jarFileName); // Tell the job what class we're testing. job.set(ExplicitSetMapper.USER_TYPE_NAME_KEY, tableClassName); job.set(ExplicitSetMapper.SET_COL_KEY, BASE_COL_NAME + "0"); job.set(ExplicitSetMapper.SET_VAL_KEY, "this-is-a-test"); // use local mode in the same JVM. ConfigurationHelper.setJobtrackerAddr(job, "local"); if (!BaseSqoopTestCase.isOnPhysicalCluster()) { job.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); } String warehouseDir = getWarehouseDir(); Path warehousePath = new Path(warehouseDir); Path inputPath = new Path(warehousePath, getTableName()); Path outputPath = new Path(warehousePath, getTableName() + "-out"); job.setMapperClass(ExplicitSetMapper.class); job.setNumReduceTasks(0); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); JobClient.runJob(job); } catch (InvalidOptionsException ioe) { fail(ioe.toString()); } catch (ParseException pe) { fail(pe.toString()); } finally { if (null != prevClassLoader) { ClassLoaderStack.setCurrentClassLoader(prevClassLoader); } } }
From source file:com.datasalt.pangool.benchmark.urlresolution.HadoopUrlResolution.java
License:Apache License
public final static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: urlresolution <url-map> <url-register> <out>"); System.exit(2);// w w w . j a va 2s. c om } JobConf job = new JobConf(conf); FileSystem fS = FileSystem.get(conf); fS.delete(new Path(otherArgs[2]), true); MultipleInputs.addInputPath(job, new Path(otherArgs[0]), TextInputFormat.class, UrlMapClass.class); MultipleInputs.addInputPath(job, new Path(otherArgs[1]), TextInputFormat.class, UrlRegisterMapClass.class); job.setJarByClass(HadoopUrlResolution.class); job.setPartitionerClass(KeyPartitioner.class); job.setOutputValueGroupingComparator(GroupingComparator.class); job.setMapOutputKeyClass(UrlRegJoinUrlMap.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); FileOutputFormat.setOutputPath(job, new Path(otherArgs[2])); Job j = new Job(job); j.setReducerClass(Reduce.class); j.waitForCompletion(true); }
From source file:com.datascience.cascading.scheme.CsvScheme.java
License:Apache License
@Override public void sinkConfInit(FlowProcess<JobConf> flowProcess, Tap<JobConf, RecordReader, OutputCollector> tap, JobConf conf) { conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(ListWritable.class); conf.setOutputFormat(CsvOutputFormat.class); configureWriterFormat(format, conf); }
From source file:com.datatorrent.demos.mroperator.LineIndexer.java
License:Open Source License
/** * The actual main() method for our program; this is the * "driver" for the MapReduce job.//w w w . j a v a 2s.co m */ public static void main(String[] args) { JobClient client = new JobClient(); JobConf conf = new JobConf(LineIndexer.class); conf.setJobName("LineIndexer"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); FileInputFormat.addInputPath(conf, new Path("input")); FileOutputFormat.setOutputPath(conf, new Path("output")); conf.setMapperClass(LineIndexMapper.class); conf.setReducerClass(LineIndexReducer.class); client.setConf(conf); try { JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } }
From source file:com.datatorrent.demos.mroperator.LogCountsPerHour.java
License:Open Source License
public int run(String[] args) throws Exception { // Create a configuration Configuration conf = getConf(); // Create a job from the default configuration that will use the WordCount class JobConf job = new JobConf(conf, LogCountsPerHour.class); // Define our input path as the first command line argument and our output path as the second Path in = new Path(args[0]); Path out = new Path(args[1]); // Create File Input/Output formats for these paths (in the job) FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); // Configure the job: name, mapper, reducer, and combiner job.setJobName("LogAveragePerHour"); job.setMapperClass(LogMapClass.class); job.setReducerClass(LogReduce.class); job.setCombinerClass(LogReduce.class); // Configure the output job.setOutputFormat(TextOutputFormat.class); job.setOutputKeyClass(DateWritable.class); job.setOutputValueClass(IntWritable.class); // Run the job JobClient.runJob(job);/*from ww w. java 2 s . c o m*/ return 0; }