List of usage examples for org.apache.hadoop.mapred JobConf setOutputKeyClass
public void setOutputKeyClass(Class<?> theClass)
From source file:edu.umd.cloud9.demo.DemoWordCondProbJSON.java
License:Apache License
/** * Runs this tool./*from w ww. j a v a 2 s . c om*/ */ public int run(String[] args) throws Exception { if (args.length != 4) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int mapTasks = Integer.parseInt(args[2]); int reduceTasks = Integer.parseInt(args[3]); sLogger.info("Tool: DemoWordCondProbJSON"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - number of mappers: " + mapTasks); sLogger.info(" - number of reducers: " + reduceTasks); JobConf conf = new JobConf(DemoWordCondProbJSON.class); conf.setJobName("DemoWordCondProbJSON"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); conf.setOutputKeyClass(MyTuple.class); conf.setOutputValueClass(FloatWritable.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(MyMapper.class); // this is a potential gotcha! can't use ReduceClass for combine because // we have not collected all the counts yet, so we can't divide through // to compute the conditional probabilities conf.setCombinerClass(IdentityReducer.class); conf.setReducerClass(MyReducer.class); conf.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); JobClient.runJob(conf); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:edu.umd.cloud9.demo.DemoWordCondProbTuple.java
License:Apache License
/** * Runs this tool./*from ww w . j a va2 s .co m*/ */ public int run(String[] args) throws Exception { if (args.length != 4) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int mapTasks = Integer.parseInt(args[2]); int reduceTasks = Integer.parseInt(args[3]); sLogger.info("Tool: DemoWordCondProbTuple"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - number of mappers: " + mapTasks); sLogger.info(" - number of reducers: " + reduceTasks); JobConf conf = new JobConf(DemoWordCondProbTuple.class); conf.setJobName("DemoWordCondProbTuple"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); conf.setOutputKeyClass(Tuple.class); conf.setOutputValueClass(FloatWritable.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapperClass(MyMapper.class); // this is a potential gotcha! can't use ReduceClass for combine because // we have not collected all the counts yet, so we can't divide through // to compute the conditional probabilities conf.setCombinerClass(IdentityReducer.class); conf.setReducerClass(MyReducer.class); conf.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); JobClient.runJob(conf); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:edu.umd.cloud9.demo.DemoWordCount.java
License:Apache License
/** * Runs this tool./*from w w w. j a va2 s . co m*/ */ public int run(String[] args) throws Exception { if (args.length != 4) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int mapTasks = Integer.parseInt(args[2]); int reduceTasks = Integer.parseInt(args[3]); sLogger.info("Tool: DemoWordCount"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - number of mappers: " + mapTasks); sLogger.info(" - number of reducers: " + reduceTasks); JobConf conf = new JobConf(DemoWordCount.class); conf.setJobName("DemoWordCount"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MyMapper.class); conf.setCombinerClass(MyReducer.class); conf.setReducerClass(MyReducer.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); JobClient.runJob(conf); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:edu.umd.cloud9.demo.DemoWordCountJSON.java
License:Apache License
/** * Runs this tool./*from w ww. ja v a 2 s. c o m*/ */ public int run(String[] args) throws Exception { if (args.length != 4) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int numMapTasks = Integer.parseInt(args[2]); int numReduceTasks = Integer.parseInt(args[3]); sLogger.info("Tool: DemoWordCountJSON"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - number of mappers: " + numMapTasks); sLogger.info(" - number of reducers: " + numReduceTasks); JobConf conf = new JobConf(DemoWordCountTuple1.class); conf.setJobName("DemoWordCountJSON"); conf.setNumMapTasks(numMapTasks); conf.setNumReduceTasks(numReduceTasks); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputKeyClass(MyKey.class); conf.setOutputValueClass(IntWritable.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setMapperClass(MyMapper.class); conf.setCombinerClass(MyReducer.class); conf.setReducerClass(MyReducer.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); JobClient.runJob(conf); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:edu.umd.cloud9.demo.DemoWordCountTuple1.java
License:Apache License
/** * Runs this tool.//from ww w . j a v a 2 s . co m */ public int run(String[] args) throws Exception { if (args.length != 4) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int numMapTasks = Integer.parseInt(args[2]); int numReduceTasks = Integer.parseInt(args[3]); sLogger.info("Tool: DemoWordCountTuple1"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - number of mappers: " + numMapTasks); sLogger.info(" - number of reducers: " + numReduceTasks); JobConf conf = new JobConf(DemoWordCountTuple1.class); conf.setJobName("DemoWordCountTuple1"); conf.setNumMapTasks(numMapTasks); conf.setNumReduceTasks(numReduceTasks); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputKeyClass(Tuple.class); conf.setOutputValueClass(IntWritable.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setMapperClass(MyMapper.class); conf.setCombinerClass(MyReducer.class); conf.setReducerClass(MyReducer.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); JobClient.runJob(conf); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:edu.umd.cloud9.demo.DemoWordCountTuple2.java
License:Apache License
/** * Runs this tool./* w w w . j a v a 2s. c o m*/ */ public int run(String[] args) throws Exception { if (args.length != 4) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int numMapTasks = Integer.parseInt(args[2]); int numReduceTasks = Integer.parseInt(args[3]); sLogger.info("Tool: DemoWordCountTuple2"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - number of mappers: " + numMapTasks); sLogger.info(" - number of reducers: " + numReduceTasks); JobConf conf = new JobConf(DemoWordCountTuple2.class); conf.setJobName("DemoWordCountTuple2"); conf.setNumMapTasks(numMapTasks); conf.setNumReduceTasks(numReduceTasks); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); conf.setInputFormat(SequenceFileInputFormat.class); conf.setOutputKeyClass(Tuple.class); conf.setOutputValueClass(IntWritable.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setMapperClass(MapClass.class); conf.setCombinerClass(ReduceClass.class); conf.setReducerClass(ReduceClass.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); JobClient.runJob(conf); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:edu.umd.cloud9.examples.BigramCount.java
License:Apache License
/** * Runs this tool./*from w w w . ja va2 s . c om*/ */ public int run(String[] args) throws Exception { if (args.length != 4) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int mapTasks = Integer.parseInt(args[2]); int reduceTasks = Integer.parseInt(args[3]); sLogger.info("Tool: BigramCount"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - number of mappers: " + mapTasks); sLogger.info(" - number of reducers: " + reduceTasks); JobConf conf = new JobConf(BigramCount.class); conf.setJobName("BigramCount"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); /** * Note that these must match the Class arguments given in the mapper */ conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MyMapper.class); conf.setCombinerClass(MyReducer.class); conf.setReducerClass(MyReducer.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); JobClient.runJob(conf); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:edu.umd.cloud9.examples.BigramRelativeFrequency.java
License:Apache License
/** * Runs this tool.//from ww w . java 2 s . c o m */ public int run(String[] args) throws Exception { if (args.length != 4) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int mapTasks = Integer.parseInt(args[2]); int reduceTasks = Integer.parseInt(args[3]); sLogger.info("Tool: BigramRelativeFrequency"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - number of mappers: " + mapTasks); sLogger.info(" - number of reducers: " + reduceTasks); JobConf conf = new JobConf(BigramRelativeFrequency.class); conf.setJobName("BigramRelativeFrequency"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); /** * Note that these must match the Class arguments given in the mapper */ conf.setOutputKeyClass(PairOfStrings.class); conf.setOutputValueClass(FloatWritable.class); conf.setMapperClass(MyMapper.class); conf.setCombinerClass(MyCombiner.class); conf.setReducerClass(MyReducer.class); conf.setPartitionerClass(MyPartitioner.class); //Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); JobClient.runJob(conf); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:edu.umd.cloud9.examples.BuildInvertedIndex.java
License:Apache License
/** * Runs this tool.//from w ww. j a v a 2 s . c o m */ public int run(String[] args) throws Exception { if (args.length != 4) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int mapTasks = Integer.parseInt(args[2]); int reduceTasks = Integer.parseInt(args[3]); sLogger.info("Tool: BuildInvertedIndex"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - number of mappers: " + mapTasks); sLogger.info(" - number of reducers: " + reduceTasks); JobConf conf = new JobConf(BuildInvertedIndex.class); conf.setJobName("BuildInvertedIndex"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); /** * Note that these must match the Class arguments given in the mapper */ conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(PairOfInts.class); conf.setMapperClass(MyMapper.class); conf.setReducerClass(MyReducer.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); JobClient.runJob(conf); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:edu.umd.cloud9.examples.DemoWordCount.java
License:Apache License
/** * Runs this tool./*w ww . ja v a 2s .c o m*/ */ public int run(String[] args) throws Exception { if (args.length != 4) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int mapTasks = Integer.parseInt(args[2]); int reduceTasks = Integer.parseInt(args[3]); sLogger.info("Tool: DemoWordCount"); sLogger.info(" - input path: " + inputPath); sLogger.info(" - output path: " + outputPath); sLogger.info(" - number of mappers: " + mapTasks); sLogger.info(" - number of reducers: " + reduceTasks); JobConf conf = new JobConf(DemoWordCount.class); conf.setJobName("DemoWordCount"); conf.setNumMapTasks(mapTasks); conf.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); FileOutputFormat.setCompressOutput(conf, false); /** * Note that these must match the Class arguments given in the mapper */ conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(MyMapper.class); conf.setCombinerClass(MyReducer.class); conf.setReducerClass(MyReducer.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true); long startTime = System.currentTimeMillis(); JobClient.runJob(conf); sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }