List of usage examples for org.apache.hadoop.mapreduce Job setPartitionerClass
public void setPartitionerClass(Class<? extends Partitioner> cls) throws IllegalStateException
From source file:WordCount_NoCombiner.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);//w w w . j a v a2 s . c o m } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount_NoCombiner.class); job.setMapperClass(TokenizerMapper.class); // delete this line to disable combining // job.setCombinerClass(IntSumReducer.class); job.setPartitionerClass(WordPartitioner.class); job.setNumReduceTasks(5); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:DescSorter.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: flights <in> <in> <out>"); System.exit(2);//from w w w . ja v a 2 s. c o m } Job job = new Job(conf, "AvgDelays"); job.setJarByClass(DescSorter.class); job.setMapperClass(FlightMapper.class); job.setMapOutputKeyClass(CompositeKey.class); job.setMapOutputValueClass(IntWritable.class); job.setPartitionerClass(CompositeKeyPartitioner.class); job.setSortComparatorClass(SortComparator.class); job.setGroupingComparatorClass(GroupingComparator.class); job.setReducerClass(AvgDelayReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:ComputeCooccurrenceMatrixPairs.java
License:Apache License
/** * Runs this tool./* www. j a va 2s. c o m*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("window size").create(WINDOW)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; int window = cmdline.hasOption(WINDOW) ? Integer.parseInt(cmdline.getOptionValue(WINDOW)) : 2; LOG.info("Tool: " + ComputeCooccurrenceMatrixPairs.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - window: " + window); LOG.info(" - number of reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(ComputeCooccurrenceMatrixPairs.class.getSimpleName()); job.setJarByClass(ComputeCooccurrenceMatrixPairs.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); job.getConfiguration().setInt("window", window); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(PairOfStrings.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(PairOfStrings.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyReducer.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:ExtractTopPersonalizedPageRankNodes.java
License:Apache License
/** * Runs this tool./* www.j a va 2s.com*/ */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("top n").create(TOP)); options.addOption(OptionBuilder.withArgName("src").hasArg().withDescription("source node").create(SRC)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(TOP)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } String inputPath = cmdline.getOptionValue(INPUT); String outputPath = "abc";//cmdline.getOptionValue(OUTPUT); int n = Integer.parseInt(cmdline.getOptionValue(TOP)); //LOG.info("Tool name: " + ExtractTopPersonalizedPageRankNodes.class.getSimpleName()); //LOG.info(" - input: " + inputPath); //LOG.info(" - output: " + outputPath); //LOG.info(" - top: " + n); Configuration conf = getConf(); conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024); conf.setInt(TOP_PG, n); Job job = Job.getInstance(conf); job.setJobName(ExtractTopPersonalizedPageRankNodes.class.getName() + ":" + inputPath); job.setJarByClass(ExtractTopPersonalizedPageRankNodes.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(PairOfIntFloat.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(FloatWritable.class); job.setOutputValueClass(IntWritable.class); job.setMapperClass(MyMapper.class); job.setPartitionerClass(MyPartitioner.class); job.setReducerClass(MyReducer.class); // Delete the output directory if it exists already. FileSystem.get(conf).delete(new Path(outputPath), true); job.waitForCompletion(true); FileSystem fileSystem = FileSystem.get(conf); Path path = new Path(outputPath + "/part-r-00000"); ; //MapFile.Reader reader = new MapFile.Reader(new Path(outputPath+ "/part-r-00000"),conf); // InputStream fis=new FileInputStream(outputPath+"/part-r-00000"); BufferedReader br = new BufferedReader(new InputStreamReader(fileSystem.open(path))); String s; float key;//=new FloatWritable(); int value;//=new IntWritable(); while ((s = br.readLine()) != null) { String[] sources = s.split("\\s+"); key = Float.parseFloat(sources[0]); value = Integer.parseInt(sources[1]); if (key == 0.0f) { System.out.print("\n" + "Source: " + value + "\n"); } else { System.out.print(String.format("%.5f %d", key, value) + "\n"); } } //reader.close(); br.close(); //while(!SysOut.isEmpty()) //{ // System.out.print(SysOut.poll()); //} return 0; }
From source file:BigramRelativeFrequencyJson.java
License:Apache License
/** * Runs this tool.//from ww w. j av a2s. c om */ public int run(String[] args) throws Exception { if (args.length != 3) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int reduceTasks = Integer.parseInt(args[2]); LOG.info("Tool name: " + BigramRelativeFrequencyJson.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - num reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(BigramRelativeFrequencyJson.class.getSimpleName()); job.setJarByClass(BigramRelativeFrequencyJson.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(MyTuple.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(MyTuple.class); job.setOutputValueClass(FloatWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }
From source file:PairsPMI_M.java
License:Apache License
/** * Runs this tool.//from w ww. j ava 2 s. co m */ @SuppressWarnings({ "static-access" }) public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT)); options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT)); options.addOption(OptionBuilder.withArgName("num").hasArg().withDescription("number of reducers") .create(NUM_REDUCERS)); CommandLine cmdline; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); return -1; } if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT)) { System.out.println("args: " + Arrays.toString(args)); HelpFormatter formatter = new HelpFormatter(); formatter.setWidth(120); formatter.printHelp(this.getClass().getName(), options); ToolRunner.printGenericCommandUsage(System.out); return -1; } // First MapReduce Job String inputPath = cmdline.getOptionValue(INPUT); String outputPath = cmdline.getOptionValue(OUTPUT); int reduceTasks = cmdline.hasOption(NUM_REDUCERS) ? Integer.parseInt(cmdline.getOptionValue(NUM_REDUCERS)) : 1; LOG.info("Tool name: " + PairsPMI_M.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - tmp path: " + outputPath + "/tmp"); LOG.info(" - num reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(PairsPMI_M.class.getSimpleName()); job.setJarByClass(PairsPMI_M.class); // Delete the tmp directory if it exists already Path tmpDir = new Path("tmp_wj"); FileSystem.get(getConf()).delete(tmpDir, true); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path("tmp_wj")); job.setMapOutputKeyClass(PairOfStrings.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(PairOfStrings.class); job.setOutputValueClass(FloatWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); // job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); double time1 = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Job Finished in " + time1 + " seconds"); numRecords = job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS") .getValue(); /* * Second MapReduce Job */ LOG.info("Tool name: " + PairsPMI_M.class.getSimpleName()); LOG.info("second stage of MapReduce"); LOG.info(" - input from tmp path: " + outputPath + "/tmp_wj"); LOG.info(" - output path: " + outputPath); LOG.info(" - num reducers: " + reduceTasks); // set the global variable Configuration conf = getConf(); conf.setLong("numRec", numRecords); job = Job.getInstance(getConf()); job.setJobName(PairsPMI_M.class.getSimpleName()); job.setJarByClass(PairsPMI_M.class); // Delete the output directory if it exists already Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path("tmp_wj/part*")); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(PairOfStrings.class); job.setMapOutputValueClass(FloatWritable.class); // job.setOutputKeyClass(PairOfStrings.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(FloatWritable.class); job.setInputFormatClass(SequenceFileInputFormat.class); // job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(MyMapperSecond.class); // job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducerSecond.class); job.setPartitionerClass(MyPartitioner.class); startTime = System.currentTimeMillis(); job.waitForCompletion(true); double time2 = (System.currentTimeMillis() - startTime) / 1000.0; System.out.println("Second job finished in " + time2 + " seconds"); System.out.println("Total time: " + (time1 + time2) + " seconds"); return 0; }
From source file:WordCountB.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2);//from w w w . j av a 2 s .c om } Job job = Job.getInstance(conf, "word count"); job.setJarByClass(WordCountB.class); job.setMapperClass(TokenizerMapper.class); // Setup the Combiner job.setCombinerClass(IntSumReducer.class); // Setup the Partitioner job.setPartitionerClass(Letterpartitioner.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); for (int i = 0; i < otherArgs.length - 1; ++i) { FileInputFormat.addInputPath(job, new Path(otherArgs[i])); } FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:PerTaskTally.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);// w ww. ja v a 2s . c om } Job job = new Job(conf, "word count"); job.setJarByClass(PerMapTally.class); job.setMapperClass(TokenizerMapper.class); // Aniket changes starts /* Here the partitioner is being called*/ job.setPartitionerClass(WordPartitioner.class); // Aniket changes ends // Part 4 Aniket changes starts /* Here I am just disabling the combiner */ // job.setCombinerClass(IntSumReducer.class); // Part 4 Aniket changes ends job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:WordCount_PerTaskTally.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); GenericOptionsParser parser = new GenericOptionsParser(conf, args); String[] otherArgs = parser.getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: wordcount <in> <out>"); System.exit(2);/*from w ww.j a v a 2 s .c om*/ } Job job = new Job(conf, "word count"); job.setJarByClass(WordCount_PerTaskTally.class); job.setMapperClass(TokenizerMapper.class); // disable combiner // job.setCombinerClass(IntSumReducer.class); job.setPartitionerClass(WordPartitioner.class); job.setNumReduceTasks(5); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:BigramRelativeFrequencyTuple.java
License:Apache License
/** * Runs this tool.//from w w w.j ava2 s . com */ public int run(String[] args) throws Exception { if (args.length != 3) { printUsage(); return -1; } String inputPath = args[0]; String outputPath = args[1]; int reduceTasks = Integer.parseInt(args[2]); LOG.info("Tool name: " + BigramRelativeFrequencyTuple.class.getSimpleName()); LOG.info(" - input path: " + inputPath); LOG.info(" - output path: " + outputPath); LOG.info(" - num reducers: " + reduceTasks); Job job = Job.getInstance(getConf()); job.setJobName(BigramRelativeFrequencyTuple.class.getSimpleName()); job.setJarByClass(BigramRelativeFrequencyTuple.class); job.setNumReduceTasks(reduceTasks); FileInputFormat.setInputPaths(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); job.setMapOutputKeyClass(BinSedesTuple.class); job.setMapOutputValueClass(FloatWritable.class); job.setOutputKeyClass(BinSedesTuple.class); job.setOutputValueClass(FloatWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MyMapper.class); job.setCombinerClass(MyCombiner.class); job.setReducerClass(MyReducer.class); job.setPartitionerClass(MyPartitioner.class); // Delete the output directory if it exists already. Path outputDir = new Path(outputPath); FileSystem.get(getConf()).delete(outputDir, true); long startTime = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds"); return 0; }